1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2021 MediaTek Inc.
4  * Author: George Sun <george.sun@mediatek.com>
5  */
6 
7 #include <linux/module.h>
8 #include <linux/slab.h>
9 #include <media/videobuf2-dma-contig.h>
10 #include <media/v4l2-vp9.h>
11 
12 #include "../mtk_vcodec_util.h"
13 #include "../mtk_vcodec_dec.h"
14 #include "../mtk_vcodec_intr.h"
15 #include "../vdec_drv_base.h"
16 #include "../vdec_drv_if.h"
17 #include "../vdec_vpu_if.h"
18 
19 /* reset_frame_context defined in VP9 spec */
20 #define VP9_RESET_FRAME_CONTEXT_NONE0 0
21 #define VP9_RESET_FRAME_CONTEXT_NONE1 1
22 #define VP9_RESET_FRAME_CONTEXT_SPEC 2
23 #define VP9_RESET_FRAME_CONTEXT_ALL 3
24 
25 #define VP9_TILE_BUF_SIZE 4096
26 #define VP9_PROB_BUF_SIZE 2560
27 #define VP9_COUNTS_BUF_SIZE 16384
28 
29 #define HDR_FLAG(x) (!!((hdr)->flags & V4L2_VP9_FRAME_FLAG_##x))
30 #define LF_FLAG(x) (!!((lf)->flags & V4L2_VP9_LOOP_FILTER_FLAG_##x))
31 #define SEG_FLAG(x) (!!((seg)->flags & V4L2_VP9_SEGMENTATION_FLAG_##x))
32 #define VP9_BAND_6(band) ((band) == 0 ? 3 : 6)
33 
34 /*
35  * struct vdec_vp9_slice_frame_ctx - vp9 prob tables footprint
36  */
37 struct vdec_vp9_slice_frame_ctx {
38 	struct {
39 		u8 probs[6][3];
40 		u8 padding[2];
41 	} coef_probs[4][2][2][6];
42 
43 	u8 y_mode_prob[4][16];
44 	u8 switch_interp_prob[4][16];
45 	u8 seg[32];  /* ignore */
46 	u8 comp_inter_prob[16];
47 	u8 comp_ref_prob[16];
48 	u8 single_ref_prob[5][2];
49 	u8 single_ref_prob_padding[6];
50 
51 	u8 joint[3];
52 	u8 joint_padding[13];
53 	struct {
54 		u8 sign;
55 		u8 classes[10];
56 		u8 padding[5];
57 	} sign_classes[2];
58 	struct {
59 		u8 class0[1];
60 		u8 bits[10];
61 		u8 padding[5];
62 	} class0_bits[2];
63 	struct {
64 		u8 class0_fp[2][3];
65 		u8 fp[3];
66 		u8 class0_hp;
67 		u8 hp;
68 		u8 padding[5];
69 	} class0_fp_hp[2];
70 
71 	u8 uv_mode_prob[10][16];
72 	u8 uv_mode_prob_padding[2][16];
73 
74 	u8 partition_prob[16][4];
75 
76 	u8 inter_mode_probs[7][4];
77 	u8 skip_probs[4];
78 
79 	u8 tx_p8x8[2][4];
80 	u8 tx_p16x16[2][4];
81 	u8 tx_p32x32[2][4];
82 	u8 intra_inter_prob[8];
83 };
84 
85 /*
86  * struct vdec_vp9_slice_frame_counts - vp9 counts tables footprint
87  */
88 struct vdec_vp9_slice_frame_counts {
89 	union {
90 		struct {
91 			u32 band_0[3];
92 			u32 padding0[1];
93 			u32 band_1_5[5][6];
94 			u32 padding1[2];
95 		} eob_branch[4][2][2];
96 		u32 eob_branch_space[256 * 4];
97 	};
98 
99 	struct {
100 		u32 band_0[3][4];
101 		u32 band_1_5[5][6][4];
102 	} coef_probs[4][2][2];
103 
104 	u32 intra_inter[4][2];
105 	u32 comp_inter[5][2];
106 	u32 comp_inter_padding[2];
107 	u32 comp_ref[5][2];
108 	u32 comp_ref_padding[2];
109 	u32 single_ref[5][2][2];
110 	u32 inter_mode[7][4];
111 	u32 y_mode[4][12];
112 	u32 uv_mode[10][10];
113 	u32 partition[16][4];
114 	u32 switchable_interp[4][4];
115 
116 	u32 tx_p8x8[2][2];
117 	u32 tx_p16x16[2][4];
118 	u32 tx_p32x32[2][4];
119 
120 	u32 skip[3][4];
121 
122 	u32 joint[4];
123 
124 	struct {
125 		u32 sign[2];
126 		u32 class0[2];
127 		u32 classes[12];
128 		u32 bits[10][2];
129 		u32 padding[4];
130 		u32 class0_fp[2][4];
131 		u32 fp[4];
132 		u32 class0_hp[2];
133 		u32 hp[2];
134 	} mvcomp[2];
135 
136 	u32 reserved[126][4];
137 };
138 
139 /**
140  * struct vdec_vp9_slice_counts_map - vp9 counts tables to map
141  *                                    v4l2_vp9_frame_symbol_counts
142  * @skip:	skip counts.
143  * @y_mode:	Y prediction mode counts.
144  * @filter:	interpolation filter counts.
145  * @mv_joint:	motion vector joint counts.
146  * @sign:	motion vector sign counts.
147  * @classes:	motion vector class counts.
148  * @class0:	motion vector class0 bit counts.
149  * @bits:	motion vector bits counts.
150  * @class0_fp:	motion vector class0 fractional bit counts.
151  * @fp:	motion vector fractional bit counts.
152  * @class0_hp:	motion vector class0 high precision fractional bit counts.
153  * @hp:	motion vector high precision fractional bit counts.
154  */
155 struct vdec_vp9_slice_counts_map {
156 	u32 skip[3][2];
157 	u32 y_mode[4][10];
158 	u32 filter[4][3];
159 	u32 sign[2][2];
160 	u32 classes[2][11];
161 	u32 class0[2][2];
162 	u32 bits[2][10][2];
163 	u32 class0_fp[2][2][4];
164 	u32 fp[2][4];
165 	u32 class0_hp[2][2];
166 	u32 hp[2][2];
167 };
168 
169 /*
170  * struct vdec_vp9_slice_uncompressed_header - vp9 uncompressed header syntax
171  *                                             used for decoding
172  */
173 struct vdec_vp9_slice_uncompressed_header {
174 	u8 profile;
175 	u8 last_frame_type;
176 	u8 frame_type;
177 
178 	u8 last_show_frame;
179 	u8 show_frame;
180 	u8 error_resilient_mode;
181 
182 	u8 bit_depth;
183 	u8 padding0[1];
184 	u16 last_frame_width;
185 	u16 last_frame_height;
186 	u16 frame_width;
187 	u16 frame_height;
188 
189 	u8 intra_only;
190 	u8 reset_frame_context;
191 	u8 ref_frame_sign_bias[4];
192 	u8 allow_high_precision_mv;
193 	u8 interpolation_filter;
194 
195 	u8 refresh_frame_context;
196 	u8 frame_parallel_decoding_mode;
197 	u8 frame_context_idx;
198 
199 	/* loop_filter_params */
200 	u8 loop_filter_level;
201 	u8 loop_filter_sharpness;
202 	u8 loop_filter_delta_enabled;
203 	s8 loop_filter_ref_deltas[4];
204 	s8 loop_filter_mode_deltas[2];
205 
206 	/* quantization_params */
207 	u8 base_q_idx;
208 	s8 delta_q_y_dc;
209 	s8 delta_q_uv_dc;
210 	s8 delta_q_uv_ac;
211 
212 	/* segmentation_params */
213 	u8 segmentation_enabled;
214 	u8 segmentation_update_map;
215 	u8 segmentation_tree_probs[7];
216 	u8 padding1[1];
217 	u8 segmentation_temporal_udpate;
218 	u8 segmentation_pred_prob[3];
219 	u8 segmentation_update_data;
220 	u8 segmentation_abs_or_delta_update;
221 	u8 feature_enabled[8];
222 	s16 feature_value[8][4];
223 
224 	/* tile_info */
225 	u8 tile_cols_log2;
226 	u8 tile_rows_log2;
227 	u8 padding2[2];
228 
229 	u16 uncompressed_header_size;
230 	u16 header_size_in_bytes;
231 
232 	/* LAT OUT, CORE IN */
233 	u32 dequant[8][4];
234 };
235 
236 /*
237  * struct vdec_vp9_slice_compressed_header - vp9 compressed header syntax
238  *                                           used for decoding.
239  */
240 struct vdec_vp9_slice_compressed_header {
241 	u8 tx_mode;
242 	u8 ref_mode;
243 	u8 comp_fixed_ref;
244 	u8 comp_var_ref[2];
245 	u8 padding[3];
246 };
247 
248 /*
249  * struct vdec_vp9_slice_tiles - vp9 tile syntax
250  */
251 struct vdec_vp9_slice_tiles {
252 	u32 size[4][64];
253 	u32 mi_rows[4];
254 	u32 mi_cols[64];
255 	u8 actual_rows;
256 	u8 padding[7];
257 };
258 
259 /*
260  * struct vdec_vp9_slice_reference - vp9 reference frame information
261  */
262 struct vdec_vp9_slice_reference {
263 	u16 frame_width;
264 	u16 frame_height;
265 	u8 bit_depth;
266 	u8 subsampling_x;
267 	u8 subsampling_y;
268 	u8 padding;
269 };
270 
271 /*
272  * struct vdec_vp9_slice_frame - vp9 syntax used for decoding
273  */
274 struct vdec_vp9_slice_frame {
275 	struct vdec_vp9_slice_uncompressed_header uh;
276 	struct vdec_vp9_slice_compressed_header ch;
277 	struct vdec_vp9_slice_tiles tiles;
278 	struct vdec_vp9_slice_reference ref[3];
279 };
280 
281 /*
282  * struct vdec_vp9_slice_init_vsi - VSI used to initialize instance
283  */
284 struct vdec_vp9_slice_init_vsi {
285 	unsigned int architecture;
286 	unsigned int reserved;
287 	u64 core_vsi;
288 	/* default frame context's position in MicroP */
289 	u64 default_frame_ctx;
290 };
291 
292 /*
293  * struct vdec_vp9_slice_mem - memory address and size
294  */
295 struct vdec_vp9_slice_mem {
296 	union {
297 		u64 buf;
298 		dma_addr_t dma_addr;
299 	};
300 	union {
301 		size_t size;
302 		dma_addr_t dma_addr_end;
303 		u64 padding;
304 	};
305 };
306 
307 /*
308  * struct vdec_vp9_slice_bs - input buffer for decoding
309  */
310 struct vdec_vp9_slice_bs {
311 	struct vdec_vp9_slice_mem buf;
312 	struct vdec_vp9_slice_mem frame;
313 };
314 
315 /*
316  * struct vdec_vp9_slice_fb - frame buffer for decoding
317  */
318 struct vdec_vp9_slice_fb {
319 	struct vdec_vp9_slice_mem y;
320 	struct vdec_vp9_slice_mem c;
321 };
322 
323 /*
324  * struct vdec_vp9_slice_state - decoding state
325  */
326 struct vdec_vp9_slice_state {
327 	int err;
328 	unsigned int full;
329 	unsigned int timeout;
330 	unsigned int perf;
331 
332 	unsigned int crc[12];
333 };
334 
335 /**
336  * struct vdec_vp9_slice_vsi - exchange decoding information
337  *                             between Main CPU and MicroP
338  *
339  * @bs:	input buffer
340  * @fb:	output buffer
341  * @ref:	3 reference buffers
342  * @mv:	mv working buffer
343  * @seg:	segmentation working buffer
344  * @tile:	tile buffer
345  * @prob:	prob table buffer, used to set/update prob table
346  * @counts:	counts table buffer, used to update prob table
347  * @ube:	general buffer
348  * @trans:	trans buffer position in general buffer
349  * @err_map:	error buffer
350  * @row_info:	row info buffer
351  * @frame:	decoding syntax
352  * @state:	decoding state
353  */
354 struct vdec_vp9_slice_vsi {
355 	/* used in LAT stage */
356 	struct vdec_vp9_slice_bs bs;
357 	/* used in Core stage */
358 	struct vdec_vp9_slice_fb fb;
359 	struct vdec_vp9_slice_fb ref[3];
360 
361 	struct vdec_vp9_slice_mem mv[2];
362 	struct vdec_vp9_slice_mem seg[2];
363 	struct vdec_vp9_slice_mem tile;
364 	struct vdec_vp9_slice_mem prob;
365 	struct vdec_vp9_slice_mem counts;
366 
367 	/* LAT stage's output, Core stage's input */
368 	struct vdec_vp9_slice_mem ube;
369 	struct vdec_vp9_slice_mem trans;
370 	struct vdec_vp9_slice_mem err_map;
371 	struct vdec_vp9_slice_mem row_info;
372 
373 	/* decoding parameters */
374 	struct vdec_vp9_slice_frame frame;
375 
376 	struct vdec_vp9_slice_state state;
377 };
378 
379 /**
380  * struct vdec_vp9_slice_pfc - per-frame context that contains a local vsi.
381  *                             pass it from lat to core
382  *
383  * @vsi:	local vsi. copy to/from remote vsi before/after decoding
384  * @ref_idx:	reference buffer index
385  * @seq:	picture sequence
386  * @state:	decoding state
387  */
388 struct vdec_vp9_slice_pfc {
389 	struct vdec_vp9_slice_vsi vsi;
390 
391 	u64 ref_idx[3];
392 
393 	int seq;
394 
395 	/* LAT/Core CRC */
396 	struct vdec_vp9_slice_state state[2];
397 };
398 
399 /*
400  * enum vdec_vp9_slice_resolution_level
401  */
402 enum vdec_vp9_slice_resolution_level {
403 	VP9_RES_NONE,
404 	VP9_RES_FHD,
405 	VP9_RES_4K,
406 	VP9_RES_8K,
407 };
408 
409 /*
410  * struct vdec_vp9_slice_ref - picture's width & height should kept
411  *                             for later decoding as reference picture
412  */
413 struct vdec_vp9_slice_ref {
414 	unsigned int width;
415 	unsigned int height;
416 };
417 
418 /**
419  * struct vdec_vp9_slice_instance - represent one vp9 instance
420  *
421  * @ctx:		pointer to codec's context
422  * @vpu:		VPU instance
423  * @seq:		global picture sequence
424  * @level:		level of current resolution
425  * @width:		width of last picture
426  * @height:		height of last picture
427  * @frame_type:	frame_type of last picture
428  * @irq:		irq to Main CPU or MicroP
429  * @show_frame:	show_frame of last picture
430  * @dpb:		picture information (width/height) for reference
431  * @mv:		mv working buffer
432  * @seg:		segmentation working buffer
433  * @tile:		tile buffer
434  * @prob:		prob table buffer, used to set/update prob table
435  * @counts:		counts table buffer, used to update prob table
436  * @frame_ctx:		4 frame context according to VP9 Spec
437  * @frame_ctx_helper:	4 frame context according to newest kernel spec
438  * @dirty:		state of each frame context
439  * @init_vsi:		vsi used for initialized VP9 instance
440  * @vsi:		vsi used for decoding/flush ...
441  * @core_vsi:		vsi used for Core stage
442  *
443  * @sc_pfc:		per frame context single core
444  * @counts_map:	used map to counts_helper
445  * @counts_helper:	counts table according to newest kernel spec
446  */
447 struct vdec_vp9_slice_instance {
448 	struct mtk_vcodec_ctx *ctx;
449 	struct vdec_vpu_inst vpu;
450 
451 	int seq;
452 
453 	enum vdec_vp9_slice_resolution_level level;
454 
455 	/* for resolution change and get_pic_info */
456 	unsigned int width;
457 	unsigned int height;
458 
459 	/* for last_frame_type */
460 	unsigned int frame_type;
461 	unsigned int irq;
462 
463 	unsigned int show_frame;
464 
465 	/* maintain vp9 reference frame state */
466 	struct vdec_vp9_slice_ref dpb[VB2_MAX_FRAME];
467 
468 	/*
469 	 * normal working buffers
470 	 * mv[0]/seg[0]/tile/prob/counts is used for LAT
471 	 * mv[1]/seg[1] is used for CORE
472 	 */
473 	struct mtk_vcodec_mem mv[2];
474 	struct mtk_vcodec_mem seg[2];
475 	struct mtk_vcodec_mem tile;
476 	struct mtk_vcodec_mem prob;
477 	struct mtk_vcodec_mem counts;
478 
479 	/* 4 prob tables */
480 	struct vdec_vp9_slice_frame_ctx frame_ctx[4];
481 	/*4 helper tables */
482 	struct v4l2_vp9_frame_context frame_ctx_helper;
483 	unsigned char dirty[4];
484 
485 	/* MicroP vsi */
486 	union {
487 		struct vdec_vp9_slice_init_vsi *init_vsi;
488 		struct vdec_vp9_slice_vsi *vsi;
489 	};
490 	struct vdec_vp9_slice_vsi *core_vsi;
491 
492 	struct vdec_vp9_slice_pfc sc_pfc;
493 	struct vdec_vp9_slice_counts_map counts_map;
494 	struct v4l2_vp9_frame_symbol_counts counts_helper;
495 };
496 
497 /*
498  * all VP9 instances could share this default frame context.
499  */
500 static struct vdec_vp9_slice_frame_ctx *vdec_vp9_slice_default_frame_ctx;
501 static DEFINE_MUTEX(vdec_vp9_slice_frame_ctx_lock);
502 
503 static int vdec_vp9_slice_core_decode(struct vdec_lat_buf *lat_buf);
504 
vdec_vp9_slice_init_default_frame_ctx(struct vdec_vp9_slice_instance * instance)505 static int vdec_vp9_slice_init_default_frame_ctx(struct vdec_vp9_slice_instance *instance)
506 {
507 	struct vdec_vp9_slice_frame_ctx *remote_frame_ctx;
508 	struct vdec_vp9_slice_frame_ctx *frame_ctx;
509 	struct mtk_vcodec_ctx *ctx;
510 	struct vdec_vp9_slice_init_vsi *vsi;
511 	int ret = 0;
512 
513 	ctx = instance->ctx;
514 	vsi = instance->vpu.vsi;
515 	if (!ctx || !vsi)
516 		return -EINVAL;
517 
518 	remote_frame_ctx = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler,
519 						     (u32)vsi->default_frame_ctx);
520 	if (!remote_frame_ctx) {
521 		mtk_vcodec_err(instance, "failed to map default frame ctx\n");
522 		return -EINVAL;
523 	}
524 
525 	mutex_lock(&vdec_vp9_slice_frame_ctx_lock);
526 	if (vdec_vp9_slice_default_frame_ctx)
527 		goto out;
528 
529 	frame_ctx = kmemdup(remote_frame_ctx, sizeof(*frame_ctx), GFP_KERNEL);
530 	if (!frame_ctx) {
531 		ret = -ENOMEM;
532 		goto out;
533 	}
534 
535 	vdec_vp9_slice_default_frame_ctx = frame_ctx;
536 
537 out:
538 	mutex_unlock(&vdec_vp9_slice_frame_ctx_lock);
539 
540 	return ret;
541 }
542 
vdec_vp9_slice_alloc_working_buffer(struct vdec_vp9_slice_instance * instance,struct vdec_vp9_slice_vsi * vsi)543 static int vdec_vp9_slice_alloc_working_buffer(struct vdec_vp9_slice_instance *instance,
544 					       struct vdec_vp9_slice_vsi *vsi)
545 {
546 	struct mtk_vcodec_ctx *ctx = instance->ctx;
547 	enum vdec_vp9_slice_resolution_level level;
548 	/* super blocks */
549 	unsigned int max_sb_w;
550 	unsigned int max_sb_h;
551 	unsigned int max_w;
552 	unsigned int max_h;
553 	unsigned int w;
554 	unsigned int h;
555 	size_t size;
556 	int ret;
557 	int i;
558 
559 	w = vsi->frame.uh.frame_width;
560 	h = vsi->frame.uh.frame_height;
561 
562 	if (w > VCODEC_DEC_4K_CODED_WIDTH ||
563 	    h > VCODEC_DEC_4K_CODED_HEIGHT) {
564 		return -EINVAL;
565 	} else if (w > MTK_VDEC_MAX_W || h > MTK_VDEC_MAX_H) {
566 		/* 4K */
567 		level = VP9_RES_4K;
568 		max_w = VCODEC_DEC_4K_CODED_WIDTH;
569 		max_h = VCODEC_DEC_4K_CODED_HEIGHT;
570 	} else {
571 		/* FHD */
572 		level = VP9_RES_FHD;
573 		max_w = MTK_VDEC_MAX_W;
574 		max_h = MTK_VDEC_MAX_H;
575 	}
576 
577 	if (level == instance->level)
578 		return 0;
579 
580 	mtk_vcodec_debug(instance, "resolution level changed, from %u to %u, %ux%u",
581 			 instance->level, level, w, h);
582 
583 	max_sb_w = DIV_ROUND_UP(max_w, 64);
584 	max_sb_h = DIV_ROUND_UP(max_h, 64);
585 	ret = -ENOMEM;
586 
587 	/*
588 	 * Lat-flush must wait core idle, otherwise core will
589 	 * use released buffers
590 	 */
591 
592 	size = (max_sb_w * max_sb_h + 2) * 576;
593 	for (i = 0; i < 2; i++) {
594 		if (instance->mv[i].va)
595 			mtk_vcodec_mem_free(ctx, &instance->mv[i]);
596 		instance->mv[i].size = size;
597 		if (mtk_vcodec_mem_alloc(ctx, &instance->mv[i]))
598 			goto err;
599 	}
600 
601 	size = (max_sb_w * max_sb_h * 32) + 256;
602 	for (i = 0; i < 2; i++) {
603 		if (instance->seg[i].va)
604 			mtk_vcodec_mem_free(ctx, &instance->seg[i]);
605 		instance->seg[i].size = size;
606 		if (mtk_vcodec_mem_alloc(ctx, &instance->seg[i]))
607 			goto err;
608 	}
609 
610 	if (!instance->tile.va) {
611 		instance->tile.size = VP9_TILE_BUF_SIZE;
612 		if (mtk_vcodec_mem_alloc(ctx, &instance->tile))
613 			goto err;
614 	}
615 
616 	if (!instance->prob.va) {
617 		instance->prob.size = VP9_PROB_BUF_SIZE;
618 		if (mtk_vcodec_mem_alloc(ctx, &instance->prob))
619 			goto err;
620 	}
621 
622 	if (!instance->counts.va) {
623 		instance->counts.size = VP9_COUNTS_BUF_SIZE;
624 		if (mtk_vcodec_mem_alloc(ctx, &instance->counts))
625 			goto err;
626 	}
627 
628 	instance->level = level;
629 	return 0;
630 
631 err:
632 	instance->level = VP9_RES_NONE;
633 	return ret;
634 }
635 
vdec_vp9_slice_free_working_buffer(struct vdec_vp9_slice_instance * instance)636 static void vdec_vp9_slice_free_working_buffer(struct vdec_vp9_slice_instance *instance)
637 {
638 	struct mtk_vcodec_ctx *ctx = instance->ctx;
639 	int i;
640 
641 	for (i = 0; i < ARRAY_SIZE(instance->mv); i++) {
642 		if (instance->mv[i].va)
643 			mtk_vcodec_mem_free(ctx, &instance->mv[i]);
644 	}
645 	for (i = 0; i < ARRAY_SIZE(instance->seg); i++) {
646 		if (instance->seg[i].va)
647 			mtk_vcodec_mem_free(ctx, &instance->seg[i]);
648 	}
649 	if (instance->tile.va)
650 		mtk_vcodec_mem_free(ctx, &instance->tile);
651 	if (instance->prob.va)
652 		mtk_vcodec_mem_free(ctx, &instance->prob);
653 	if (instance->counts.va)
654 		mtk_vcodec_mem_free(ctx, &instance->counts);
655 
656 	instance->level = VP9_RES_NONE;
657 }
658 
vdec_vp9_slice_vsi_from_remote(struct vdec_vp9_slice_vsi * vsi,struct vdec_vp9_slice_vsi * remote_vsi,int skip)659 static void vdec_vp9_slice_vsi_from_remote(struct vdec_vp9_slice_vsi *vsi,
660 					   struct vdec_vp9_slice_vsi *remote_vsi,
661 					   int skip)
662 {
663 	struct vdec_vp9_slice_frame *rf;
664 	struct vdec_vp9_slice_frame *f;
665 
666 	/*
667 	 * compressed header
668 	 * dequant
669 	 * buffer position
670 	 * decode state
671 	 */
672 	if (!skip) {
673 		rf = &remote_vsi->frame;
674 		f = &vsi->frame;
675 		memcpy(&f->ch, &rf->ch, sizeof(f->ch));
676 		memcpy(&f->uh.dequant, &rf->uh.dequant, sizeof(f->uh.dequant));
677 		memcpy(&vsi->trans, &remote_vsi->trans, sizeof(vsi->trans));
678 	}
679 
680 	memcpy(&vsi->state, &remote_vsi->state, sizeof(vsi->state));
681 }
682 
vdec_vp9_slice_vsi_to_remote(struct vdec_vp9_slice_vsi * vsi,struct vdec_vp9_slice_vsi * remote_vsi)683 static void vdec_vp9_slice_vsi_to_remote(struct vdec_vp9_slice_vsi *vsi,
684 					 struct vdec_vp9_slice_vsi *remote_vsi)
685 {
686 	memcpy(remote_vsi, vsi, sizeof(*vsi));
687 }
688 
vdec_vp9_slice_tile_offset(int idx,int mi_num,int tile_log2)689 static int vdec_vp9_slice_tile_offset(int idx, int mi_num, int tile_log2)
690 {
691 	int sbs = (mi_num + 7) >> 3;
692 	int offset = ((idx * sbs) >> tile_log2) << 3;
693 
694 	return min(offset, mi_num);
695 }
696 
697 static
vdec_vp9_slice_setup_single_from_src_to_dst(struct vdec_vp9_slice_instance * instance)698 int vdec_vp9_slice_setup_single_from_src_to_dst(struct vdec_vp9_slice_instance *instance)
699 {
700 	struct vb2_v4l2_buffer *src;
701 	struct vb2_v4l2_buffer *dst;
702 
703 	src = v4l2_m2m_next_src_buf(instance->ctx->m2m_ctx);
704 	if (!src)
705 		return -EINVAL;
706 
707 	dst = v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx);
708 	if (!dst)
709 		return -EINVAL;
710 
711 	v4l2_m2m_buf_copy_metadata(src, dst, true);
712 
713 	return 0;
714 }
715 
vdec_vp9_slice_setup_lat_from_src_buf(struct vdec_vp9_slice_instance * instance,struct vdec_lat_buf * lat_buf)716 static int vdec_vp9_slice_setup_lat_from_src_buf(struct vdec_vp9_slice_instance *instance,
717 						 struct vdec_lat_buf *lat_buf)
718 {
719 	struct vb2_v4l2_buffer *src;
720 	struct vb2_v4l2_buffer *dst;
721 
722 	src = v4l2_m2m_next_src_buf(instance->ctx->m2m_ctx);
723 	if (!src)
724 		return -EINVAL;
725 
726 	lat_buf->src_buf_req = src->vb2_buf.req_obj.req;
727 
728 	dst = &lat_buf->ts_info;
729 	v4l2_m2m_buf_copy_metadata(src, dst, true);
730 	return 0;
731 }
732 
vdec_vp9_slice_setup_hdr(struct vdec_vp9_slice_instance * instance,struct vdec_vp9_slice_uncompressed_header * uh,struct v4l2_ctrl_vp9_frame * hdr)733 static void vdec_vp9_slice_setup_hdr(struct vdec_vp9_slice_instance *instance,
734 				     struct vdec_vp9_slice_uncompressed_header *uh,
735 				     struct v4l2_ctrl_vp9_frame *hdr)
736 {
737 	int i;
738 
739 	uh->profile = hdr->profile;
740 	uh->last_frame_type = instance->frame_type;
741 	uh->frame_type = !HDR_FLAG(KEY_FRAME);
742 	uh->last_show_frame = instance->show_frame;
743 	uh->show_frame = HDR_FLAG(SHOW_FRAME);
744 	uh->error_resilient_mode = HDR_FLAG(ERROR_RESILIENT);
745 	uh->bit_depth = hdr->bit_depth;
746 	uh->last_frame_width = instance->width;
747 	uh->last_frame_height = instance->height;
748 	uh->frame_width = hdr->frame_width_minus_1 + 1;
749 	uh->frame_height = hdr->frame_height_minus_1 + 1;
750 	uh->intra_only = HDR_FLAG(INTRA_ONLY);
751 	/* map v4l2 enum to values defined in VP9 spec for firmware */
752 	switch (hdr->reset_frame_context) {
753 	case V4L2_VP9_RESET_FRAME_CTX_NONE:
754 		uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_NONE0;
755 		break;
756 	case V4L2_VP9_RESET_FRAME_CTX_SPEC:
757 		uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_SPEC;
758 		break;
759 	case V4L2_VP9_RESET_FRAME_CTX_ALL:
760 		uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_ALL;
761 		break;
762 	default:
763 		uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_NONE0;
764 		break;
765 	}
766 	/*
767 	 * ref_frame_sign_bias specifies the intended direction
768 	 * of the motion vector in time for each reference frame.
769 	 * - INTRA_FRAME = 0,
770 	 * - LAST_FRAME = 1,
771 	 * - GOLDEN_FRAME = 2,
772 	 * - ALTREF_FRAME = 3,
773 	 * ref_frame_sign_bias[INTRA_FRAME] is always 0
774 	 * and VDA only passes another 3 directions
775 	 */
776 	uh->ref_frame_sign_bias[0] = 0;
777 	for (i = 0; i < 3; i++)
778 		uh->ref_frame_sign_bias[i + 1] =
779 			!!(hdr->ref_frame_sign_bias & (1 << i));
780 	uh->allow_high_precision_mv = HDR_FLAG(ALLOW_HIGH_PREC_MV);
781 	uh->interpolation_filter = hdr->interpolation_filter;
782 	uh->refresh_frame_context = HDR_FLAG(REFRESH_FRAME_CTX);
783 	uh->frame_parallel_decoding_mode = HDR_FLAG(PARALLEL_DEC_MODE);
784 	uh->frame_context_idx = hdr->frame_context_idx;
785 
786 	/* tile info */
787 	uh->tile_cols_log2 = hdr->tile_cols_log2;
788 	uh->tile_rows_log2 = hdr->tile_rows_log2;
789 
790 	uh->uncompressed_header_size = hdr->uncompressed_header_size;
791 	uh->header_size_in_bytes = hdr->compressed_header_size;
792 }
793 
vdec_vp9_slice_setup_frame_ctx(struct vdec_vp9_slice_instance * instance,struct vdec_vp9_slice_uncompressed_header * uh,struct v4l2_ctrl_vp9_frame * hdr)794 static void vdec_vp9_slice_setup_frame_ctx(struct vdec_vp9_slice_instance *instance,
795 					   struct vdec_vp9_slice_uncompressed_header *uh,
796 					   struct v4l2_ctrl_vp9_frame *hdr)
797 {
798 	int error_resilient_mode;
799 	int reset_frame_context;
800 	int key_frame;
801 	int intra_only;
802 	int i;
803 
804 	key_frame = HDR_FLAG(KEY_FRAME);
805 	intra_only = HDR_FLAG(INTRA_ONLY);
806 	error_resilient_mode = HDR_FLAG(ERROR_RESILIENT);
807 	reset_frame_context = uh->reset_frame_context;
808 
809 	/*
810 	 * according to "6.2 Uncompressed header syntax" in
811 	 * "VP9 Bitstream & Decoding Process Specification",
812 	 * reset @frame_context_idx when (FrameIsIntra || error_resilient_mode)
813 	 */
814 	if (key_frame || intra_only || error_resilient_mode) {
815 		/*
816 		 * @reset_frame_context specifies
817 		 * whether the frame context should be
818 		 * reset to default values:
819 		 * 0 or 1 means do not reset any frame context
820 		 * 2 resets just the context specified in the frame header
821 		 * 3 resets all contexts
822 		 */
823 		if (key_frame || error_resilient_mode ||
824 		    reset_frame_context == 3) {
825 			/* use default table */
826 			for (i = 0; i < 4; i++)
827 				instance->dirty[i] = 0;
828 		} else if (reset_frame_context == 2) {
829 			instance->dirty[uh->frame_context_idx] = 0;
830 		}
831 		uh->frame_context_idx = 0;
832 	}
833 }
834 
vdec_vp9_slice_setup_loop_filter(struct vdec_vp9_slice_uncompressed_header * uh,struct v4l2_vp9_loop_filter * lf)835 static void vdec_vp9_slice_setup_loop_filter(struct vdec_vp9_slice_uncompressed_header *uh,
836 					     struct v4l2_vp9_loop_filter *lf)
837 {
838 	int i;
839 
840 	uh->loop_filter_level = lf->level;
841 	uh->loop_filter_sharpness = lf->sharpness;
842 	uh->loop_filter_delta_enabled = LF_FLAG(DELTA_ENABLED);
843 	for (i = 0; i < 4; i++)
844 		uh->loop_filter_ref_deltas[i] = lf->ref_deltas[i];
845 	for (i = 0; i < 2; i++)
846 		uh->loop_filter_mode_deltas[i] = lf->mode_deltas[i];
847 }
848 
vdec_vp9_slice_setup_quantization(struct vdec_vp9_slice_uncompressed_header * uh,struct v4l2_vp9_quantization * quant)849 static void vdec_vp9_slice_setup_quantization(struct vdec_vp9_slice_uncompressed_header *uh,
850 					      struct v4l2_vp9_quantization *quant)
851 {
852 	uh->base_q_idx = quant->base_q_idx;
853 	uh->delta_q_y_dc = quant->delta_q_y_dc;
854 	uh->delta_q_uv_dc = quant->delta_q_uv_dc;
855 	uh->delta_q_uv_ac = quant->delta_q_uv_ac;
856 }
857 
vdec_vp9_slice_setup_segmentation(struct vdec_vp9_slice_uncompressed_header * uh,struct v4l2_vp9_segmentation * seg)858 static void vdec_vp9_slice_setup_segmentation(struct vdec_vp9_slice_uncompressed_header *uh,
859 					      struct v4l2_vp9_segmentation *seg)
860 {
861 	int i;
862 	int j;
863 
864 	uh->segmentation_enabled = SEG_FLAG(ENABLED);
865 	uh->segmentation_update_map = SEG_FLAG(UPDATE_MAP);
866 	for (i = 0; i < 7; i++)
867 		uh->segmentation_tree_probs[i] = seg->tree_probs[i];
868 	uh->segmentation_temporal_udpate = SEG_FLAG(TEMPORAL_UPDATE);
869 	for (i = 0; i < 3; i++)
870 		uh->segmentation_pred_prob[i] = seg->pred_probs[i];
871 	uh->segmentation_update_data = SEG_FLAG(UPDATE_DATA);
872 	uh->segmentation_abs_or_delta_update = SEG_FLAG(ABS_OR_DELTA_UPDATE);
873 	for (i = 0; i < 8; i++) {
874 		uh->feature_enabled[i] = seg->feature_enabled[i];
875 		for (j = 0; j < 4; j++)
876 			uh->feature_value[i][j] = seg->feature_data[i][j];
877 	}
878 }
879 
vdec_vp9_slice_setup_tile(struct vdec_vp9_slice_vsi * vsi,struct v4l2_ctrl_vp9_frame * hdr)880 static int vdec_vp9_slice_setup_tile(struct vdec_vp9_slice_vsi *vsi,
881 				     struct v4l2_ctrl_vp9_frame *hdr)
882 {
883 	unsigned int rows_log2;
884 	unsigned int cols_log2;
885 	unsigned int rows;
886 	unsigned int cols;
887 	unsigned int mi_rows;
888 	unsigned int mi_cols;
889 	struct vdec_vp9_slice_tiles *tiles;
890 	int offset;
891 	int start;
892 	int end;
893 	int i;
894 
895 	rows_log2 = hdr->tile_rows_log2;
896 	cols_log2 = hdr->tile_cols_log2;
897 	rows = 1 << rows_log2;
898 	cols = 1 << cols_log2;
899 	tiles = &vsi->frame.tiles;
900 	tiles->actual_rows = 0;
901 
902 	if (rows > 4 || cols > 64)
903 		return -EINVAL;
904 
905 	/* setup mi rows/cols information */
906 	mi_rows = (hdr->frame_height_minus_1 + 1 + 7) >> 3;
907 	mi_cols = (hdr->frame_width_minus_1 + 1 + 7) >> 3;
908 
909 	for (i = 0; i < rows; i++) {
910 		start = vdec_vp9_slice_tile_offset(i, mi_rows, rows_log2);
911 		end = vdec_vp9_slice_tile_offset(i + 1, mi_rows, rows_log2);
912 		offset = end - start;
913 		tiles->mi_rows[i] = (offset + 7) >> 3;
914 		if (tiles->mi_rows[i])
915 			tiles->actual_rows++;
916 	}
917 
918 	for (i = 0; i < cols; i++) {
919 		start = vdec_vp9_slice_tile_offset(i, mi_cols, cols_log2);
920 		end = vdec_vp9_slice_tile_offset(i + 1, mi_cols, cols_log2);
921 		offset = end - start;
922 		tiles->mi_cols[i] = (offset + 7) >> 3;
923 	}
924 
925 	return 0;
926 }
927 
vdec_vp9_slice_setup_state(struct vdec_vp9_slice_vsi * vsi)928 static void vdec_vp9_slice_setup_state(struct vdec_vp9_slice_vsi *vsi)
929 {
930 	memset(&vsi->state, 0, sizeof(vsi->state));
931 }
932 
vdec_vp9_slice_setup_ref_idx(struct vdec_vp9_slice_pfc * pfc,struct v4l2_ctrl_vp9_frame * hdr)933 static void vdec_vp9_slice_setup_ref_idx(struct vdec_vp9_slice_pfc *pfc,
934 					 struct v4l2_ctrl_vp9_frame *hdr)
935 {
936 	pfc->ref_idx[0] = hdr->last_frame_ts;
937 	pfc->ref_idx[1] = hdr->golden_frame_ts;
938 	pfc->ref_idx[2] = hdr->alt_frame_ts;
939 }
940 
vdec_vp9_slice_setup_pfc(struct vdec_vp9_slice_instance * instance,struct vdec_vp9_slice_pfc * pfc)941 static int vdec_vp9_slice_setup_pfc(struct vdec_vp9_slice_instance *instance,
942 				    struct vdec_vp9_slice_pfc *pfc)
943 {
944 	struct v4l2_ctrl_vp9_frame *hdr;
945 	struct vdec_vp9_slice_uncompressed_header *uh;
946 	struct v4l2_ctrl *hdr_ctrl;
947 	struct vdec_vp9_slice_vsi *vsi;
948 	int ret;
949 
950 	/* frame header */
951 	hdr_ctrl = v4l2_ctrl_find(&instance->ctx->ctrl_hdl, V4L2_CID_STATELESS_VP9_FRAME);
952 	if (!hdr_ctrl || !hdr_ctrl->p_cur.p)
953 		return -EINVAL;
954 
955 	hdr = hdr_ctrl->p_cur.p;
956 	vsi = &pfc->vsi;
957 	uh = &vsi->frame.uh;
958 
959 	/* setup vsi information */
960 	vdec_vp9_slice_setup_hdr(instance, uh, hdr);
961 	vdec_vp9_slice_setup_frame_ctx(instance, uh, hdr);
962 	vdec_vp9_slice_setup_loop_filter(uh, &hdr->lf);
963 	vdec_vp9_slice_setup_quantization(uh, &hdr->quant);
964 	vdec_vp9_slice_setup_segmentation(uh, &hdr->seg);
965 	ret = vdec_vp9_slice_setup_tile(vsi, hdr);
966 	if (ret)
967 		return ret;
968 	vdec_vp9_slice_setup_state(vsi);
969 
970 	/* core stage needs buffer index to get ref y/c ... */
971 	vdec_vp9_slice_setup_ref_idx(pfc, hdr);
972 
973 	pfc->seq = instance->seq;
974 	instance->seq++;
975 
976 	return 0;
977 }
978 
vdec_vp9_slice_setup_lat_buffer(struct vdec_vp9_slice_instance * instance,struct vdec_vp9_slice_vsi * vsi,struct mtk_vcodec_mem * bs,struct vdec_lat_buf * lat_buf)979 static int vdec_vp9_slice_setup_lat_buffer(struct vdec_vp9_slice_instance *instance,
980 					   struct vdec_vp9_slice_vsi *vsi,
981 					   struct mtk_vcodec_mem *bs,
982 					   struct vdec_lat_buf *lat_buf)
983 {
984 	int i;
985 
986 	vsi->bs.buf.dma_addr = bs->dma_addr;
987 	vsi->bs.buf.size = bs->size;
988 	vsi->bs.frame.dma_addr = bs->dma_addr;
989 	vsi->bs.frame.size = bs->size;
990 
991 	for (i = 0; i < 2; i++) {
992 		vsi->mv[i].dma_addr = instance->mv[i].dma_addr;
993 		vsi->mv[i].size = instance->mv[i].size;
994 	}
995 	for (i = 0; i < 2; i++) {
996 		vsi->seg[i].dma_addr = instance->seg[i].dma_addr;
997 		vsi->seg[i].size = instance->seg[i].size;
998 	}
999 	vsi->tile.dma_addr = instance->tile.dma_addr;
1000 	vsi->tile.size = instance->tile.size;
1001 	vsi->prob.dma_addr = instance->prob.dma_addr;
1002 	vsi->prob.size = instance->prob.size;
1003 	vsi->counts.dma_addr = instance->counts.dma_addr;
1004 	vsi->counts.size = instance->counts.size;
1005 
1006 	vsi->ube.dma_addr = lat_buf->ctx->msg_queue.wdma_addr.dma_addr;
1007 	vsi->ube.size = lat_buf->ctx->msg_queue.wdma_addr.size;
1008 	vsi->trans.dma_addr = lat_buf->ctx->msg_queue.wdma_wptr_addr;
1009 	/* used to store trans end */
1010 	vsi->trans.dma_addr_end = lat_buf->ctx->msg_queue.wdma_rptr_addr;
1011 	vsi->err_map.dma_addr = lat_buf->wdma_err_addr.dma_addr;
1012 	vsi->err_map.size = lat_buf->wdma_err_addr.size;
1013 
1014 	vsi->row_info.buf = 0;
1015 	vsi->row_info.size = 0;
1016 
1017 	return 0;
1018 }
1019 
vdec_vp9_slice_setup_prob_buffer(struct vdec_vp9_slice_instance * instance,struct vdec_vp9_slice_vsi * vsi)1020 static int vdec_vp9_slice_setup_prob_buffer(struct vdec_vp9_slice_instance *instance,
1021 					    struct vdec_vp9_slice_vsi *vsi)
1022 {
1023 	struct vdec_vp9_slice_frame_ctx *frame_ctx;
1024 	struct vdec_vp9_slice_uncompressed_header *uh;
1025 
1026 	uh = &vsi->frame.uh;
1027 
1028 	mtk_vcodec_debug(instance, "ctx dirty %u idx %d\n",
1029 			 instance->dirty[uh->frame_context_idx],
1030 			 uh->frame_context_idx);
1031 
1032 	if (instance->dirty[uh->frame_context_idx])
1033 		frame_ctx = &instance->frame_ctx[uh->frame_context_idx];
1034 	else
1035 		frame_ctx = vdec_vp9_slice_default_frame_ctx;
1036 	memcpy(instance->prob.va, frame_ctx, sizeof(*frame_ctx));
1037 
1038 	return 0;
1039 }
1040 
vdec_vp9_slice_setup_seg_buffer(struct vdec_vp9_slice_instance * instance,struct vdec_vp9_slice_vsi * vsi,struct mtk_vcodec_mem * buf)1041 static void vdec_vp9_slice_setup_seg_buffer(struct vdec_vp9_slice_instance *instance,
1042 					    struct vdec_vp9_slice_vsi *vsi,
1043 					    struct mtk_vcodec_mem *buf)
1044 {
1045 	struct vdec_vp9_slice_uncompressed_header *uh;
1046 
1047 	/* reset segment buffer */
1048 	uh = &vsi->frame.uh;
1049 	if (uh->frame_type == 0 ||
1050 	    uh->intra_only ||
1051 	    uh->error_resilient_mode ||
1052 	    uh->frame_width != instance->width ||
1053 	    uh->frame_height != instance->height) {
1054 		mtk_vcodec_debug(instance, "reset seg\n");
1055 		memset(buf->va, 0, buf->size);
1056 	}
1057 }
1058 
1059 /*
1060  * parse tiles according to `6.4 Decode tiles syntax`
1061  * in "vp9-bitstream-specification"
1062  *
1063  * frame contains uncompress header, compressed header and several tiles.
1064  * this function parses tiles' position and size, stores them to tile buffer
1065  * for decoding.
1066  */
vdec_vp9_slice_setup_tile_buffer(struct vdec_vp9_slice_instance * instance,struct vdec_vp9_slice_vsi * vsi,struct mtk_vcodec_mem * bs)1067 static int vdec_vp9_slice_setup_tile_buffer(struct vdec_vp9_slice_instance *instance,
1068 					    struct vdec_vp9_slice_vsi *vsi,
1069 					    struct mtk_vcodec_mem *bs)
1070 {
1071 	struct vdec_vp9_slice_uncompressed_header *uh;
1072 	unsigned int rows_log2;
1073 	unsigned int cols_log2;
1074 	unsigned int rows;
1075 	unsigned int cols;
1076 	unsigned int mi_row;
1077 	unsigned int mi_col;
1078 	unsigned int offset;
1079 	unsigned int pa;
1080 	unsigned int size;
1081 	struct vdec_vp9_slice_tiles *tiles;
1082 	unsigned char *pos;
1083 	unsigned char *end;
1084 	unsigned char *va;
1085 	unsigned int *tb;
1086 	int i;
1087 	int j;
1088 
1089 	uh = &vsi->frame.uh;
1090 	rows_log2 = uh->tile_rows_log2;
1091 	cols_log2 = uh->tile_cols_log2;
1092 	rows = 1 << rows_log2;
1093 	cols = 1 << cols_log2;
1094 
1095 	if (rows > 4 || cols > 64) {
1096 		mtk_vcodec_err(instance, "tile_rows %u tile_cols %u\n",
1097 			       rows, cols);
1098 		return -EINVAL;
1099 	}
1100 
1101 	offset = uh->uncompressed_header_size +
1102 		uh->header_size_in_bytes;
1103 	if (bs->size <= offset) {
1104 		mtk_vcodec_err(instance, "bs size %zu tile offset %u\n",
1105 			       bs->size, offset);
1106 		return -EINVAL;
1107 	}
1108 
1109 	tiles = &vsi->frame.tiles;
1110 	/* setup tile buffer */
1111 
1112 	va = (unsigned char *)bs->va;
1113 	pos = va + offset;
1114 	end = va + bs->size;
1115 	/* truncated */
1116 	pa = (unsigned int)bs->dma_addr + offset;
1117 	tb = instance->tile.va;
1118 	for (i = 0; i < rows; i++) {
1119 		for (j = 0; j < cols; j++) {
1120 			if (i == rows - 1 &&
1121 			    j == cols - 1) {
1122 				size = (unsigned int)(end - pos);
1123 			} else {
1124 				if (end - pos < 4)
1125 					return -EINVAL;
1126 
1127 				size = (pos[0] << 24) | (pos[1] << 16) |
1128 					(pos[2] << 8) | pos[3];
1129 				pos += 4;
1130 				pa += 4;
1131 				offset += 4;
1132 				if (end - pos < size)
1133 					return -EINVAL;
1134 			}
1135 			tiles->size[i][j] = size;
1136 			if (tiles->mi_rows[i]) {
1137 				*tb++ = (size << 3) + ((offset << 3) & 0x7f);
1138 				*tb++ = pa & ~0xf;
1139 				*tb++ = (pa << 3) & 0x7f;
1140 				mi_row = (tiles->mi_rows[i] - 1) & 0x1ff;
1141 				mi_col = (tiles->mi_cols[j] - 1) & 0x3f;
1142 				*tb++ = (mi_row << 6) + mi_col;
1143 			}
1144 			pos += size;
1145 			pa += size;
1146 			offset += size;
1147 		}
1148 	}
1149 
1150 	return 0;
1151 }
1152 
vdec_vp9_slice_setup_lat(struct vdec_vp9_slice_instance * instance,struct mtk_vcodec_mem * bs,struct vdec_lat_buf * lat_buf,struct vdec_vp9_slice_pfc * pfc)1153 static int vdec_vp9_slice_setup_lat(struct vdec_vp9_slice_instance *instance,
1154 				    struct mtk_vcodec_mem *bs,
1155 				    struct vdec_lat_buf *lat_buf,
1156 				    struct vdec_vp9_slice_pfc *pfc)
1157 {
1158 	struct vdec_vp9_slice_vsi *vsi = &pfc->vsi;
1159 	int ret;
1160 
1161 	ret = vdec_vp9_slice_setup_lat_from_src_buf(instance, lat_buf);
1162 	if (ret)
1163 		goto err;
1164 
1165 	ret = vdec_vp9_slice_setup_pfc(instance, pfc);
1166 	if (ret)
1167 		goto err;
1168 
1169 	ret = vdec_vp9_slice_alloc_working_buffer(instance, vsi);
1170 	if (ret)
1171 		goto err;
1172 
1173 	ret = vdec_vp9_slice_setup_lat_buffer(instance, vsi, bs, lat_buf);
1174 	if (ret)
1175 		goto err;
1176 
1177 	vdec_vp9_slice_setup_seg_buffer(instance, vsi, &instance->seg[0]);
1178 
1179 	/* setup prob/tile buffers for LAT */
1180 
1181 	ret = vdec_vp9_slice_setup_prob_buffer(instance, vsi);
1182 	if (ret)
1183 		goto err;
1184 
1185 	ret = vdec_vp9_slice_setup_tile_buffer(instance, vsi, bs);
1186 	if (ret)
1187 		goto err;
1188 
1189 	return 0;
1190 
1191 err:
1192 	return ret;
1193 }
1194 
1195 static
vdec_vp9_slice_map_counts_eob_coef(unsigned int i,unsigned int j,unsigned int k,struct vdec_vp9_slice_frame_counts * counts,struct v4l2_vp9_frame_symbol_counts * counts_helper)1196 void vdec_vp9_slice_map_counts_eob_coef(unsigned int i, unsigned int j, unsigned int k,
1197 					struct vdec_vp9_slice_frame_counts *counts,
1198 					struct v4l2_vp9_frame_symbol_counts *counts_helper)
1199 {
1200 	u32 l = 0, m;
1201 
1202 	/*
1203 	 * helper eo -> mtk eo
1204 	 * helpre e1 -> mtk c3
1205 	 * helper c0 -> c0
1206 	 * helper c1 -> c1
1207 	 * helper c2 -> c2
1208 	 */
1209 	for (m = 0; m < 3; m++) {
1210 		counts_helper->coeff[i][j][k][l][m] =
1211 			(u32 (*)[3]) & counts->coef_probs[i][j][k].band_0[m];
1212 		counts_helper->eob[i][j][k][l][m][0] =
1213 			&counts->eob_branch[i][j][k].band_0[m];
1214 		counts_helper->eob[i][j][k][l][m][1] =
1215 			&counts->coef_probs[i][j][k].band_0[m][3];
1216 	}
1217 
1218 	for (l = 1; l < 6; l++) {
1219 		for (m = 0; m < 6; m++) {
1220 			counts_helper->coeff[i][j][k][l][m] =
1221 				(u32 (*)[3]) & counts->coef_probs[i][j][k].band_1_5[l - 1][m];
1222 			counts_helper->eob[i][j][k][l][m][0] =
1223 				&counts->eob_branch[i][j][k].band_1_5[l - 1][m];
1224 			counts_helper->eob[i][j][k][l][m][1] =
1225 				&counts->coef_probs[i][j][k].band_1_5[l - 1][m][3];
1226 		}
1227 	}
1228 }
1229 
vdec_vp9_slice_counts_map_helper(struct vdec_vp9_slice_counts_map * counts_map,struct vdec_vp9_slice_frame_counts * counts,struct v4l2_vp9_frame_symbol_counts * counts_helper)1230 static void vdec_vp9_slice_counts_map_helper(struct vdec_vp9_slice_counts_map *counts_map,
1231 					     struct vdec_vp9_slice_frame_counts *counts,
1232 					     struct v4l2_vp9_frame_symbol_counts *counts_helper)
1233 {
1234 	int i, j, k;
1235 
1236 	counts_helper->partition = &counts->partition;
1237 	counts_helper->intra_inter = &counts->intra_inter;
1238 	counts_helper->tx32p = &counts->tx_p32x32;
1239 	counts_helper->tx16p = &counts->tx_p16x16;
1240 	counts_helper->tx8p = &counts->tx_p8x8;
1241 	counts_helper->uv_mode = &counts->uv_mode;
1242 
1243 	counts_helper->comp = &counts->comp_inter;
1244 	counts_helper->comp_ref = &counts->comp_ref;
1245 	counts_helper->single_ref = &counts->single_ref;
1246 	counts_helper->mv_mode = &counts->inter_mode;
1247 	counts_helper->mv_joint = &counts->joint;
1248 
1249 	for (i = 0; i < ARRAY_SIZE(counts_map->skip); i++)
1250 		memcpy(counts_map->skip[i], counts->skip[i],
1251 		       sizeof(counts_map->skip[0]));
1252 	counts_helper->skip = &counts_map->skip;
1253 
1254 	for (i = 0; i < ARRAY_SIZE(counts_map->y_mode); i++)
1255 		memcpy(counts_map->y_mode[i], counts->y_mode[i],
1256 		       sizeof(counts_map->y_mode[0]));
1257 	counts_helper->y_mode = &counts_map->y_mode;
1258 
1259 	for (i = 0; i < ARRAY_SIZE(counts_map->filter); i++)
1260 		memcpy(counts_map->filter[i], counts->switchable_interp[i],
1261 		       sizeof(counts_map->filter[0]));
1262 	counts_helper->filter = &counts_map->filter;
1263 
1264 	for (i = 0; i < ARRAY_SIZE(counts_map->sign); i++)
1265 		memcpy(counts_map->sign[i], counts->mvcomp[i].sign,
1266 		       sizeof(counts_map->sign[0]));
1267 	counts_helper->sign = &counts_map->sign;
1268 
1269 	for (i = 0; i < ARRAY_SIZE(counts_map->classes); i++)
1270 		memcpy(counts_map->classes[i], counts->mvcomp[i].classes,
1271 		       sizeof(counts_map->classes[0]));
1272 	counts_helper->classes = &counts_map->classes;
1273 
1274 	for (i = 0; i < ARRAY_SIZE(counts_map->class0); i++)
1275 		memcpy(counts_map->class0[i], counts->mvcomp[i].class0,
1276 		       sizeof(counts_map->class0[0]));
1277 	counts_helper->class0 = &counts_map->class0;
1278 
1279 	for (i = 0; i < ARRAY_SIZE(counts_map->bits); i++)
1280 		for (j = 0; j < ARRAY_SIZE(counts_map->bits[0]); j++)
1281 			memcpy(counts_map->bits[i][j], counts->mvcomp[i].bits[j],
1282 			       sizeof(counts_map->bits[0][0]));
1283 	counts_helper->bits = &counts_map->bits;
1284 
1285 	for (i = 0; i < ARRAY_SIZE(counts_map->class0_fp); i++)
1286 		for (j = 0; j < ARRAY_SIZE(counts_map->class0_fp[0]); j++)
1287 			memcpy(counts_map->class0_fp[i][j], counts->mvcomp[i].class0_fp[j],
1288 			       sizeof(counts_map->class0_fp[0][0]));
1289 	counts_helper->class0_fp = &counts_map->class0_fp;
1290 
1291 	for (i = 0; i < ARRAY_SIZE(counts_map->fp); i++)
1292 		memcpy(counts_map->fp[i], counts->mvcomp[i].fp,
1293 		       sizeof(counts_map->fp[0]));
1294 	counts_helper->fp = &counts_map->fp;
1295 
1296 	for (i = 0; i < ARRAY_SIZE(counts_map->class0_hp); i++)
1297 		memcpy(counts_map->class0_hp[i], counts->mvcomp[i].class0_hp,
1298 		       sizeof(counts_map->class0_hp[0]));
1299 	counts_helper->class0_hp = &counts_map->class0_hp;
1300 
1301 	for (i = 0; i < ARRAY_SIZE(counts_map->hp); i++)
1302 		memcpy(counts_map->hp[i], counts->mvcomp[i].hp, sizeof(counts_map->hp[0]));
1303 
1304 	counts_helper->hp = &counts_map->hp;
1305 
1306 	for (i = 0; i < 4; i++)
1307 		for (j = 0; j < 2; j++)
1308 			for (k = 0; k < 2; k++)
1309 				vdec_vp9_slice_map_counts_eob_coef(i, j, k, counts, counts_helper);
1310 }
1311 
vdec_vp9_slice_map_to_coef(unsigned int i,unsigned int j,unsigned int k,struct vdec_vp9_slice_frame_ctx * frame_ctx,struct v4l2_vp9_frame_context * frame_ctx_helper)1312 static void vdec_vp9_slice_map_to_coef(unsigned int i, unsigned int j, unsigned int k,
1313 				       struct vdec_vp9_slice_frame_ctx *frame_ctx,
1314 				       struct v4l2_vp9_frame_context *frame_ctx_helper)
1315 {
1316 	u32 l, m;
1317 
1318 	for (l = 0; l < ARRAY_SIZE(frame_ctx_helper->coef[0][0][0]); l++) {
1319 		for (m = 0; m < VP9_BAND_6(l); m++) {
1320 			memcpy(frame_ctx_helper->coef[i][j][k][l][m],
1321 			       frame_ctx->coef_probs[i][j][k][l].probs[m],
1322 			       sizeof(frame_ctx_helper->coef[i][j][k][l][0]));
1323 		}
1324 	}
1325 }
1326 
vdec_vp9_slice_map_from_coef(unsigned int i,unsigned int j,unsigned int k,struct vdec_vp9_slice_frame_ctx * frame_ctx,struct v4l2_vp9_frame_context * frame_ctx_helper)1327 static void vdec_vp9_slice_map_from_coef(unsigned int i, unsigned int j, unsigned int k,
1328 					 struct vdec_vp9_slice_frame_ctx *frame_ctx,
1329 					 struct v4l2_vp9_frame_context *frame_ctx_helper)
1330 {
1331 	u32 l, m;
1332 
1333 	for (l = 0; l < ARRAY_SIZE(frame_ctx_helper->coef[0][0][0]); l++) {
1334 		for (m = 0; m < VP9_BAND_6(l); m++) {
1335 			memcpy(frame_ctx->coef_probs[i][j][k][l].probs[m],
1336 			       frame_ctx_helper->coef[i][j][k][l][m],
1337 			       sizeof(frame_ctx_helper->coef[i][j][k][l][0]));
1338 		}
1339 	}
1340 }
1341 
1342 static
vdec_vp9_slice_framectx_map_helper(bool frame_is_intra,struct vdec_vp9_slice_frame_ctx * pre_frame_ctx,struct vdec_vp9_slice_frame_ctx * frame_ctx,struct v4l2_vp9_frame_context * frame_ctx_helper)1343 void vdec_vp9_slice_framectx_map_helper(bool frame_is_intra,
1344 					struct vdec_vp9_slice_frame_ctx *pre_frame_ctx,
1345 					struct vdec_vp9_slice_frame_ctx *frame_ctx,
1346 					struct v4l2_vp9_frame_context *frame_ctx_helper)
1347 {
1348 	struct v4l2_vp9_frame_mv_context *mv = &frame_ctx_helper->mv;
1349 	u32 i, j, k;
1350 
1351 	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->coef); i++)
1352 		for (j = 0; j < ARRAY_SIZE(frame_ctx_helper->coef[0]); j++)
1353 			for (k = 0; k < ARRAY_SIZE(frame_ctx_helper->coef[0][0]); k++)
1354 				vdec_vp9_slice_map_to_coef(i, j, k, pre_frame_ctx,
1355 							   frame_ctx_helper);
1356 
1357 	/*
1358 	 * use previous prob when frame is not intra or
1359 	 * we should use the prob updated by the compressed header parse
1360 	 */
1361 	if (!frame_is_intra)
1362 		frame_ctx = pre_frame_ctx;
1363 
1364 	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx8); i++)
1365 		memcpy(frame_ctx_helper->tx8[i], frame_ctx->tx_p8x8[i],
1366 		       sizeof(frame_ctx_helper->tx8[0]));
1367 
1368 	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx16); i++)
1369 		memcpy(frame_ctx_helper->tx16[i], frame_ctx->tx_p16x16[i],
1370 		       sizeof(frame_ctx_helper->tx16[0]));
1371 
1372 	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx32); i++)
1373 		memcpy(frame_ctx_helper->tx32[i], frame_ctx->tx_p32x32[i],
1374 		       sizeof(frame_ctx_helper->tx32[0]));
1375 
1376 	memcpy(frame_ctx_helper->skip, frame_ctx->skip_probs, sizeof(frame_ctx_helper->skip));
1377 
1378 	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->inter_mode); i++)
1379 		memcpy(frame_ctx_helper->inter_mode[i], frame_ctx->inter_mode_probs[i],
1380 		       sizeof(frame_ctx_helper->inter_mode[0]));
1381 
1382 	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->interp_filter); i++)
1383 		memcpy(frame_ctx_helper->interp_filter[i], frame_ctx->switch_interp_prob[i],
1384 		       sizeof(frame_ctx_helper->interp_filter[0]));
1385 
1386 	memcpy(frame_ctx_helper->is_inter, frame_ctx->intra_inter_prob,
1387 	       sizeof(frame_ctx_helper->is_inter));
1388 
1389 	memcpy(frame_ctx_helper->comp_mode, frame_ctx->comp_inter_prob,
1390 	       sizeof(frame_ctx_helper->comp_mode));
1391 
1392 	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->single_ref); i++)
1393 		memcpy(frame_ctx_helper->single_ref[i], frame_ctx->single_ref_prob[i],
1394 		       sizeof(frame_ctx_helper->single_ref[0]));
1395 
1396 	memcpy(frame_ctx_helper->comp_ref, frame_ctx->comp_ref_prob,
1397 	       sizeof(frame_ctx_helper->comp_ref));
1398 
1399 	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->y_mode); i++)
1400 		memcpy(frame_ctx_helper->y_mode[i], frame_ctx->y_mode_prob[i],
1401 		       sizeof(frame_ctx_helper->y_mode[0]));
1402 
1403 	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->uv_mode); i++)
1404 		memcpy(frame_ctx_helper->uv_mode[i], frame_ctx->uv_mode_prob[i],
1405 		       sizeof(frame_ctx_helper->uv_mode[0]));
1406 
1407 	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->partition); i++)
1408 		memcpy(frame_ctx_helper->partition[i], frame_ctx->partition_prob[i],
1409 		       sizeof(frame_ctx_helper->partition[0]));
1410 
1411 	memcpy(mv->joint, frame_ctx->joint, sizeof(mv->joint));
1412 
1413 	for (i = 0; i < ARRAY_SIZE(mv->sign); i++)
1414 		mv->sign[i] = frame_ctx->sign_classes[i].sign;
1415 
1416 	for (i = 0; i < ARRAY_SIZE(mv->classes); i++)
1417 		memcpy(mv->classes[i], frame_ctx->sign_classes[i].classes,
1418 		       sizeof(mv->classes[i]));
1419 
1420 	for (i = 0; i < ARRAY_SIZE(mv->class0_bit); i++)
1421 		mv->class0_bit[i] = frame_ctx->class0_bits[i].class0[0];
1422 
1423 	for (i = 0; i < ARRAY_SIZE(mv->bits); i++)
1424 		memcpy(mv->bits[i], frame_ctx->class0_bits[i].bits, sizeof(mv->bits[0]));
1425 
1426 	for (i = 0; i < ARRAY_SIZE(mv->class0_fr); i++)
1427 		for (j = 0; j < ARRAY_SIZE(mv->class0_fr[0]); j++)
1428 			memcpy(mv->class0_fr[i][j], frame_ctx->class0_fp_hp[i].class0_fp[j],
1429 			       sizeof(mv->class0_fr[0][0]));
1430 
1431 	for (i = 0; i < ARRAY_SIZE(mv->fr); i++)
1432 		memcpy(mv->fr[i], frame_ctx->class0_fp_hp[i].fp, sizeof(mv->fr[0]));
1433 
1434 	for (i = 0; i < ARRAY_SIZE(mv->class0_hp); i++)
1435 		mv->class0_hp[i] = frame_ctx->class0_fp_hp[i].class0_hp;
1436 
1437 	for (i = 0; i < ARRAY_SIZE(mv->hp); i++)
1438 		mv->hp[i] = frame_ctx->class0_fp_hp[i].hp;
1439 }
1440 
vdec_vp9_slice_helper_map_framectx(struct v4l2_vp9_frame_context * frame_ctx_helper,struct vdec_vp9_slice_frame_ctx * frame_ctx)1441 static void vdec_vp9_slice_helper_map_framectx(struct v4l2_vp9_frame_context *frame_ctx_helper,
1442 					       struct vdec_vp9_slice_frame_ctx *frame_ctx)
1443 {
1444 	struct v4l2_vp9_frame_mv_context *mv = &frame_ctx_helper->mv;
1445 	u32 i, j, k;
1446 
1447 	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx8); i++)
1448 		memcpy(frame_ctx->tx_p8x8[i], frame_ctx_helper->tx8[i],
1449 		       sizeof(frame_ctx_helper->tx8[0]));
1450 
1451 	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx16); i++)
1452 		memcpy(frame_ctx->tx_p16x16[i], frame_ctx_helper->tx16[i],
1453 		       sizeof(frame_ctx_helper->tx16[0]));
1454 
1455 	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx32); i++)
1456 		memcpy(frame_ctx->tx_p32x32[i], frame_ctx_helper->tx32[i],
1457 		       sizeof(frame_ctx_helper->tx32[0]));
1458 
1459 	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->coef); i++)
1460 		for (j = 0; j < ARRAY_SIZE(frame_ctx_helper->coef[0]); j++)
1461 			for (k = 0; k < ARRAY_SIZE(frame_ctx_helper->coef[0][0]); k++)
1462 				vdec_vp9_slice_map_from_coef(i, j, k, frame_ctx,
1463 							     frame_ctx_helper);
1464 
1465 	memcpy(frame_ctx->skip_probs, frame_ctx_helper->skip, sizeof(frame_ctx_helper->skip));
1466 
1467 	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->inter_mode); i++)
1468 		memcpy(frame_ctx->inter_mode_probs[i], frame_ctx_helper->inter_mode[i],
1469 		       sizeof(frame_ctx_helper->inter_mode[0]));
1470 
1471 	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->interp_filter); i++)
1472 		memcpy(frame_ctx->switch_interp_prob[i], frame_ctx_helper->interp_filter[i],
1473 		       sizeof(frame_ctx_helper->interp_filter[0]));
1474 
1475 	memcpy(frame_ctx->intra_inter_prob, frame_ctx_helper->is_inter,
1476 	       sizeof(frame_ctx_helper->is_inter));
1477 
1478 	memcpy(frame_ctx->comp_inter_prob, frame_ctx_helper->comp_mode,
1479 	       sizeof(frame_ctx_helper->comp_mode));
1480 
1481 	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->single_ref); i++)
1482 		memcpy(frame_ctx->single_ref_prob[i], frame_ctx_helper->single_ref[i],
1483 		       sizeof(frame_ctx_helper->single_ref[0]));
1484 
1485 	memcpy(frame_ctx->comp_ref_prob, frame_ctx_helper->comp_ref,
1486 	       sizeof(frame_ctx_helper->comp_ref));
1487 
1488 	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->y_mode); i++)
1489 		memcpy(frame_ctx->y_mode_prob[i], frame_ctx_helper->y_mode[i],
1490 		       sizeof(frame_ctx_helper->y_mode[0]));
1491 
1492 	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->uv_mode); i++)
1493 		memcpy(frame_ctx->uv_mode_prob[i], frame_ctx_helper->uv_mode[i],
1494 		       sizeof(frame_ctx_helper->uv_mode[0]));
1495 
1496 	for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->partition); i++)
1497 		memcpy(frame_ctx->partition_prob[i], frame_ctx_helper->partition[i],
1498 		       sizeof(frame_ctx_helper->partition[0]));
1499 
1500 	memcpy(frame_ctx->joint, mv->joint, sizeof(mv->joint));
1501 
1502 	for (i = 0; i < ARRAY_SIZE(mv->sign); i++)
1503 		frame_ctx->sign_classes[i].sign = mv->sign[i];
1504 
1505 	for (i = 0; i < ARRAY_SIZE(mv->classes); i++)
1506 		memcpy(frame_ctx->sign_classes[i].classes, mv->classes[i],
1507 		       sizeof(mv->classes[i]));
1508 
1509 	for (i = 0; i < ARRAY_SIZE(mv->class0_bit); i++)
1510 		frame_ctx->class0_bits[i].class0[0] = mv->class0_bit[i];
1511 
1512 	for (i = 0; i < ARRAY_SIZE(mv->bits); i++)
1513 		memcpy(frame_ctx->class0_bits[i].bits, mv->bits[i], sizeof(mv->bits[0]));
1514 
1515 	for (i = 0; i < ARRAY_SIZE(mv->class0_fr); i++)
1516 		for (j = 0; j < ARRAY_SIZE(mv->class0_fr[0]); j++)
1517 			memcpy(frame_ctx->class0_fp_hp[i].class0_fp[j], mv->class0_fr[i][j],
1518 			       sizeof(mv->class0_fr[0][0]));
1519 
1520 	for (i = 0; i < ARRAY_SIZE(mv->fr); i++)
1521 		memcpy(frame_ctx->class0_fp_hp[i].fp, mv->fr[i], sizeof(mv->fr[0]));
1522 
1523 	for (i = 0; i < ARRAY_SIZE(mv->class0_hp); i++)
1524 		frame_ctx->class0_fp_hp[i].class0_hp = mv->class0_hp[i];
1525 
1526 	for (i = 0; i < ARRAY_SIZE(mv->hp); i++)
1527 		frame_ctx->class0_fp_hp[i].hp = mv->hp[i];
1528 }
1529 
vdec_vp9_slice_update_prob(struct vdec_vp9_slice_instance * instance,struct vdec_vp9_slice_vsi * vsi)1530 static int vdec_vp9_slice_update_prob(struct vdec_vp9_slice_instance *instance,
1531 				      struct vdec_vp9_slice_vsi *vsi)
1532 {
1533 	struct vdec_vp9_slice_frame_ctx *pre_frame_ctx;
1534 	struct v4l2_vp9_frame_context *pre_frame_ctx_helper;
1535 	struct vdec_vp9_slice_frame_ctx *frame_ctx;
1536 	struct vdec_vp9_slice_frame_counts *counts;
1537 	struct v4l2_vp9_frame_symbol_counts *counts_helper;
1538 	struct vdec_vp9_slice_uncompressed_header *uh;
1539 	bool frame_is_intra;
1540 	bool use_128;
1541 
1542 	uh = &vsi->frame.uh;
1543 	pre_frame_ctx = &instance->frame_ctx[uh->frame_context_idx];
1544 	pre_frame_ctx_helper = &instance->frame_ctx_helper;
1545 	frame_ctx = (struct vdec_vp9_slice_frame_ctx *)instance->prob.va;
1546 	counts = (struct vdec_vp9_slice_frame_counts *)instance->counts.va;
1547 	counts_helper = &instance->counts_helper;
1548 
1549 	if (!uh->refresh_frame_context)
1550 		return 0;
1551 
1552 	if (!uh->frame_parallel_decoding_mode) {
1553 		vdec_vp9_slice_counts_map_helper(&instance->counts_map, counts, counts_helper);
1554 
1555 		frame_is_intra = !vsi->frame.uh.frame_type || vsi->frame.uh.intra_only;
1556 		/* check default prob */
1557 		if (!instance->dirty[uh->frame_context_idx])
1558 			vdec_vp9_slice_framectx_map_helper(frame_is_intra,
1559 							   vdec_vp9_slice_default_frame_ctx,
1560 							   frame_ctx,
1561 							   pre_frame_ctx_helper);
1562 		else
1563 			vdec_vp9_slice_framectx_map_helper(frame_is_intra,
1564 							   pre_frame_ctx,
1565 							   frame_ctx,
1566 							   pre_frame_ctx_helper);
1567 
1568 		use_128 = !frame_is_intra && !vsi->frame.uh.last_frame_type;
1569 		v4l2_vp9_adapt_coef_probs(pre_frame_ctx_helper,
1570 					  counts_helper,
1571 					  use_128,
1572 					  frame_is_intra);
1573 		if (!frame_is_intra)
1574 			v4l2_vp9_adapt_noncoef_probs(pre_frame_ctx_helper,
1575 						     counts_helper,
1576 						     V4L2_VP9_REFERENCE_MODE_SINGLE_REFERENCE,
1577 						     vsi->frame.uh.interpolation_filter,
1578 						     vsi->frame.ch.tx_mode,
1579 						     vsi->frame.uh.allow_high_precision_mv ?
1580 						     V4L2_VP9_FRAME_FLAG_ALLOW_HIGH_PREC_MV : 0);
1581 		vdec_vp9_slice_helper_map_framectx(pre_frame_ctx_helper, pre_frame_ctx);
1582 	} else {
1583 		memcpy(pre_frame_ctx, frame_ctx, sizeof(*frame_ctx));
1584 	}
1585 
1586 	instance->dirty[uh->frame_context_idx] = 1;
1587 
1588 	return 0;
1589 }
1590 
vdec_vp9_slice_update_single(struct vdec_vp9_slice_instance * instance,struct vdec_vp9_slice_pfc * pfc)1591 static int vdec_vp9_slice_update_single(struct vdec_vp9_slice_instance *instance,
1592 					struct vdec_vp9_slice_pfc *pfc)
1593 {
1594 	struct vdec_vp9_slice_vsi *vsi;
1595 
1596 	vsi = &pfc->vsi;
1597 	memcpy(&pfc->state[0], &vsi->state, sizeof(vsi->state));
1598 
1599 	mtk_vcodec_debug(instance, "Frame %u Y_CRC %08x %08x %08x %08x\n",
1600 			 pfc->seq,
1601 			 vsi->state.crc[0], vsi->state.crc[1],
1602 			 vsi->state.crc[2], vsi->state.crc[3]);
1603 	mtk_vcodec_debug(instance, "Frame %u C_CRC %08x %08x %08x %08x\n",
1604 			 pfc->seq,
1605 			 vsi->state.crc[4], vsi->state.crc[5],
1606 			 vsi->state.crc[6], vsi->state.crc[7]);
1607 
1608 	vdec_vp9_slice_update_prob(instance, vsi);
1609 
1610 	instance->width = vsi->frame.uh.frame_width;
1611 	instance->height = vsi->frame.uh.frame_height;
1612 	instance->frame_type = vsi->frame.uh.frame_type;
1613 	instance->show_frame = vsi->frame.uh.show_frame;
1614 
1615 	return 0;
1616 }
1617 
vdec_vp9_slice_update_lat(struct vdec_vp9_slice_instance * instance,struct vdec_lat_buf * lat_buf,struct vdec_vp9_slice_pfc * pfc)1618 static int vdec_vp9_slice_update_lat(struct vdec_vp9_slice_instance *instance,
1619 				     struct vdec_lat_buf *lat_buf,
1620 				     struct vdec_vp9_slice_pfc *pfc)
1621 {
1622 	struct vdec_vp9_slice_vsi *vsi;
1623 
1624 	vsi = &pfc->vsi;
1625 	memcpy(&pfc->state[0], &vsi->state, sizeof(vsi->state));
1626 
1627 	mtk_vcodec_debug(instance, "Frame %u LAT CRC 0x%08x %lx %lx\n",
1628 			 pfc->seq, vsi->state.crc[0],
1629 			 (unsigned long)vsi->trans.dma_addr,
1630 			 (unsigned long)vsi->trans.dma_addr_end);
1631 
1632 	/* buffer full, need to re-decode */
1633 	if (vsi->state.full) {
1634 		/* buffer not enough */
1635 		if (vsi->trans.dma_addr_end - vsi->trans.dma_addr ==
1636 			vsi->ube.size)
1637 			return -ENOMEM;
1638 		return -EAGAIN;
1639 	}
1640 
1641 	vdec_vp9_slice_update_prob(instance, vsi);
1642 
1643 	instance->width = vsi->frame.uh.frame_width;
1644 	instance->height = vsi->frame.uh.frame_height;
1645 	instance->frame_type = vsi->frame.uh.frame_type;
1646 	instance->show_frame = vsi->frame.uh.show_frame;
1647 
1648 	return 0;
1649 }
1650 
vdec_vp9_slice_setup_core_to_dst_buf(struct vdec_vp9_slice_instance * instance,struct vdec_lat_buf * lat_buf)1651 static int vdec_vp9_slice_setup_core_to_dst_buf(struct vdec_vp9_slice_instance *instance,
1652 						struct vdec_lat_buf *lat_buf)
1653 {
1654 	struct vb2_v4l2_buffer *dst;
1655 
1656 	dst = v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx);
1657 	if (!dst)
1658 		return -EINVAL;
1659 
1660 	v4l2_m2m_buf_copy_metadata(&lat_buf->ts_info, dst, true);
1661 	return 0;
1662 }
1663 
vdec_vp9_slice_setup_core_buffer(struct vdec_vp9_slice_instance * instance,struct vdec_vp9_slice_pfc * pfc,struct vdec_vp9_slice_vsi * vsi,struct vdec_fb * fb,struct vdec_lat_buf * lat_buf)1664 static int vdec_vp9_slice_setup_core_buffer(struct vdec_vp9_slice_instance *instance,
1665 					    struct vdec_vp9_slice_pfc *pfc,
1666 					    struct vdec_vp9_slice_vsi *vsi,
1667 					    struct vdec_fb *fb,
1668 					    struct vdec_lat_buf *lat_buf)
1669 {
1670 	struct vb2_buffer *vb;
1671 	struct vb2_queue *vq;
1672 	struct vdec_vp9_slice_reference *ref;
1673 	int plane;
1674 	int size;
1675 	int w;
1676 	int h;
1677 	int i;
1678 
1679 	plane = instance->ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes;
1680 	w = vsi->frame.uh.frame_width;
1681 	h = vsi->frame.uh.frame_height;
1682 	size = ALIGN(w, 64) * ALIGN(h, 64);
1683 
1684 	/* frame buffer */
1685 	vsi->fb.y.dma_addr = fb->base_y.dma_addr;
1686 	if (plane == 1)
1687 		vsi->fb.c.dma_addr = fb->base_y.dma_addr + size;
1688 	else
1689 		vsi->fb.c.dma_addr = fb->base_c.dma_addr;
1690 
1691 	/* reference buffers */
1692 	vq = v4l2_m2m_get_vq(instance->ctx->m2m_ctx,
1693 			     V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE);
1694 	if (!vq)
1695 		return -EINVAL;
1696 
1697 	/* get current output buffer */
1698 	vb = &v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx)->vb2_buf;
1699 	if (!vb)
1700 		return -EINVAL;
1701 
1702 	/* update internal buffer's width/height */
1703 	for (i = 0; i < vq->num_buffers; i++) {
1704 		if (vb == vq->bufs[i]) {
1705 			instance->dpb[i].width = w;
1706 			instance->dpb[i].height = h;
1707 			break;
1708 		}
1709 	}
1710 
1711 	/*
1712 	 * get buffer's width/height from instance
1713 	 * get buffer address from vb2buf
1714 	 */
1715 	for (i = 0; i < 3; i++) {
1716 		ref = &vsi->frame.ref[i];
1717 		vb = vb2_find_buffer(vq, pfc->ref_idx[i]);
1718 		if (!vb) {
1719 			ref->frame_width = w;
1720 			ref->frame_height = h;
1721 			memset(&vsi->ref[i], 0, sizeof(vsi->ref[i]));
1722 		} else {
1723 			int idx = vb->index;
1724 
1725 			ref->frame_width = instance->dpb[idx].width;
1726 			ref->frame_height = instance->dpb[idx].height;
1727 			vsi->ref[i].y.dma_addr =
1728 				vb2_dma_contig_plane_dma_addr(vb, 0);
1729 			if (plane == 1)
1730 				vsi->ref[i].c.dma_addr =
1731 					vsi->ref[i].y.dma_addr + size;
1732 			else
1733 				vsi->ref[i].c.dma_addr =
1734 					vb2_dma_contig_plane_dma_addr(vb, 1);
1735 		}
1736 	}
1737 
1738 	return 0;
1739 }
1740 
vdec_vp9_slice_setup_single_buffer(struct vdec_vp9_slice_instance * instance,struct vdec_vp9_slice_pfc * pfc,struct vdec_vp9_slice_vsi * vsi,struct mtk_vcodec_mem * bs,struct vdec_fb * fb)1741 static void vdec_vp9_slice_setup_single_buffer(struct vdec_vp9_slice_instance *instance,
1742 					       struct vdec_vp9_slice_pfc *pfc,
1743 					       struct vdec_vp9_slice_vsi *vsi,
1744 					       struct mtk_vcodec_mem *bs,
1745 					       struct vdec_fb *fb)
1746 {
1747 	int i;
1748 
1749 	vsi->bs.buf.dma_addr = bs->dma_addr;
1750 	vsi->bs.buf.size = bs->size;
1751 	vsi->bs.frame.dma_addr = bs->dma_addr;
1752 	vsi->bs.frame.size = bs->size;
1753 
1754 	for (i = 0; i < 2; i++) {
1755 		vsi->mv[i].dma_addr = instance->mv[i].dma_addr;
1756 		vsi->mv[i].size = instance->mv[i].size;
1757 	}
1758 	for (i = 0; i < 2; i++) {
1759 		vsi->seg[i].dma_addr = instance->seg[i].dma_addr;
1760 		vsi->seg[i].size = instance->seg[i].size;
1761 	}
1762 	vsi->tile.dma_addr = instance->tile.dma_addr;
1763 	vsi->tile.size = instance->tile.size;
1764 	vsi->prob.dma_addr = instance->prob.dma_addr;
1765 	vsi->prob.size = instance->prob.size;
1766 	vsi->counts.dma_addr = instance->counts.dma_addr;
1767 	vsi->counts.size = instance->counts.size;
1768 
1769 	vsi->row_info.buf = 0;
1770 	vsi->row_info.size = 0;
1771 
1772 	vdec_vp9_slice_setup_core_buffer(instance, pfc, vsi, fb, NULL);
1773 }
1774 
vdec_vp9_slice_setup_core(struct vdec_vp9_slice_instance * instance,struct vdec_fb * fb,struct vdec_lat_buf * lat_buf,struct vdec_vp9_slice_pfc * pfc)1775 static int vdec_vp9_slice_setup_core(struct vdec_vp9_slice_instance *instance,
1776 				     struct vdec_fb *fb,
1777 				     struct vdec_lat_buf *lat_buf,
1778 				     struct vdec_vp9_slice_pfc *pfc)
1779 {
1780 	struct vdec_vp9_slice_vsi *vsi = &pfc->vsi;
1781 	int ret;
1782 
1783 	vdec_vp9_slice_setup_state(vsi);
1784 
1785 	ret = vdec_vp9_slice_setup_core_to_dst_buf(instance, lat_buf);
1786 	if (ret)
1787 		goto err;
1788 
1789 	ret = vdec_vp9_slice_setup_core_buffer(instance, pfc, vsi, fb, lat_buf);
1790 	if (ret)
1791 		goto err;
1792 
1793 	vdec_vp9_slice_setup_seg_buffer(instance, vsi, &instance->seg[1]);
1794 
1795 	return 0;
1796 
1797 err:
1798 	return ret;
1799 }
1800 
vdec_vp9_slice_setup_single(struct vdec_vp9_slice_instance * instance,struct mtk_vcodec_mem * bs,struct vdec_fb * fb,struct vdec_vp9_slice_pfc * pfc)1801 static int vdec_vp9_slice_setup_single(struct vdec_vp9_slice_instance *instance,
1802 				       struct mtk_vcodec_mem *bs,
1803 				       struct vdec_fb *fb,
1804 				       struct vdec_vp9_slice_pfc *pfc)
1805 {
1806 	struct vdec_vp9_slice_vsi *vsi = &pfc->vsi;
1807 	int ret;
1808 
1809 	ret = vdec_vp9_slice_setup_single_from_src_to_dst(instance);
1810 	if (ret)
1811 		goto err;
1812 
1813 	ret = vdec_vp9_slice_setup_pfc(instance, pfc);
1814 	if (ret)
1815 		goto err;
1816 
1817 	ret = vdec_vp9_slice_alloc_working_buffer(instance, vsi);
1818 	if (ret)
1819 		goto err;
1820 
1821 	vdec_vp9_slice_setup_single_buffer(instance, pfc, vsi, bs, fb);
1822 	vdec_vp9_slice_setup_seg_buffer(instance, vsi, &instance->seg[0]);
1823 
1824 	ret = vdec_vp9_slice_setup_prob_buffer(instance, vsi);
1825 	if (ret)
1826 		goto err;
1827 
1828 	ret = vdec_vp9_slice_setup_tile_buffer(instance, vsi, bs);
1829 	if (ret)
1830 		goto err;
1831 
1832 	return 0;
1833 
1834 err:
1835 	return ret;
1836 }
1837 
vdec_vp9_slice_update_core(struct vdec_vp9_slice_instance * instance,struct vdec_lat_buf * lat_buf,struct vdec_vp9_slice_pfc * pfc)1838 static int vdec_vp9_slice_update_core(struct vdec_vp9_slice_instance *instance,
1839 				      struct vdec_lat_buf *lat_buf,
1840 				      struct vdec_vp9_slice_pfc *pfc)
1841 {
1842 	struct vdec_vp9_slice_vsi *vsi;
1843 
1844 	vsi = &pfc->vsi;
1845 	memcpy(&pfc->state[1], &vsi->state, sizeof(vsi->state));
1846 
1847 	mtk_vcodec_debug(instance, "Frame %u Y_CRC %08x %08x %08x %08x\n",
1848 			 pfc->seq,
1849 			 vsi->state.crc[0], vsi->state.crc[1],
1850 			 vsi->state.crc[2], vsi->state.crc[3]);
1851 	mtk_vcodec_debug(instance, "Frame %u C_CRC %08x %08x %08x %08x\n",
1852 			 pfc->seq,
1853 			 vsi->state.crc[4], vsi->state.crc[5],
1854 			 vsi->state.crc[6], vsi->state.crc[7]);
1855 
1856 	return 0;
1857 }
1858 
vdec_vp9_slice_init(struct mtk_vcodec_ctx * ctx)1859 static int vdec_vp9_slice_init(struct mtk_vcodec_ctx *ctx)
1860 {
1861 	struct vdec_vp9_slice_instance *instance;
1862 	struct vdec_vp9_slice_init_vsi *vsi;
1863 	int ret;
1864 
1865 	instance = kzalloc(sizeof(*instance), GFP_KERNEL);
1866 	if (!instance)
1867 		return -ENOMEM;
1868 
1869 	instance->ctx = ctx;
1870 	instance->vpu.id = SCP_IPI_VDEC_LAT;
1871 	instance->vpu.core_id = SCP_IPI_VDEC_CORE;
1872 	instance->vpu.ctx = ctx;
1873 	instance->vpu.codec_type = ctx->current_codec;
1874 
1875 	ret = vpu_dec_init(&instance->vpu);
1876 	if (ret) {
1877 		mtk_vcodec_err(instance, "failed to init vpu dec, ret %d\n", ret);
1878 		goto error_vpu_init;
1879 	}
1880 
1881 	/* init vsi and global flags */
1882 
1883 	vsi = instance->vpu.vsi;
1884 	if (!vsi) {
1885 		mtk_vcodec_err(instance, "failed to get VP9 vsi\n");
1886 		ret = -EINVAL;
1887 		goto error_vsi;
1888 	}
1889 	instance->init_vsi = vsi;
1890 	instance->core_vsi = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler,
1891 						       (u32)vsi->core_vsi);
1892 	if (!instance->core_vsi) {
1893 		mtk_vcodec_err(instance, "failed to get VP9 core vsi\n");
1894 		ret = -EINVAL;
1895 		goto error_vsi;
1896 	}
1897 
1898 	instance->irq = 1;
1899 
1900 	ret = vdec_vp9_slice_init_default_frame_ctx(instance);
1901 	if (ret)
1902 		goto error_default_frame_ctx;
1903 
1904 	ctx->drv_handle = instance;
1905 
1906 	return 0;
1907 
1908 error_default_frame_ctx:
1909 error_vsi:
1910 	vpu_dec_deinit(&instance->vpu);
1911 error_vpu_init:
1912 	kfree(instance);
1913 	return ret;
1914 }
1915 
vdec_vp9_slice_deinit(void * h_vdec)1916 static void vdec_vp9_slice_deinit(void *h_vdec)
1917 {
1918 	struct vdec_vp9_slice_instance *instance = h_vdec;
1919 
1920 	if (!instance)
1921 		return;
1922 
1923 	vpu_dec_deinit(&instance->vpu);
1924 	vdec_vp9_slice_free_working_buffer(instance);
1925 	vdec_msg_queue_deinit(&instance->ctx->msg_queue, instance->ctx);
1926 	kfree(instance);
1927 }
1928 
vdec_vp9_slice_flush(void * h_vdec,struct mtk_vcodec_mem * bs,struct vdec_fb * fb,bool * res_chg)1929 static int vdec_vp9_slice_flush(void *h_vdec, struct mtk_vcodec_mem *bs,
1930 				struct vdec_fb *fb, bool *res_chg)
1931 {
1932 	struct vdec_vp9_slice_instance *instance = h_vdec;
1933 
1934 	mtk_vcodec_debug(instance, "flush ...\n");
1935 	if (instance->ctx->dev->vdec_pdata->hw_arch != MTK_VDEC_PURE_SINGLE_CORE)
1936 		vdec_msg_queue_wait_lat_buf_full(&instance->ctx->msg_queue);
1937 	return vpu_dec_reset(&instance->vpu);
1938 }
1939 
vdec_vp9_slice_get_pic_info(struct vdec_vp9_slice_instance * instance)1940 static void vdec_vp9_slice_get_pic_info(struct vdec_vp9_slice_instance *instance)
1941 {
1942 	struct mtk_vcodec_ctx *ctx = instance->ctx;
1943 	unsigned int data[3];
1944 
1945 	mtk_vcodec_debug(instance, "w %u h %u\n",
1946 			 ctx->picinfo.pic_w, ctx->picinfo.pic_h);
1947 
1948 	data[0] = ctx->picinfo.pic_w;
1949 	data[1] = ctx->picinfo.pic_h;
1950 	data[2] = ctx->capture_fourcc;
1951 	vpu_dec_get_param(&instance->vpu, data, 3, GET_PARAM_PIC_INFO);
1952 
1953 	ctx->picinfo.buf_w = ALIGN(ctx->picinfo.pic_w, 64);
1954 	ctx->picinfo.buf_h = ALIGN(ctx->picinfo.pic_h, 64);
1955 	ctx->picinfo.fb_sz[0] = instance->vpu.fb_sz[0];
1956 	ctx->picinfo.fb_sz[1] = instance->vpu.fb_sz[1];
1957 }
1958 
vdec_vp9_slice_get_dpb_size(struct vdec_vp9_slice_instance * instance,unsigned int * dpb_sz)1959 static void vdec_vp9_slice_get_dpb_size(struct vdec_vp9_slice_instance *instance,
1960 					unsigned int *dpb_sz)
1961 {
1962 	/* refer VP9 specification */
1963 	*dpb_sz = 9;
1964 }
1965 
vdec_vp9_slice_get_param(void * h_vdec,enum vdec_get_param_type type,void * out)1966 static int vdec_vp9_slice_get_param(void *h_vdec, enum vdec_get_param_type type, void *out)
1967 {
1968 	struct vdec_vp9_slice_instance *instance = h_vdec;
1969 
1970 	switch (type) {
1971 	case GET_PARAM_PIC_INFO:
1972 		vdec_vp9_slice_get_pic_info(instance);
1973 		break;
1974 	case GET_PARAM_DPB_SIZE:
1975 		vdec_vp9_slice_get_dpb_size(instance, out);
1976 		break;
1977 	case GET_PARAM_CROP_INFO:
1978 		mtk_vcodec_debug(instance, "No need to get vp9 crop information.");
1979 		break;
1980 	default:
1981 		mtk_vcodec_err(instance, "invalid get parameter type=%d\n",
1982 			       type);
1983 		return -EINVAL;
1984 	}
1985 
1986 	return 0;
1987 }
1988 
vdec_vp9_slice_single_decode(void * h_vdec,struct mtk_vcodec_mem * bs,struct vdec_fb * fb,bool * res_chg)1989 static int vdec_vp9_slice_single_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
1990 					struct vdec_fb *fb, bool *res_chg)
1991 {
1992 	struct vdec_vp9_slice_instance *instance = h_vdec;
1993 	struct vdec_vp9_slice_pfc *pfc = &instance->sc_pfc;
1994 	struct vdec_vp9_slice_vsi *vsi;
1995 	struct mtk_vcodec_ctx *ctx;
1996 	int ret;
1997 
1998 	if (!instance || !instance->ctx)
1999 		return -EINVAL;
2000 	ctx = instance->ctx;
2001 
2002 	/* bs NULL means flush decoder */
2003 	if (!bs)
2004 		return vdec_vp9_slice_flush(h_vdec, bs, fb, res_chg);
2005 
2006 	fb = ctx->dev->vdec_pdata->get_cap_buffer(ctx);
2007 	if (!fb)
2008 		return -EBUSY;
2009 
2010 	vsi = &pfc->vsi;
2011 
2012 	ret = vdec_vp9_slice_setup_single(instance, bs, fb, pfc);
2013 	if (ret) {
2014 		mtk_vcodec_err(instance, "Failed to setup VP9 single ret %d\n", ret);
2015 		return ret;
2016 	}
2017 	vdec_vp9_slice_vsi_to_remote(vsi, instance->vsi);
2018 
2019 	ret = vpu_dec_start(&instance->vpu, NULL, 0);
2020 	if (ret) {
2021 		mtk_vcodec_err(instance, "Failed to dec VP9 ret %d\n", ret);
2022 		return ret;
2023 	}
2024 
2025 	ret = mtk_vcodec_wait_for_done_ctx(ctx,	MTK_INST_IRQ_RECEIVED,
2026 					   WAIT_INTR_TIMEOUT_MS, MTK_VDEC_CORE);
2027 	/* update remote vsi if decode timeout */
2028 	if (ret) {
2029 		mtk_vcodec_err(instance, "VP9 decode timeout %d\n", ret);
2030 		WRITE_ONCE(instance->vsi->state.timeout, 1);
2031 	}
2032 
2033 	vpu_dec_end(&instance->vpu);
2034 
2035 	vdec_vp9_slice_vsi_from_remote(vsi, instance->vsi, 0);
2036 	ret = vdec_vp9_slice_update_single(instance, pfc);
2037 	if (ret) {
2038 		mtk_vcodec_err(instance, "VP9 decode error: %d\n", ret);
2039 		return ret;
2040 	}
2041 
2042 	instance->ctx->decoded_frame_cnt++;
2043 	return 0;
2044 }
2045 
vdec_vp9_slice_lat_decode(void * h_vdec,struct mtk_vcodec_mem * bs,struct vdec_fb * fb,bool * res_chg)2046 static int vdec_vp9_slice_lat_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
2047 				     struct vdec_fb *fb, bool *res_chg)
2048 {
2049 	struct vdec_vp9_slice_instance *instance = h_vdec;
2050 	struct vdec_lat_buf *lat_buf;
2051 	struct vdec_vp9_slice_pfc *pfc;
2052 	struct vdec_vp9_slice_vsi *vsi;
2053 	struct mtk_vcodec_ctx *ctx;
2054 	int ret;
2055 
2056 	if (!instance || !instance->ctx)
2057 		return -EINVAL;
2058 	ctx = instance->ctx;
2059 
2060 	/* init msgQ for the first time */
2061 	if (vdec_msg_queue_init(&ctx->msg_queue, ctx,
2062 				vdec_vp9_slice_core_decode,
2063 				sizeof(*pfc)))
2064 		return -ENOMEM;
2065 
2066 	/* bs NULL means flush decoder */
2067 	if (!bs)
2068 		return vdec_vp9_slice_flush(h_vdec, bs, fb, res_chg);
2069 
2070 	lat_buf = vdec_msg_queue_dqbuf(&instance->ctx->msg_queue.lat_ctx);
2071 	if (!lat_buf) {
2072 		mtk_vcodec_err(instance, "Failed to get VP9 lat buf\n");
2073 		return -EBUSY;
2074 	}
2075 	pfc = (struct vdec_vp9_slice_pfc *)lat_buf->private_data;
2076 	if (!pfc) {
2077 		ret = -EINVAL;
2078 		goto err_free_fb_out;
2079 	}
2080 	vsi = &pfc->vsi;
2081 
2082 	ret = vdec_vp9_slice_setup_lat(instance, bs, lat_buf, pfc);
2083 	if (ret) {
2084 		mtk_vcodec_err(instance, "Failed to setup VP9 lat ret %d\n", ret);
2085 		goto err_free_fb_out;
2086 	}
2087 	vdec_vp9_slice_vsi_to_remote(vsi, instance->vsi);
2088 
2089 	ret = vpu_dec_start(&instance->vpu, NULL, 0);
2090 	if (ret) {
2091 		mtk_vcodec_err(instance, "Failed to dec VP9 ret %d\n", ret);
2092 		goto err_free_fb_out;
2093 	}
2094 
2095 	if (instance->irq) {
2096 		ret = mtk_vcodec_wait_for_done_ctx(ctx,	MTK_INST_IRQ_RECEIVED,
2097 						   WAIT_INTR_TIMEOUT_MS, MTK_VDEC_LAT0);
2098 		/* update remote vsi if decode timeout */
2099 		if (ret) {
2100 			mtk_vcodec_err(instance, "VP9 decode timeout %d pic %d\n", ret, pfc->seq);
2101 			WRITE_ONCE(instance->vsi->state.timeout, 1);
2102 		}
2103 		vpu_dec_end(&instance->vpu);
2104 	}
2105 
2106 	vdec_vp9_slice_vsi_from_remote(vsi, instance->vsi, 0);
2107 	ret = vdec_vp9_slice_update_lat(instance, lat_buf, pfc);
2108 
2109 	/* LAT trans full, no more UBE or decode timeout */
2110 	if (ret) {
2111 		mtk_vcodec_err(instance, "VP9 decode error: %d\n", ret);
2112 		goto err_free_fb_out;
2113 	}
2114 
2115 	mtk_vcodec_debug(instance, "lat dma addr: 0x%lx 0x%lx\n",
2116 			 (unsigned long)pfc->vsi.trans.dma_addr,
2117 			 (unsigned long)pfc->vsi.trans.dma_addr_end);
2118 
2119 	vdec_msg_queue_update_ube_wptr(&ctx->msg_queue,
2120 				       vsi->trans.dma_addr_end +
2121 				       ctx->msg_queue.wdma_addr.dma_addr);
2122 	vdec_msg_queue_qbuf(&ctx->dev->msg_queue_core_ctx, lat_buf);
2123 
2124 	return 0;
2125 err_free_fb_out:
2126 	vdec_msg_queue_qbuf(&ctx->msg_queue.lat_ctx, lat_buf);
2127 	return ret;
2128 }
2129 
vdec_vp9_slice_decode(void * h_vdec,struct mtk_vcodec_mem * bs,struct vdec_fb * fb,bool * res_chg)2130 static int vdec_vp9_slice_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
2131 				 struct vdec_fb *fb, bool *res_chg)
2132 {
2133 	struct vdec_vp9_slice_instance *instance = h_vdec;
2134 	int ret;
2135 
2136 	if (instance->ctx->dev->vdec_pdata->hw_arch == MTK_VDEC_PURE_SINGLE_CORE)
2137 		ret = vdec_vp9_slice_single_decode(h_vdec, bs, fb, res_chg);
2138 	else
2139 		ret = vdec_vp9_slice_lat_decode(h_vdec, bs, fb, res_chg);
2140 
2141 	return ret;
2142 }
2143 
vdec_vp9_slice_core_decode(struct vdec_lat_buf * lat_buf)2144 static int vdec_vp9_slice_core_decode(struct vdec_lat_buf *lat_buf)
2145 {
2146 	struct vdec_vp9_slice_instance *instance;
2147 	struct vdec_vp9_slice_pfc *pfc;
2148 	struct mtk_vcodec_ctx *ctx = NULL;
2149 	struct vdec_fb *fb = NULL;
2150 	int ret = -EINVAL;
2151 
2152 	if (!lat_buf)
2153 		goto err;
2154 
2155 	pfc = lat_buf->private_data;
2156 	ctx = lat_buf->ctx;
2157 	if (!pfc || !ctx)
2158 		goto err;
2159 
2160 	instance = ctx->drv_handle;
2161 	if (!instance)
2162 		goto err;
2163 
2164 	fb = ctx->dev->vdec_pdata->get_cap_buffer(ctx);
2165 	if (!fb) {
2166 		ret = -EBUSY;
2167 		goto err;
2168 	}
2169 
2170 	ret = vdec_vp9_slice_setup_core(instance, fb, lat_buf, pfc);
2171 	if (ret) {
2172 		mtk_vcodec_err(instance, "vdec_vp9_slice_setup_core\n");
2173 		goto err;
2174 	}
2175 	vdec_vp9_slice_vsi_to_remote(&pfc->vsi, instance->core_vsi);
2176 
2177 	ret = vpu_dec_core(&instance->vpu);
2178 	if (ret) {
2179 		mtk_vcodec_err(instance, "vpu_dec_core\n");
2180 		goto err;
2181 	}
2182 
2183 	if (instance->irq) {
2184 		ret = mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED,
2185 						   WAIT_INTR_TIMEOUT_MS, MTK_VDEC_CORE);
2186 		/* update remote vsi if decode timeout */
2187 		if (ret) {
2188 			mtk_vcodec_err(instance, "VP9 core timeout pic %d\n", pfc->seq);
2189 			WRITE_ONCE(instance->core_vsi->state.timeout, 1);
2190 		}
2191 		vpu_dec_core_end(&instance->vpu);
2192 	}
2193 
2194 	vdec_vp9_slice_vsi_from_remote(&pfc->vsi, instance->core_vsi, 1);
2195 	ret = vdec_vp9_slice_update_core(instance, lat_buf, pfc);
2196 	if (ret) {
2197 		mtk_vcodec_err(instance, "vdec_vp9_slice_update_core\n");
2198 		goto err;
2199 	}
2200 
2201 	pfc->vsi.trans.dma_addr_end += ctx->msg_queue.wdma_addr.dma_addr;
2202 	mtk_vcodec_debug(instance, "core dma_addr_end 0x%lx\n",
2203 			 (unsigned long)pfc->vsi.trans.dma_addr_end);
2204 	vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc->vsi.trans.dma_addr_end);
2205 	ctx->dev->vdec_pdata->cap_to_disp(ctx, 0, lat_buf->src_buf_req);
2206 
2207 	return 0;
2208 
2209 err:
2210 	if (ctx && pfc) {
2211 		/* always update read pointer */
2212 		vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc->vsi.trans.dma_addr_end);
2213 
2214 		if (fb)
2215 			ctx->dev->vdec_pdata->cap_to_disp(ctx, 1, lat_buf->src_buf_req);
2216 	}
2217 	return ret;
2218 }
2219 
2220 const struct vdec_common_if vdec_vp9_slice_lat_if = {
2221 	.init		= vdec_vp9_slice_init,
2222 	.decode		= vdec_vp9_slice_decode,
2223 	.get_param	= vdec_vp9_slice_get_param,
2224 	.deinit		= vdec_vp9_slice_deinit,
2225 };
2226