1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (c) 2016 MediaTek Inc.
4 * Author: Daniel Hsiao <daniel.hsiao@mediatek.com>
5 * PoChun Lin <pochun.lin@mediatek.com>
6 */
7
8 #include <linux/interrupt.h>
9 #include <linux/kernel.h>
10 #include <linux/slab.h>
11
12 #include "../mtk_vcodec_enc_drv.h"
13 #include "../../common/mtk_vcodec_intr.h"
14 #include "../mtk_vcodec_enc.h"
15 #include "../mtk_vcodec_enc_pm.h"
16 #include "../venc_drv_base.h"
17 #include "../venc_ipi_msg.h"
18 #include "../venc_vpu_if.h"
19
20 #define VENC_BITSTREAM_FRAME_SIZE 0x0098
21 #define VENC_BITSTREAM_HEADER_LEN 0x00e8
22
23 /* This ac_tag is vp8 frame tag. */
24 #define MAX_AC_TAG_SIZE 10
25
26 /*
27 * enum venc_vp8_vpu_work_buf - vp8 encoder buffer index
28 */
29 enum venc_vp8_vpu_work_buf {
30 VENC_VP8_VPU_WORK_BUF_LUMA,
31 VENC_VP8_VPU_WORK_BUF_LUMA2,
32 VENC_VP8_VPU_WORK_BUF_LUMA3,
33 VENC_VP8_VPU_WORK_BUF_CHROMA,
34 VENC_VP8_VPU_WORK_BUF_CHROMA2,
35 VENC_VP8_VPU_WORK_BUF_CHROMA3,
36 VENC_VP8_VPU_WORK_BUF_MV_INFO,
37 VENC_VP8_VPU_WORK_BUF_BS_HEADER,
38 VENC_VP8_VPU_WORK_BUF_PROB_BUF,
39 VENC_VP8_VPU_WORK_BUF_RC_INFO,
40 VENC_VP8_VPU_WORK_BUF_RC_CODE,
41 VENC_VP8_VPU_WORK_BUF_RC_CODE2,
42 VENC_VP8_VPU_WORK_BUF_RC_CODE3,
43 VENC_VP8_VPU_WORK_BUF_MAX,
44 };
45
46 /*
47 * struct venc_vp8_vpu_config - Structure for vp8 encoder configuration
48 * AP-W/R : AP is writer/reader on this item
49 * VPU-W/R: VPU is write/reader on this item
50 * @input_fourcc: input fourcc
51 * @bitrate: target bitrate (in bps)
52 * @pic_w: picture width. Picture size is visible stream resolution, in pixels,
53 * to be used for display purposes; must be smaller or equal to buffer
54 * size.
55 * @pic_h: picture height
56 * @buf_w: buffer width (with 16 alignment). Buffer size is stream resolution
57 * in pixels aligned to hardware requirements.
58 * @buf_h: buffer height (with 16 alignment)
59 * @gop_size: group of picture size (key frame)
60 * @framerate: frame rate in fps
61 * @ts_mode: temporal scalability mode (0: disable, 1: enable)
62 * support three temporal layers - 0: 7.5fps 1: 7.5fps 2: 15fps.
63 */
64 struct venc_vp8_vpu_config {
65 u32 input_fourcc;
66 u32 bitrate;
67 u32 pic_w;
68 u32 pic_h;
69 u32 buf_w;
70 u32 buf_h;
71 u32 gop_size;
72 u32 framerate;
73 u32 ts_mode;
74 };
75
76 /*
77 * struct venc_vp8_vpu_buf - Structure for buffer information
78 * AP-W/R : AP is writer/reader on this item
79 * VPU-W/R: VPU is write/reader on this item
80 * @iova: IO virtual address
81 * @vpua: VPU side memory addr which is used by RC_CODE
82 * @size: buffer size (in bytes)
83 */
84 struct venc_vp8_vpu_buf {
85 u32 iova;
86 u32 vpua;
87 u32 size;
88 };
89
90 /*
91 * struct venc_vp8_vsi - Structure for VPU driver control and info share
92 * AP-W/R : AP is writer/reader on this item
93 * VPU-W/R: VPU is write/reader on this item
94 * This structure is allocated in VPU side and shared to AP side.
95 * @config: vp8 encoder configuration
96 * @work_bufs: working buffer information in VPU side
97 * The work_bufs here is for storing the 'size' info shared to AP side.
98 * The similar item in struct venc_vp8_inst is for memory allocation
99 * in AP side. The AP driver will copy the 'size' from here to the one in
100 * struct mtk_vcodec_mem, then invoke mtk_vcodec_mem_alloc to allocate
101 * the buffer. After that, bypass the 'dma_addr' to the 'iova' field here for
102 * register setting in VPU side.
103 */
104 struct venc_vp8_vsi {
105 struct venc_vp8_vpu_config config;
106 struct venc_vp8_vpu_buf work_bufs[VENC_VP8_VPU_WORK_BUF_MAX];
107 };
108
109 /*
110 * struct venc_vp8_inst - vp8 encoder AP driver instance
111 * @hw_base: vp8 encoder hardware register base
112 * @work_bufs: working buffer
113 * @work_buf_allocated: working buffer allocated flag
114 * @frm_cnt: encoded frame count, it's used for I-frame judgement and
115 * reset when force intra cmd received.
116 * @ts_mode: temporal scalability mode (0: disable, 1: enable)
117 * support three temporal layers - 0: 7.5fps 1: 7.5fps 2: 15fps.
118 * @vpu_inst: VPU instance to exchange information between AP and VPU
119 * @vsi: driver structure allocated by VPU side and shared to AP side for
120 * control and info share
121 * @ctx: context for v4l2 layer integration
122 */
123 struct venc_vp8_inst {
124 void __iomem *hw_base;
125 struct mtk_vcodec_mem work_bufs[VENC_VP8_VPU_WORK_BUF_MAX];
126 bool work_buf_allocated;
127 unsigned int frm_cnt;
128 unsigned int ts_mode;
129 struct venc_vpu_inst vpu_inst;
130 struct venc_vp8_vsi *vsi;
131 struct mtk_vcodec_enc_ctx *ctx;
132 };
133
vp8_enc_read_reg(struct venc_vp8_inst * inst,u32 addr)134 static inline u32 vp8_enc_read_reg(struct venc_vp8_inst *inst, u32 addr)
135 {
136 return readl(inst->hw_base + addr);
137 }
138
vp8_enc_free_work_buf(struct venc_vp8_inst * inst)139 static void vp8_enc_free_work_buf(struct venc_vp8_inst *inst)
140 {
141 int i;
142
143 /* Buffers need to be freed by AP. */
144 for (i = 0; i < VENC_VP8_VPU_WORK_BUF_MAX; i++) {
145 if (inst->work_bufs[i].size == 0)
146 continue;
147 mtk_vcodec_mem_free(inst->ctx, &inst->work_bufs[i]);
148 }
149 }
150
vp8_enc_alloc_work_buf(struct venc_vp8_inst * inst)151 static int vp8_enc_alloc_work_buf(struct venc_vp8_inst *inst)
152 {
153 int i;
154 int ret = 0;
155 struct venc_vp8_vpu_buf *wb = inst->vsi->work_bufs;
156
157 for (i = 0; i < VENC_VP8_VPU_WORK_BUF_MAX; i++) {
158 if (wb[i].size == 0)
159 continue;
160 /*
161 * This 'wb' structure is set by VPU side and shared to AP for
162 * buffer allocation and IO virtual addr mapping. For most of
163 * the buffers, AP will allocate the buffer according to 'size'
164 * field and store the IO virtual addr in 'iova' field. For the
165 * RC_CODEx buffers, they are pre-allocated in the VPU side
166 * because they are inside VPU SRAM, and save the VPU addr in
167 * the 'vpua' field. The AP will translate the VPU addr to the
168 * corresponding IO virtual addr and store in 'iova' field.
169 */
170 inst->work_bufs[i].size = wb[i].size;
171 ret = mtk_vcodec_mem_alloc(inst->ctx, &inst->work_bufs[i]);
172 if (ret) {
173 mtk_venc_err(inst->ctx, "cannot alloc work_bufs[%d]", i);
174 goto err_alloc;
175 }
176 /*
177 * This RC_CODEx is pre-allocated by VPU and saved in VPU addr.
178 * So we need use memcpy to copy RC_CODEx from VPU addr into IO
179 * virtual addr in 'iova' field for reg setting in VPU side.
180 */
181 if (i == VENC_VP8_VPU_WORK_BUF_RC_CODE ||
182 i == VENC_VP8_VPU_WORK_BUF_RC_CODE2 ||
183 i == VENC_VP8_VPU_WORK_BUF_RC_CODE3) {
184 struct mtk_vcodec_fw *handler;
185 void *tmp_va;
186
187 handler = inst->vpu_inst.ctx->dev->fw_handler;
188 tmp_va = mtk_vcodec_fw_map_dm_addr(handler,
189 wb[i].vpua);
190 memcpy(inst->work_bufs[i].va, tmp_va, wb[i].size);
191 }
192 wb[i].iova = inst->work_bufs[i].dma_addr;
193
194 mtk_venc_debug(inst->ctx, "work_bufs[%d] va=0x%p,iova=%pad,size=%zu",
195 i, inst->work_bufs[i].va,
196 &inst->work_bufs[i].dma_addr,
197 inst->work_bufs[i].size);
198 }
199
200 return ret;
201
202 err_alloc:
203 vp8_enc_free_work_buf(inst);
204
205 return ret;
206 }
207
vp8_enc_wait_venc_done(struct venc_vp8_inst * inst)208 static unsigned int vp8_enc_wait_venc_done(struct venc_vp8_inst *inst)
209 {
210 unsigned int irq_status = 0;
211 struct mtk_vcodec_enc_ctx *ctx = (struct mtk_vcodec_enc_ctx *)inst->ctx;
212
213 if (!mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED,
214 WAIT_INTR_TIMEOUT_MS, 0)) {
215 irq_status = ctx->irq_status;
216 mtk_venc_debug(ctx, "isr return %x", irq_status);
217 }
218 return irq_status;
219 }
220
221 /*
222 * Compose ac_tag, bitstream header and bitstream payload into
223 * one bitstream buffer.
224 */
vp8_enc_compose_one_frame(struct venc_vp8_inst * inst,struct mtk_vcodec_mem * bs_buf,unsigned int * bs_size)225 static int vp8_enc_compose_one_frame(struct venc_vp8_inst *inst,
226 struct mtk_vcodec_mem *bs_buf,
227 unsigned int *bs_size)
228 {
229 unsigned int not_key;
230 u32 bs_frm_size;
231 u32 bs_hdr_len;
232 unsigned int ac_tag_size;
233 u8 ac_tag[MAX_AC_TAG_SIZE];
234 u32 tag;
235
236 bs_frm_size = vp8_enc_read_reg(inst, VENC_BITSTREAM_FRAME_SIZE);
237 bs_hdr_len = vp8_enc_read_reg(inst, VENC_BITSTREAM_HEADER_LEN);
238
239 /* if a frame is key frame, not_key is 0 */
240 not_key = !inst->vpu_inst.is_key_frm;
241 tag = (bs_hdr_len << 5) | 0x10 | not_key;
242 ac_tag[0] = tag & 0xff;
243 ac_tag[1] = (tag >> 8) & 0xff;
244 ac_tag[2] = (tag >> 16) & 0xff;
245
246 /* key frame */
247 if (not_key == 0) {
248 ac_tag_size = MAX_AC_TAG_SIZE;
249 ac_tag[3] = 0x9d;
250 ac_tag[4] = 0x01;
251 ac_tag[5] = 0x2a;
252 ac_tag[6] = inst->vsi->config.pic_w;
253 ac_tag[7] = inst->vsi->config.pic_w >> 8;
254 ac_tag[8] = inst->vsi->config.pic_h;
255 ac_tag[9] = inst->vsi->config.pic_h >> 8;
256 } else {
257 ac_tag_size = 3;
258 }
259
260 if (bs_buf->size < bs_hdr_len + bs_frm_size + ac_tag_size) {
261 mtk_venc_err(inst->ctx, "bitstream buf size is too small(%zu)", bs_buf->size);
262 return -EINVAL;
263 }
264
265 /*
266 * (1) The vp8 bitstream header and body are generated by the HW vp8
267 * encoder separately at the same time. We cannot know the bitstream
268 * header length in advance.
269 * (2) From the vp8 spec, there is no stuffing byte allowed between the
270 * ac tag, bitstream header and bitstream body.
271 */
272 memmove(bs_buf->va + bs_hdr_len + ac_tag_size,
273 bs_buf->va, bs_frm_size);
274 memcpy(bs_buf->va + ac_tag_size,
275 inst->work_bufs[VENC_VP8_VPU_WORK_BUF_BS_HEADER].va,
276 bs_hdr_len);
277 memcpy(bs_buf->va, ac_tag, ac_tag_size);
278 *bs_size = bs_frm_size + bs_hdr_len + ac_tag_size;
279
280 return 0;
281 }
282
vp8_enc_encode_frame(struct venc_vp8_inst * inst,struct venc_frm_buf * frm_buf,struct mtk_vcodec_mem * bs_buf,unsigned int * bs_size)283 static int vp8_enc_encode_frame(struct venc_vp8_inst *inst,
284 struct venc_frm_buf *frm_buf,
285 struct mtk_vcodec_mem *bs_buf,
286 unsigned int *bs_size)
287 {
288 int ret = 0;
289 unsigned int irq_status;
290
291 mtk_venc_debug(inst->ctx, "->frm_cnt=%d", inst->frm_cnt);
292
293 ret = vpu_enc_encode(&inst->vpu_inst, 0, frm_buf, bs_buf, NULL);
294 if (ret)
295 return ret;
296
297 irq_status = vp8_enc_wait_venc_done(inst);
298 if (irq_status != MTK_VENC_IRQ_STATUS_FRM) {
299 mtk_venc_err(inst->ctx, "irq_status=%d failed", irq_status);
300 return -EIO;
301 }
302
303 if (vp8_enc_compose_one_frame(inst, bs_buf, bs_size)) {
304 mtk_venc_err(inst->ctx, "vp8_enc_compose_one_frame failed");
305 return -EINVAL;
306 }
307
308 inst->frm_cnt++;
309 mtk_venc_debug(inst->ctx, "<-size=%d key_frm=%d", *bs_size, inst->vpu_inst.is_key_frm);
310
311 return ret;
312 }
313
vp8_enc_init(struct mtk_vcodec_enc_ctx * ctx)314 static int vp8_enc_init(struct mtk_vcodec_enc_ctx *ctx)
315 {
316 int ret = 0;
317 struct venc_vp8_inst *inst;
318
319 inst = kzalloc(sizeof(*inst), GFP_KERNEL);
320 if (!inst)
321 return -ENOMEM;
322
323 inst->ctx = ctx;
324 inst->vpu_inst.ctx = ctx;
325 inst->vpu_inst.id = IPI_VENC_VP8;
326 inst->hw_base = mtk_vcodec_get_reg_addr(inst->ctx->dev->reg_base, VENC_LT_SYS);
327
328 ret = vpu_enc_init(&inst->vpu_inst);
329
330 inst->vsi = (struct venc_vp8_vsi *)inst->vpu_inst.vsi;
331
332 if (ret)
333 kfree(inst);
334 else
335 ctx->drv_handle = inst;
336
337 return ret;
338 }
339
vp8_enc_encode(void * handle,enum venc_start_opt opt,struct venc_frm_buf * frm_buf,struct mtk_vcodec_mem * bs_buf,struct venc_done_result * result)340 static int vp8_enc_encode(void *handle,
341 enum venc_start_opt opt,
342 struct venc_frm_buf *frm_buf,
343 struct mtk_vcodec_mem *bs_buf,
344 struct venc_done_result *result)
345 {
346 int ret = 0;
347 struct venc_vp8_inst *inst = (struct venc_vp8_inst *)handle;
348 struct mtk_vcodec_enc_ctx *ctx = inst->ctx;
349
350 enable_irq(ctx->dev->enc_irq);
351
352 switch (opt) {
353 case VENC_START_OPT_ENCODE_FRAME:
354 ret = vp8_enc_encode_frame(inst, frm_buf, bs_buf,
355 &result->bs_size);
356 if (ret)
357 goto encode_err;
358 result->is_key_frm = inst->vpu_inst.is_key_frm;
359 break;
360
361 default:
362 mtk_venc_err(ctx, "opt not support:%d", opt);
363 ret = -EINVAL;
364 break;
365 }
366
367 encode_err:
368
369 disable_irq(ctx->dev->enc_irq);
370 return ret;
371 }
372
vp8_enc_set_param(void * handle,enum venc_set_param_type type,struct venc_enc_param * enc_prm)373 static int vp8_enc_set_param(void *handle,
374 enum venc_set_param_type type,
375 struct venc_enc_param *enc_prm)
376 {
377 int ret = 0;
378 struct venc_vp8_inst *inst = (struct venc_vp8_inst *)handle;
379
380 mtk_venc_debug(inst->ctx, "->type=%d", type);
381
382 switch (type) {
383 case VENC_SET_PARAM_ENC:
384 inst->vsi->config.input_fourcc = enc_prm->input_yuv_fmt;
385 inst->vsi->config.bitrate = enc_prm->bitrate;
386 inst->vsi->config.pic_w = enc_prm->width;
387 inst->vsi->config.pic_h = enc_prm->height;
388 inst->vsi->config.buf_w = enc_prm->buf_width;
389 inst->vsi->config.buf_h = enc_prm->buf_height;
390 inst->vsi->config.gop_size = enc_prm->gop_size;
391 inst->vsi->config.framerate = enc_prm->frm_rate;
392 inst->vsi->config.ts_mode = inst->ts_mode;
393 ret = vpu_enc_set_param(&inst->vpu_inst, type, enc_prm);
394 if (ret)
395 break;
396 if (inst->work_buf_allocated) {
397 vp8_enc_free_work_buf(inst);
398 inst->work_buf_allocated = false;
399 }
400 ret = vp8_enc_alloc_work_buf(inst);
401 if (ret)
402 break;
403 inst->work_buf_allocated = true;
404 break;
405
406 /*
407 * VENC_SET_PARAM_TS_MODE must be called before VENC_SET_PARAM_ENC
408 */
409 case VENC_SET_PARAM_TS_MODE:
410 inst->ts_mode = 1;
411 mtk_venc_debug(inst->ctx, "set ts_mode");
412 break;
413
414 default:
415 ret = vpu_enc_set_param(&inst->vpu_inst, type, enc_prm);
416 break;
417 }
418
419 return ret;
420 }
421
vp8_enc_deinit(void * handle)422 static int vp8_enc_deinit(void *handle)
423 {
424 int ret = 0;
425 struct venc_vp8_inst *inst = (struct venc_vp8_inst *)handle;
426
427 ret = vpu_enc_deinit(&inst->vpu_inst);
428
429 if (inst->work_buf_allocated)
430 vp8_enc_free_work_buf(inst);
431
432 kfree(inst);
433 return ret;
434 }
435
436 const struct venc_common_if venc_vp8_if = {
437 .init = vp8_enc_init,
438 .encode = vp8_enc_encode,
439 .set_param = vp8_enc_set_param,
440 .deinit = vp8_enc_deinit,
441 };
442