1 /*
2  * Copyright 2013 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Christian König <christian.koenig@amd.com>
23  */
24 
25 #include <linux/firmware.h>
26 
27 #include "radeon.h"
28 #include "radeon_asic.h"
29 #include "r600d.h"
30 
31 /**
32  * uvd_v1_0_get_rptr - get read pointer
33  *
34  * @rdev: radeon_device pointer
35  * @ring: radeon_ring pointer
36  *
37  * Returns the current hardware read pointer
38  */
uvd_v1_0_get_rptr(struct radeon_device * rdev,struct radeon_ring * ring)39 uint32_t uvd_v1_0_get_rptr(struct radeon_device *rdev,
40 			   struct radeon_ring *ring)
41 {
42 	return RREG32(UVD_RBC_RB_RPTR);
43 }
44 
45 /**
46  * uvd_v1_0_get_wptr - get write pointer
47  *
48  * @rdev: radeon_device pointer
49  * @ring: radeon_ring pointer
50  *
51  * Returns the current hardware write pointer
52  */
uvd_v1_0_get_wptr(struct radeon_device * rdev,struct radeon_ring * ring)53 uint32_t uvd_v1_0_get_wptr(struct radeon_device *rdev,
54 			   struct radeon_ring *ring)
55 {
56 	return RREG32(UVD_RBC_RB_WPTR);
57 }
58 
59 /**
60  * uvd_v1_0_set_wptr - set write pointer
61  *
62  * @rdev: radeon_device pointer
63  * @ring: radeon_ring pointer
64  *
65  * Commits the write pointer to the hardware
66  */
uvd_v1_0_set_wptr(struct radeon_device * rdev,struct radeon_ring * ring)67 void uvd_v1_0_set_wptr(struct radeon_device *rdev,
68 		       struct radeon_ring *ring)
69 {
70 	WREG32(UVD_RBC_RB_WPTR, ring->wptr);
71 }
72 
73 /**
74  * uvd_v1_0_fence_emit - emit an fence & trap command
75  *
76  * @rdev: radeon_device pointer
77  * @fence: fence to emit
78  *
79  * Write a fence and a trap command to the ring.
80  */
uvd_v1_0_fence_emit(struct radeon_device * rdev,struct radeon_fence * fence)81 void uvd_v1_0_fence_emit(struct radeon_device *rdev,
82 			 struct radeon_fence *fence)
83 {
84 	struct radeon_ring *ring = &rdev->ring[fence->ring];
85 	uint64_t addr = rdev->fence_drv[fence->ring].gpu_addr;
86 
87 	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0));
88 	radeon_ring_write(ring, addr & 0xffffffff);
89 	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0));
90 	radeon_ring_write(ring, fence->seq);
91 	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0));
92 	radeon_ring_write(ring, 0);
93 
94 	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0));
95 	radeon_ring_write(ring, 0);
96 	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0));
97 	radeon_ring_write(ring, 0);
98 	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0));
99 	radeon_ring_write(ring, 2);
100 	return;
101 }
102 
103 /**
104  * uvd_v1_0_resume - memory controller programming
105  *
106  * @rdev: radeon_device pointer
107  *
108  * Let the UVD memory controller know it's offsets
109  */
uvd_v1_0_resume(struct radeon_device * rdev)110 int uvd_v1_0_resume(struct radeon_device *rdev)
111 {
112 	uint64_t addr;
113 	uint32_t size;
114 	int r;
115 
116 	r = radeon_uvd_resume(rdev);
117 	if (r)
118 		return r;
119 
120 	/* program the VCPU memory controller bits 0-27 */
121 	addr = (rdev->uvd.gpu_addr >> 3) + 16;
122 	size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size) >> 3;
123 	WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
124 	WREG32(UVD_VCPU_CACHE_SIZE0, size);
125 
126 	addr += size;
127 	size = RADEON_UVD_HEAP_SIZE >> 3;
128 	WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
129 	WREG32(UVD_VCPU_CACHE_SIZE1, size);
130 
131 	addr += size;
132 	size = (RADEON_UVD_STACK_SIZE +
133 	       (RADEON_UVD_SESSION_SIZE * rdev->uvd.max_handles)) >> 3;
134 	WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
135 	WREG32(UVD_VCPU_CACHE_SIZE2, size);
136 
137 	/* bits 28-31 */
138 	addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
139 	WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
140 
141 	/* bits 32-39 */
142 	addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
143 	WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
144 
145 	WREG32(UVD_FW_START, *((uint32_t*)rdev->uvd.cpu_addr));
146 
147 	return 0;
148 }
149 
150 /**
151  * uvd_v1_0_init - start and test UVD block
152  *
153  * @rdev: radeon_device pointer
154  *
155  * Initialize the hardware, boot up the VCPU and do some testing
156  */
uvd_v1_0_init(struct radeon_device * rdev)157 int uvd_v1_0_init(struct radeon_device *rdev)
158 {
159 	struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
160 	uint32_t tmp;
161 	int r;
162 
163 	/* raise clocks while booting up the VCPU */
164 	if (rdev->family < CHIP_RV740)
165 		radeon_set_uvd_clocks(rdev, 10000, 10000);
166 	else
167 		radeon_set_uvd_clocks(rdev, 53300, 40000);
168 
169 	r = uvd_v1_0_start(rdev);
170 	if (r)
171 		goto done;
172 
173 	ring->ready = true;
174 	r = radeon_ring_test(rdev, R600_RING_TYPE_UVD_INDEX, ring);
175 	if (r) {
176 		ring->ready = false;
177 		goto done;
178 	}
179 
180 	r = radeon_ring_lock(rdev, ring, 10);
181 	if (r) {
182 		DRM_ERROR("radeon: ring failed to lock UVD ring (%d).\n", r);
183 		goto done;
184 	}
185 
186 	tmp = PACKET0(UVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL, 0);
187 	radeon_ring_write(ring, tmp);
188 	radeon_ring_write(ring, 0xFFFFF);
189 
190 	tmp = PACKET0(UVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL, 0);
191 	radeon_ring_write(ring, tmp);
192 	radeon_ring_write(ring, 0xFFFFF);
193 
194 	tmp = PACKET0(UVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL, 0);
195 	radeon_ring_write(ring, tmp);
196 	radeon_ring_write(ring, 0xFFFFF);
197 
198 	/* Clear timeout status bits */
199 	radeon_ring_write(ring, PACKET0(UVD_SEMA_TIMEOUT_STATUS, 0));
200 	radeon_ring_write(ring, 0x8);
201 
202 	radeon_ring_write(ring, PACKET0(UVD_SEMA_CNTL, 0));
203 	radeon_ring_write(ring, 3);
204 
205 	radeon_ring_unlock_commit(rdev, ring, false);
206 
207 done:
208 	/* lower clocks again */
209 	radeon_set_uvd_clocks(rdev, 0, 0);
210 
211 	if (!r) {
212 		switch (rdev->family) {
213 		case CHIP_RV610:
214 		case CHIP_RV630:
215 		case CHIP_RV620:
216 			/* 64byte granularity workaround */
217 			WREG32(MC_CONFIG, 0);
218 			WREG32(MC_CONFIG, 1 << 4);
219 			WREG32(RS_DQ_RD_RET_CONF, 0x3f);
220 			WREG32(MC_CONFIG, 0x1f);
221 
222 			fallthrough;
223 		case CHIP_RV670:
224 		case CHIP_RV635:
225 
226 			/* write clean workaround */
227 			WREG32_P(UVD_VCPU_CNTL, 0x10, ~0x10);
228 			break;
229 
230 		default:
231 			/* TODO: Do we need more? */
232 			break;
233 		}
234 
235 		DRM_INFO("UVD initialized successfully.\n");
236 	}
237 
238 	return r;
239 }
240 
241 /**
242  * uvd_v1_0_fini - stop the hardware block
243  *
244  * @rdev: radeon_device pointer
245  *
246  * Stop the UVD block, mark ring as not ready any more
247  */
uvd_v1_0_fini(struct radeon_device * rdev)248 void uvd_v1_0_fini(struct radeon_device *rdev)
249 {
250 	struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
251 
252 	uvd_v1_0_stop(rdev);
253 	ring->ready = false;
254 }
255 
256 /**
257  * uvd_v1_0_start - start UVD block
258  *
259  * @rdev: radeon_device pointer
260  *
261  * Setup and start the UVD block
262  */
uvd_v1_0_start(struct radeon_device * rdev)263 int uvd_v1_0_start(struct radeon_device *rdev)
264 {
265 	struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
266 	uint32_t rb_bufsz;
267 	int i, j, r;
268 
269 	/* disable byte swapping */
270 	u32 lmi_swap_cntl = 0;
271 	u32 mp_swap_cntl = 0;
272 
273 	/* disable clock gating */
274 	WREG32(UVD_CGC_GATE, 0);
275 
276 	/* disable interupt */
277 	WREG32_P(UVD_MASTINT_EN, 0, ~(1 << 1));
278 
279 	/* Stall UMC and register bus before resetting VCPU */
280 	WREG32_P(UVD_LMI_CTRL2, 1 << 8, ~(1 << 8));
281 	WREG32_P(UVD_RB_ARB_CTRL, 1 << 3, ~(1 << 3));
282 	mdelay(1);
283 
284 	/* put LMI, VCPU, RBC etc... into reset */
285 	WREG32(UVD_SOFT_RESET, LMI_SOFT_RESET | VCPU_SOFT_RESET |
286 	       LBSI_SOFT_RESET | RBC_SOFT_RESET | CSM_SOFT_RESET |
287 	       CXW_SOFT_RESET | TAP_SOFT_RESET | LMI_UMC_SOFT_RESET);
288 	mdelay(5);
289 
290 	/* take UVD block out of reset */
291 	WREG32_P(SRBM_SOFT_RESET, 0, ~SOFT_RESET_UVD);
292 	mdelay(5);
293 
294 	/* initialize UVD memory controller */
295 	WREG32(UVD_LMI_CTRL, 0x40 | (1 << 8) | (1 << 13) |
296 			     (1 << 21) | (1 << 9) | (1 << 20));
297 
298 #ifdef __BIG_ENDIAN
299 	/* swap (8 in 32) RB and IB */
300 	lmi_swap_cntl = 0xa;
301 	mp_swap_cntl = 0;
302 #endif
303 	WREG32(UVD_LMI_SWAP_CNTL, lmi_swap_cntl);
304 	WREG32(UVD_MP_SWAP_CNTL, mp_swap_cntl);
305 
306 	WREG32(UVD_MPC_SET_MUXA0, 0x40c2040);
307 	WREG32(UVD_MPC_SET_MUXA1, 0x0);
308 	WREG32(UVD_MPC_SET_MUXB0, 0x40c2040);
309 	WREG32(UVD_MPC_SET_MUXB1, 0x0);
310 	WREG32(UVD_MPC_SET_ALU, 0);
311 	WREG32(UVD_MPC_SET_MUX, 0x88);
312 
313 	/* take all subblocks out of reset, except VCPU */
314 	WREG32(UVD_SOFT_RESET, VCPU_SOFT_RESET);
315 	mdelay(5);
316 
317 	/* enable VCPU clock */
318 	WREG32(UVD_VCPU_CNTL,  1 << 9);
319 
320 	/* enable UMC */
321 	WREG32_P(UVD_LMI_CTRL2, 0, ~(1 << 8));
322 
323 	WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3));
324 
325 	/* boot up the VCPU */
326 	WREG32(UVD_SOFT_RESET, 0);
327 	mdelay(10);
328 
329 	for (i = 0; i < 10; ++i) {
330 		uint32_t status;
331 		for (j = 0; j < 100; ++j) {
332 			status = RREG32(UVD_STATUS);
333 			if (status & 2)
334 				break;
335 			mdelay(10);
336 		}
337 		r = 0;
338 		if (status & 2)
339 			break;
340 
341 		DRM_ERROR("UVD not responding, trying to reset the VCPU!!!\n");
342 		WREG32_P(UVD_SOFT_RESET, VCPU_SOFT_RESET, ~VCPU_SOFT_RESET);
343 		mdelay(10);
344 		WREG32_P(UVD_SOFT_RESET, 0, ~VCPU_SOFT_RESET);
345 		mdelay(10);
346 		r = -1;
347 	}
348 
349 	if (r) {
350 		DRM_ERROR("UVD not responding, giving up!!!\n");
351 		return r;
352 	}
353 
354 	/* enable interupt */
355 	WREG32_P(UVD_MASTINT_EN, 3<<1, ~(3 << 1));
356 
357 	/* force RBC into idle state */
358 	WREG32(UVD_RBC_RB_CNTL, 0x11010101);
359 
360 	/* Set the write pointer delay */
361 	WREG32(UVD_RBC_RB_WPTR_CNTL, 0);
362 
363 	/* program the 4GB memory segment for rptr and ring buffer */
364 	WREG32(UVD_LMI_EXT40_ADDR, upper_32_bits(ring->gpu_addr) |
365 				   (0x7 << 16) | (0x1 << 31));
366 
367 	/* Initialize the ring buffer's read and write pointers */
368 	WREG32(UVD_RBC_RB_RPTR, 0x0);
369 
370 	ring->wptr = RREG32(UVD_RBC_RB_RPTR);
371 	WREG32(UVD_RBC_RB_WPTR, ring->wptr);
372 
373 	/* set the ring address */
374 	WREG32(UVD_RBC_RB_BASE, ring->gpu_addr);
375 
376 	/* Set ring buffer size */
377 	rb_bufsz = order_base_2(ring->ring_size);
378 	rb_bufsz = (0x1 << 8) | rb_bufsz;
379 	WREG32_P(UVD_RBC_RB_CNTL, rb_bufsz, ~0x11f1f);
380 
381 	return 0;
382 }
383 
384 /**
385  * uvd_v1_0_stop - stop UVD block
386  *
387  * @rdev: radeon_device pointer
388  *
389  * stop the UVD block
390  */
uvd_v1_0_stop(struct radeon_device * rdev)391 void uvd_v1_0_stop(struct radeon_device *rdev)
392 {
393 	/* force RBC into idle state */
394 	WREG32(UVD_RBC_RB_CNTL, 0x11010101);
395 
396 	/* Stall UMC and register bus before resetting VCPU */
397 	WREG32_P(UVD_LMI_CTRL2, 1 << 8, ~(1 << 8));
398 	WREG32_P(UVD_RB_ARB_CTRL, 1 << 3, ~(1 << 3));
399 	mdelay(1);
400 
401 	/* put VCPU into reset */
402 	WREG32(UVD_SOFT_RESET, VCPU_SOFT_RESET);
403 	mdelay(5);
404 
405 	/* disable VCPU clock */
406 	WREG32(UVD_VCPU_CNTL, 0x0);
407 
408 	/* Unstall UMC and register bus */
409 	WREG32_P(UVD_LMI_CTRL2, 0, ~(1 << 8));
410 	WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3));
411 }
412 
413 /**
414  * uvd_v1_0_ring_test - register write test
415  *
416  * @rdev: radeon_device pointer
417  * @ring: radeon_ring pointer
418  *
419  * Test if we can successfully write to the context register
420  */
uvd_v1_0_ring_test(struct radeon_device * rdev,struct radeon_ring * ring)421 int uvd_v1_0_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
422 {
423 	uint32_t tmp = 0;
424 	unsigned i;
425 	int r;
426 
427 	WREG32(UVD_CONTEXT_ID, 0xCAFEDEAD);
428 	r = radeon_ring_lock(rdev, ring, 3);
429 	if (r) {
430 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n",
431 			  ring->idx, r);
432 		return r;
433 	}
434 	radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0));
435 	radeon_ring_write(ring, 0xDEADBEEF);
436 	radeon_ring_unlock_commit(rdev, ring, false);
437 	for (i = 0; i < rdev->usec_timeout; i++) {
438 		tmp = RREG32(UVD_CONTEXT_ID);
439 		if (tmp == 0xDEADBEEF)
440 			break;
441 		udelay(1);
442 	}
443 
444 	if (i < rdev->usec_timeout) {
445 		DRM_INFO("ring test on %d succeeded in %d usecs\n",
446 			 ring->idx, i);
447 	} else {
448 		DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
449 			  ring->idx, tmp);
450 		r = -EINVAL;
451 	}
452 	return r;
453 }
454 
455 /**
456  * uvd_v1_0_semaphore_emit - emit semaphore command
457  *
458  * @rdev: radeon_device pointer
459  * @ring: radeon_ring pointer
460  * @semaphore: semaphore to emit commands for
461  * @emit_wait: true if we should emit a wait command
462  *
463  * Emit a semaphore command (either wait or signal) to the UVD ring.
464  */
uvd_v1_0_semaphore_emit(struct radeon_device * rdev,struct radeon_ring * ring,struct radeon_semaphore * semaphore,bool emit_wait)465 bool uvd_v1_0_semaphore_emit(struct radeon_device *rdev,
466 			     struct radeon_ring *ring,
467 			     struct radeon_semaphore *semaphore,
468 			     bool emit_wait)
469 {
470 	/* disable semaphores for UVD V1 hardware */
471 	return false;
472 }
473 
474 /**
475  * uvd_v1_0_ib_execute - execute indirect buffer
476  *
477  * @rdev: radeon_device pointer
478  * @ib: indirect buffer to execute
479  *
480  * Write ring commands to execute the indirect buffer
481  */
uvd_v1_0_ib_execute(struct radeon_device * rdev,struct radeon_ib * ib)482 void uvd_v1_0_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
483 {
484 	struct radeon_ring *ring = &rdev->ring[ib->ring];
485 
486 	radeon_ring_write(ring, PACKET0(UVD_RBC_IB_BASE, 0));
487 	radeon_ring_write(ring, ib->gpu_addr);
488 	radeon_ring_write(ring, PACKET0(UVD_RBC_IB_SIZE, 0));
489 	radeon_ring_write(ring, ib->length_dw);
490 }
491 
492 /**
493  * uvd_v1_0_ib_test - test ib execution
494  *
495  * @rdev: radeon_device pointer
496  * @ring: radeon_ring pointer
497  *
498  * Test if we can successfully execute an IB
499  */
uvd_v1_0_ib_test(struct radeon_device * rdev,struct radeon_ring * ring)500 int uvd_v1_0_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
501 {
502 	struct radeon_fence *fence = NULL;
503 	int r;
504 
505 	if (rdev->family < CHIP_RV740)
506 		r = radeon_set_uvd_clocks(rdev, 10000, 10000);
507 	else
508 		r = radeon_set_uvd_clocks(rdev, 53300, 40000);
509 	if (r) {
510 		DRM_ERROR("radeon: failed to raise UVD clocks (%d).\n", r);
511 		return r;
512 	}
513 
514 	r = radeon_uvd_get_create_msg(rdev, ring->idx, 1, NULL);
515 	if (r) {
516 		DRM_ERROR("radeon: failed to get create msg (%d).\n", r);
517 		goto error;
518 	}
519 
520 	r = radeon_uvd_get_destroy_msg(rdev, ring->idx, 1, &fence);
521 	if (r) {
522 		DRM_ERROR("radeon: failed to get destroy ib (%d).\n", r);
523 		goto error;
524 	}
525 
526 	r = radeon_fence_wait_timeout(fence, false, usecs_to_jiffies(
527 		RADEON_USEC_IB_TEST_TIMEOUT));
528 	if (r < 0) {
529 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
530 		goto error;
531 	} else if (r == 0) {
532 		DRM_ERROR("radeon: fence wait timed out.\n");
533 		r = -ETIMEDOUT;
534 		goto error;
535 	}
536 	r = 0;
537 	DRM_INFO("ib test on ring %d succeeded\n",  ring->idx);
538 error:
539 	radeon_fence_unref(&fence);
540 	radeon_set_uvd_clocks(rdev, 0, 0);
541 	return r;
542 }
543