1 /*
2  * Copyright 2009 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *     Alex Deucher <alexander.deucher@amd.com>
25  */
26 #include "drmP.h"
27 #include "drm.h"
28 #include "radeon_drm.h"
29 #include "radeon_drv.h"
30 
31 #include "r600_blit_shaders.h"
32 
33 #define DI_PT_RECTLIST        0x11
34 #define DI_INDEX_SIZE_16_BIT  0x0
35 #define DI_SRC_SEL_AUTO_INDEX 0x2
36 
37 #define FMT_8                 0x1
38 #define FMT_5_6_5             0x8
39 #define FMT_8_8_8_8           0x1a
40 #define COLOR_8               0x1
41 #define COLOR_5_6_5           0x8
42 #define COLOR_8_8_8_8         0x1a
43 
44 static inline void
set_render_target(drm_radeon_private_t * dev_priv,int format,int w,int h,u64 gpu_addr)45 set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64 gpu_addr)
46 {
47 	u32 cb_color_info;
48 	int pitch, slice;
49 	RING_LOCALS;
50 	DRM_DEBUG("\n");
51 
52 	h = ALIGN(h, 8);
53 	if (h < 8)
54 		h = 8;
55 
56 	cb_color_info = ((format << 2) | (1 << 27));
57 	pitch = (w / 8) - 1;
58 	slice = ((w * h) / 64) - 1;
59 
60 	if (((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_R600) &&
61 	    ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV770)) {
62 		BEGIN_RING(21 + 2);
63 		OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
64 		OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
65 		OUT_RING(gpu_addr >> 8);
66 		OUT_RING(CP_PACKET3(R600_IT_SURFACE_BASE_UPDATE, 0));
67 		OUT_RING(2 << 0);
68 	} else {
69 		BEGIN_RING(21);
70 		OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
71 		OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
72 		OUT_RING(gpu_addr >> 8);
73 	}
74 
75 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
76 	OUT_RING((R600_CB_COLOR0_SIZE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
77 	OUT_RING((pitch << 0) | (slice << 10));
78 
79 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
80 	OUT_RING((R600_CB_COLOR0_VIEW - R600_SET_CONTEXT_REG_OFFSET) >> 2);
81 	OUT_RING(0);
82 
83 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
84 	OUT_RING((R600_CB_COLOR0_INFO - R600_SET_CONTEXT_REG_OFFSET) >> 2);
85 	OUT_RING(cb_color_info);
86 
87 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
88 	OUT_RING((R600_CB_COLOR0_TILE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
89 	OUT_RING(0);
90 
91 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
92 	OUT_RING((R600_CB_COLOR0_FRAG - R600_SET_CONTEXT_REG_OFFSET) >> 2);
93 	OUT_RING(0);
94 
95 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
96 	OUT_RING((R600_CB_COLOR0_MASK - R600_SET_CONTEXT_REG_OFFSET) >> 2);
97 	OUT_RING(0);
98 
99 	ADVANCE_RING();
100 }
101 
102 static inline void
cp_set_surface_sync(drm_radeon_private_t * dev_priv,u32 sync_type,u32 size,u64 mc_addr)103 cp_set_surface_sync(drm_radeon_private_t *dev_priv,
104 		    u32 sync_type, u32 size, u64 mc_addr)
105 {
106 	u32 cp_coher_size;
107 	RING_LOCALS;
108 	DRM_DEBUG("\n");
109 
110 	if (size == 0xffffffff)
111 		cp_coher_size = 0xffffffff;
112 	else
113 		cp_coher_size = ((size + 255) >> 8);
114 
115 	BEGIN_RING(5);
116 	OUT_RING(CP_PACKET3(R600_IT_SURFACE_SYNC, 3));
117 	OUT_RING(sync_type);
118 	OUT_RING(cp_coher_size);
119 	OUT_RING((mc_addr >> 8));
120 	OUT_RING(10); /* poll interval */
121 	ADVANCE_RING();
122 }
123 
124 static inline void
set_shaders(struct drm_device * dev)125 set_shaders(struct drm_device *dev)
126 {
127 	drm_radeon_private_t *dev_priv = dev->dev_private;
128 	u64 gpu_addr;
129 	int i;
130 	u32 *vs, *ps;
131 	uint32_t sq_pgm_resources;
132 	RING_LOCALS;
133 	DRM_DEBUG("\n");
134 
135 	/* load shaders */
136 	vs = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset);
137 	ps = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset + 256);
138 
139 	for (i = 0; i < r6xx_vs_size; i++)
140 		vs[i] = cpu_to_le32(r6xx_vs[i]);
141 	for (i = 0; i < r6xx_ps_size; i++)
142 		ps[i] = cpu_to_le32(r6xx_ps[i]);
143 
144 	dev_priv->blit_vb->used = 512;
145 
146 	gpu_addr = dev_priv->gart_buffers_offset + dev_priv->blit_vb->offset;
147 
148 	/* setup shader regs */
149 	sq_pgm_resources = (1 << 0);
150 
151 	BEGIN_RING(9 + 12);
152 	/* VS */
153 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
154 	OUT_RING((R600_SQ_PGM_START_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
155 	OUT_RING(gpu_addr >> 8);
156 
157 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
158 	OUT_RING((R600_SQ_PGM_RESOURCES_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
159 	OUT_RING(sq_pgm_resources);
160 
161 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
162 	OUT_RING((R600_SQ_PGM_CF_OFFSET_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
163 	OUT_RING(0);
164 
165 	/* PS */
166 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
167 	OUT_RING((R600_SQ_PGM_START_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
168 	OUT_RING((gpu_addr + 256) >> 8);
169 
170 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
171 	OUT_RING((R600_SQ_PGM_RESOURCES_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
172 	OUT_RING(sq_pgm_resources | (1 << 28));
173 
174 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
175 	OUT_RING((R600_SQ_PGM_EXPORTS_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
176 	OUT_RING(2);
177 
178 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
179 	OUT_RING((R600_SQ_PGM_CF_OFFSET_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
180 	OUT_RING(0);
181 	ADVANCE_RING();
182 
183 	cp_set_surface_sync(dev_priv,
184 			    R600_SH_ACTION_ENA, 512, gpu_addr);
185 }
186 
187 static inline void
set_vtx_resource(drm_radeon_private_t * dev_priv,u64 gpu_addr)188 set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr)
189 {
190 	uint32_t sq_vtx_constant_word2;
191 	RING_LOCALS;
192 	DRM_DEBUG("\n");
193 
194 	sq_vtx_constant_word2 = (((gpu_addr >> 32) & 0xff) | (16 << 8));
195 #ifdef __BIG_ENDIAN
196 	sq_vtx_constant_word2 |= (2 << 30);
197 #endif
198 
199 	BEGIN_RING(9);
200 	OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
201 	OUT_RING(0x460);
202 	OUT_RING(gpu_addr & 0xffffffff);
203 	OUT_RING(48 - 1);
204 	OUT_RING(sq_vtx_constant_word2);
205 	OUT_RING(1 << 0);
206 	OUT_RING(0);
207 	OUT_RING(0);
208 	OUT_RING(R600_SQ_TEX_VTX_VALID_BUFFER << 30);
209 	ADVANCE_RING();
210 
211 	if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
212 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
213 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
214 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||
215 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))
216 		cp_set_surface_sync(dev_priv,
217 				    R600_TC_ACTION_ENA, 48, gpu_addr);
218 	else
219 		cp_set_surface_sync(dev_priv,
220 				    R600_VC_ACTION_ENA, 48, gpu_addr);
221 }
222 
223 static inline void
set_tex_resource(drm_radeon_private_t * dev_priv,int format,int w,int h,int pitch,u64 gpu_addr)224 set_tex_resource(drm_radeon_private_t *dev_priv,
225 		 int format, int w, int h, int pitch, u64 gpu_addr)
226 {
227 	uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
228 	RING_LOCALS;
229 	DRM_DEBUG("\n");
230 
231 	if (h < 1)
232 		h = 1;
233 
234 	sq_tex_resource_word0 = (1 << 0);
235 	sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) |
236 				  ((w - 1) << 19));
237 
238 	sq_tex_resource_word1 = (format << 26);
239 	sq_tex_resource_word1 |= ((h - 1) << 0);
240 
241 	sq_tex_resource_word4 = ((1 << 14) |
242 				 (0 << 16) |
243 				 (1 << 19) |
244 				 (2 << 22) |
245 				 (3 << 25));
246 
247 	BEGIN_RING(9);
248 	OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
249 	OUT_RING(0);
250 	OUT_RING(sq_tex_resource_word0);
251 	OUT_RING(sq_tex_resource_word1);
252 	OUT_RING(gpu_addr >> 8);
253 	OUT_RING(gpu_addr >> 8);
254 	OUT_RING(sq_tex_resource_word4);
255 	OUT_RING(0);
256 	OUT_RING(R600_SQ_TEX_VTX_VALID_TEXTURE << 30);
257 	ADVANCE_RING();
258 
259 }
260 
261 static inline void
set_scissors(drm_radeon_private_t * dev_priv,int x1,int y1,int x2,int y2)262 set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2)
263 {
264 	RING_LOCALS;
265 	DRM_DEBUG("\n");
266 
267 	BEGIN_RING(12);
268 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
269 	OUT_RING((R600_PA_SC_SCREEN_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
270 	OUT_RING((x1 << 0) | (y1 << 16));
271 	OUT_RING((x2 << 0) | (y2 << 16));
272 
273 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
274 	OUT_RING((R600_PA_SC_GENERIC_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
275 	OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31));
276 	OUT_RING((x2 << 0) | (y2 << 16));
277 
278 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
279 	OUT_RING((R600_PA_SC_WINDOW_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
280 	OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31));
281 	OUT_RING((x2 << 0) | (y2 << 16));
282 	ADVANCE_RING();
283 }
284 
285 static inline void
draw_auto(drm_radeon_private_t * dev_priv)286 draw_auto(drm_radeon_private_t *dev_priv)
287 {
288 	RING_LOCALS;
289 	DRM_DEBUG("\n");
290 
291 	BEGIN_RING(10);
292 	OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
293 	OUT_RING((R600_VGT_PRIMITIVE_TYPE - R600_SET_CONFIG_REG_OFFSET) >> 2);
294 	OUT_RING(DI_PT_RECTLIST);
295 
296 	OUT_RING(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
297 #ifdef __BIG_ENDIAN
298 	OUT_RING((2 << 2) | DI_INDEX_SIZE_16_BIT);
299 #else
300 	OUT_RING(DI_INDEX_SIZE_16_BIT);
301 #endif
302 
303 	OUT_RING(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
304 	OUT_RING(1);
305 
306 	OUT_RING(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1));
307 	OUT_RING(3);
308 	OUT_RING(DI_SRC_SEL_AUTO_INDEX);
309 
310 	ADVANCE_RING();
311 	COMMIT_RING();
312 }
313 
314 static inline void
set_default_state(drm_radeon_private_t * dev_priv)315 set_default_state(drm_radeon_private_t *dev_priv)
316 {
317 	int i;
318 	u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
319 	u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
320 	int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs;
321 	int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads;
322 	int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries;
323 	RING_LOCALS;
324 
325 	switch ((dev_priv->flags & RADEON_FAMILY_MASK)) {
326 	case CHIP_R600:
327 		num_ps_gprs = 192;
328 		num_vs_gprs = 56;
329 		num_temp_gprs = 4;
330 		num_gs_gprs = 0;
331 		num_es_gprs = 0;
332 		num_ps_threads = 136;
333 		num_vs_threads = 48;
334 		num_gs_threads = 4;
335 		num_es_threads = 4;
336 		num_ps_stack_entries = 128;
337 		num_vs_stack_entries = 128;
338 		num_gs_stack_entries = 0;
339 		num_es_stack_entries = 0;
340 		break;
341 	case CHIP_RV630:
342 	case CHIP_RV635:
343 		num_ps_gprs = 84;
344 		num_vs_gprs = 36;
345 		num_temp_gprs = 4;
346 		num_gs_gprs = 0;
347 		num_es_gprs = 0;
348 		num_ps_threads = 144;
349 		num_vs_threads = 40;
350 		num_gs_threads = 4;
351 		num_es_threads = 4;
352 		num_ps_stack_entries = 40;
353 		num_vs_stack_entries = 40;
354 		num_gs_stack_entries = 32;
355 		num_es_stack_entries = 16;
356 		break;
357 	case CHIP_RV610:
358 	case CHIP_RV620:
359 	case CHIP_RS780:
360 	case CHIP_RS880:
361 	default:
362 		num_ps_gprs = 84;
363 		num_vs_gprs = 36;
364 		num_temp_gprs = 4;
365 		num_gs_gprs = 0;
366 		num_es_gprs = 0;
367 		num_ps_threads = 136;
368 		num_vs_threads = 48;
369 		num_gs_threads = 4;
370 		num_es_threads = 4;
371 		num_ps_stack_entries = 40;
372 		num_vs_stack_entries = 40;
373 		num_gs_stack_entries = 32;
374 		num_es_stack_entries = 16;
375 		break;
376 	case CHIP_RV670:
377 		num_ps_gprs = 144;
378 		num_vs_gprs = 40;
379 		num_temp_gprs = 4;
380 		num_gs_gprs = 0;
381 		num_es_gprs = 0;
382 		num_ps_threads = 136;
383 		num_vs_threads = 48;
384 		num_gs_threads = 4;
385 		num_es_threads = 4;
386 		num_ps_stack_entries = 40;
387 		num_vs_stack_entries = 40;
388 		num_gs_stack_entries = 32;
389 		num_es_stack_entries = 16;
390 		break;
391 	case CHIP_RV770:
392 		num_ps_gprs = 192;
393 		num_vs_gprs = 56;
394 		num_temp_gprs = 4;
395 		num_gs_gprs = 0;
396 		num_es_gprs = 0;
397 		num_ps_threads = 188;
398 		num_vs_threads = 60;
399 		num_gs_threads = 0;
400 		num_es_threads = 0;
401 		num_ps_stack_entries = 256;
402 		num_vs_stack_entries = 256;
403 		num_gs_stack_entries = 0;
404 		num_es_stack_entries = 0;
405 		break;
406 	case CHIP_RV730:
407 	case CHIP_RV740:
408 		num_ps_gprs = 84;
409 		num_vs_gprs = 36;
410 		num_temp_gprs = 4;
411 		num_gs_gprs = 0;
412 		num_es_gprs = 0;
413 		num_ps_threads = 188;
414 		num_vs_threads = 60;
415 		num_gs_threads = 0;
416 		num_es_threads = 0;
417 		num_ps_stack_entries = 128;
418 		num_vs_stack_entries = 128;
419 		num_gs_stack_entries = 0;
420 		num_es_stack_entries = 0;
421 		break;
422 	case CHIP_RV710:
423 		num_ps_gprs = 192;
424 		num_vs_gprs = 56;
425 		num_temp_gprs = 4;
426 		num_gs_gprs = 0;
427 		num_es_gprs = 0;
428 		num_ps_threads = 144;
429 		num_vs_threads = 48;
430 		num_gs_threads = 0;
431 		num_es_threads = 0;
432 		num_ps_stack_entries = 128;
433 		num_vs_stack_entries = 128;
434 		num_gs_stack_entries = 0;
435 		num_es_stack_entries = 0;
436 		break;
437 	}
438 
439 	if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
440 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
441 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
442 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||
443 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))
444 		sq_config = 0;
445 	else
446 		sq_config = R600_VC_ENABLE;
447 
448 	sq_config |= (R600_DX9_CONSTS |
449 		      R600_ALU_INST_PREFER_VECTOR |
450 		      R600_PS_PRIO(0) |
451 		      R600_VS_PRIO(1) |
452 		      R600_GS_PRIO(2) |
453 		      R600_ES_PRIO(3));
454 
455 	sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(num_ps_gprs) |
456 				  R600_NUM_VS_GPRS(num_vs_gprs) |
457 				  R600_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
458 	sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(num_gs_gprs) |
459 				  R600_NUM_ES_GPRS(num_es_gprs));
460 	sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(num_ps_threads) |
461 				   R600_NUM_VS_THREADS(num_vs_threads) |
462 				   R600_NUM_GS_THREADS(num_gs_threads) |
463 				   R600_NUM_ES_THREADS(num_es_threads));
464 	sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
465 				    R600_NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
466 	sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
467 				    R600_NUM_ES_STACK_ENTRIES(num_es_stack_entries));
468 
469 	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) {
470 		BEGIN_RING(r7xx_default_size + 10);
471 		for (i = 0; i < r7xx_default_size; i++)
472 			OUT_RING(r7xx_default_state[i]);
473 	} else {
474 		BEGIN_RING(r6xx_default_size + 10);
475 		for (i = 0; i < r6xx_default_size; i++)
476 			OUT_RING(r6xx_default_state[i]);
477 	}
478 	OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
479 	OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
480 	/* SQ config */
481 	OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 6));
482 	OUT_RING((R600_SQ_CONFIG - R600_SET_CONFIG_REG_OFFSET) >> 2);
483 	OUT_RING(sq_config);
484 	OUT_RING(sq_gpr_resource_mgmt_1);
485 	OUT_RING(sq_gpr_resource_mgmt_2);
486 	OUT_RING(sq_thread_resource_mgmt);
487 	OUT_RING(sq_stack_resource_mgmt_1);
488 	OUT_RING(sq_stack_resource_mgmt_2);
489 	ADVANCE_RING();
490 }
491 
i2f(uint32_t input)492 static inline uint32_t i2f(uint32_t input)
493 {
494 	u32 result, i, exponent, fraction;
495 
496 	if ((input & 0x3fff) == 0)
497 		result = 0; /* 0 is a special case */
498 	else {
499 		exponent = 140; /* exponent biased by 127; */
500 		fraction = (input & 0x3fff) << 10; /* cheat and only
501 						      handle numbers below 2^^15 */
502 		for (i = 0; i < 14; i++) {
503 			if (fraction & 0x800000)
504 				break;
505 			else {
506 				fraction = fraction << 1; /* keep
507 							     shifting left until top bit = 1 */
508 				exponent = exponent - 1;
509 			}
510 		}
511 		result = exponent << 23 | (fraction & 0x7fffff); /* mask
512 								    off top bit; assumed 1 */
513 	}
514 	return result;
515 }
516 
517 
r600_nomm_get_vb(struct drm_device * dev)518 static inline int r600_nomm_get_vb(struct drm_device *dev)
519 {
520 	drm_radeon_private_t *dev_priv = dev->dev_private;
521 	dev_priv->blit_vb = radeon_freelist_get(dev);
522 	if (!dev_priv->blit_vb) {
523 		DRM_ERROR("Unable to allocate vertex buffer for blit\n");
524 		return -EAGAIN;
525 	}
526 	return 0;
527 }
528 
r600_nomm_put_vb(struct drm_device * dev)529 static inline void r600_nomm_put_vb(struct drm_device *dev)
530 {
531 	drm_radeon_private_t *dev_priv = dev->dev_private;
532 
533 	dev_priv->blit_vb->used = 0;
534 	radeon_cp_discard_buffer(dev, dev_priv->blit_vb->file_priv->master, dev_priv->blit_vb);
535 }
536 
r600_nomm_get_vb_ptr(struct drm_device * dev)537 static inline void *r600_nomm_get_vb_ptr(struct drm_device *dev)
538 {
539 	drm_radeon_private_t *dev_priv = dev->dev_private;
540 	return (((char *)dev->agp_buffer_map->handle +
541 		 dev_priv->blit_vb->offset + dev_priv->blit_vb->used));
542 }
543 
544 int
r600_prepare_blit_copy(struct drm_device * dev,struct drm_file * file_priv)545 r600_prepare_blit_copy(struct drm_device *dev, struct drm_file *file_priv)
546 {
547 	drm_radeon_private_t *dev_priv = dev->dev_private;
548 	int ret;
549 	DRM_DEBUG("\n");
550 
551 	ret = r600_nomm_get_vb(dev);
552 	if (ret)
553 		return ret;
554 
555 	dev_priv->blit_vb->file_priv = file_priv;
556 
557 	set_default_state(dev_priv);
558 	set_shaders(dev);
559 
560 	return 0;
561 }
562 
563 
564 void
r600_done_blit_copy(struct drm_device * dev)565 r600_done_blit_copy(struct drm_device *dev)
566 {
567 	drm_radeon_private_t *dev_priv = dev->dev_private;
568 	RING_LOCALS;
569 	DRM_DEBUG("\n");
570 
571 	BEGIN_RING(5);
572 	OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
573 	OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
574 	/* wait for 3D idle clean */
575 	OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
576 	OUT_RING((R600_WAIT_UNTIL - R600_SET_CONFIG_REG_OFFSET) >> 2);
577 	OUT_RING(RADEON_WAIT_3D_IDLE | RADEON_WAIT_3D_IDLECLEAN);
578 
579 	ADVANCE_RING();
580 	COMMIT_RING();
581 
582 	r600_nomm_put_vb(dev);
583 }
584 
585 void
r600_blit_copy(struct drm_device * dev,uint64_t src_gpu_addr,uint64_t dst_gpu_addr,int size_bytes)586 r600_blit_copy(struct drm_device *dev,
587 	       uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
588 	       int size_bytes)
589 {
590 	drm_radeon_private_t *dev_priv = dev->dev_private;
591 	int max_bytes;
592 	u64 vb_addr;
593 	u32 *vb;
594 
595 	vb = r600_nomm_get_vb_ptr(dev);
596 
597 	if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {
598 		max_bytes = 8192;
599 
600 		while (size_bytes) {
601 			int cur_size = size_bytes;
602 			int src_x = src_gpu_addr & 255;
603 			int dst_x = dst_gpu_addr & 255;
604 			int h = 1;
605 			src_gpu_addr = src_gpu_addr & ~255;
606 			dst_gpu_addr = dst_gpu_addr & ~255;
607 
608 			if (!src_x && !dst_x) {
609 				h = (cur_size / max_bytes);
610 				if (h > 8192)
611 					h = 8192;
612 				if (h == 0)
613 					h = 1;
614 				else
615 					cur_size = max_bytes;
616 			} else {
617 				if (cur_size > max_bytes)
618 					cur_size = max_bytes;
619 				if (cur_size > (max_bytes - dst_x))
620 					cur_size = (max_bytes - dst_x);
621 				if (cur_size > (max_bytes - src_x))
622 					cur_size = (max_bytes - src_x);
623 			}
624 
625 			if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
626 
627 				r600_nomm_put_vb(dev);
628 				r600_nomm_get_vb(dev);
629 				if (!dev_priv->blit_vb)
630 					return;
631 				set_shaders(dev);
632 				vb = r600_nomm_get_vb_ptr(dev);
633 			}
634 
635 			vb[0] = i2f(dst_x);
636 			vb[1] = 0;
637 			vb[2] = i2f(src_x);
638 			vb[3] = 0;
639 
640 			vb[4] = i2f(dst_x);
641 			vb[5] = i2f(h);
642 			vb[6] = i2f(src_x);
643 			vb[7] = i2f(h);
644 
645 			vb[8] = i2f(dst_x + cur_size);
646 			vb[9] = i2f(h);
647 			vb[10] = i2f(src_x + cur_size);
648 			vb[11] = i2f(h);
649 
650 			/* src */
651 			set_tex_resource(dev_priv, FMT_8,
652 					 src_x + cur_size, h, src_x + cur_size,
653 					 src_gpu_addr);
654 
655 			cp_set_surface_sync(dev_priv,
656 					    R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
657 
658 			/* dst */
659 			set_render_target(dev_priv, COLOR_8,
660 					  dst_x + cur_size, h,
661 					  dst_gpu_addr);
662 
663 			/* scissors */
664 			set_scissors(dev_priv, dst_x, 0, dst_x + cur_size, h);
665 
666 			/* Vertex buffer setup */
667 			vb_addr = dev_priv->gart_buffers_offset +
668 				dev_priv->blit_vb->offset +
669 				dev_priv->blit_vb->used;
670 			set_vtx_resource(dev_priv, vb_addr);
671 
672 			/* draw */
673 			draw_auto(dev_priv);
674 
675 			cp_set_surface_sync(dev_priv,
676 					    R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
677 					    cur_size * h, dst_gpu_addr);
678 
679 			vb += 12;
680 			dev_priv->blit_vb->used += 12 * 4;
681 
682 			src_gpu_addr += cur_size * h;
683 			dst_gpu_addr += cur_size * h;
684 			size_bytes -= cur_size * h;
685 		}
686 	} else {
687 		max_bytes = 8192 * 4;
688 
689 		while (size_bytes) {
690 			int cur_size = size_bytes;
691 			int src_x = (src_gpu_addr & 255);
692 			int dst_x = (dst_gpu_addr & 255);
693 			int h = 1;
694 			src_gpu_addr = src_gpu_addr & ~255;
695 			dst_gpu_addr = dst_gpu_addr & ~255;
696 
697 			if (!src_x && !dst_x) {
698 				h = (cur_size / max_bytes);
699 				if (h > 8192)
700 					h = 8192;
701 				if (h == 0)
702 					h = 1;
703 				else
704 					cur_size = max_bytes;
705 			} else {
706 				if (cur_size > max_bytes)
707 					cur_size = max_bytes;
708 				if (cur_size > (max_bytes - dst_x))
709 					cur_size = (max_bytes - dst_x);
710 				if (cur_size > (max_bytes - src_x))
711 					cur_size = (max_bytes - src_x);
712 			}
713 
714 			if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
715 				r600_nomm_put_vb(dev);
716 				r600_nomm_get_vb(dev);
717 				if (!dev_priv->blit_vb)
718 					return;
719 
720 				set_shaders(dev);
721 				vb = r600_nomm_get_vb_ptr(dev);
722 			}
723 
724 			vb[0] = i2f(dst_x / 4);
725 			vb[1] = 0;
726 			vb[2] = i2f(src_x / 4);
727 			vb[3] = 0;
728 
729 			vb[4] = i2f(dst_x / 4);
730 			vb[5] = i2f(h);
731 			vb[6] = i2f(src_x / 4);
732 			vb[7] = i2f(h);
733 
734 			vb[8] = i2f((dst_x + cur_size) / 4);
735 			vb[9] = i2f(h);
736 			vb[10] = i2f((src_x + cur_size) / 4);
737 			vb[11] = i2f(h);
738 
739 			/* src */
740 			set_tex_resource(dev_priv, FMT_8_8_8_8,
741 					 (src_x + cur_size) / 4,
742 					 h, (src_x + cur_size) / 4,
743 					 src_gpu_addr);
744 
745 			cp_set_surface_sync(dev_priv,
746 					    R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
747 
748 			/* dst */
749 			set_render_target(dev_priv, COLOR_8_8_8_8,
750 					  (dst_x + cur_size) / 4, h,
751 					  dst_gpu_addr);
752 
753 			/* scissors */
754 			set_scissors(dev_priv, (dst_x / 4), 0, (dst_x + cur_size / 4), h);
755 
756 			/* Vertex buffer setup */
757 			vb_addr = dev_priv->gart_buffers_offset +
758 				dev_priv->blit_vb->offset +
759 				dev_priv->blit_vb->used;
760 			set_vtx_resource(dev_priv, vb_addr);
761 
762 			/* draw */
763 			draw_auto(dev_priv);
764 
765 			cp_set_surface_sync(dev_priv,
766 					    R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
767 					    cur_size * h, dst_gpu_addr);
768 
769 			vb += 12;
770 			dev_priv->blit_vb->used += 12 * 4;
771 
772 			src_gpu_addr += cur_size * h;
773 			dst_gpu_addr += cur_size * h;
774 			size_bytes -= cur_size * h;
775 		}
776 	}
777 }
778 
779 void
r600_blit_swap(struct drm_device * dev,uint64_t src_gpu_addr,uint64_t dst_gpu_addr,int sx,int sy,int dx,int dy,int w,int h,int src_pitch,int dst_pitch,int cpp)780 r600_blit_swap(struct drm_device *dev,
781 	       uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
782 	       int sx, int sy, int dx, int dy,
783 	       int w, int h, int src_pitch, int dst_pitch, int cpp)
784 {
785 	drm_radeon_private_t *dev_priv = dev->dev_private;
786 	int cb_format, tex_format;
787 	int sx2, sy2, dx2, dy2;
788 	u64 vb_addr;
789 	u32 *vb;
790 
791 	if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
792 
793 		r600_nomm_put_vb(dev);
794 		r600_nomm_get_vb(dev);
795 		if (!dev_priv->blit_vb)
796 			return;
797 
798 		set_shaders(dev);
799 	}
800 	vb = r600_nomm_get_vb_ptr(dev);
801 
802 	sx2 = sx + w;
803 	sy2 = sy + h;
804 	dx2 = dx + w;
805 	dy2 = dy + h;
806 
807 	vb[0] = i2f(dx);
808 	vb[1] = i2f(dy);
809 	vb[2] = i2f(sx);
810 	vb[3] = i2f(sy);
811 
812 	vb[4] = i2f(dx);
813 	vb[5] = i2f(dy2);
814 	vb[6] = i2f(sx);
815 	vb[7] = i2f(sy2);
816 
817 	vb[8] = i2f(dx2);
818 	vb[9] = i2f(dy2);
819 	vb[10] = i2f(sx2);
820 	vb[11] = i2f(sy2);
821 
822 	switch(cpp) {
823 	case 4:
824 		cb_format = COLOR_8_8_8_8;
825 		tex_format = FMT_8_8_8_8;
826 		break;
827 	case 2:
828 		cb_format = COLOR_5_6_5;
829 		tex_format = FMT_5_6_5;
830 		break;
831 	default:
832 		cb_format = COLOR_8;
833 		tex_format = FMT_8;
834 		break;
835 	}
836 
837 	/* src */
838 	set_tex_resource(dev_priv, tex_format,
839 			 src_pitch / cpp,
840 			 sy2, src_pitch / cpp,
841 			 src_gpu_addr);
842 
843 	cp_set_surface_sync(dev_priv,
844 			    R600_TC_ACTION_ENA, src_pitch * sy2, src_gpu_addr);
845 
846 	/* dst */
847 	set_render_target(dev_priv, cb_format,
848 			  dst_pitch / cpp, dy2,
849 			  dst_gpu_addr);
850 
851 	/* scissors */
852 	set_scissors(dev_priv, dx, dy, dx2, dy2);
853 
854 	/* Vertex buffer setup */
855 	vb_addr = dev_priv->gart_buffers_offset +
856 		dev_priv->blit_vb->offset +
857 		dev_priv->blit_vb->used;
858 	set_vtx_resource(dev_priv, vb_addr);
859 
860 	/* draw */
861 	draw_auto(dev_priv);
862 
863 	cp_set_surface_sync(dev_priv,
864 			    R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
865 			    dst_pitch * dy2, dst_gpu_addr);
866 
867 	dev_priv->blit_vb->used += 12 * 4;
868 }
869