1 /* radeon_state.c -- State support for Radeon -*- linux-c -*- */
2 /*
3  * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23  * DEALINGS IN THE SOFTWARE.
24  *
25  * Authors:
26  *    Gareth Hughes <gareth@valinux.com>
27  *    Kevin E. Martin <martin@valinux.com>
28  */
29 
30 #include "drmP.h"
31 #include "drm.h"
32 #include "drm_buffer.h"
33 #include "drm_sarea.h"
34 #include "radeon_drm.h"
35 #include "radeon_drv.h"
36 
37 /* ================================================================
38  * Helper functions for client state checking and fixup
39  */
40 
radeon_check_and_fixup_offset(drm_radeon_private_t * dev_priv,struct drm_file * file_priv,u32 * offset)41 static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
42 						    dev_priv,
43 						    struct drm_file * file_priv,
44 						    u32 *offset)
45 {
46 	u64 off = *offset;
47 	u32 fb_end = dev_priv->fb_location + dev_priv->fb_size - 1;
48 	struct drm_radeon_driver_file_fields *radeon_priv;
49 
50 	/* Hrm ... the story of the offset ... So this function converts
51 	 * the various ideas of what userland clients might have for an
52 	 * offset in the card address space into an offset into the card
53 	 * address space :) So with a sane client, it should just keep
54 	 * the value intact and just do some boundary checking. However,
55 	 * not all clients are sane. Some older clients pass us 0 based
56 	 * offsets relative to the start of the framebuffer and some may
57 	 * assume the AGP aperture it appended to the framebuffer, so we
58 	 * try to detect those cases and fix them up.
59 	 *
60 	 * Note: It might be a good idea here to make sure the offset lands
61 	 * in some "allowed" area to protect things like the PCIE GART...
62 	 */
63 
64 	/* First, the best case, the offset already lands in either the
65 	 * framebuffer or the GART mapped space
66 	 */
67 	if (radeon_check_offset(dev_priv, off))
68 		return 0;
69 
70 	/* Ok, that didn't happen... now check if we have a zero based
71 	 * offset that fits in the framebuffer + gart space, apply the
72 	 * magic offset we get from SETPARAM or calculated from fb_location
73 	 */
74 	if (off < (dev_priv->fb_size + dev_priv->gart_size)) {
75 		radeon_priv = file_priv->driver_priv;
76 		off += radeon_priv->radeon_fb_delta;
77 	}
78 
79 	/* Finally, assume we aimed at a GART offset if beyond the fb */
80 	if (off > fb_end)
81 		off = off - fb_end - 1 + dev_priv->gart_vm_start;
82 
83 	/* Now recheck and fail if out of bounds */
84 	if (radeon_check_offset(dev_priv, off)) {
85 		DRM_DEBUG("offset fixed up to 0x%x\n", (unsigned int)off);
86 		*offset = off;
87 		return 0;
88 	}
89 	return -EINVAL;
90 }
91 
radeon_check_and_fixup_packets(drm_radeon_private_t * dev_priv,struct drm_file * file_priv,int id,struct drm_buffer * buf)92 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
93 						     dev_priv,
94 						     struct drm_file *file_priv,
95 						     int id, struct drm_buffer *buf)
96 {
97 	u32 *data;
98 	switch (id) {
99 
100 	case RADEON_EMIT_PP_MISC:
101 		data = drm_buffer_pointer_to_dword(buf,
102 			(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4);
103 
104 		if (radeon_check_and_fixup_offset(dev_priv, file_priv, data)) {
105 			DRM_ERROR("Invalid depth buffer offset\n");
106 			return -EINVAL;
107 		}
108 		dev_priv->have_z_offset = 1;
109 		break;
110 
111 	case RADEON_EMIT_PP_CNTL:
112 		data = drm_buffer_pointer_to_dword(buf,
113 			(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4);
114 
115 		if (radeon_check_and_fixup_offset(dev_priv, file_priv, data)) {
116 			DRM_ERROR("Invalid colour buffer offset\n");
117 			return -EINVAL;
118 		}
119 		break;
120 
121 	case R200_EMIT_PP_TXOFFSET_0:
122 	case R200_EMIT_PP_TXOFFSET_1:
123 	case R200_EMIT_PP_TXOFFSET_2:
124 	case R200_EMIT_PP_TXOFFSET_3:
125 	case R200_EMIT_PP_TXOFFSET_4:
126 	case R200_EMIT_PP_TXOFFSET_5:
127 		data = drm_buffer_pointer_to_dword(buf, 0);
128 		if (radeon_check_and_fixup_offset(dev_priv, file_priv, data)) {
129 			DRM_ERROR("Invalid R200 texture offset\n");
130 			return -EINVAL;
131 		}
132 		break;
133 
134 	case RADEON_EMIT_PP_TXFILTER_0:
135 	case RADEON_EMIT_PP_TXFILTER_1:
136 	case RADEON_EMIT_PP_TXFILTER_2:
137 		data = drm_buffer_pointer_to_dword(buf,
138 			(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4);
139 		if (radeon_check_and_fixup_offset(dev_priv, file_priv, data)) {
140 			DRM_ERROR("Invalid R100 texture offset\n");
141 			return -EINVAL;
142 		}
143 		break;
144 
145 	case R200_EMIT_PP_CUBIC_OFFSETS_0:
146 	case R200_EMIT_PP_CUBIC_OFFSETS_1:
147 	case R200_EMIT_PP_CUBIC_OFFSETS_2:
148 	case R200_EMIT_PP_CUBIC_OFFSETS_3:
149 	case R200_EMIT_PP_CUBIC_OFFSETS_4:
150 	case R200_EMIT_PP_CUBIC_OFFSETS_5:{
151 			int i;
152 			for (i = 0; i < 5; i++) {
153 				data = drm_buffer_pointer_to_dword(buf, i);
154 				if (radeon_check_and_fixup_offset(dev_priv,
155 								  file_priv,
156 								  data)) {
157 					DRM_ERROR
158 					    ("Invalid R200 cubic texture offset\n");
159 					return -EINVAL;
160 				}
161 			}
162 			break;
163 		}
164 
165 	case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
166 	case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
167 	case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
168 			int i;
169 			for (i = 0; i < 5; i++) {
170 				data = drm_buffer_pointer_to_dword(buf, i);
171 				if (radeon_check_and_fixup_offset(dev_priv,
172 								  file_priv,
173 								  data)) {
174 					DRM_ERROR
175 					    ("Invalid R100 cubic texture offset\n");
176 					return -EINVAL;
177 				}
178 			}
179 		}
180 		break;
181 
182 	case R200_EMIT_VAP_CTL:{
183 			RING_LOCALS;
184 			BEGIN_RING(2);
185 			OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
186 			ADVANCE_RING();
187 		}
188 		break;
189 
190 	case RADEON_EMIT_RB3D_COLORPITCH:
191 	case RADEON_EMIT_RE_LINE_PATTERN:
192 	case RADEON_EMIT_SE_LINE_WIDTH:
193 	case RADEON_EMIT_PP_LUM_MATRIX:
194 	case RADEON_EMIT_PP_ROT_MATRIX_0:
195 	case RADEON_EMIT_RB3D_STENCILREFMASK:
196 	case RADEON_EMIT_SE_VPORT_XSCALE:
197 	case RADEON_EMIT_SE_CNTL:
198 	case RADEON_EMIT_SE_CNTL_STATUS:
199 	case RADEON_EMIT_RE_MISC:
200 	case RADEON_EMIT_PP_BORDER_COLOR_0:
201 	case RADEON_EMIT_PP_BORDER_COLOR_1:
202 	case RADEON_EMIT_PP_BORDER_COLOR_2:
203 	case RADEON_EMIT_SE_ZBIAS_FACTOR:
204 	case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
205 	case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
206 	case R200_EMIT_PP_TXCBLEND_0:
207 	case R200_EMIT_PP_TXCBLEND_1:
208 	case R200_EMIT_PP_TXCBLEND_2:
209 	case R200_EMIT_PP_TXCBLEND_3:
210 	case R200_EMIT_PP_TXCBLEND_4:
211 	case R200_EMIT_PP_TXCBLEND_5:
212 	case R200_EMIT_PP_TXCBLEND_6:
213 	case R200_EMIT_PP_TXCBLEND_7:
214 	case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
215 	case R200_EMIT_TFACTOR_0:
216 	case R200_EMIT_VTX_FMT_0:
217 	case R200_EMIT_MATRIX_SELECT_0:
218 	case R200_EMIT_TEX_PROC_CTL_2:
219 	case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
220 	case R200_EMIT_PP_TXFILTER_0:
221 	case R200_EMIT_PP_TXFILTER_1:
222 	case R200_EMIT_PP_TXFILTER_2:
223 	case R200_EMIT_PP_TXFILTER_3:
224 	case R200_EMIT_PP_TXFILTER_4:
225 	case R200_EMIT_PP_TXFILTER_5:
226 	case R200_EMIT_VTE_CNTL:
227 	case R200_EMIT_OUTPUT_VTX_COMP_SEL:
228 	case R200_EMIT_PP_TAM_DEBUG3:
229 	case R200_EMIT_PP_CNTL_X:
230 	case R200_EMIT_RB3D_DEPTHXY_OFFSET:
231 	case R200_EMIT_RE_AUX_SCISSOR_CNTL:
232 	case R200_EMIT_RE_SCISSOR_TL_0:
233 	case R200_EMIT_RE_SCISSOR_TL_1:
234 	case R200_EMIT_RE_SCISSOR_TL_2:
235 	case R200_EMIT_SE_VAP_CNTL_STATUS:
236 	case R200_EMIT_SE_VTX_STATE_CNTL:
237 	case R200_EMIT_RE_POINTSIZE:
238 	case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
239 	case R200_EMIT_PP_CUBIC_FACES_0:
240 	case R200_EMIT_PP_CUBIC_FACES_1:
241 	case R200_EMIT_PP_CUBIC_FACES_2:
242 	case R200_EMIT_PP_CUBIC_FACES_3:
243 	case R200_EMIT_PP_CUBIC_FACES_4:
244 	case R200_EMIT_PP_CUBIC_FACES_5:
245 	case RADEON_EMIT_PP_TEX_SIZE_0:
246 	case RADEON_EMIT_PP_TEX_SIZE_1:
247 	case RADEON_EMIT_PP_TEX_SIZE_2:
248 	case R200_EMIT_RB3D_BLENDCOLOR:
249 	case R200_EMIT_TCL_POINT_SPRITE_CNTL:
250 	case RADEON_EMIT_PP_CUBIC_FACES_0:
251 	case RADEON_EMIT_PP_CUBIC_FACES_1:
252 	case RADEON_EMIT_PP_CUBIC_FACES_2:
253 	case R200_EMIT_PP_TRI_PERF_CNTL:
254 	case R200_EMIT_PP_AFS_0:
255 	case R200_EMIT_PP_AFS_1:
256 	case R200_EMIT_ATF_TFACTOR:
257 	case R200_EMIT_PP_TXCTLALL_0:
258 	case R200_EMIT_PP_TXCTLALL_1:
259 	case R200_EMIT_PP_TXCTLALL_2:
260 	case R200_EMIT_PP_TXCTLALL_3:
261 	case R200_EMIT_PP_TXCTLALL_4:
262 	case R200_EMIT_PP_TXCTLALL_5:
263 	case R200_EMIT_VAP_PVS_CNTL:
264 		/* These packets don't contain memory offsets */
265 		break;
266 
267 	default:
268 		DRM_ERROR("Unknown state packet ID %d\n", id);
269 		return -EINVAL;
270 	}
271 
272 	return 0;
273 }
274 
radeon_check_and_fixup_packet3(drm_radeon_private_t * dev_priv,struct drm_file * file_priv,drm_radeon_kcmd_buffer_t * cmdbuf,unsigned int * cmdsz)275 static int radeon_check_and_fixup_packet3(drm_radeon_private_t *
276 					  dev_priv,
277 					  struct drm_file *file_priv,
278 					  drm_radeon_kcmd_buffer_t *
279 					  cmdbuf,
280 					  unsigned int *cmdsz)
281 {
282 	u32 *cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
283 	u32 offset, narrays;
284 	int count, i, k;
285 
286 	count = ((*cmd & RADEON_CP_PACKET_COUNT_MASK) >> 16);
287 	*cmdsz = 2 + count;
288 
289 	if ((*cmd & 0xc0000000) != RADEON_CP_PACKET3) {
290 		DRM_ERROR("Not a type 3 packet\n");
291 		return -EINVAL;
292 	}
293 
294 	if (4 * *cmdsz > drm_buffer_unprocessed(cmdbuf->buffer)) {
295 		DRM_ERROR("Packet size larger than size of data provided\n");
296 		return -EINVAL;
297 	}
298 
299 	switch (*cmd & 0xff00) {
300 	/* XXX Are there old drivers needing other packets? */
301 
302 	case RADEON_3D_DRAW_IMMD:
303 	case RADEON_3D_DRAW_VBUF:
304 	case RADEON_3D_DRAW_INDX:
305 	case RADEON_WAIT_FOR_IDLE:
306 	case RADEON_CP_NOP:
307 	case RADEON_3D_CLEAR_ZMASK:
308 /*	case RADEON_CP_NEXT_CHAR:
309 	case RADEON_CP_PLY_NEXTSCAN:
310 	case RADEON_CP_SET_SCISSORS: */ /* probably safe but will never need them? */
311 		/* these packets are safe */
312 		break;
313 
314 	case RADEON_CP_3D_DRAW_IMMD_2:
315 	case RADEON_CP_3D_DRAW_VBUF_2:
316 	case RADEON_CP_3D_DRAW_INDX_2:
317 	case RADEON_3D_CLEAR_HIZ:
318 		/* safe but r200 only */
319 		if (dev_priv->microcode_version != UCODE_R200) {
320 			DRM_ERROR("Invalid 3d packet for r100-class chip\n");
321 			return -EINVAL;
322 		}
323 		break;
324 
325 	case RADEON_3D_LOAD_VBPNTR:
326 
327 		if (count > 18) { /* 12 arrays max */
328 			DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
329 				  count);
330 			return -EINVAL;
331 		}
332 
333 		/* carefully check packet contents */
334 		cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
335 
336 		narrays = *cmd & ~0xc000;
337 		k = 0;
338 		i = 2;
339 		while ((k < narrays) && (i < (count + 2))) {
340 			i++;		/* skip attribute field */
341 			cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, i);
342 			if (radeon_check_and_fixup_offset(dev_priv, file_priv,
343 							  cmd)) {
344 				DRM_ERROR
345 				    ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
346 				     k, i);
347 				return -EINVAL;
348 			}
349 			k++;
350 			i++;
351 			if (k == narrays)
352 				break;
353 			/* have one more to process, they come in pairs */
354 			cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, i);
355 
356 			if (radeon_check_and_fixup_offset(dev_priv,
357 							  file_priv, cmd))
358 			{
359 				DRM_ERROR
360 				    ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
361 				     k, i);
362 				return -EINVAL;
363 			}
364 			k++;
365 			i++;
366 		}
367 		/* do the counts match what we expect ? */
368 		if ((k != narrays) || (i != (count + 2))) {
369 			DRM_ERROR
370 			    ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
371 			      k, i, narrays, count + 1);
372 			return -EINVAL;
373 		}
374 		break;
375 
376 	case RADEON_3D_RNDR_GEN_INDX_PRIM:
377 		if (dev_priv->microcode_version != UCODE_R100) {
378 			DRM_ERROR("Invalid 3d packet for r200-class chip\n");
379 			return -EINVAL;
380 		}
381 
382 		cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
383 		if (radeon_check_and_fixup_offset(dev_priv, file_priv, cmd)) {
384 				DRM_ERROR("Invalid rndr_gen_indx offset\n");
385 				return -EINVAL;
386 		}
387 		break;
388 
389 	case RADEON_CP_INDX_BUFFER:
390 		if (dev_priv->microcode_version != UCODE_R200) {
391 			DRM_ERROR("Invalid 3d packet for r100-class chip\n");
392 			return -EINVAL;
393 		}
394 
395 		cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
396 		if ((*cmd & 0x8000ffff) != 0x80000810) {
397 			DRM_ERROR("Invalid indx_buffer reg address %08X\n", *cmd);
398 			return -EINVAL;
399 		}
400 		cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 2);
401 		if (radeon_check_and_fixup_offset(dev_priv, file_priv, cmd)) {
402 			DRM_ERROR("Invalid indx_buffer offset is %08X\n", *cmd);
403 			return -EINVAL;
404 		}
405 		break;
406 
407 	case RADEON_CNTL_HOSTDATA_BLT:
408 	case RADEON_CNTL_PAINT_MULTI:
409 	case RADEON_CNTL_BITBLT_MULTI:
410 		/* MSB of opcode: next DWORD GUI_CNTL */
411 		cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
412 		if (*cmd & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
413 			      | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
414 			u32 *cmd2 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 2);
415 			offset = *cmd2 << 10;
416 			if (radeon_check_and_fixup_offset
417 			    (dev_priv, file_priv, &offset)) {
418 				DRM_ERROR("Invalid first packet offset\n");
419 				return -EINVAL;
420 			}
421 			*cmd2 = (*cmd2 & 0xffc00000) | offset >> 10;
422 		}
423 
424 		if ((*cmd & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
425 		    (*cmd & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
426 			u32 *cmd3 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 3);
427 			offset = *cmd3 << 10;
428 			if (radeon_check_and_fixup_offset
429 			    (dev_priv, file_priv, &offset)) {
430 				DRM_ERROR("Invalid second packet offset\n");
431 				return -EINVAL;
432 			}
433 			*cmd3 = (*cmd3 & 0xffc00000) | offset >> 10;
434 		}
435 		break;
436 
437 	default:
438 		DRM_ERROR("Invalid packet type %x\n", *cmd & 0xff00);
439 		return -EINVAL;
440 	}
441 
442 	return 0;
443 }
444 
445 /* ================================================================
446  * CP hardware state programming functions
447  */
448 
radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,struct drm_clip_rect * box)449 static void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
450 				  struct drm_clip_rect * box)
451 {
452 	RING_LOCALS;
453 
454 	DRM_DEBUG("   box:  x1=%d y1=%d  x2=%d y2=%d\n",
455 		  box->x1, box->y1, box->x2, box->y2);
456 
457 	BEGIN_RING(4);
458 	OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
459 	OUT_RING((box->y1 << 16) | box->x1);
460 	OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
461 	OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
462 	ADVANCE_RING();
463 }
464 
465 /* Emit 1.1 state
466  */
radeon_emit_state(drm_radeon_private_t * dev_priv,struct drm_file * file_priv,drm_radeon_context_regs_t * ctx,drm_radeon_texture_regs_t * tex,unsigned int dirty)467 static int radeon_emit_state(drm_radeon_private_t * dev_priv,
468 			     struct drm_file *file_priv,
469 			     drm_radeon_context_regs_t * ctx,
470 			     drm_radeon_texture_regs_t * tex,
471 			     unsigned int dirty)
472 {
473 	RING_LOCALS;
474 	DRM_DEBUG("dirty=0x%08x\n", dirty);
475 
476 	if (dirty & RADEON_UPLOAD_CONTEXT) {
477 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
478 						  &ctx->rb3d_depthoffset)) {
479 			DRM_ERROR("Invalid depth buffer offset\n");
480 			return -EINVAL;
481 		}
482 
483 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
484 						  &ctx->rb3d_coloroffset)) {
485 			DRM_ERROR("Invalid depth buffer offset\n");
486 			return -EINVAL;
487 		}
488 
489 		BEGIN_RING(14);
490 		OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
491 		OUT_RING(ctx->pp_misc);
492 		OUT_RING(ctx->pp_fog_color);
493 		OUT_RING(ctx->re_solid_color);
494 		OUT_RING(ctx->rb3d_blendcntl);
495 		OUT_RING(ctx->rb3d_depthoffset);
496 		OUT_RING(ctx->rb3d_depthpitch);
497 		OUT_RING(ctx->rb3d_zstencilcntl);
498 		OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
499 		OUT_RING(ctx->pp_cntl);
500 		OUT_RING(ctx->rb3d_cntl);
501 		OUT_RING(ctx->rb3d_coloroffset);
502 		OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
503 		OUT_RING(ctx->rb3d_colorpitch);
504 		ADVANCE_RING();
505 	}
506 
507 	if (dirty & RADEON_UPLOAD_VERTFMT) {
508 		BEGIN_RING(2);
509 		OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
510 		OUT_RING(ctx->se_coord_fmt);
511 		ADVANCE_RING();
512 	}
513 
514 	if (dirty & RADEON_UPLOAD_LINE) {
515 		BEGIN_RING(5);
516 		OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
517 		OUT_RING(ctx->re_line_pattern);
518 		OUT_RING(ctx->re_line_state);
519 		OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
520 		OUT_RING(ctx->se_line_width);
521 		ADVANCE_RING();
522 	}
523 
524 	if (dirty & RADEON_UPLOAD_BUMPMAP) {
525 		BEGIN_RING(5);
526 		OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
527 		OUT_RING(ctx->pp_lum_matrix);
528 		OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
529 		OUT_RING(ctx->pp_rot_matrix_0);
530 		OUT_RING(ctx->pp_rot_matrix_1);
531 		ADVANCE_RING();
532 	}
533 
534 	if (dirty & RADEON_UPLOAD_MASKS) {
535 		BEGIN_RING(4);
536 		OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
537 		OUT_RING(ctx->rb3d_stencilrefmask);
538 		OUT_RING(ctx->rb3d_ropcntl);
539 		OUT_RING(ctx->rb3d_planemask);
540 		ADVANCE_RING();
541 	}
542 
543 	if (dirty & RADEON_UPLOAD_VIEWPORT) {
544 		BEGIN_RING(7);
545 		OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
546 		OUT_RING(ctx->se_vport_xscale);
547 		OUT_RING(ctx->se_vport_xoffset);
548 		OUT_RING(ctx->se_vport_yscale);
549 		OUT_RING(ctx->se_vport_yoffset);
550 		OUT_RING(ctx->se_vport_zscale);
551 		OUT_RING(ctx->se_vport_zoffset);
552 		ADVANCE_RING();
553 	}
554 
555 	if (dirty & RADEON_UPLOAD_SETUP) {
556 		BEGIN_RING(4);
557 		OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
558 		OUT_RING(ctx->se_cntl);
559 		OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
560 		OUT_RING(ctx->se_cntl_status);
561 		ADVANCE_RING();
562 	}
563 
564 	if (dirty & RADEON_UPLOAD_MISC) {
565 		BEGIN_RING(2);
566 		OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
567 		OUT_RING(ctx->re_misc);
568 		ADVANCE_RING();
569 	}
570 
571 	if (dirty & RADEON_UPLOAD_TEX0) {
572 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
573 						  &tex[0].pp_txoffset)) {
574 			DRM_ERROR("Invalid texture offset for unit 0\n");
575 			return -EINVAL;
576 		}
577 
578 		BEGIN_RING(9);
579 		OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
580 		OUT_RING(tex[0].pp_txfilter);
581 		OUT_RING(tex[0].pp_txformat);
582 		OUT_RING(tex[0].pp_txoffset);
583 		OUT_RING(tex[0].pp_txcblend);
584 		OUT_RING(tex[0].pp_txablend);
585 		OUT_RING(tex[0].pp_tfactor);
586 		OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
587 		OUT_RING(tex[0].pp_border_color);
588 		ADVANCE_RING();
589 	}
590 
591 	if (dirty & RADEON_UPLOAD_TEX1) {
592 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
593 						  &tex[1].pp_txoffset)) {
594 			DRM_ERROR("Invalid texture offset for unit 1\n");
595 			return -EINVAL;
596 		}
597 
598 		BEGIN_RING(9);
599 		OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
600 		OUT_RING(tex[1].pp_txfilter);
601 		OUT_RING(tex[1].pp_txformat);
602 		OUT_RING(tex[1].pp_txoffset);
603 		OUT_RING(tex[1].pp_txcblend);
604 		OUT_RING(tex[1].pp_txablend);
605 		OUT_RING(tex[1].pp_tfactor);
606 		OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
607 		OUT_RING(tex[1].pp_border_color);
608 		ADVANCE_RING();
609 	}
610 
611 	if (dirty & RADEON_UPLOAD_TEX2) {
612 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
613 						  &tex[2].pp_txoffset)) {
614 			DRM_ERROR("Invalid texture offset for unit 2\n");
615 			return -EINVAL;
616 		}
617 
618 		BEGIN_RING(9);
619 		OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
620 		OUT_RING(tex[2].pp_txfilter);
621 		OUT_RING(tex[2].pp_txformat);
622 		OUT_RING(tex[2].pp_txoffset);
623 		OUT_RING(tex[2].pp_txcblend);
624 		OUT_RING(tex[2].pp_txablend);
625 		OUT_RING(tex[2].pp_tfactor);
626 		OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
627 		OUT_RING(tex[2].pp_border_color);
628 		ADVANCE_RING();
629 	}
630 
631 	return 0;
632 }
633 
634 /* Emit 1.2 state
635  */
radeon_emit_state2(drm_radeon_private_t * dev_priv,struct drm_file * file_priv,drm_radeon_state_t * state)636 static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
637 			      struct drm_file *file_priv,
638 			      drm_radeon_state_t * state)
639 {
640 	RING_LOCALS;
641 
642 	if (state->dirty & RADEON_UPLOAD_ZBIAS) {
643 		BEGIN_RING(3);
644 		OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
645 		OUT_RING(state->context2.se_zbias_factor);
646 		OUT_RING(state->context2.se_zbias_constant);
647 		ADVANCE_RING();
648 	}
649 
650 	return radeon_emit_state(dev_priv, file_priv, &state->context,
651 				 state->tex, state->dirty);
652 }
653 
654 /* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
655  * 1.3 cmdbuffers allow all previous state to be updated as well as
656  * the tcl scalar and vector areas.
657  */
658 static struct {
659 	int start;
660 	int len;
661 	const char *name;
662 } packet[RADEON_MAX_STATE_PACKETS] = {
663 	{RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
664 	{RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
665 	{RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
666 	{RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
667 	{RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
668 	{RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
669 	{RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
670 	{RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
671 	{RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
672 	{RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
673 	{RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
674 	{RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
675 	{RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
676 	{RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
677 	{RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
678 	{RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
679 	{RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
680 	{RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
681 	{RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
682 	{RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
683 	{RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
684 		    "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
685 	{R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
686 	{R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
687 	{R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
688 	{R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
689 	{R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
690 	{R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
691 	{R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
692 	{R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
693 	{R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
694 	{R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
695 	{R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
696 	{R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
697 	{R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
698 	{R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
699 	{R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
700 	{R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
701 	{R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
702 	{R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
703 	{R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
704 	{R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
705 	{R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
706 	{R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
707 	{R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
708 	{R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
709 	{R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
710 	{R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
711 	{R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
712 	{R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
713 	{R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
714 	 "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
715 	{R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
716 	{R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
717 	{R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
718 	{R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
719 	{R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
720 	{R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
721 	{R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
722 	{R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
723 	{R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
724 	{R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
725 	{R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
726 		    "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
727 	{R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},	/* 61 */
728 	{R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
729 	{R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
730 	{R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
731 	{R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
732 	{R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
733 	{R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
734 	{R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
735 	{R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
736 	{R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
737 	{R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
738 	{R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
739 	{RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
740 	{RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
741 	{RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
742 	{R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
743 	{R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
744 	{RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
745 	{RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
746 	{RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
747 	{RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
748 	{RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
749 	{RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
750 	{R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
751 	{R200_PP_AFS_0, 32, "R200_PP_AFS_0"},     /* 85 */
752 	{R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
753 	{R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
754 	{R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
755 	{R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
756 	{R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
757 	{R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
758 	{R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
759 	{R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
760 	{R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
761 };
762 
763 /* ================================================================
764  * Performance monitoring functions
765  */
766 
radeon_clear_box(drm_radeon_private_t * dev_priv,struct drm_radeon_master_private * master_priv,int x,int y,int w,int h,int r,int g,int b)767 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
768 			     struct drm_radeon_master_private *master_priv,
769 			     int x, int y, int w, int h, int r, int g, int b)
770 {
771 	u32 color;
772 	RING_LOCALS;
773 
774 	x += master_priv->sarea_priv->boxes[0].x1;
775 	y += master_priv->sarea_priv->boxes[0].y1;
776 
777 	switch (dev_priv->color_fmt) {
778 	case RADEON_COLOR_FORMAT_RGB565:
779 		color = (((r & 0xf8) << 8) |
780 			 ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
781 		break;
782 	case RADEON_COLOR_FORMAT_ARGB8888:
783 	default:
784 		color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
785 		break;
786 	}
787 
788 	BEGIN_RING(4);
789 	RADEON_WAIT_UNTIL_3D_IDLE();
790 	OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
791 	OUT_RING(0xffffffff);
792 	ADVANCE_RING();
793 
794 	BEGIN_RING(6);
795 
796 	OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
797 	OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
798 		 RADEON_GMC_BRUSH_SOLID_COLOR |
799 		 (dev_priv->color_fmt << 8) |
800 		 RADEON_GMC_SRC_DATATYPE_COLOR |
801 		 RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
802 
803 	if (master_priv->sarea_priv->pfCurrentPage == 1) {
804 		OUT_RING(dev_priv->front_pitch_offset);
805 	} else {
806 		OUT_RING(dev_priv->back_pitch_offset);
807 	}
808 
809 	OUT_RING(color);
810 
811 	OUT_RING((x << 16) | y);
812 	OUT_RING((w << 16) | h);
813 
814 	ADVANCE_RING();
815 }
816 
radeon_cp_performance_boxes(drm_radeon_private_t * dev_priv,struct drm_radeon_master_private * master_priv)817 static void radeon_cp_performance_boxes(drm_radeon_private_t *dev_priv, struct drm_radeon_master_private *master_priv)
818 {
819 	/* Collapse various things into a wait flag -- trying to
820 	 * guess if userspase slept -- better just to have them tell us.
821 	 */
822 	if (dev_priv->stats.last_frame_reads > 1 ||
823 	    dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
824 		dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
825 	}
826 
827 	if (dev_priv->stats.freelist_loops) {
828 		dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
829 	}
830 
831 	/* Purple box for page flipping
832 	 */
833 	if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
834 		radeon_clear_box(dev_priv, master_priv, 4, 4, 8, 8, 255, 0, 255);
835 
836 	/* Red box if we have to wait for idle at any point
837 	 */
838 	if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
839 		radeon_clear_box(dev_priv, master_priv, 16, 4, 8, 8, 255, 0, 0);
840 
841 	/* Blue box: lost context?
842 	 */
843 
844 	/* Yellow box for texture swaps
845 	 */
846 	if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
847 		radeon_clear_box(dev_priv, master_priv, 40, 4, 8, 8, 255, 255, 0);
848 
849 	/* Green box if hardware never idles (as far as we can tell)
850 	 */
851 	if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
852 		radeon_clear_box(dev_priv, master_priv, 64, 4, 8, 8, 0, 255, 0);
853 
854 	/* Draw bars indicating number of buffers allocated
855 	 * (not a great measure, easily confused)
856 	 */
857 	if (dev_priv->stats.requested_bufs) {
858 		if (dev_priv->stats.requested_bufs > 100)
859 			dev_priv->stats.requested_bufs = 100;
860 
861 		radeon_clear_box(dev_priv, master_priv, 4, 16,
862 				 dev_priv->stats.requested_bufs, 4,
863 				 196, 128, 128);
864 	}
865 
866 	memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
867 
868 }
869 
870 /* ================================================================
871  * CP command dispatch functions
872  */
873 
radeon_cp_dispatch_clear(struct drm_device * dev,struct drm_master * master,drm_radeon_clear_t * clear,drm_radeon_clear_rect_t * depth_boxes)874 static void radeon_cp_dispatch_clear(struct drm_device * dev,
875 				     struct drm_master *master,
876 				     drm_radeon_clear_t * clear,
877 				     drm_radeon_clear_rect_t * depth_boxes)
878 {
879 	drm_radeon_private_t *dev_priv = dev->dev_private;
880 	struct drm_radeon_master_private *master_priv = master->driver_priv;
881 	drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
882 	drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
883 	int nbox = sarea_priv->nbox;
884 	struct drm_clip_rect *pbox = sarea_priv->boxes;
885 	unsigned int flags = clear->flags;
886 	u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
887 	int i;
888 	RING_LOCALS;
889 	DRM_DEBUG("flags = 0x%x\n", flags);
890 
891 	dev_priv->stats.clears++;
892 
893 	if (sarea_priv->pfCurrentPage == 1) {
894 		unsigned int tmp = flags;
895 
896 		flags &= ~(RADEON_FRONT | RADEON_BACK);
897 		if (tmp & RADEON_FRONT)
898 			flags |= RADEON_BACK;
899 		if (tmp & RADEON_BACK)
900 			flags |= RADEON_FRONT;
901 	}
902 	if (flags & (RADEON_DEPTH|RADEON_STENCIL)) {
903 		if (!dev_priv->have_z_offset) {
904 			printk_once(KERN_ERR "radeon: illegal depth clear request. Buggy mesa detected - please update.\n");
905 			flags &= ~(RADEON_DEPTH | RADEON_STENCIL);
906 		}
907 	}
908 
909 	if (flags & (RADEON_FRONT | RADEON_BACK)) {
910 
911 		BEGIN_RING(4);
912 
913 		/* Ensure the 3D stream is idle before doing a
914 		 * 2D fill to clear the front or back buffer.
915 		 */
916 		RADEON_WAIT_UNTIL_3D_IDLE();
917 
918 		OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
919 		OUT_RING(clear->color_mask);
920 
921 		ADVANCE_RING();
922 
923 		/* Make sure we restore the 3D state next time.
924 		 */
925 		sarea_priv->ctx_owner = 0;
926 
927 		for (i = 0; i < nbox; i++) {
928 			int x = pbox[i].x1;
929 			int y = pbox[i].y1;
930 			int w = pbox[i].x2 - x;
931 			int h = pbox[i].y2 - y;
932 
933 			DRM_DEBUG("%d,%d-%d,%d flags 0x%x\n",
934 				  x, y, w, h, flags);
935 
936 			if (flags & RADEON_FRONT) {
937 				BEGIN_RING(6);
938 
939 				OUT_RING(CP_PACKET3
940 					 (RADEON_CNTL_PAINT_MULTI, 4));
941 				OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
942 					 RADEON_GMC_BRUSH_SOLID_COLOR |
943 					 (dev_priv->
944 					  color_fmt << 8) |
945 					 RADEON_GMC_SRC_DATATYPE_COLOR |
946 					 RADEON_ROP3_P |
947 					 RADEON_GMC_CLR_CMP_CNTL_DIS);
948 
949 				OUT_RING(dev_priv->front_pitch_offset);
950 				OUT_RING(clear->clear_color);
951 
952 				OUT_RING((x << 16) | y);
953 				OUT_RING((w << 16) | h);
954 
955 				ADVANCE_RING();
956 			}
957 
958 			if (flags & RADEON_BACK) {
959 				BEGIN_RING(6);
960 
961 				OUT_RING(CP_PACKET3
962 					 (RADEON_CNTL_PAINT_MULTI, 4));
963 				OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
964 					 RADEON_GMC_BRUSH_SOLID_COLOR |
965 					 (dev_priv->
966 					  color_fmt << 8) |
967 					 RADEON_GMC_SRC_DATATYPE_COLOR |
968 					 RADEON_ROP3_P |
969 					 RADEON_GMC_CLR_CMP_CNTL_DIS);
970 
971 				OUT_RING(dev_priv->back_pitch_offset);
972 				OUT_RING(clear->clear_color);
973 
974 				OUT_RING((x << 16) | y);
975 				OUT_RING((w << 16) | h);
976 
977 				ADVANCE_RING();
978 			}
979 		}
980 	}
981 
982 	/* hyper z clear */
983 	/* no docs available, based on reverse engineering by Stephane Marchesin */
984 	if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
985 	    && (flags & RADEON_CLEAR_FASTZ)) {
986 
987 		int i;
988 		int depthpixperline =
989 		    dev_priv->depth_fmt ==
990 		    RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
991 						       2) : (dev_priv->
992 							     depth_pitch / 4);
993 
994 		u32 clearmask;
995 
996 		u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
997 		    ((clear->depth_mask & 0xff) << 24);
998 
999 		/* Make sure we restore the 3D state next time.
1000 		 * we haven't touched any "normal" state - still need this?
1001 		 */
1002 		sarea_priv->ctx_owner = 0;
1003 
1004 		if ((dev_priv->flags & RADEON_HAS_HIERZ)
1005 		    && (flags & RADEON_USE_HIERZ)) {
1006 			/* FIXME : reverse engineer that for Rx00 cards */
1007 			/* FIXME : the mask supposedly contains low-res z values. So can't set
1008 			   just to the max (0xff? or actually 0x3fff?), need to take z clear
1009 			   value into account? */
1010 			/* pattern seems to work for r100, though get slight
1011 			   rendering errors with glxgears. If hierz is not enabled for r100,
1012 			   only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
1013 			   other ones are ignored, and the same clear mask can be used. That's
1014 			   very different behaviour than R200 which needs different clear mask
1015 			   and different number of tiles to clear if hierz is enabled or not !?!
1016 			 */
1017 			clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
1018 		} else {
1019 			/* clear mask : chooses the clearing pattern.
1020 			   rv250: could be used to clear only parts of macrotiles
1021 			   (but that would get really complicated...)?
1022 			   bit 0 and 1 (either or both of them ?!?!) are used to
1023 			   not clear tile (or maybe one of the bits indicates if the tile is
1024 			   compressed or not), bit 2 and 3 to not clear tile 1,...,.
1025 			   Pattern is as follows:
1026 			   | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
1027 			   bits -------------------------------------------------
1028 			   | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
1029 			   rv100: clearmask covers 2x8 4x1 tiles, but one clear still
1030 			   covers 256 pixels ?!?
1031 			 */
1032 			clearmask = 0x0;
1033 		}
1034 
1035 		BEGIN_RING(8);
1036 		RADEON_WAIT_UNTIL_2D_IDLE();
1037 		OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
1038 			     tempRB3D_DEPTHCLEARVALUE);
1039 		/* what offset is this exactly ? */
1040 		OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
1041 		/* need ctlstat, otherwise get some strange black flickering */
1042 		OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
1043 			     RADEON_RB3D_ZC_FLUSH_ALL);
1044 		ADVANCE_RING();
1045 
1046 		for (i = 0; i < nbox; i++) {
1047 			int tileoffset, nrtilesx, nrtilesy, j;
1048 			/* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
1049 			if ((dev_priv->flags & RADEON_HAS_HIERZ)
1050 			    && !(dev_priv->microcode_version == UCODE_R200)) {
1051 				/* FIXME : figure this out for r200 (when hierz is enabled). Or
1052 				   maybe r200 actually doesn't need to put the low-res z value into
1053 				   the tile cache like r100, but just needs to clear the hi-level z-buffer?
1054 				   Works for R100, both with hierz and without.
1055 				   R100 seems to operate on 2x1 8x8 tiles, but...
1056 				   odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
1057 				   problematic with resolutions which are not 64 pix aligned? */
1058 				tileoffset =
1059 				    ((pbox[i].y1 >> 3) * depthpixperline +
1060 				     pbox[i].x1) >> 6;
1061 				nrtilesx =
1062 				    ((pbox[i].x2 & ~63) -
1063 				     (pbox[i].x1 & ~63)) >> 4;
1064 				nrtilesy =
1065 				    (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1066 				for (j = 0; j <= nrtilesy; j++) {
1067 					BEGIN_RING(4);
1068 					OUT_RING(CP_PACKET3
1069 						 (RADEON_3D_CLEAR_ZMASK, 2));
1070 					/* first tile */
1071 					OUT_RING(tileoffset * 8);
1072 					/* the number of tiles to clear */
1073 					OUT_RING(nrtilesx + 4);
1074 					/* clear mask : chooses the clearing pattern. */
1075 					OUT_RING(clearmask);
1076 					ADVANCE_RING();
1077 					tileoffset += depthpixperline >> 6;
1078 				}
1079 			} else if (dev_priv->microcode_version == UCODE_R200) {
1080 				/* works for rv250. */
1081 				/* find first macro tile (8x2 4x4 z-pixels on rv250) */
1082 				tileoffset =
1083 				    ((pbox[i].y1 >> 3) * depthpixperline +
1084 				     pbox[i].x1) >> 5;
1085 				nrtilesx =
1086 				    (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
1087 				nrtilesy =
1088 				    (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1089 				for (j = 0; j <= nrtilesy; j++) {
1090 					BEGIN_RING(4);
1091 					OUT_RING(CP_PACKET3
1092 						 (RADEON_3D_CLEAR_ZMASK, 2));
1093 					/* first tile */
1094 					/* judging by the first tile offset needed, could possibly
1095 					   directly address/clear 4x4 tiles instead of 8x2 * 4x4
1096 					   macro tiles, though would still need clear mask for
1097 					   right/bottom if truly 4x4 granularity is desired ? */
1098 					OUT_RING(tileoffset * 16);
1099 					/* the number of tiles to clear */
1100 					OUT_RING(nrtilesx + 1);
1101 					/* clear mask : chooses the clearing pattern. */
1102 					OUT_RING(clearmask);
1103 					ADVANCE_RING();
1104 					tileoffset += depthpixperline >> 5;
1105 				}
1106 			} else {	/* rv 100 */
1107 				/* rv100 might not need 64 pix alignment, who knows */
1108 				/* offsets are, hmm, weird */
1109 				tileoffset =
1110 				    ((pbox[i].y1 >> 4) * depthpixperline +
1111 				     pbox[i].x1) >> 6;
1112 				nrtilesx =
1113 				    ((pbox[i].x2 & ~63) -
1114 				     (pbox[i].x1 & ~63)) >> 4;
1115 				nrtilesy =
1116 				    (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
1117 				for (j = 0; j <= nrtilesy; j++) {
1118 					BEGIN_RING(4);
1119 					OUT_RING(CP_PACKET3
1120 						 (RADEON_3D_CLEAR_ZMASK, 2));
1121 					OUT_RING(tileoffset * 128);
1122 					/* the number of tiles to clear */
1123 					OUT_RING(nrtilesx + 4);
1124 					/* clear mask : chooses the clearing pattern. */
1125 					OUT_RING(clearmask);
1126 					ADVANCE_RING();
1127 					tileoffset += depthpixperline >> 6;
1128 				}
1129 			}
1130 		}
1131 
1132 		/* TODO don't always clear all hi-level z tiles */
1133 		if ((dev_priv->flags & RADEON_HAS_HIERZ)
1134 		    && (dev_priv->microcode_version == UCODE_R200)
1135 		    && (flags & RADEON_USE_HIERZ))
1136 			/* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
1137 			/* FIXME : the mask supposedly contains low-res z values. So can't set
1138 			   just to the max (0xff? or actually 0x3fff?), need to take z clear
1139 			   value into account? */
1140 		{
1141 			BEGIN_RING(4);
1142 			OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
1143 			OUT_RING(0x0);	/* First tile */
1144 			OUT_RING(0x3cc0);
1145 			OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
1146 			ADVANCE_RING();
1147 		}
1148 	}
1149 
1150 	/* We have to clear the depth and/or stencil buffers by
1151 	 * rendering a quad into just those buffers.  Thus, we have to
1152 	 * make sure the 3D engine is configured correctly.
1153 	 */
1154 	else if ((dev_priv->microcode_version == UCODE_R200) &&
1155 		(flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1156 
1157 		int tempPP_CNTL;
1158 		int tempRE_CNTL;
1159 		int tempRB3D_CNTL;
1160 		int tempRB3D_ZSTENCILCNTL;
1161 		int tempRB3D_STENCILREFMASK;
1162 		int tempRB3D_PLANEMASK;
1163 		int tempSE_CNTL;
1164 		int tempSE_VTE_CNTL;
1165 		int tempSE_VTX_FMT_0;
1166 		int tempSE_VTX_FMT_1;
1167 		int tempSE_VAP_CNTL;
1168 		int tempRE_AUX_SCISSOR_CNTL;
1169 
1170 		tempPP_CNTL = 0;
1171 		tempRE_CNTL = 0;
1172 
1173 		tempRB3D_CNTL = depth_clear->rb3d_cntl;
1174 
1175 		tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1176 		tempRB3D_STENCILREFMASK = 0x0;
1177 
1178 		tempSE_CNTL = depth_clear->se_cntl;
1179 
1180 		/* Disable TCL */
1181 
1182 		tempSE_VAP_CNTL = (	/* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */
1183 					  (0x9 <<
1184 					   SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
1185 
1186 		tempRB3D_PLANEMASK = 0x0;
1187 
1188 		tempRE_AUX_SCISSOR_CNTL = 0x0;
1189 
1190 		tempSE_VTE_CNTL =
1191 		    SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
1192 
1193 		/* Vertex format (X, Y, Z, W) */
1194 		tempSE_VTX_FMT_0 =
1195 		    SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
1196 		    SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
1197 		tempSE_VTX_FMT_1 = 0x0;
1198 
1199 		/*
1200 		 * Depth buffer specific enables
1201 		 */
1202 		if (flags & RADEON_DEPTH) {
1203 			/* Enable depth buffer */
1204 			tempRB3D_CNTL |= RADEON_Z_ENABLE;
1205 		} else {
1206 			/* Disable depth buffer */
1207 			tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
1208 		}
1209 
1210 		/*
1211 		 * Stencil buffer specific enables
1212 		 */
1213 		if (flags & RADEON_STENCIL) {
1214 			tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
1215 			tempRB3D_STENCILREFMASK = clear->depth_mask;
1216 		} else {
1217 			tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1218 			tempRB3D_STENCILREFMASK = 0x00000000;
1219 		}
1220 
1221 		if (flags & RADEON_USE_COMP_ZBUF) {
1222 			tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1223 			    RADEON_Z_DECOMPRESSION_ENABLE;
1224 		}
1225 		if (flags & RADEON_USE_HIERZ) {
1226 			tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1227 		}
1228 
1229 		BEGIN_RING(26);
1230 		RADEON_WAIT_UNTIL_2D_IDLE();
1231 
1232 		OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
1233 		OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
1234 		OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
1235 		OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1236 		OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
1237 			     tempRB3D_STENCILREFMASK);
1238 		OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
1239 		OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
1240 		OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
1241 		OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
1242 		OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
1243 		OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
1244 		OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
1245 		ADVANCE_RING();
1246 
1247 		/* Make sure we restore the 3D state next time.
1248 		 */
1249 		sarea_priv->ctx_owner = 0;
1250 
1251 		for (i = 0; i < nbox; i++) {
1252 
1253 			/* Funny that this should be required --
1254 			 *  sets top-left?
1255 			 */
1256 			radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1257 
1258 			BEGIN_RING(14);
1259 			OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
1260 			OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1261 				  RADEON_PRIM_WALK_RING |
1262 				  (3 << RADEON_NUM_VERTICES_SHIFT)));
1263 			OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1264 			OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1265 			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1266 			OUT_RING(0x3f800000);
1267 			OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1268 			OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1269 			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1270 			OUT_RING(0x3f800000);
1271 			OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1272 			OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1273 			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1274 			OUT_RING(0x3f800000);
1275 			ADVANCE_RING();
1276 		}
1277 	} else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1278 
1279 		int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1280 
1281 		rb3d_cntl = depth_clear->rb3d_cntl;
1282 
1283 		if (flags & RADEON_DEPTH) {
1284 			rb3d_cntl |= RADEON_Z_ENABLE;
1285 		} else {
1286 			rb3d_cntl &= ~RADEON_Z_ENABLE;
1287 		}
1288 
1289 		if (flags & RADEON_STENCIL) {
1290 			rb3d_cntl |= RADEON_STENCIL_ENABLE;
1291 			rb3d_stencilrefmask = clear->depth_mask;	/* misnamed field */
1292 		} else {
1293 			rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1294 			rb3d_stencilrefmask = 0x00000000;
1295 		}
1296 
1297 		if (flags & RADEON_USE_COMP_ZBUF) {
1298 			tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1299 			    RADEON_Z_DECOMPRESSION_ENABLE;
1300 		}
1301 		if (flags & RADEON_USE_HIERZ) {
1302 			tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1303 		}
1304 
1305 		BEGIN_RING(13);
1306 		RADEON_WAIT_UNTIL_2D_IDLE();
1307 
1308 		OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
1309 		OUT_RING(0x00000000);
1310 		OUT_RING(rb3d_cntl);
1311 
1312 		OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1313 		OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
1314 		OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
1315 		OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
1316 		ADVANCE_RING();
1317 
1318 		/* Make sure we restore the 3D state next time.
1319 		 */
1320 		sarea_priv->ctx_owner = 0;
1321 
1322 		for (i = 0; i < nbox; i++) {
1323 
1324 			/* Funny that this should be required --
1325 			 *  sets top-left?
1326 			 */
1327 			radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1328 
1329 			BEGIN_RING(15);
1330 
1331 			OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
1332 			OUT_RING(RADEON_VTX_Z_PRESENT |
1333 				 RADEON_VTX_PKCOLOR_PRESENT);
1334 			OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1335 				  RADEON_PRIM_WALK_RING |
1336 				  RADEON_MAOS_ENABLE |
1337 				  RADEON_VTX_FMT_RADEON_MODE |
1338 				  (3 << RADEON_NUM_VERTICES_SHIFT)));
1339 
1340 			OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1341 			OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1342 			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1343 			OUT_RING(0x0);
1344 
1345 			OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1346 			OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1347 			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1348 			OUT_RING(0x0);
1349 
1350 			OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1351 			OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1352 			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1353 			OUT_RING(0x0);
1354 
1355 			ADVANCE_RING();
1356 		}
1357 	}
1358 
1359 	/* Increment the clear counter.  The client-side 3D driver must
1360 	 * wait on this value before performing the clear ioctl.  We
1361 	 * need this because the card's so damned fast...
1362 	 */
1363 	sarea_priv->last_clear++;
1364 
1365 	BEGIN_RING(4);
1366 
1367 	RADEON_CLEAR_AGE(sarea_priv->last_clear);
1368 	RADEON_WAIT_UNTIL_IDLE();
1369 
1370 	ADVANCE_RING();
1371 }
1372 
radeon_cp_dispatch_swap(struct drm_device * dev,struct drm_master * master)1373 static void radeon_cp_dispatch_swap(struct drm_device *dev, struct drm_master *master)
1374 {
1375 	drm_radeon_private_t *dev_priv = dev->dev_private;
1376 	struct drm_radeon_master_private *master_priv = master->driver_priv;
1377 	drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
1378 	int nbox = sarea_priv->nbox;
1379 	struct drm_clip_rect *pbox = sarea_priv->boxes;
1380 	int i;
1381 	RING_LOCALS;
1382 	DRM_DEBUG("\n");
1383 
1384 	/* Do some trivial performance monitoring...
1385 	 */
1386 	if (dev_priv->do_boxes)
1387 		radeon_cp_performance_boxes(dev_priv, master_priv);
1388 
1389 	/* Wait for the 3D stream to idle before dispatching the bitblt.
1390 	 * This will prevent data corruption between the two streams.
1391 	 */
1392 	BEGIN_RING(2);
1393 
1394 	RADEON_WAIT_UNTIL_3D_IDLE();
1395 
1396 	ADVANCE_RING();
1397 
1398 	for (i = 0; i < nbox; i++) {
1399 		int x = pbox[i].x1;
1400 		int y = pbox[i].y1;
1401 		int w = pbox[i].x2 - x;
1402 		int h = pbox[i].y2 - y;
1403 
1404 		DRM_DEBUG("%d,%d-%d,%d\n", x, y, w, h);
1405 
1406 		BEGIN_RING(9);
1407 
1408 		OUT_RING(CP_PACKET0(RADEON_DP_GUI_MASTER_CNTL, 0));
1409 		OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1410 			 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1411 			 RADEON_GMC_BRUSH_NONE |
1412 			 (dev_priv->color_fmt << 8) |
1413 			 RADEON_GMC_SRC_DATATYPE_COLOR |
1414 			 RADEON_ROP3_S |
1415 			 RADEON_DP_SRC_SOURCE_MEMORY |
1416 			 RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1417 
1418 		/* Make this work even if front & back are flipped:
1419 		 */
1420 		OUT_RING(CP_PACKET0(RADEON_SRC_PITCH_OFFSET, 1));
1421 		if (sarea_priv->pfCurrentPage == 0) {
1422 			OUT_RING(dev_priv->back_pitch_offset);
1423 			OUT_RING(dev_priv->front_pitch_offset);
1424 		} else {
1425 			OUT_RING(dev_priv->front_pitch_offset);
1426 			OUT_RING(dev_priv->back_pitch_offset);
1427 		}
1428 
1429 		OUT_RING(CP_PACKET0(RADEON_SRC_X_Y, 2));
1430 		OUT_RING((x << 16) | y);
1431 		OUT_RING((x << 16) | y);
1432 		OUT_RING((w << 16) | h);
1433 
1434 		ADVANCE_RING();
1435 	}
1436 
1437 	/* Increment the frame counter.  The client-side 3D driver must
1438 	 * throttle the framerate by waiting for this value before
1439 	 * performing the swapbuffer ioctl.
1440 	 */
1441 	sarea_priv->last_frame++;
1442 
1443 	BEGIN_RING(4);
1444 
1445 	RADEON_FRAME_AGE(sarea_priv->last_frame);
1446 	RADEON_WAIT_UNTIL_2D_IDLE();
1447 
1448 	ADVANCE_RING();
1449 }
1450 
radeon_cp_dispatch_flip(struct drm_device * dev,struct drm_master * master)1451 void radeon_cp_dispatch_flip(struct drm_device *dev, struct drm_master *master)
1452 {
1453 	drm_radeon_private_t *dev_priv = dev->dev_private;
1454 	struct drm_radeon_master_private *master_priv = master->driver_priv;
1455 	struct drm_sarea *sarea = (struct drm_sarea *)master_priv->sarea->handle;
1456 	int offset = (master_priv->sarea_priv->pfCurrentPage == 1)
1457 	    ? dev_priv->front_offset : dev_priv->back_offset;
1458 	RING_LOCALS;
1459 	DRM_DEBUG("pfCurrentPage=%d\n",
1460 		  master_priv->sarea_priv->pfCurrentPage);
1461 
1462 	/* Do some trivial performance monitoring...
1463 	 */
1464 	if (dev_priv->do_boxes) {
1465 		dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1466 		radeon_cp_performance_boxes(dev_priv, master_priv);
1467 	}
1468 
1469 	/* Update the frame offsets for both CRTCs
1470 	 */
1471 	BEGIN_RING(6);
1472 
1473 	RADEON_WAIT_UNTIL_3D_IDLE();
1474 	OUT_RING_REG(RADEON_CRTC_OFFSET,
1475 		     ((sarea->frame.y * dev_priv->front_pitch +
1476 		       sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
1477 		     + offset);
1478 	OUT_RING_REG(RADEON_CRTC2_OFFSET, master_priv->sarea_priv->crtc2_base
1479 		     + offset);
1480 
1481 	ADVANCE_RING();
1482 
1483 	/* Increment the frame counter.  The client-side 3D driver must
1484 	 * throttle the framerate by waiting for this value before
1485 	 * performing the swapbuffer ioctl.
1486 	 */
1487 	master_priv->sarea_priv->last_frame++;
1488 	master_priv->sarea_priv->pfCurrentPage =
1489 		1 - master_priv->sarea_priv->pfCurrentPage;
1490 
1491 	BEGIN_RING(2);
1492 
1493 	RADEON_FRAME_AGE(master_priv->sarea_priv->last_frame);
1494 
1495 	ADVANCE_RING();
1496 }
1497 
bad_prim_vertex_nr(int primitive,int nr)1498 static int bad_prim_vertex_nr(int primitive, int nr)
1499 {
1500 	switch (primitive & RADEON_PRIM_TYPE_MASK) {
1501 	case RADEON_PRIM_TYPE_NONE:
1502 	case RADEON_PRIM_TYPE_POINT:
1503 		return nr < 1;
1504 	case RADEON_PRIM_TYPE_LINE:
1505 		return (nr & 1) || nr == 0;
1506 	case RADEON_PRIM_TYPE_LINE_STRIP:
1507 		return nr < 2;
1508 	case RADEON_PRIM_TYPE_TRI_LIST:
1509 	case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1510 	case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1511 	case RADEON_PRIM_TYPE_RECT_LIST:
1512 		return nr % 3 || nr == 0;
1513 	case RADEON_PRIM_TYPE_TRI_FAN:
1514 	case RADEON_PRIM_TYPE_TRI_STRIP:
1515 		return nr < 3;
1516 	default:
1517 		return 1;
1518 	}
1519 }
1520 
1521 typedef struct {
1522 	unsigned int start;
1523 	unsigned int finish;
1524 	unsigned int prim;
1525 	unsigned int numverts;
1526 	unsigned int offset;
1527 	unsigned int vc_format;
1528 } drm_radeon_tcl_prim_t;
1529 
radeon_cp_dispatch_vertex(struct drm_device * dev,struct drm_file * file_priv,struct drm_buf * buf,drm_radeon_tcl_prim_t * prim)1530 static void radeon_cp_dispatch_vertex(struct drm_device * dev,
1531 				      struct drm_file *file_priv,
1532 				      struct drm_buf * buf,
1533 				      drm_radeon_tcl_prim_t * prim)
1534 {
1535 	drm_radeon_private_t *dev_priv = dev->dev_private;
1536 	struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
1537 	drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
1538 	int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1539 	int numverts = (int)prim->numverts;
1540 	int nbox = sarea_priv->nbox;
1541 	int i = 0;
1542 	RING_LOCALS;
1543 
1544 	DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1545 		  prim->prim,
1546 		  prim->vc_format, prim->start, prim->finish, prim->numverts);
1547 
1548 	if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
1549 		DRM_ERROR("bad prim %x numverts %d\n",
1550 			  prim->prim, prim->numverts);
1551 		return;
1552 	}
1553 
1554 	do {
1555 		/* Emit the next cliprect */
1556 		if (i < nbox) {
1557 			radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1558 		}
1559 
1560 		/* Emit the vertex buffer rendering commands */
1561 		BEGIN_RING(5);
1562 
1563 		OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
1564 		OUT_RING(offset);
1565 		OUT_RING(numverts);
1566 		OUT_RING(prim->vc_format);
1567 		OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
1568 			 RADEON_COLOR_ORDER_RGBA |
1569 			 RADEON_VTX_FMT_RADEON_MODE |
1570 			 (numverts << RADEON_NUM_VERTICES_SHIFT));
1571 
1572 		ADVANCE_RING();
1573 
1574 		i++;
1575 	} while (i < nbox);
1576 }
1577 
radeon_cp_discard_buffer(struct drm_device * dev,struct drm_master * master,struct drm_buf * buf)1578 void radeon_cp_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf)
1579 {
1580 	drm_radeon_private_t *dev_priv = dev->dev_private;
1581 	struct drm_radeon_master_private *master_priv = master->driver_priv;
1582 	drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1583 	RING_LOCALS;
1584 
1585 	buf_priv->age = ++master_priv->sarea_priv->last_dispatch;
1586 
1587 	/* Emit the vertex buffer age */
1588 	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600) {
1589 		BEGIN_RING(3);
1590 		R600_DISPATCH_AGE(buf_priv->age);
1591 		ADVANCE_RING();
1592 	} else {
1593 		BEGIN_RING(2);
1594 		RADEON_DISPATCH_AGE(buf_priv->age);
1595 		ADVANCE_RING();
1596 	}
1597 
1598 	buf->pending = 1;
1599 	buf->used = 0;
1600 }
1601 
radeon_cp_dispatch_indirect(struct drm_device * dev,struct drm_buf * buf,int start,int end)1602 static void radeon_cp_dispatch_indirect(struct drm_device * dev,
1603 					struct drm_buf * buf, int start, int end)
1604 {
1605 	drm_radeon_private_t *dev_priv = dev->dev_private;
1606 	RING_LOCALS;
1607 	DRM_DEBUG("buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
1608 
1609 	if (start != end) {
1610 		int offset = (dev_priv->gart_buffers_offset
1611 			      + buf->offset + start);
1612 		int dwords = (end - start + 3) / sizeof(u32);
1613 
1614 		/* Indirect buffer data must be an even number of
1615 		 * dwords, so if we've been given an odd number we must
1616 		 * pad the data with a Type-2 CP packet.
1617 		 */
1618 		if (dwords & 1) {
1619 			u32 *data = (u32 *)
1620 			    ((char *)dev->agp_buffer_map->handle
1621 			     + buf->offset + start);
1622 			data[dwords++] = RADEON_CP_PACKET2;
1623 		}
1624 
1625 		/* Fire off the indirect buffer */
1626 		BEGIN_RING(3);
1627 
1628 		OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
1629 		OUT_RING(offset);
1630 		OUT_RING(dwords);
1631 
1632 		ADVANCE_RING();
1633 	}
1634 }
1635 
radeon_cp_dispatch_indices(struct drm_device * dev,struct drm_master * master,struct drm_buf * elt_buf,drm_radeon_tcl_prim_t * prim)1636 static void radeon_cp_dispatch_indices(struct drm_device *dev,
1637 				       struct drm_master *master,
1638 				       struct drm_buf * elt_buf,
1639 				       drm_radeon_tcl_prim_t * prim)
1640 {
1641 	drm_radeon_private_t *dev_priv = dev->dev_private;
1642 	struct drm_radeon_master_private *master_priv = master->driver_priv;
1643 	drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
1644 	int offset = dev_priv->gart_buffers_offset + prim->offset;
1645 	u32 *data;
1646 	int dwords;
1647 	int i = 0;
1648 	int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1649 	int count = (prim->finish - start) / sizeof(u16);
1650 	int nbox = sarea_priv->nbox;
1651 
1652 	DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1653 		  prim->prim,
1654 		  prim->vc_format,
1655 		  prim->start, prim->finish, prim->offset, prim->numverts);
1656 
1657 	if (bad_prim_vertex_nr(prim->prim, count)) {
1658 		DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
1659 		return;
1660 	}
1661 
1662 	if (start >= prim->finish || (prim->start & 0x7)) {
1663 		DRM_ERROR("buffer prim %d\n", prim->prim);
1664 		return;
1665 	}
1666 
1667 	dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1668 
1669 	data = (u32 *) ((char *)dev->agp_buffer_map->handle +
1670 			elt_buf->offset + prim->start);
1671 
1672 	data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
1673 	data[1] = offset;
1674 	data[2] = prim->numverts;
1675 	data[3] = prim->vc_format;
1676 	data[4] = (prim->prim |
1677 		   RADEON_PRIM_WALK_IND |
1678 		   RADEON_COLOR_ORDER_RGBA |
1679 		   RADEON_VTX_FMT_RADEON_MODE |
1680 		   (count << RADEON_NUM_VERTICES_SHIFT));
1681 
1682 	do {
1683 		if (i < nbox)
1684 			radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1685 
1686 		radeon_cp_dispatch_indirect(dev, elt_buf,
1687 					    prim->start, prim->finish);
1688 
1689 		i++;
1690 	} while (i < nbox);
1691 
1692 }
1693 
1694 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1695 
radeon_cp_dispatch_texture(struct drm_device * dev,struct drm_file * file_priv,drm_radeon_texture_t * tex,drm_radeon_tex_image_t * image)1696 static int radeon_cp_dispatch_texture(struct drm_device * dev,
1697 				      struct drm_file *file_priv,
1698 				      drm_radeon_texture_t * tex,
1699 				      drm_radeon_tex_image_t * image)
1700 {
1701 	drm_radeon_private_t *dev_priv = dev->dev_private;
1702 	struct drm_buf *buf;
1703 	u32 format;
1704 	u32 *buffer;
1705 	const u8 __user *data;
1706 	int size, dwords, tex_width, blit_width, spitch;
1707 	u32 height;
1708 	int i;
1709 	u32 texpitch, microtile;
1710 	u32 offset, byte_offset;
1711 	RING_LOCALS;
1712 
1713 	if (radeon_check_and_fixup_offset(dev_priv, file_priv, &tex->offset)) {
1714 		DRM_ERROR("Invalid destination offset\n");
1715 		return -EINVAL;
1716 	}
1717 
1718 	dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1719 
1720 	/* Flush the pixel cache.  This ensures no pixel data gets mixed
1721 	 * up with the texture data from the host data blit, otherwise
1722 	 * part of the texture image may be corrupted.
1723 	 */
1724 	BEGIN_RING(4);
1725 	RADEON_FLUSH_CACHE();
1726 	RADEON_WAIT_UNTIL_IDLE();
1727 	ADVANCE_RING();
1728 
1729 	/* The compiler won't optimize away a division by a variable,
1730 	 * even if the only legal values are powers of two.  Thus, we'll
1731 	 * use a shift instead.
1732 	 */
1733 	switch (tex->format) {
1734 	case RADEON_TXFORMAT_ARGB8888:
1735 	case RADEON_TXFORMAT_RGBA8888:
1736 		format = RADEON_COLOR_FORMAT_ARGB8888;
1737 		tex_width = tex->width * 4;
1738 		blit_width = image->width * 4;
1739 		break;
1740 	case RADEON_TXFORMAT_AI88:
1741 	case RADEON_TXFORMAT_ARGB1555:
1742 	case RADEON_TXFORMAT_RGB565:
1743 	case RADEON_TXFORMAT_ARGB4444:
1744 	case RADEON_TXFORMAT_VYUY422:
1745 	case RADEON_TXFORMAT_YVYU422:
1746 		format = RADEON_COLOR_FORMAT_RGB565;
1747 		tex_width = tex->width * 2;
1748 		blit_width = image->width * 2;
1749 		break;
1750 	case RADEON_TXFORMAT_I8:
1751 	case RADEON_TXFORMAT_RGB332:
1752 		format = RADEON_COLOR_FORMAT_CI8;
1753 		tex_width = tex->width * 1;
1754 		blit_width = image->width * 1;
1755 		break;
1756 	default:
1757 		DRM_ERROR("invalid texture format %d\n", tex->format);
1758 		return -EINVAL;
1759 	}
1760 	spitch = blit_width >> 6;
1761 	if (spitch == 0 && image->height > 1)
1762 		return -EINVAL;
1763 
1764 	texpitch = tex->pitch;
1765 	if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1766 		microtile = 1;
1767 		if (tex_width < 64) {
1768 			texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1769 			/* we got tiled coordinates, untile them */
1770 			image->x *= 2;
1771 		}
1772 	} else
1773 		microtile = 0;
1774 
1775 	/* this might fail for zero-sized uploads - are those illegal? */
1776 	if (!radeon_check_offset(dev_priv, tex->offset + image->height *
1777 				blit_width - 1)) {
1778 		DRM_ERROR("Invalid final destination offset\n");
1779 		return -EINVAL;
1780 	}
1781 
1782 	DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
1783 
1784 	do {
1785 		DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1786 			  tex->offset >> 10, tex->pitch, tex->format,
1787 			  image->x, image->y, image->width, image->height);
1788 
1789 		/* Make a copy of some parameters in case we have to
1790 		 * update them for a multi-pass texture blit.
1791 		 */
1792 		height = image->height;
1793 		data = (const u8 __user *)image->data;
1794 
1795 		size = height * blit_width;
1796 
1797 		if (size > RADEON_MAX_TEXTURE_SIZE) {
1798 			height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1799 			size = height * blit_width;
1800 		} else if (size < 4 && size > 0) {
1801 			size = 4;
1802 		} else if (size == 0) {
1803 			return 0;
1804 		}
1805 
1806 		buf = radeon_freelist_get(dev);
1807 		if (0 && !buf) {
1808 			radeon_do_cp_idle(dev_priv);
1809 			buf = radeon_freelist_get(dev);
1810 		}
1811 		if (!buf) {
1812 			DRM_DEBUG("EAGAIN\n");
1813 			if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
1814 				return -EFAULT;
1815 			return -EAGAIN;
1816 		}
1817 
1818 		/* Dispatch the indirect buffer.
1819 		 */
1820 		buffer =
1821 		    (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
1822 		dwords = size / 4;
1823 
1824 #define RADEON_COPY_MT(_buf, _data, _width) \
1825 	do { \
1826 		if (DRM_COPY_FROM_USER(_buf, _data, (_width))) {\
1827 			DRM_ERROR("EFAULT on pad, %d bytes\n", (_width)); \
1828 			return -EFAULT; \
1829 		} \
1830 	} while(0)
1831 
1832 		if (microtile) {
1833 			/* texture micro tiling in use, minimum texture width is thus 16 bytes.
1834 			   however, we cannot use blitter directly for texture width < 64 bytes,
1835 			   since minimum tex pitch is 64 bytes and we need this to match
1836 			   the texture width, otherwise the blitter will tile it wrong.
1837 			   Thus, tiling manually in this case. Additionally, need to special
1838 			   case tex height = 1, since our actual image will have height 2
1839 			   and we need to ensure we don't read beyond the texture size
1840 			   from user space. */
1841 			if (tex->height == 1) {
1842 				if (tex_width >= 64 || tex_width <= 16) {
1843 					RADEON_COPY_MT(buffer, data,
1844 						(int)(tex_width * sizeof(u32)));
1845 				} else if (tex_width == 32) {
1846 					RADEON_COPY_MT(buffer, data, 16);
1847 					RADEON_COPY_MT(buffer + 8,
1848 						       data + 16, 16);
1849 				}
1850 			} else if (tex_width >= 64 || tex_width == 16) {
1851 				RADEON_COPY_MT(buffer, data,
1852 					       (int)(dwords * sizeof(u32)));
1853 			} else if (tex_width < 16) {
1854 				for (i = 0; i < tex->height; i++) {
1855 					RADEON_COPY_MT(buffer, data, tex_width);
1856 					buffer += 4;
1857 					data += tex_width;
1858 				}
1859 			} else if (tex_width == 32) {
1860 				/* TODO: make sure this works when not fitting in one buffer
1861 				   (i.e. 32bytes x 2048...) */
1862 				for (i = 0; i < tex->height; i += 2) {
1863 					RADEON_COPY_MT(buffer, data, 16);
1864 					data += 16;
1865 					RADEON_COPY_MT(buffer + 8, data, 16);
1866 					data += 16;
1867 					RADEON_COPY_MT(buffer + 4, data, 16);
1868 					data += 16;
1869 					RADEON_COPY_MT(buffer + 12, data, 16);
1870 					data += 16;
1871 					buffer += 16;
1872 				}
1873 			}
1874 		} else {
1875 			if (tex_width >= 32) {
1876 				/* Texture image width is larger than the minimum, so we
1877 				 * can upload it directly.
1878 				 */
1879 				RADEON_COPY_MT(buffer, data,
1880 					       (int)(dwords * sizeof(u32)));
1881 			} else {
1882 				/* Texture image width is less than the minimum, so we
1883 				 * need to pad out each image scanline to the minimum
1884 				 * width.
1885 				 */
1886 				for (i = 0; i < tex->height; i++) {
1887 					RADEON_COPY_MT(buffer, data, tex_width);
1888 					buffer += 8;
1889 					data += tex_width;
1890 				}
1891 			}
1892 		}
1893 
1894 #undef RADEON_COPY_MT
1895 		byte_offset = (image->y & ~2047) * blit_width;
1896 		buf->file_priv = file_priv;
1897 		buf->used = size;
1898 		offset = dev_priv->gart_buffers_offset + buf->offset;
1899 		BEGIN_RING(9);
1900 		OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1901 		OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1902 			 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1903 			 RADEON_GMC_BRUSH_NONE |
1904 			 (format << 8) |
1905 			 RADEON_GMC_SRC_DATATYPE_COLOR |
1906 			 RADEON_ROP3_S |
1907 			 RADEON_DP_SRC_SOURCE_MEMORY |
1908 			 RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1909 		OUT_RING((spitch << 22) | (offset >> 10));
1910 		OUT_RING((texpitch << 22) | ((tex->offset >> 10) + (byte_offset >> 10)));
1911 		OUT_RING(0);
1912 		OUT_RING((image->x << 16) | (image->y % 2048));
1913 		OUT_RING((image->width << 16) | height);
1914 		RADEON_WAIT_UNTIL_2D_IDLE();
1915 		ADVANCE_RING();
1916 		COMMIT_RING();
1917 
1918 		radeon_cp_discard_buffer(dev, file_priv->master, buf);
1919 
1920 		/* Update the input parameters for next time */
1921 		image->y += height;
1922 		image->height -= height;
1923 		image->data = (const u8 __user *)image->data + size;
1924 	} while (image->height > 0);
1925 
1926 	/* Flush the pixel cache after the blit completes.  This ensures
1927 	 * the texture data is written out to memory before rendering
1928 	 * continues.
1929 	 */
1930 	BEGIN_RING(4);
1931 	RADEON_FLUSH_CACHE();
1932 	RADEON_WAIT_UNTIL_2D_IDLE();
1933 	ADVANCE_RING();
1934 	COMMIT_RING();
1935 
1936 	return 0;
1937 }
1938 
radeon_cp_dispatch_stipple(struct drm_device * dev,u32 * stipple)1939 static void radeon_cp_dispatch_stipple(struct drm_device * dev, u32 * stipple)
1940 {
1941 	drm_radeon_private_t *dev_priv = dev->dev_private;
1942 	int i;
1943 	RING_LOCALS;
1944 	DRM_DEBUG("\n");
1945 
1946 	BEGIN_RING(35);
1947 
1948 	OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
1949 	OUT_RING(0x00000000);
1950 
1951 	OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
1952 	for (i = 0; i < 32; i++) {
1953 		OUT_RING(stipple[i]);
1954 	}
1955 
1956 	ADVANCE_RING();
1957 }
1958 
radeon_apply_surface_regs(int surf_index,drm_radeon_private_t * dev_priv)1959 static void radeon_apply_surface_regs(int surf_index,
1960 				      drm_radeon_private_t *dev_priv)
1961 {
1962 	if (!dev_priv->mmio)
1963 		return;
1964 
1965 	radeon_do_cp_idle(dev_priv);
1966 
1967 	RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
1968 		     dev_priv->surfaces[surf_index].flags);
1969 	RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
1970 		     dev_priv->surfaces[surf_index].lower);
1971 	RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
1972 		     dev_priv->surfaces[surf_index].upper);
1973 }
1974 
1975 /* Allocates a virtual surface
1976  * doesn't always allocate a real surface, will stretch an existing
1977  * surface when possible.
1978  *
1979  * Note that refcount can be at most 2, since during a free refcount=3
1980  * might mean we have to allocate a new surface which might not always
1981  * be available.
1982  * For example : we allocate three contiguous surfaces ABC. If B is
1983  * freed, we suddenly need two surfaces to store A and C, which might
1984  * not always be available.
1985  */
alloc_surface(drm_radeon_surface_alloc_t * new,drm_radeon_private_t * dev_priv,struct drm_file * file_priv)1986 static int alloc_surface(drm_radeon_surface_alloc_t *new,
1987 			 drm_radeon_private_t *dev_priv,
1988 			 struct drm_file *file_priv)
1989 {
1990 	struct radeon_virt_surface *s;
1991 	int i;
1992 	int virt_surface_index;
1993 	uint32_t new_upper, new_lower;
1994 
1995 	new_lower = new->address;
1996 	new_upper = new_lower + new->size - 1;
1997 
1998 	/* sanity check */
1999 	if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
2000 	    ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
2001 	     RADEON_SURF_ADDRESS_FIXED_MASK)
2002 	    || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
2003 		return -1;
2004 
2005 	/* make sure there is no overlap with existing surfaces */
2006 	for (i = 0; i < RADEON_MAX_SURFACES; i++) {
2007 		if ((dev_priv->surfaces[i].refcount != 0) &&
2008 		    (((new_lower >= dev_priv->surfaces[i].lower) &&
2009 		      (new_lower < dev_priv->surfaces[i].upper)) ||
2010 		     ((new_lower < dev_priv->surfaces[i].lower) &&
2011 		      (new_upper > dev_priv->surfaces[i].lower)))) {
2012 			return -1;
2013 		}
2014 	}
2015 
2016 	/* find a virtual surface */
2017 	for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
2018 		if (dev_priv->virt_surfaces[i].file_priv == NULL)
2019 			break;
2020 	if (i == 2 * RADEON_MAX_SURFACES) {
2021 		return -1;
2022 	}
2023 	virt_surface_index = i;
2024 
2025 	/* try to reuse an existing surface */
2026 	for (i = 0; i < RADEON_MAX_SURFACES; i++) {
2027 		/* extend before */
2028 		if ((dev_priv->surfaces[i].refcount == 1) &&
2029 		    (new->flags == dev_priv->surfaces[i].flags) &&
2030 		    (new_upper + 1 == dev_priv->surfaces[i].lower)) {
2031 			s = &(dev_priv->virt_surfaces[virt_surface_index]);
2032 			s->surface_index = i;
2033 			s->lower = new_lower;
2034 			s->upper = new_upper;
2035 			s->flags = new->flags;
2036 			s->file_priv = file_priv;
2037 			dev_priv->surfaces[i].refcount++;
2038 			dev_priv->surfaces[i].lower = s->lower;
2039 			radeon_apply_surface_regs(s->surface_index, dev_priv);
2040 			return virt_surface_index;
2041 		}
2042 
2043 		/* extend after */
2044 		if ((dev_priv->surfaces[i].refcount == 1) &&
2045 		    (new->flags == dev_priv->surfaces[i].flags) &&
2046 		    (new_lower == dev_priv->surfaces[i].upper + 1)) {
2047 			s = &(dev_priv->virt_surfaces[virt_surface_index]);
2048 			s->surface_index = i;
2049 			s->lower = new_lower;
2050 			s->upper = new_upper;
2051 			s->flags = new->flags;
2052 			s->file_priv = file_priv;
2053 			dev_priv->surfaces[i].refcount++;
2054 			dev_priv->surfaces[i].upper = s->upper;
2055 			radeon_apply_surface_regs(s->surface_index, dev_priv);
2056 			return virt_surface_index;
2057 		}
2058 	}
2059 
2060 	/* okay, we need a new one */
2061 	for (i = 0; i < RADEON_MAX_SURFACES; i++) {
2062 		if (dev_priv->surfaces[i].refcount == 0) {
2063 			s = &(dev_priv->virt_surfaces[virt_surface_index]);
2064 			s->surface_index = i;
2065 			s->lower = new_lower;
2066 			s->upper = new_upper;
2067 			s->flags = new->flags;
2068 			s->file_priv = file_priv;
2069 			dev_priv->surfaces[i].refcount = 1;
2070 			dev_priv->surfaces[i].lower = s->lower;
2071 			dev_priv->surfaces[i].upper = s->upper;
2072 			dev_priv->surfaces[i].flags = s->flags;
2073 			radeon_apply_surface_regs(s->surface_index, dev_priv);
2074 			return virt_surface_index;
2075 		}
2076 	}
2077 
2078 	/* we didn't find anything */
2079 	return -1;
2080 }
2081 
free_surface(struct drm_file * file_priv,drm_radeon_private_t * dev_priv,int lower)2082 static int free_surface(struct drm_file *file_priv,
2083 			drm_radeon_private_t * dev_priv,
2084 			int lower)
2085 {
2086 	struct radeon_virt_surface *s;
2087 	int i;
2088 	/* find the virtual surface */
2089 	for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2090 		s = &(dev_priv->virt_surfaces[i]);
2091 		if (s->file_priv) {
2092 			if ((lower == s->lower) && (file_priv == s->file_priv))
2093 			{
2094 				if (dev_priv->surfaces[s->surface_index].
2095 				    lower == s->lower)
2096 					dev_priv->surfaces[s->surface_index].
2097 					    lower = s->upper;
2098 
2099 				if (dev_priv->surfaces[s->surface_index].
2100 				    upper == s->upper)
2101 					dev_priv->surfaces[s->surface_index].
2102 					    upper = s->lower;
2103 
2104 				dev_priv->surfaces[s->surface_index].refcount--;
2105 				if (dev_priv->surfaces[s->surface_index].
2106 				    refcount == 0)
2107 					dev_priv->surfaces[s->surface_index].
2108 					    flags = 0;
2109 				s->file_priv = NULL;
2110 				radeon_apply_surface_regs(s->surface_index,
2111 							  dev_priv);
2112 				return 0;
2113 			}
2114 		}
2115 	}
2116 	return 1;
2117 }
2118 
radeon_surfaces_release(struct drm_file * file_priv,drm_radeon_private_t * dev_priv)2119 static void radeon_surfaces_release(struct drm_file *file_priv,
2120 				    drm_radeon_private_t * dev_priv)
2121 {
2122 	int i;
2123 	for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2124 		if (dev_priv->virt_surfaces[i].file_priv == file_priv)
2125 			free_surface(file_priv, dev_priv,
2126 				     dev_priv->virt_surfaces[i].lower);
2127 	}
2128 }
2129 
2130 /* ================================================================
2131  * IOCTL functions
2132  */
radeon_surface_alloc(struct drm_device * dev,void * data,struct drm_file * file_priv)2133 static int radeon_surface_alloc(struct drm_device *dev, void *data, struct drm_file *file_priv)
2134 {
2135 	drm_radeon_private_t *dev_priv = dev->dev_private;
2136 	drm_radeon_surface_alloc_t *alloc = data;
2137 
2138 	if (alloc_surface(alloc, dev_priv, file_priv) == -1)
2139 		return -EINVAL;
2140 	else
2141 		return 0;
2142 }
2143 
radeon_surface_free(struct drm_device * dev,void * data,struct drm_file * file_priv)2144 static int radeon_surface_free(struct drm_device *dev, void *data, struct drm_file *file_priv)
2145 {
2146 	drm_radeon_private_t *dev_priv = dev->dev_private;
2147 	drm_radeon_surface_free_t *memfree = data;
2148 
2149 	if (free_surface(file_priv, dev_priv, memfree->address))
2150 		return -EINVAL;
2151 	else
2152 		return 0;
2153 }
2154 
radeon_cp_clear(struct drm_device * dev,void * data,struct drm_file * file_priv)2155 static int radeon_cp_clear(struct drm_device *dev, void *data, struct drm_file *file_priv)
2156 {
2157 	drm_radeon_private_t *dev_priv = dev->dev_private;
2158 	struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
2159 	drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
2160 	drm_radeon_clear_t *clear = data;
2161 	drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
2162 	DRM_DEBUG("\n");
2163 
2164 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2165 
2166 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2167 
2168 	if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2169 		sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2170 
2171 	if (DRM_COPY_FROM_USER(&depth_boxes, clear->depth_boxes,
2172 			       sarea_priv->nbox * sizeof(depth_boxes[0])))
2173 		return -EFAULT;
2174 
2175 	radeon_cp_dispatch_clear(dev, file_priv->master, clear, depth_boxes);
2176 
2177 	COMMIT_RING();
2178 	return 0;
2179 }
2180 
2181 /* Not sure why this isn't set all the time:
2182  */
radeon_do_init_pageflip(struct drm_device * dev,struct drm_master * master)2183 static int radeon_do_init_pageflip(struct drm_device *dev, struct drm_master *master)
2184 {
2185 	drm_radeon_private_t *dev_priv = dev->dev_private;
2186 	struct drm_radeon_master_private *master_priv = master->driver_priv;
2187 	RING_LOCALS;
2188 
2189 	DRM_DEBUG("\n");
2190 
2191 	BEGIN_RING(6);
2192 	RADEON_WAIT_UNTIL_3D_IDLE();
2193 	OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
2194 	OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
2195 		 RADEON_CRTC_OFFSET_FLIP_CNTL);
2196 	OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
2197 	OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
2198 		 RADEON_CRTC_OFFSET_FLIP_CNTL);
2199 	ADVANCE_RING();
2200 
2201 	dev_priv->page_flipping = 1;
2202 
2203 	if (master_priv->sarea_priv->pfCurrentPage != 1)
2204 		master_priv->sarea_priv->pfCurrentPage = 0;
2205 
2206 	return 0;
2207 }
2208 
2209 /* Swapping and flipping are different operations, need different ioctls.
2210  * They can & should be intermixed to support multiple 3d windows.
2211  */
radeon_cp_flip(struct drm_device * dev,void * data,struct drm_file * file_priv)2212 static int radeon_cp_flip(struct drm_device *dev, void *data, struct drm_file *file_priv)
2213 {
2214 	drm_radeon_private_t *dev_priv = dev->dev_private;
2215 	DRM_DEBUG("\n");
2216 
2217 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2218 
2219 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2220 
2221 	if (!dev_priv->page_flipping)
2222 		radeon_do_init_pageflip(dev, file_priv->master);
2223 
2224 	radeon_cp_dispatch_flip(dev, file_priv->master);
2225 
2226 	COMMIT_RING();
2227 	return 0;
2228 }
2229 
radeon_cp_swap(struct drm_device * dev,void * data,struct drm_file * file_priv)2230 static int radeon_cp_swap(struct drm_device *dev, void *data, struct drm_file *file_priv)
2231 {
2232 	drm_radeon_private_t *dev_priv = dev->dev_private;
2233 	struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
2234 	drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
2235 
2236 	DRM_DEBUG("\n");
2237 
2238 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2239 
2240 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2241 
2242 	if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2243 		sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2244 
2245 	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2246 		r600_cp_dispatch_swap(dev, file_priv);
2247 	else
2248 		radeon_cp_dispatch_swap(dev, file_priv->master);
2249 	sarea_priv->ctx_owner = 0;
2250 
2251 	COMMIT_RING();
2252 	return 0;
2253 }
2254 
radeon_cp_vertex(struct drm_device * dev,void * data,struct drm_file * file_priv)2255 static int radeon_cp_vertex(struct drm_device *dev, void *data, struct drm_file *file_priv)
2256 {
2257 	drm_radeon_private_t *dev_priv = dev->dev_private;
2258 	struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
2259 	drm_radeon_sarea_t *sarea_priv;
2260 	struct drm_device_dma *dma = dev->dma;
2261 	struct drm_buf *buf;
2262 	drm_radeon_vertex_t *vertex = data;
2263 	drm_radeon_tcl_prim_t prim;
2264 
2265 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2266 
2267 	sarea_priv = master_priv->sarea_priv;
2268 
2269 	DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
2270 		  DRM_CURRENTPID, vertex->idx, vertex->count, vertex->discard);
2271 
2272 	if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
2273 		DRM_ERROR("buffer index %d (of %d max)\n",
2274 			  vertex->idx, dma->buf_count - 1);
2275 		return -EINVAL;
2276 	}
2277 	if (vertex->prim < 0 || vertex->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2278 		DRM_ERROR("buffer prim %d\n", vertex->prim);
2279 		return -EINVAL;
2280 	}
2281 
2282 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2283 	VB_AGE_TEST_WITH_RETURN(dev_priv);
2284 
2285 	buf = dma->buflist[vertex->idx];
2286 
2287 	if (buf->file_priv != file_priv) {
2288 		DRM_ERROR("process %d using buffer owned by %p\n",
2289 			  DRM_CURRENTPID, buf->file_priv);
2290 		return -EINVAL;
2291 	}
2292 	if (buf->pending) {
2293 		DRM_ERROR("sending pending buffer %d\n", vertex->idx);
2294 		return -EINVAL;
2295 	}
2296 
2297 	/* Build up a prim_t record:
2298 	 */
2299 	if (vertex->count) {
2300 		buf->used = vertex->count;	/* not used? */
2301 
2302 		if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2303 			if (radeon_emit_state(dev_priv, file_priv,
2304 					      &sarea_priv->context_state,
2305 					      sarea_priv->tex_state,
2306 					      sarea_priv->dirty)) {
2307 				DRM_ERROR("radeon_emit_state failed\n");
2308 				return -EINVAL;
2309 			}
2310 
2311 			sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2312 					       RADEON_UPLOAD_TEX1IMAGES |
2313 					       RADEON_UPLOAD_TEX2IMAGES |
2314 					       RADEON_REQUIRE_QUIESCENCE);
2315 		}
2316 
2317 		prim.start = 0;
2318 		prim.finish = vertex->count;	/* unused */
2319 		prim.prim = vertex->prim;
2320 		prim.numverts = vertex->count;
2321 		prim.vc_format = sarea_priv->vc_format;
2322 
2323 		radeon_cp_dispatch_vertex(dev, file_priv, buf, &prim);
2324 	}
2325 
2326 	if (vertex->discard) {
2327 		radeon_cp_discard_buffer(dev, file_priv->master, buf);
2328 	}
2329 
2330 	COMMIT_RING();
2331 	return 0;
2332 }
2333 
radeon_cp_indices(struct drm_device * dev,void * data,struct drm_file * file_priv)2334 static int radeon_cp_indices(struct drm_device *dev, void *data, struct drm_file *file_priv)
2335 {
2336 	drm_radeon_private_t *dev_priv = dev->dev_private;
2337 	struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
2338 	drm_radeon_sarea_t *sarea_priv;
2339 	struct drm_device_dma *dma = dev->dma;
2340 	struct drm_buf *buf;
2341 	drm_radeon_indices_t *elts = data;
2342 	drm_radeon_tcl_prim_t prim;
2343 	int count;
2344 
2345 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2346 
2347 	sarea_priv = master_priv->sarea_priv;
2348 
2349 	DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
2350 		  DRM_CURRENTPID, elts->idx, elts->start, elts->end,
2351 		  elts->discard);
2352 
2353 	if (elts->idx < 0 || elts->idx >= dma->buf_count) {
2354 		DRM_ERROR("buffer index %d (of %d max)\n",
2355 			  elts->idx, dma->buf_count - 1);
2356 		return -EINVAL;
2357 	}
2358 	if (elts->prim < 0 || elts->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2359 		DRM_ERROR("buffer prim %d\n", elts->prim);
2360 		return -EINVAL;
2361 	}
2362 
2363 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2364 	VB_AGE_TEST_WITH_RETURN(dev_priv);
2365 
2366 	buf = dma->buflist[elts->idx];
2367 
2368 	if (buf->file_priv != file_priv) {
2369 		DRM_ERROR("process %d using buffer owned by %p\n",
2370 			  DRM_CURRENTPID, buf->file_priv);
2371 		return -EINVAL;
2372 	}
2373 	if (buf->pending) {
2374 		DRM_ERROR("sending pending buffer %d\n", elts->idx);
2375 		return -EINVAL;
2376 	}
2377 
2378 	count = (elts->end - elts->start) / sizeof(u16);
2379 	elts->start -= RADEON_INDEX_PRIM_OFFSET;
2380 
2381 	if (elts->start & 0x7) {
2382 		DRM_ERROR("misaligned buffer 0x%x\n", elts->start);
2383 		return -EINVAL;
2384 	}
2385 	if (elts->start < buf->used) {
2386 		DRM_ERROR("no header 0x%x - 0x%x\n", elts->start, buf->used);
2387 		return -EINVAL;
2388 	}
2389 
2390 	buf->used = elts->end;
2391 
2392 	if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2393 		if (radeon_emit_state(dev_priv, file_priv,
2394 				      &sarea_priv->context_state,
2395 				      sarea_priv->tex_state,
2396 				      sarea_priv->dirty)) {
2397 			DRM_ERROR("radeon_emit_state failed\n");
2398 			return -EINVAL;
2399 		}
2400 
2401 		sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2402 				       RADEON_UPLOAD_TEX1IMAGES |
2403 				       RADEON_UPLOAD_TEX2IMAGES |
2404 				       RADEON_REQUIRE_QUIESCENCE);
2405 	}
2406 
2407 	/* Build up a prim_t record:
2408 	 */
2409 	prim.start = elts->start;
2410 	prim.finish = elts->end;
2411 	prim.prim = elts->prim;
2412 	prim.offset = 0;	/* offset from start of dma buffers */
2413 	prim.numverts = RADEON_MAX_VB_VERTS;	/* duh */
2414 	prim.vc_format = sarea_priv->vc_format;
2415 
2416 	radeon_cp_dispatch_indices(dev, file_priv->master, buf, &prim);
2417 	if (elts->discard) {
2418 		radeon_cp_discard_buffer(dev, file_priv->master, buf);
2419 	}
2420 
2421 	COMMIT_RING();
2422 	return 0;
2423 }
2424 
radeon_cp_texture(struct drm_device * dev,void * data,struct drm_file * file_priv)2425 static int radeon_cp_texture(struct drm_device *dev, void *data, struct drm_file *file_priv)
2426 {
2427 	drm_radeon_private_t *dev_priv = dev->dev_private;
2428 	drm_radeon_texture_t *tex = data;
2429 	drm_radeon_tex_image_t image;
2430 	int ret;
2431 
2432 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2433 
2434 	if (tex->image == NULL) {
2435 		DRM_ERROR("null texture image!\n");
2436 		return -EINVAL;
2437 	}
2438 
2439 	if (DRM_COPY_FROM_USER(&image,
2440 			       (drm_radeon_tex_image_t __user *) tex->image,
2441 			       sizeof(image)))
2442 		return -EFAULT;
2443 
2444 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2445 	VB_AGE_TEST_WITH_RETURN(dev_priv);
2446 
2447 	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2448 		ret = r600_cp_dispatch_texture(dev, file_priv, tex, &image);
2449 	else
2450 		ret = radeon_cp_dispatch_texture(dev, file_priv, tex, &image);
2451 
2452 	return ret;
2453 }
2454 
radeon_cp_stipple(struct drm_device * dev,void * data,struct drm_file * file_priv)2455 static int radeon_cp_stipple(struct drm_device *dev, void *data, struct drm_file *file_priv)
2456 {
2457 	drm_radeon_private_t *dev_priv = dev->dev_private;
2458 	drm_radeon_stipple_t *stipple = data;
2459 	u32 mask[32];
2460 
2461 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2462 
2463 	if (DRM_COPY_FROM_USER(&mask, stipple->mask, 32 * sizeof(u32)))
2464 		return -EFAULT;
2465 
2466 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2467 
2468 	radeon_cp_dispatch_stipple(dev, mask);
2469 
2470 	COMMIT_RING();
2471 	return 0;
2472 }
2473 
radeon_cp_indirect(struct drm_device * dev,void * data,struct drm_file * file_priv)2474 static int radeon_cp_indirect(struct drm_device *dev, void *data, struct drm_file *file_priv)
2475 {
2476 	drm_radeon_private_t *dev_priv = dev->dev_private;
2477 	struct drm_device_dma *dma = dev->dma;
2478 	struct drm_buf *buf;
2479 	drm_radeon_indirect_t *indirect = data;
2480 	RING_LOCALS;
2481 
2482 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2483 
2484 	DRM_DEBUG("idx=%d s=%d e=%d d=%d\n",
2485 		  indirect->idx, indirect->start, indirect->end,
2486 		  indirect->discard);
2487 
2488 	if (indirect->idx < 0 || indirect->idx >= dma->buf_count) {
2489 		DRM_ERROR("buffer index %d (of %d max)\n",
2490 			  indirect->idx, dma->buf_count - 1);
2491 		return -EINVAL;
2492 	}
2493 
2494 	buf = dma->buflist[indirect->idx];
2495 
2496 	if (buf->file_priv != file_priv) {
2497 		DRM_ERROR("process %d using buffer owned by %p\n",
2498 			  DRM_CURRENTPID, buf->file_priv);
2499 		return -EINVAL;
2500 	}
2501 	if (buf->pending) {
2502 		DRM_ERROR("sending pending buffer %d\n", indirect->idx);
2503 		return -EINVAL;
2504 	}
2505 
2506 	if (indirect->start < buf->used) {
2507 		DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
2508 			  indirect->start, buf->used);
2509 		return -EINVAL;
2510 	}
2511 
2512 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2513 	VB_AGE_TEST_WITH_RETURN(dev_priv);
2514 
2515 	buf->used = indirect->end;
2516 
2517 	/* Dispatch the indirect buffer full of commands from the
2518 	 * X server.  This is insecure and is thus only available to
2519 	 * privileged clients.
2520 	 */
2521 	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2522 		r600_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
2523 	else {
2524 		/* Wait for the 3D stream to idle before the indirect buffer
2525 		 * containing 2D acceleration commands is processed.
2526 		 */
2527 		BEGIN_RING(2);
2528 		RADEON_WAIT_UNTIL_3D_IDLE();
2529 		ADVANCE_RING();
2530 		radeon_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
2531 	}
2532 
2533 	if (indirect->discard) {
2534 		radeon_cp_discard_buffer(dev, file_priv->master, buf);
2535 	}
2536 
2537 	COMMIT_RING();
2538 	return 0;
2539 }
2540 
radeon_cp_vertex2(struct drm_device * dev,void * data,struct drm_file * file_priv)2541 static int radeon_cp_vertex2(struct drm_device *dev, void *data, struct drm_file *file_priv)
2542 {
2543 	drm_radeon_private_t *dev_priv = dev->dev_private;
2544 	struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
2545 	drm_radeon_sarea_t *sarea_priv;
2546 	struct drm_device_dma *dma = dev->dma;
2547 	struct drm_buf *buf;
2548 	drm_radeon_vertex2_t *vertex = data;
2549 	int i;
2550 	unsigned char laststate;
2551 
2552 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2553 
2554 	sarea_priv = master_priv->sarea_priv;
2555 
2556 	DRM_DEBUG("pid=%d index=%d discard=%d\n",
2557 		  DRM_CURRENTPID, vertex->idx, vertex->discard);
2558 
2559 	if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
2560 		DRM_ERROR("buffer index %d (of %d max)\n",
2561 			  vertex->idx, dma->buf_count - 1);
2562 		return -EINVAL;
2563 	}
2564 
2565 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2566 	VB_AGE_TEST_WITH_RETURN(dev_priv);
2567 
2568 	buf = dma->buflist[vertex->idx];
2569 
2570 	if (buf->file_priv != file_priv) {
2571 		DRM_ERROR("process %d using buffer owned by %p\n",
2572 			  DRM_CURRENTPID, buf->file_priv);
2573 		return -EINVAL;
2574 	}
2575 
2576 	if (buf->pending) {
2577 		DRM_ERROR("sending pending buffer %d\n", vertex->idx);
2578 		return -EINVAL;
2579 	}
2580 
2581 	if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2582 		return -EINVAL;
2583 
2584 	for (laststate = 0xff, i = 0; i < vertex->nr_prims; i++) {
2585 		drm_radeon_prim_t prim;
2586 		drm_radeon_tcl_prim_t tclprim;
2587 
2588 		if (DRM_COPY_FROM_USER(&prim, &vertex->prim[i], sizeof(prim)))
2589 			return -EFAULT;
2590 
2591 		if (prim.stateidx != laststate) {
2592 			drm_radeon_state_t state;
2593 
2594 			if (DRM_COPY_FROM_USER(&state,
2595 					       &vertex->state[prim.stateidx],
2596 					       sizeof(state)))
2597 				return -EFAULT;
2598 
2599 			if (radeon_emit_state2(dev_priv, file_priv, &state)) {
2600 				DRM_ERROR("radeon_emit_state2 failed\n");
2601 				return -EINVAL;
2602 			}
2603 
2604 			laststate = prim.stateidx;
2605 		}
2606 
2607 		tclprim.start = prim.start;
2608 		tclprim.finish = prim.finish;
2609 		tclprim.prim = prim.prim;
2610 		tclprim.vc_format = prim.vc_format;
2611 
2612 		if (prim.prim & RADEON_PRIM_WALK_IND) {
2613 			tclprim.offset = prim.numverts * 64;
2614 			tclprim.numverts = RADEON_MAX_VB_VERTS;	/* duh */
2615 
2616 			radeon_cp_dispatch_indices(dev, file_priv->master, buf, &tclprim);
2617 		} else {
2618 			tclprim.numverts = prim.numverts;
2619 			tclprim.offset = 0;	/* not used */
2620 
2621 			radeon_cp_dispatch_vertex(dev, file_priv, buf, &tclprim);
2622 		}
2623 
2624 		if (sarea_priv->nbox == 1)
2625 			sarea_priv->nbox = 0;
2626 	}
2627 
2628 	if (vertex->discard) {
2629 		radeon_cp_discard_buffer(dev, file_priv->master, buf);
2630 	}
2631 
2632 	COMMIT_RING();
2633 	return 0;
2634 }
2635 
radeon_emit_packets(drm_radeon_private_t * dev_priv,struct drm_file * file_priv,drm_radeon_cmd_header_t header,drm_radeon_kcmd_buffer_t * cmdbuf)2636 static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
2637 			       struct drm_file *file_priv,
2638 			       drm_radeon_cmd_header_t header,
2639 			       drm_radeon_kcmd_buffer_t *cmdbuf)
2640 {
2641 	int id = (int)header.packet.packet_id;
2642 	int sz, reg;
2643 	RING_LOCALS;
2644 
2645 	if (id >= RADEON_MAX_STATE_PACKETS)
2646 		return -EINVAL;
2647 
2648 	sz = packet[id].len;
2649 	reg = packet[id].start;
2650 
2651 	if (sz * sizeof(u32) > drm_buffer_unprocessed(cmdbuf->buffer)) {
2652 		DRM_ERROR("Packet size provided larger than data provided\n");
2653 		return -EINVAL;
2654 	}
2655 
2656 	if (radeon_check_and_fixup_packets(dev_priv, file_priv, id,
2657 				cmdbuf->buffer)) {
2658 		DRM_ERROR("Packet verification failed\n");
2659 		return -EINVAL;
2660 	}
2661 
2662 	BEGIN_RING(sz + 1);
2663 	OUT_RING(CP_PACKET0(reg, (sz - 1)));
2664 	OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
2665 	ADVANCE_RING();
2666 
2667 	return 0;
2668 }
2669 
radeon_emit_scalars(drm_radeon_private_t * dev_priv,drm_radeon_cmd_header_t header,drm_radeon_kcmd_buffer_t * cmdbuf)2670 static __inline__ int radeon_emit_scalars(drm_radeon_private_t *dev_priv,
2671 					  drm_radeon_cmd_header_t header,
2672 					  drm_radeon_kcmd_buffer_t *cmdbuf)
2673 {
2674 	int sz = header.scalars.count;
2675 	int start = header.scalars.offset;
2676 	int stride = header.scalars.stride;
2677 	RING_LOCALS;
2678 
2679 	BEGIN_RING(3 + sz);
2680 	OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2681 	OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2682 	OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2683 	OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
2684 	ADVANCE_RING();
2685 	return 0;
2686 }
2687 
2688 /* God this is ugly
2689  */
radeon_emit_scalars2(drm_radeon_private_t * dev_priv,drm_radeon_cmd_header_t header,drm_radeon_kcmd_buffer_t * cmdbuf)2690 static __inline__ int radeon_emit_scalars2(drm_radeon_private_t *dev_priv,
2691 					   drm_radeon_cmd_header_t header,
2692 					   drm_radeon_kcmd_buffer_t *cmdbuf)
2693 {
2694 	int sz = header.scalars.count;
2695 	int start = ((unsigned int)header.scalars.offset) + 0x100;
2696 	int stride = header.scalars.stride;
2697 	RING_LOCALS;
2698 
2699 	BEGIN_RING(3 + sz);
2700 	OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2701 	OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2702 	OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2703 	OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
2704 	ADVANCE_RING();
2705 	return 0;
2706 }
2707 
radeon_emit_vectors(drm_radeon_private_t * dev_priv,drm_radeon_cmd_header_t header,drm_radeon_kcmd_buffer_t * cmdbuf)2708 static __inline__ int radeon_emit_vectors(drm_radeon_private_t *dev_priv,
2709 					  drm_radeon_cmd_header_t header,
2710 					  drm_radeon_kcmd_buffer_t *cmdbuf)
2711 {
2712 	int sz = header.vectors.count;
2713 	int start = header.vectors.offset;
2714 	int stride = header.vectors.stride;
2715 	RING_LOCALS;
2716 
2717 	BEGIN_RING(5 + sz);
2718 	OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2719 	OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2720 	OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2721 	OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2722 	OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
2723 	ADVANCE_RING();
2724 
2725 	return 0;
2726 }
2727 
radeon_emit_veclinear(drm_radeon_private_t * dev_priv,drm_radeon_cmd_header_t header,drm_radeon_kcmd_buffer_t * cmdbuf)2728 static __inline__ int radeon_emit_veclinear(drm_radeon_private_t *dev_priv,
2729 					  drm_radeon_cmd_header_t header,
2730 					  drm_radeon_kcmd_buffer_t *cmdbuf)
2731 {
2732 	int sz = header.veclinear.count * 4;
2733 	int start = header.veclinear.addr_lo | (header.veclinear.addr_hi << 8);
2734 	RING_LOCALS;
2735 
2736         if (!sz)
2737                 return 0;
2738 	if (sz * 4 > drm_buffer_unprocessed(cmdbuf->buffer))
2739                 return -EINVAL;
2740 
2741 	BEGIN_RING(5 + sz);
2742 	OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2743 	OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2744 	OUT_RING(start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2745 	OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2746 	OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
2747 	ADVANCE_RING();
2748 
2749 	return 0;
2750 }
2751 
radeon_emit_packet3(struct drm_device * dev,struct drm_file * file_priv,drm_radeon_kcmd_buffer_t * cmdbuf)2752 static int radeon_emit_packet3(struct drm_device * dev,
2753 			       struct drm_file *file_priv,
2754 			       drm_radeon_kcmd_buffer_t *cmdbuf)
2755 {
2756 	drm_radeon_private_t *dev_priv = dev->dev_private;
2757 	unsigned int cmdsz;
2758 	int ret;
2759 	RING_LOCALS;
2760 
2761 	DRM_DEBUG("\n");
2762 
2763 	if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
2764 						  cmdbuf, &cmdsz))) {
2765 		DRM_ERROR("Packet verification failed\n");
2766 		return ret;
2767 	}
2768 
2769 	BEGIN_RING(cmdsz);
2770 	OUT_RING_DRM_BUFFER(cmdbuf->buffer, cmdsz);
2771 	ADVANCE_RING();
2772 
2773 	return 0;
2774 }
2775 
radeon_emit_packet3_cliprect(struct drm_device * dev,struct drm_file * file_priv,drm_radeon_kcmd_buffer_t * cmdbuf,int orig_nbox)2776 static int radeon_emit_packet3_cliprect(struct drm_device *dev,
2777 					struct drm_file *file_priv,
2778 					drm_radeon_kcmd_buffer_t *cmdbuf,
2779 					int orig_nbox)
2780 {
2781 	drm_radeon_private_t *dev_priv = dev->dev_private;
2782 	struct drm_clip_rect box;
2783 	unsigned int cmdsz;
2784 	int ret;
2785 	struct drm_clip_rect __user *boxes = cmdbuf->boxes;
2786 	int i = 0;
2787 	RING_LOCALS;
2788 
2789 	DRM_DEBUG("\n");
2790 
2791 	if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
2792 						  cmdbuf, &cmdsz))) {
2793 		DRM_ERROR("Packet verification failed\n");
2794 		return ret;
2795 	}
2796 
2797 	if (!orig_nbox)
2798 		goto out;
2799 
2800 	do {
2801 		if (i < cmdbuf->nbox) {
2802 			if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
2803 				return -EFAULT;
2804 			/* FIXME The second and subsequent times round
2805 			 * this loop, send a WAIT_UNTIL_3D_IDLE before
2806 			 * calling emit_clip_rect(). This fixes a
2807 			 * lockup on fast machines when sending
2808 			 * several cliprects with a cmdbuf, as when
2809 			 * waving a 2D window over a 3D
2810 			 * window. Something in the commands from user
2811 			 * space seems to hang the card when they're
2812 			 * sent several times in a row. That would be
2813 			 * the correct place to fix it but this works
2814 			 * around it until I can figure that out - Tim
2815 			 * Smith */
2816 			if (i) {
2817 				BEGIN_RING(2);
2818 				RADEON_WAIT_UNTIL_3D_IDLE();
2819 				ADVANCE_RING();
2820 			}
2821 			radeon_emit_clip_rect(dev_priv, &box);
2822 		}
2823 
2824 		BEGIN_RING(cmdsz);
2825 		OUT_RING_DRM_BUFFER(cmdbuf->buffer, cmdsz);
2826 		ADVANCE_RING();
2827 
2828 	} while (++i < cmdbuf->nbox);
2829 	if (cmdbuf->nbox == 1)
2830 		cmdbuf->nbox = 0;
2831 
2832 	return 0;
2833       out:
2834 	drm_buffer_advance(cmdbuf->buffer, cmdsz * 4);
2835 	return 0;
2836 }
2837 
radeon_emit_wait(struct drm_device * dev,int flags)2838 static int radeon_emit_wait(struct drm_device * dev, int flags)
2839 {
2840 	drm_radeon_private_t *dev_priv = dev->dev_private;
2841 	RING_LOCALS;
2842 
2843 	DRM_DEBUG("%x\n", flags);
2844 	switch (flags) {
2845 	case RADEON_WAIT_2D:
2846 		BEGIN_RING(2);
2847 		RADEON_WAIT_UNTIL_2D_IDLE();
2848 		ADVANCE_RING();
2849 		break;
2850 	case RADEON_WAIT_3D:
2851 		BEGIN_RING(2);
2852 		RADEON_WAIT_UNTIL_3D_IDLE();
2853 		ADVANCE_RING();
2854 		break;
2855 	case RADEON_WAIT_2D | RADEON_WAIT_3D:
2856 		BEGIN_RING(2);
2857 		RADEON_WAIT_UNTIL_IDLE();
2858 		ADVANCE_RING();
2859 		break;
2860 	default:
2861 		return -EINVAL;
2862 	}
2863 
2864 	return 0;
2865 }
2866 
radeon_cp_cmdbuf(struct drm_device * dev,void * data,struct drm_file * file_priv)2867 static int radeon_cp_cmdbuf(struct drm_device *dev, void *data,
2868 		struct drm_file *file_priv)
2869 {
2870 	drm_radeon_private_t *dev_priv = dev->dev_private;
2871 	struct drm_device_dma *dma = dev->dma;
2872 	struct drm_buf *buf = NULL;
2873 	drm_radeon_cmd_header_t stack_header;
2874 	int idx;
2875 	drm_radeon_kcmd_buffer_t *cmdbuf = data;
2876 	int orig_nbox;
2877 
2878 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2879 
2880 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2881 	VB_AGE_TEST_WITH_RETURN(dev_priv);
2882 
2883 	if (cmdbuf->bufsz > 64 * 1024 || cmdbuf->bufsz < 0) {
2884 		return -EINVAL;
2885 	}
2886 
2887 	/* Allocate an in-kernel area and copy in the cmdbuf.  Do this to avoid
2888 	 * races between checking values and using those values in other code,
2889 	 * and simply to avoid a lot of function calls to copy in data.
2890 	 */
2891 	if (cmdbuf->bufsz != 0) {
2892 		int rv;
2893 		void __user *buffer = cmdbuf->buffer;
2894 		rv = drm_buffer_alloc(&cmdbuf->buffer, cmdbuf->bufsz);
2895 		if (rv)
2896 			return rv;
2897 		rv = drm_buffer_copy_from_user(cmdbuf->buffer, buffer,
2898 						cmdbuf->bufsz);
2899 		if (rv) {
2900 			drm_buffer_free(cmdbuf->buffer);
2901 			return rv;
2902 		}
2903 	} else
2904 		goto done;
2905 
2906 	orig_nbox = cmdbuf->nbox;
2907 
2908 	if (dev_priv->microcode_version == UCODE_R300) {
2909 		int temp;
2910 		temp = r300_do_cp_cmdbuf(dev, file_priv, cmdbuf);
2911 
2912 		drm_buffer_free(cmdbuf->buffer);
2913 
2914 		return temp;
2915 	}
2916 
2917 	/* microcode_version != r300 */
2918 	while (drm_buffer_unprocessed(cmdbuf->buffer) >= sizeof(stack_header)) {
2919 
2920 		drm_radeon_cmd_header_t *header;
2921 		header = drm_buffer_read_object(cmdbuf->buffer,
2922 				sizeof(stack_header), &stack_header);
2923 
2924 		switch (header->header.cmd_type) {
2925 		case RADEON_CMD_PACKET:
2926 			DRM_DEBUG("RADEON_CMD_PACKET\n");
2927 			if (radeon_emit_packets
2928 			    (dev_priv, file_priv, *header, cmdbuf)) {
2929 				DRM_ERROR("radeon_emit_packets failed\n");
2930 				goto err;
2931 			}
2932 			break;
2933 
2934 		case RADEON_CMD_SCALARS:
2935 			DRM_DEBUG("RADEON_CMD_SCALARS\n");
2936 			if (radeon_emit_scalars(dev_priv, *header, cmdbuf)) {
2937 				DRM_ERROR("radeon_emit_scalars failed\n");
2938 				goto err;
2939 			}
2940 			break;
2941 
2942 		case RADEON_CMD_VECTORS:
2943 			DRM_DEBUG("RADEON_CMD_VECTORS\n");
2944 			if (radeon_emit_vectors(dev_priv, *header, cmdbuf)) {
2945 				DRM_ERROR("radeon_emit_vectors failed\n");
2946 				goto err;
2947 			}
2948 			break;
2949 
2950 		case RADEON_CMD_DMA_DISCARD:
2951 			DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2952 			idx = header->dma.buf_idx;
2953 			if (idx < 0 || idx >= dma->buf_count) {
2954 				DRM_ERROR("buffer index %d (of %d max)\n",
2955 					  idx, dma->buf_count - 1);
2956 				goto err;
2957 			}
2958 
2959 			buf = dma->buflist[idx];
2960 			if (buf->file_priv != file_priv || buf->pending) {
2961 				DRM_ERROR("bad buffer %p %p %d\n",
2962 					  buf->file_priv, file_priv,
2963 					  buf->pending);
2964 				goto err;
2965 			}
2966 
2967 			radeon_cp_discard_buffer(dev, file_priv->master, buf);
2968 			break;
2969 
2970 		case RADEON_CMD_PACKET3:
2971 			DRM_DEBUG("RADEON_CMD_PACKET3\n");
2972 			if (radeon_emit_packet3(dev, file_priv, cmdbuf)) {
2973 				DRM_ERROR("radeon_emit_packet3 failed\n");
2974 				goto err;
2975 			}
2976 			break;
2977 
2978 		case RADEON_CMD_PACKET3_CLIP:
2979 			DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2980 			if (radeon_emit_packet3_cliprect
2981 			    (dev, file_priv, cmdbuf, orig_nbox)) {
2982 				DRM_ERROR("radeon_emit_packet3_clip failed\n");
2983 				goto err;
2984 			}
2985 			break;
2986 
2987 		case RADEON_CMD_SCALARS2:
2988 			DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2989 			if (radeon_emit_scalars2(dev_priv, *header, cmdbuf)) {
2990 				DRM_ERROR("radeon_emit_scalars2 failed\n");
2991 				goto err;
2992 			}
2993 			break;
2994 
2995 		case RADEON_CMD_WAIT:
2996 			DRM_DEBUG("RADEON_CMD_WAIT\n");
2997 			if (radeon_emit_wait(dev, header->wait.flags)) {
2998 				DRM_ERROR("radeon_emit_wait failed\n");
2999 				goto err;
3000 			}
3001 			break;
3002 		case RADEON_CMD_VECLINEAR:
3003 			DRM_DEBUG("RADEON_CMD_VECLINEAR\n");
3004 			if (radeon_emit_veclinear(dev_priv, *header, cmdbuf)) {
3005 				DRM_ERROR("radeon_emit_veclinear failed\n");
3006 				goto err;
3007 			}
3008 			break;
3009 
3010 		default:
3011 			DRM_ERROR("bad cmd_type %d at byte %d\n",
3012 				  header->header.cmd_type,
3013 				  cmdbuf->buffer->iterator);
3014 			goto err;
3015 		}
3016 	}
3017 
3018 	drm_buffer_free(cmdbuf->buffer);
3019 
3020       done:
3021 	DRM_DEBUG("DONE\n");
3022 	COMMIT_RING();
3023 	return 0;
3024 
3025       err:
3026 	drm_buffer_free(cmdbuf->buffer);
3027 	return -EINVAL;
3028 }
3029 
radeon_cp_getparam(struct drm_device * dev,void * data,struct drm_file * file_priv)3030 static int radeon_cp_getparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
3031 {
3032 	drm_radeon_private_t *dev_priv = dev->dev_private;
3033 	drm_radeon_getparam_t *param = data;
3034 	int value;
3035 
3036 	DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
3037 
3038 	switch (param->param) {
3039 	case RADEON_PARAM_GART_BUFFER_OFFSET:
3040 		value = dev_priv->gart_buffers_offset;
3041 		break;
3042 	case RADEON_PARAM_LAST_FRAME:
3043 		dev_priv->stats.last_frame_reads++;
3044 		value = GET_SCRATCH(dev_priv, 0);
3045 		break;
3046 	case RADEON_PARAM_LAST_DISPATCH:
3047 		value = GET_SCRATCH(dev_priv, 1);
3048 		break;
3049 	case RADEON_PARAM_LAST_CLEAR:
3050 		dev_priv->stats.last_clear_reads++;
3051 		value = GET_SCRATCH(dev_priv, 2);
3052 		break;
3053 	case RADEON_PARAM_IRQ_NR:
3054 		if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
3055 			value = 0;
3056 		else
3057 			value = drm_dev_to_irq(dev);
3058 		break;
3059 	case RADEON_PARAM_GART_BASE:
3060 		value = dev_priv->gart_vm_start;
3061 		break;
3062 	case RADEON_PARAM_REGISTER_HANDLE:
3063 		value = dev_priv->mmio->offset;
3064 		break;
3065 	case RADEON_PARAM_STATUS_HANDLE:
3066 		value = dev_priv->ring_rptr_offset;
3067 		break;
3068 #if BITS_PER_LONG == 32
3069 		/*
3070 		 * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
3071 		 * pointer which can't fit into an int-sized variable.  According to
3072 		 * Michel Dänzer, the ioctl() is only used on embedded platforms, so
3073 		 * not supporting it shouldn't be a problem.  If the same functionality
3074 		 * is needed on 64-bit platforms, a new ioctl() would have to be added,
3075 		 * so backwards-compatibility for the embedded platforms can be
3076 		 * maintained.  --davidm 4-Feb-2004.
3077 		 */
3078 	case RADEON_PARAM_SAREA_HANDLE:
3079 		/* The lock is the first dword in the sarea. */
3080 		/* no users of this parameter */
3081 		break;
3082 #endif
3083 	case RADEON_PARAM_GART_TEX_HANDLE:
3084 		value = dev_priv->gart_textures_offset;
3085 		break;
3086 	case RADEON_PARAM_SCRATCH_OFFSET:
3087 		if (!dev_priv->writeback_works)
3088 			return -EINVAL;
3089 		if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
3090 			value = R600_SCRATCH_REG_OFFSET;
3091 		else
3092 			value = RADEON_SCRATCH_REG_OFFSET;
3093 		break;
3094 	case RADEON_PARAM_CARD_TYPE:
3095 		if (dev_priv->flags & RADEON_IS_PCIE)
3096 			value = RADEON_CARD_PCIE;
3097 		else if (dev_priv->flags & RADEON_IS_AGP)
3098 			value = RADEON_CARD_AGP;
3099 		else
3100 			value = RADEON_CARD_PCI;
3101 		break;
3102 	case RADEON_PARAM_VBLANK_CRTC:
3103 		value = radeon_vblank_crtc_get(dev);
3104 		break;
3105 	case RADEON_PARAM_FB_LOCATION:
3106 		value = radeon_read_fb_location(dev_priv);
3107 		break;
3108 	case RADEON_PARAM_NUM_GB_PIPES:
3109 		value = dev_priv->num_gb_pipes;
3110 		break;
3111 	case RADEON_PARAM_NUM_Z_PIPES:
3112 		value = dev_priv->num_z_pipes;
3113 		break;
3114 	default:
3115 		DRM_DEBUG("Invalid parameter %d\n", param->param);
3116 		return -EINVAL;
3117 	}
3118 
3119 	if (DRM_COPY_TO_USER(param->value, &value, sizeof(int))) {
3120 		DRM_ERROR("copy_to_user\n");
3121 		return -EFAULT;
3122 	}
3123 
3124 	return 0;
3125 }
3126 
radeon_cp_setparam(struct drm_device * dev,void * data,struct drm_file * file_priv)3127 static int radeon_cp_setparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
3128 {
3129 	drm_radeon_private_t *dev_priv = dev->dev_private;
3130 	struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
3131 	drm_radeon_setparam_t *sp = data;
3132 	struct drm_radeon_driver_file_fields *radeon_priv;
3133 
3134 	switch (sp->param) {
3135 	case RADEON_SETPARAM_FB_LOCATION:
3136 		radeon_priv = file_priv->driver_priv;
3137 		radeon_priv->radeon_fb_delta = dev_priv->fb_location -
3138 		    sp->value;
3139 		break;
3140 	case RADEON_SETPARAM_SWITCH_TILING:
3141 		if (sp->value == 0) {
3142 			DRM_DEBUG("color tiling disabled\n");
3143 			dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3144 			dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3145 			if (master_priv->sarea_priv)
3146 				master_priv->sarea_priv->tiling_enabled = 0;
3147 		} else if (sp->value == 1) {
3148 			DRM_DEBUG("color tiling enabled\n");
3149 			dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
3150 			dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
3151 			if (master_priv->sarea_priv)
3152 				master_priv->sarea_priv->tiling_enabled = 1;
3153 		}
3154 		break;
3155 	case RADEON_SETPARAM_PCIGART_LOCATION:
3156 		dev_priv->pcigart_offset = sp->value;
3157 		dev_priv->pcigart_offset_set = 1;
3158 		break;
3159 	case RADEON_SETPARAM_NEW_MEMMAP:
3160 		dev_priv->new_memmap = sp->value;
3161 		break;
3162 	case RADEON_SETPARAM_PCIGART_TABLE_SIZE:
3163 		dev_priv->gart_info.table_size = sp->value;
3164 		if (dev_priv->gart_info.table_size < RADEON_PCIGART_TABLE_SIZE)
3165 			dev_priv->gart_info.table_size = RADEON_PCIGART_TABLE_SIZE;
3166 		break;
3167 	case RADEON_SETPARAM_VBLANK_CRTC:
3168 		return radeon_vblank_crtc_set(dev, sp->value);
3169 		break;
3170 	default:
3171 		DRM_DEBUG("Invalid parameter %d\n", sp->param);
3172 		return -EINVAL;
3173 	}
3174 
3175 	return 0;
3176 }
3177 
3178 /* When a client dies:
3179  *    - Check for and clean up flipped page state
3180  *    - Free any alloced GART memory.
3181  *    - Free any alloced radeon surfaces.
3182  *
3183  * DRM infrastructure takes care of reclaiming dma buffers.
3184  */
radeon_driver_preclose(struct drm_device * dev,struct drm_file * file_priv)3185 void radeon_driver_preclose(struct drm_device *dev, struct drm_file *file_priv)
3186 {
3187 	if (dev->dev_private) {
3188 		drm_radeon_private_t *dev_priv = dev->dev_private;
3189 		dev_priv->page_flipping = 0;
3190 		radeon_mem_release(file_priv, dev_priv->gart_heap);
3191 		radeon_mem_release(file_priv, dev_priv->fb_heap);
3192 		radeon_surfaces_release(file_priv, dev_priv);
3193 	}
3194 }
3195 
radeon_driver_lastclose(struct drm_device * dev)3196 void radeon_driver_lastclose(struct drm_device *dev)
3197 {
3198 	radeon_surfaces_release(PCIGART_FILE_PRIV, dev->dev_private);
3199 	radeon_do_release(dev);
3200 }
3201 
radeon_driver_open(struct drm_device * dev,struct drm_file * file_priv)3202 int radeon_driver_open(struct drm_device *dev, struct drm_file *file_priv)
3203 {
3204 	drm_radeon_private_t *dev_priv = dev->dev_private;
3205 	struct drm_radeon_driver_file_fields *radeon_priv;
3206 
3207 	DRM_DEBUG("\n");
3208 	radeon_priv = kmalloc(sizeof(*radeon_priv), GFP_KERNEL);
3209 
3210 	if (!radeon_priv)
3211 		return -ENOMEM;
3212 
3213 	file_priv->driver_priv = radeon_priv;
3214 
3215 	if (dev_priv)
3216 		radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3217 	else
3218 		radeon_priv->radeon_fb_delta = 0;
3219 	return 0;
3220 }
3221 
radeon_driver_postclose(struct drm_device * dev,struct drm_file * file_priv)3222 void radeon_driver_postclose(struct drm_device *dev, struct drm_file *file_priv)
3223 {
3224 	struct drm_radeon_driver_file_fields *radeon_priv =
3225 	    file_priv->driver_priv;
3226 
3227 	kfree(radeon_priv);
3228 }
3229 
3230 struct drm_ioctl_desc radeon_ioctls[] = {
3231 	DRM_IOCTL_DEF_DRV(RADEON_CP_INIT, radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3232 	DRM_IOCTL_DEF_DRV(RADEON_CP_START, radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3233 	DRM_IOCTL_DEF_DRV(RADEON_CP_STOP, radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3234 	DRM_IOCTL_DEF_DRV(RADEON_CP_RESET, radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3235 	DRM_IOCTL_DEF_DRV(RADEON_CP_IDLE, radeon_cp_idle, DRM_AUTH),
3236 	DRM_IOCTL_DEF_DRV(RADEON_CP_RESUME, radeon_cp_resume, DRM_AUTH),
3237 	DRM_IOCTL_DEF_DRV(RADEON_RESET, radeon_engine_reset, DRM_AUTH),
3238 	DRM_IOCTL_DEF_DRV(RADEON_FULLSCREEN, radeon_fullscreen, DRM_AUTH),
3239 	DRM_IOCTL_DEF_DRV(RADEON_SWAP, radeon_cp_swap, DRM_AUTH),
3240 	DRM_IOCTL_DEF_DRV(RADEON_CLEAR, radeon_cp_clear, DRM_AUTH),
3241 	DRM_IOCTL_DEF_DRV(RADEON_VERTEX, radeon_cp_vertex, DRM_AUTH),
3242 	DRM_IOCTL_DEF_DRV(RADEON_INDICES, radeon_cp_indices, DRM_AUTH),
3243 	DRM_IOCTL_DEF_DRV(RADEON_TEXTURE, radeon_cp_texture, DRM_AUTH),
3244 	DRM_IOCTL_DEF_DRV(RADEON_STIPPLE, radeon_cp_stipple, DRM_AUTH),
3245 	DRM_IOCTL_DEF_DRV(RADEON_INDIRECT, radeon_cp_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3246 	DRM_IOCTL_DEF_DRV(RADEON_VERTEX2, radeon_cp_vertex2, DRM_AUTH),
3247 	DRM_IOCTL_DEF_DRV(RADEON_CMDBUF, radeon_cp_cmdbuf, DRM_AUTH),
3248 	DRM_IOCTL_DEF_DRV(RADEON_GETPARAM, radeon_cp_getparam, DRM_AUTH),
3249 	DRM_IOCTL_DEF_DRV(RADEON_FLIP, radeon_cp_flip, DRM_AUTH),
3250 	DRM_IOCTL_DEF_DRV(RADEON_ALLOC, radeon_mem_alloc, DRM_AUTH),
3251 	DRM_IOCTL_DEF_DRV(RADEON_FREE, radeon_mem_free, DRM_AUTH),
3252 	DRM_IOCTL_DEF_DRV(RADEON_INIT_HEAP, radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3253 	DRM_IOCTL_DEF_DRV(RADEON_IRQ_EMIT, radeon_irq_emit, DRM_AUTH),
3254 	DRM_IOCTL_DEF_DRV(RADEON_IRQ_WAIT, radeon_irq_wait, DRM_AUTH),
3255 	DRM_IOCTL_DEF_DRV(RADEON_SETPARAM, radeon_cp_setparam, DRM_AUTH),
3256 	DRM_IOCTL_DEF_DRV(RADEON_SURF_ALLOC, radeon_surface_alloc, DRM_AUTH),
3257 	DRM_IOCTL_DEF_DRV(RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH),
3258 	DRM_IOCTL_DEF_DRV(RADEON_CS, r600_cs_legacy_ioctl, DRM_AUTH)
3259 };
3260 
3261 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);
3262