1 /* r300_cmdbuf.c -- Command buffer emission for R300 -*- linux-c -*-
2  *
3  * Copyright (C) The Weather Channel, Inc.  2002.
4  * Copyright (C) 2004 Nicolai Haehnle.
5  * All Rights Reserved.
6  *
7  * The Weather Channel (TM) funded Tungsten Graphics to develop the
8  * initial release of the Radeon 8500 driver under the XFree86 license.
9  * This notice must be preserved.
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a
12  * copy of this software and associated documentation files (the "Software"),
13  * to deal in the Software without restriction, including without limitation
14  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15  * and/or sell copies of the Software, and to permit persons to whom the
16  * Software is furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice (including the next
19  * paragraph) shall be included in all copies or substantial portions of the
20  * Software.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
25  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
26  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
27  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
28  * DEALINGS IN THE SOFTWARE.
29  *
30  * Authors:
31  *    Nicolai Haehnle <prefect_@gmx.net>
32  */
33 
34 #include "drmP.h"
35 #include "drm.h"
36 #include "drm_buffer.h"
37 #include "radeon_drm.h"
38 #include "radeon_drv.h"
39 #include "r300_reg.h"
40 
41 #include <asm/unaligned.h>
42 
43 #define R300_SIMULTANEOUS_CLIPRECTS		4
44 
45 /* Values for R300_RE_CLIPRECT_CNTL depending on the number of cliprects
46  */
47 static const int r300_cliprect_cntl[4] = {
48 	0xAAAA,
49 	0xEEEE,
50 	0xFEFE,
51 	0xFFFE
52 };
53 
54 /**
55  * Emit up to R300_SIMULTANEOUS_CLIPRECTS cliprects from the given command
56  * buffer, starting with index n.
57  */
r300_emit_cliprects(drm_radeon_private_t * dev_priv,drm_radeon_kcmd_buffer_t * cmdbuf,int n)58 static int r300_emit_cliprects(drm_radeon_private_t *dev_priv,
59 			       drm_radeon_kcmd_buffer_t *cmdbuf, int n)
60 {
61 	struct drm_clip_rect box;
62 	int nr;
63 	int i;
64 	RING_LOCALS;
65 
66 	nr = cmdbuf->nbox - n;
67 	if (nr > R300_SIMULTANEOUS_CLIPRECTS)
68 		nr = R300_SIMULTANEOUS_CLIPRECTS;
69 
70 	DRM_DEBUG("%i cliprects\n", nr);
71 
72 	if (nr) {
73 		BEGIN_RING(6 + nr * 2);
74 		OUT_RING(CP_PACKET0(R300_RE_CLIPRECT_TL_0, nr * 2 - 1));
75 
76 		for (i = 0; i < nr; ++i) {
77 			if (DRM_COPY_FROM_USER
78 			    (&box, &cmdbuf->boxes[n + i], sizeof(box))) {
79 				DRM_ERROR("copy cliprect faulted\n");
80 				return -EFAULT;
81 			}
82 
83 			box.x2--; /* Hardware expects inclusive bottom-right corner */
84 			box.y2--;
85 
86 			if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
87 				box.x1 = (box.x1) &
88 					R300_CLIPRECT_MASK;
89 				box.y1 = (box.y1) &
90 					R300_CLIPRECT_MASK;
91 				box.x2 = (box.x2) &
92 					R300_CLIPRECT_MASK;
93 				box.y2 = (box.y2) &
94 					R300_CLIPRECT_MASK;
95 			} else {
96 				box.x1 = (box.x1 + R300_CLIPRECT_OFFSET) &
97 					R300_CLIPRECT_MASK;
98 				box.y1 = (box.y1 + R300_CLIPRECT_OFFSET) &
99 					R300_CLIPRECT_MASK;
100 				box.x2 = (box.x2 + R300_CLIPRECT_OFFSET) &
101 					R300_CLIPRECT_MASK;
102 				box.y2 = (box.y2 + R300_CLIPRECT_OFFSET) &
103 					R300_CLIPRECT_MASK;
104 			}
105 
106 			OUT_RING((box.x1 << R300_CLIPRECT_X_SHIFT) |
107 				 (box.y1 << R300_CLIPRECT_Y_SHIFT));
108 			OUT_RING((box.x2 << R300_CLIPRECT_X_SHIFT) |
109 				 (box.y2 << R300_CLIPRECT_Y_SHIFT));
110 
111 		}
112 
113 		OUT_RING_REG(R300_RE_CLIPRECT_CNTL, r300_cliprect_cntl[nr - 1]);
114 
115 		/* TODO/SECURITY: Force scissors to a safe value, otherwise the
116 		 * client might be able to trample over memory.
117 		 * The impact should be very limited, but I'd rather be safe than
118 		 * sorry.
119 		 */
120 		OUT_RING(CP_PACKET0(R300_RE_SCISSORS_TL, 1));
121 		OUT_RING(0);
122 		OUT_RING(R300_SCISSORS_X_MASK | R300_SCISSORS_Y_MASK);
123 		ADVANCE_RING();
124 	} else {
125 		/* Why we allow zero cliprect rendering:
126 		 * There are some commands in a command buffer that must be submitted
127 		 * even when there are no cliprects, e.g. DMA buffer discard
128 		 * or state setting (though state setting could be avoided by
129 		 * simulating a loss of context).
130 		 *
131 		 * Now since the cmdbuf interface is so chaotic right now (and is
132 		 * bound to remain that way for a bit until things settle down),
133 		 * it is basically impossible to filter out the commands that are
134 		 * necessary and those that aren't.
135 		 *
136 		 * So I choose the safe way and don't do any filtering at all;
137 		 * instead, I simply set up the engine so that all rendering
138 		 * can't produce any fragments.
139 		 */
140 		BEGIN_RING(2);
141 		OUT_RING_REG(R300_RE_CLIPRECT_CNTL, 0);
142 		ADVANCE_RING();
143 	}
144 
145 	/* flus cache and wait idle clean after cliprect change */
146 	BEGIN_RING(2);
147 	OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
148 	OUT_RING(R300_RB3D_DC_FLUSH);
149 	ADVANCE_RING();
150 	BEGIN_RING(2);
151 	OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
152 	OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
153 	ADVANCE_RING();
154 	/* set flush flag */
155 	dev_priv->track_flush |= RADEON_FLUSH_EMITED;
156 
157 	return 0;
158 }
159 
160 static u8 r300_reg_flags[0x10000 >> 2];
161 
r300_init_reg_flags(struct drm_device * dev)162 void r300_init_reg_flags(struct drm_device *dev)
163 {
164 	int i;
165 	drm_radeon_private_t *dev_priv = dev->dev_private;
166 
167 	memset(r300_reg_flags, 0, 0x10000 >> 2);
168 #define ADD_RANGE_MARK(reg, count,mark) \
169 		for(i=((reg)>>2);i<((reg)>>2)+(count);i++)\
170 			r300_reg_flags[i]|=(mark);
171 
172 #define MARK_SAFE		1
173 #define MARK_CHECK_OFFSET	2
174 
175 #define ADD_RANGE(reg, count)	ADD_RANGE_MARK(reg, count, MARK_SAFE)
176 
177 	/* these match cmducs() command in r300_driver/r300/r300_cmdbuf.c */
178 	ADD_RANGE(R300_SE_VPORT_XSCALE, 6);
179 	ADD_RANGE(R300_VAP_CNTL, 1);
180 	ADD_RANGE(R300_SE_VTE_CNTL, 2);
181 	ADD_RANGE(0x2134, 2);
182 	ADD_RANGE(R300_VAP_CNTL_STATUS, 1);
183 	ADD_RANGE(R300_VAP_INPUT_CNTL_0, 2);
184 	ADD_RANGE(0x21DC, 1);
185 	ADD_RANGE(R300_VAP_UNKNOWN_221C, 1);
186 	ADD_RANGE(R300_VAP_CLIP_X_0, 4);
187 	ADD_RANGE(R300_VAP_PVS_STATE_FLUSH_REG, 1);
188 	ADD_RANGE(R300_VAP_UNKNOWN_2288, 1);
189 	ADD_RANGE(R300_VAP_OUTPUT_VTX_FMT_0, 2);
190 	ADD_RANGE(R300_VAP_PVS_CNTL_1, 3);
191 	ADD_RANGE(R300_GB_ENABLE, 1);
192 	ADD_RANGE(R300_GB_MSPOS0, 5);
193 	ADD_RANGE(R300_TX_INVALTAGS, 1);
194 	ADD_RANGE(R300_TX_ENABLE, 1);
195 	ADD_RANGE(0x4200, 4);
196 	ADD_RANGE(0x4214, 1);
197 	ADD_RANGE(R300_RE_POINTSIZE, 1);
198 	ADD_RANGE(0x4230, 3);
199 	ADD_RANGE(R300_RE_LINE_CNT, 1);
200 	ADD_RANGE(R300_RE_UNK4238, 1);
201 	ADD_RANGE(0x4260, 3);
202 	ADD_RANGE(R300_RE_SHADE, 4);
203 	ADD_RANGE(R300_RE_POLYGON_MODE, 5);
204 	ADD_RANGE(R300_RE_ZBIAS_CNTL, 1);
205 	ADD_RANGE(R300_RE_ZBIAS_T_FACTOR, 4);
206 	ADD_RANGE(R300_RE_OCCLUSION_CNTL, 1);
207 	ADD_RANGE(R300_RE_CULL_CNTL, 1);
208 	ADD_RANGE(0x42C0, 2);
209 	ADD_RANGE(R300_RS_CNTL_0, 2);
210 
211 	ADD_RANGE(R300_SU_REG_DEST, 1);
212 	if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV530)
213 		ADD_RANGE(RV530_FG_ZBREG_DEST, 1);
214 
215 	ADD_RANGE(R300_SC_HYPERZ, 2);
216 	ADD_RANGE(0x43E8, 1);
217 
218 	ADD_RANGE(0x46A4, 5);
219 
220 	ADD_RANGE(R300_RE_FOG_STATE, 1);
221 	ADD_RANGE(R300_FOG_COLOR_R, 3);
222 	ADD_RANGE(R300_PP_ALPHA_TEST, 2);
223 	ADD_RANGE(0x4BD8, 1);
224 	ADD_RANGE(R300_PFS_PARAM_0_X, 64);
225 	ADD_RANGE(0x4E00, 1);
226 	ADD_RANGE(R300_RB3D_CBLEND, 2);
227 	ADD_RANGE(R300_RB3D_COLORMASK, 1);
228 	ADD_RANGE(R300_RB3D_BLEND_COLOR, 3);
229 	ADD_RANGE_MARK(R300_RB3D_COLOROFFSET0, 1, MARK_CHECK_OFFSET);	/* check offset */
230 	ADD_RANGE(R300_RB3D_COLORPITCH0, 1);
231 	ADD_RANGE(0x4E50, 9);
232 	ADD_RANGE(0x4E88, 1);
233 	ADD_RANGE(0x4EA0, 2);
234 	ADD_RANGE(R300_ZB_CNTL, 3);
235 	ADD_RANGE(R300_ZB_FORMAT, 4);
236 	ADD_RANGE_MARK(R300_ZB_DEPTHOFFSET, 1, MARK_CHECK_OFFSET);	/* check offset */
237 	ADD_RANGE(R300_ZB_DEPTHPITCH, 1);
238 	ADD_RANGE(R300_ZB_DEPTHCLEARVALUE, 1);
239 	ADD_RANGE(R300_ZB_ZMASK_OFFSET, 13);
240 	ADD_RANGE(R300_ZB_ZPASS_DATA, 2); /* ZB_ZPASS_DATA, ZB_ZPASS_ADDR */
241 
242 	ADD_RANGE(R300_TX_FILTER_0, 16);
243 	ADD_RANGE(R300_TX_FILTER1_0, 16);
244 	ADD_RANGE(R300_TX_SIZE_0, 16);
245 	ADD_RANGE(R300_TX_FORMAT_0, 16);
246 	ADD_RANGE(R300_TX_PITCH_0, 16);
247 	/* Texture offset is dangerous and needs more checking */
248 	ADD_RANGE_MARK(R300_TX_OFFSET_0, 16, MARK_CHECK_OFFSET);
249 	ADD_RANGE(R300_TX_CHROMA_KEY_0, 16);
250 	ADD_RANGE(R300_TX_BORDER_COLOR_0, 16);
251 
252 	/* Sporadic registers used as primitives are emitted */
253 	ADD_RANGE(R300_ZB_ZCACHE_CTLSTAT, 1);
254 	ADD_RANGE(R300_RB3D_DSTCACHE_CTLSTAT, 1);
255 	ADD_RANGE(R300_VAP_INPUT_ROUTE_0_0, 8);
256 	ADD_RANGE(R300_VAP_INPUT_ROUTE_1_0, 8);
257 
258 	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
259 		ADD_RANGE(R500_VAP_INDEX_OFFSET, 1);
260 		ADD_RANGE(R500_US_CONFIG, 2);
261 		ADD_RANGE(R500_US_CODE_ADDR, 3);
262 		ADD_RANGE(R500_US_FC_CTRL, 1);
263 		ADD_RANGE(R500_RS_IP_0, 16);
264 		ADD_RANGE(R500_RS_INST_0, 16);
265 		ADD_RANGE(R500_RB3D_COLOR_CLEAR_VALUE_AR, 2);
266 		ADD_RANGE(R500_RB3D_CONSTANT_COLOR_AR, 2);
267 		ADD_RANGE(R500_ZB_FIFO_SIZE, 2);
268 	} else {
269 		ADD_RANGE(R300_PFS_CNTL_0, 3);
270 		ADD_RANGE(R300_PFS_NODE_0, 4);
271 		ADD_RANGE(R300_PFS_TEXI_0, 64);
272 		ADD_RANGE(R300_PFS_INSTR0_0, 64);
273 		ADD_RANGE(R300_PFS_INSTR1_0, 64);
274 		ADD_RANGE(R300_PFS_INSTR2_0, 64);
275 		ADD_RANGE(R300_PFS_INSTR3_0, 64);
276 		ADD_RANGE(R300_RS_INTERP_0, 8);
277 		ADD_RANGE(R300_RS_ROUTE_0, 8);
278 
279 	}
280 }
281 
r300_check_range(unsigned reg,int count)282 static __inline__ int r300_check_range(unsigned reg, int count)
283 {
284 	int i;
285 	if (reg & ~0xffff)
286 		return -1;
287 	for (i = (reg >> 2); i < (reg >> 2) + count; i++)
288 		if (r300_reg_flags[i] != MARK_SAFE)
289 			return 1;
290 	return 0;
291 }
292 
r300_emit_carefully_checked_packet0(drm_radeon_private_t * dev_priv,drm_radeon_kcmd_buffer_t * cmdbuf,drm_r300_cmd_header_t header)293 static __inline__ int r300_emit_carefully_checked_packet0(drm_radeon_private_t *
294 							  dev_priv,
295 							  drm_radeon_kcmd_buffer_t
296 							  * cmdbuf,
297 							  drm_r300_cmd_header_t
298 							  header)
299 {
300 	int reg;
301 	int sz;
302 	int i;
303 	u32 *value;
304 	RING_LOCALS;
305 
306 	sz = header.packet0.count;
307 	reg = (header.packet0.reghi << 8) | header.packet0.reglo;
308 
309 	if ((sz > 64) || (sz < 0)) {
310 		DRM_ERROR("Cannot emit more than 64 values at a time (reg=%04x sz=%d)\n",
311 			 reg, sz);
312 		return -EINVAL;
313 	}
314 
315 	for (i = 0; i < sz; i++) {
316 		switch (r300_reg_flags[(reg >> 2) + i]) {
317 		case MARK_SAFE:
318 			break;
319 		case MARK_CHECK_OFFSET:
320 			value = drm_buffer_pointer_to_dword(cmdbuf->buffer, i);
321 			if (!radeon_check_offset(dev_priv, *value)) {
322 				DRM_ERROR("Offset failed range check (reg=%04x sz=%d)\n",
323 					 reg, sz);
324 				return -EINVAL;
325 			}
326 			break;
327 		default:
328 			DRM_ERROR("Register %04x failed check as flag=%02x\n",
329 				reg + i * 4, r300_reg_flags[(reg >> 2) + i]);
330 			return -EINVAL;
331 		}
332 	}
333 
334 	BEGIN_RING(1 + sz);
335 	OUT_RING(CP_PACKET0(reg, sz - 1));
336 	OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
337 	ADVANCE_RING();
338 
339 	return 0;
340 }
341 
342 /**
343  * Emits a packet0 setting arbitrary registers.
344  * Called by r300_do_cp_cmdbuf.
345  *
346  * Note that checks are performed on contents and addresses of the registers
347  */
r300_emit_packet0(drm_radeon_private_t * dev_priv,drm_radeon_kcmd_buffer_t * cmdbuf,drm_r300_cmd_header_t header)348 static __inline__ int r300_emit_packet0(drm_radeon_private_t *dev_priv,
349 					drm_radeon_kcmd_buffer_t *cmdbuf,
350 					drm_r300_cmd_header_t header)
351 {
352 	int reg;
353 	int sz;
354 	RING_LOCALS;
355 
356 	sz = header.packet0.count;
357 	reg = (header.packet0.reghi << 8) | header.packet0.reglo;
358 
359 	if (!sz)
360 		return 0;
361 
362 	if (sz * 4 > drm_buffer_unprocessed(cmdbuf->buffer))
363 		return -EINVAL;
364 
365 	if (reg + sz * 4 >= 0x10000) {
366 		DRM_ERROR("No such registers in hardware reg=%04x sz=%d\n", reg,
367 			  sz);
368 		return -EINVAL;
369 	}
370 
371 	if (r300_check_range(reg, sz)) {
372 		/* go and check everything */
373 		return r300_emit_carefully_checked_packet0(dev_priv, cmdbuf,
374 							   header);
375 	}
376 	/* the rest of the data is safe to emit, whatever the values the user passed */
377 
378 	BEGIN_RING(1 + sz);
379 	OUT_RING(CP_PACKET0(reg, sz - 1));
380 	OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
381 	ADVANCE_RING();
382 
383 	return 0;
384 }
385 
386 /**
387  * Uploads user-supplied vertex program instructions or parameters onto
388  * the graphics card.
389  * Called by r300_do_cp_cmdbuf.
390  */
r300_emit_vpu(drm_radeon_private_t * dev_priv,drm_radeon_kcmd_buffer_t * cmdbuf,drm_r300_cmd_header_t header)391 static __inline__ int r300_emit_vpu(drm_radeon_private_t *dev_priv,
392 				    drm_radeon_kcmd_buffer_t *cmdbuf,
393 				    drm_r300_cmd_header_t header)
394 {
395 	int sz;
396 	int addr;
397 	RING_LOCALS;
398 
399 	sz = header.vpu.count;
400 	addr = (header.vpu.adrhi << 8) | header.vpu.adrlo;
401 
402 	if (!sz)
403 		return 0;
404 	if (sz * 16 > drm_buffer_unprocessed(cmdbuf->buffer))
405 		return -EINVAL;
406 
407 	/* VAP is very sensitive so we purge cache before we program it
408 	 * and we also flush its state before & after */
409 	BEGIN_RING(6);
410 	OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
411 	OUT_RING(R300_RB3D_DC_FLUSH);
412 	OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
413 	OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
414 	OUT_RING(CP_PACKET0(R300_VAP_PVS_STATE_FLUSH_REG, 0));
415 	OUT_RING(0);
416 	ADVANCE_RING();
417 	/* set flush flag */
418 	dev_priv->track_flush |= RADEON_FLUSH_EMITED;
419 
420 	BEGIN_RING(3 + sz * 4);
421 	OUT_RING_REG(R300_VAP_PVS_UPLOAD_ADDRESS, addr);
422 	OUT_RING(CP_PACKET0_TABLE(R300_VAP_PVS_UPLOAD_DATA, sz * 4 - 1));
423 	OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz * 4);
424 	ADVANCE_RING();
425 
426 	BEGIN_RING(2);
427 	OUT_RING(CP_PACKET0(R300_VAP_PVS_STATE_FLUSH_REG, 0));
428 	OUT_RING(0);
429 	ADVANCE_RING();
430 
431 	return 0;
432 }
433 
434 /**
435  * Emit a clear packet from userspace.
436  * Called by r300_emit_packet3.
437  */
r300_emit_clear(drm_radeon_private_t * dev_priv,drm_radeon_kcmd_buffer_t * cmdbuf)438 static __inline__ int r300_emit_clear(drm_radeon_private_t *dev_priv,
439 				      drm_radeon_kcmd_buffer_t *cmdbuf)
440 {
441 	RING_LOCALS;
442 
443 	if (8 * 4 > drm_buffer_unprocessed(cmdbuf->buffer))
444 		return -EINVAL;
445 
446 	BEGIN_RING(10);
447 	OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8));
448 	OUT_RING(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
449 		 (1 << R300_PRIM_NUM_VERTICES_SHIFT));
450 	OUT_RING_DRM_BUFFER(cmdbuf->buffer, 8);
451 	ADVANCE_RING();
452 
453 	BEGIN_RING(4);
454 	OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
455 	OUT_RING(R300_RB3D_DC_FLUSH);
456 	OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
457 	OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
458 	ADVANCE_RING();
459 	/* set flush flag */
460 	dev_priv->track_flush |= RADEON_FLUSH_EMITED;
461 
462 	return 0;
463 }
464 
r300_emit_3d_load_vbpntr(drm_radeon_private_t * dev_priv,drm_radeon_kcmd_buffer_t * cmdbuf,u32 header)465 static __inline__ int r300_emit_3d_load_vbpntr(drm_radeon_private_t *dev_priv,
466 					       drm_radeon_kcmd_buffer_t *cmdbuf,
467 					       u32 header)
468 {
469 	int count, i, k;
470 #define MAX_ARRAY_PACKET  64
471 	u32 *data;
472 	u32 narrays;
473 	RING_LOCALS;
474 
475 	count = (header & RADEON_CP_PACKET_COUNT_MASK) >> 16;
476 
477 	if ((count + 1) > MAX_ARRAY_PACKET) {
478 		DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
479 			  count);
480 		return -EINVAL;
481 	}
482 	/* carefully check packet contents */
483 
484 	/* We have already read the header so advance the buffer. */
485 	drm_buffer_advance(cmdbuf->buffer, 4);
486 
487 	narrays = *(u32 *)drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
488 	k = 0;
489 	i = 1;
490 	while ((k < narrays) && (i < (count + 1))) {
491 		i++;		/* skip attribute field */
492 		data = drm_buffer_pointer_to_dword(cmdbuf->buffer, i);
493 		if (!radeon_check_offset(dev_priv, *data)) {
494 			DRM_ERROR
495 			    ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
496 			     k, i);
497 			return -EINVAL;
498 		}
499 		k++;
500 		i++;
501 		if (k == narrays)
502 			break;
503 		/* have one more to process, they come in pairs */
504 		data = drm_buffer_pointer_to_dword(cmdbuf->buffer, i);
505 		if (!radeon_check_offset(dev_priv, *data)) {
506 			DRM_ERROR
507 			    ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
508 			     k, i);
509 			return -EINVAL;
510 		}
511 		k++;
512 		i++;
513 	}
514 	/* do the counts match what we expect ? */
515 	if ((k != narrays) || (i != (count + 1))) {
516 		DRM_ERROR
517 		    ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
518 		     k, i, narrays, count + 1);
519 		return -EINVAL;
520 	}
521 
522 	/* all clear, output packet */
523 
524 	BEGIN_RING(count + 2);
525 	OUT_RING(header);
526 	OUT_RING_DRM_BUFFER(cmdbuf->buffer, count + 1);
527 	ADVANCE_RING();
528 
529 	return 0;
530 }
531 
r300_emit_bitblt_multi(drm_radeon_private_t * dev_priv,drm_radeon_kcmd_buffer_t * cmdbuf)532 static __inline__ int r300_emit_bitblt_multi(drm_radeon_private_t *dev_priv,
533 					     drm_radeon_kcmd_buffer_t *cmdbuf)
534 {
535 	u32 *cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
536 	int count, ret;
537 	RING_LOCALS;
538 
539 
540 	count = (*cmd & RADEON_CP_PACKET_COUNT_MASK) >> 16;
541 
542 	if (*cmd & 0x8000) {
543 		u32 offset;
544 		u32 *cmd1 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
545 		if (*cmd1 & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
546 			      | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
547 
548 			u32 *cmd2 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 2);
549 			offset = *cmd2 << 10;
550 			ret = !radeon_check_offset(dev_priv, offset);
551 			if (ret) {
552 				DRM_ERROR("Invalid bitblt first offset is %08X\n", offset);
553 				return -EINVAL;
554 			}
555 		}
556 
557 		if ((*cmd1 & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
558 		    (*cmd1 & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
559 			u32 *cmd3 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 3);
560 			offset = *cmd3 << 10;
561 			ret = !radeon_check_offset(dev_priv, offset);
562 			if (ret) {
563 				DRM_ERROR("Invalid bitblt second offset is %08X\n", offset);
564 				return -EINVAL;
565 			}
566 
567 		}
568 	}
569 
570 	BEGIN_RING(count+2);
571 	OUT_RING_DRM_BUFFER(cmdbuf->buffer, count + 2);
572 	ADVANCE_RING();
573 
574 	return 0;
575 }
576 
r300_emit_draw_indx_2(drm_radeon_private_t * dev_priv,drm_radeon_kcmd_buffer_t * cmdbuf)577 static __inline__ int r300_emit_draw_indx_2(drm_radeon_private_t *dev_priv,
578 					    drm_radeon_kcmd_buffer_t *cmdbuf)
579 {
580 	u32 *cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
581 	u32 *cmd1 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
582 	int count;
583 	int expected_count;
584 	RING_LOCALS;
585 
586 	count = (*cmd & RADEON_CP_PACKET_COUNT_MASK) >> 16;
587 
588 	expected_count = *cmd1 >> 16;
589 	if (!(*cmd1 & R300_VAP_VF_CNTL__INDEX_SIZE_32bit))
590 		expected_count = (expected_count+1)/2;
591 
592 	if (count && count != expected_count) {
593 		DRM_ERROR("3D_DRAW_INDX_2: packet size %i, expected %i\n",
594 			count, expected_count);
595 		return -EINVAL;
596 	}
597 
598 	BEGIN_RING(count+2);
599 	OUT_RING_DRM_BUFFER(cmdbuf->buffer, count + 2);
600 	ADVANCE_RING();
601 
602 	if (!count) {
603 		drm_r300_cmd_header_t stack_header, *header;
604 		u32 *cmd1, *cmd2, *cmd3;
605 
606 		if (drm_buffer_unprocessed(cmdbuf->buffer)
607 				< 4*4 + sizeof(stack_header)) {
608 			DRM_ERROR("3D_DRAW_INDX_2: expect subsequent INDX_BUFFER, but stream is too short.\n");
609 			return -EINVAL;
610 		}
611 
612 		header = drm_buffer_read_object(cmdbuf->buffer,
613 				sizeof(stack_header), &stack_header);
614 
615 		cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
616 		cmd1 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
617 		cmd2 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 2);
618 		cmd3 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 3);
619 
620 		if (header->header.cmd_type != R300_CMD_PACKET3 ||
621 		    header->packet3.packet != R300_CMD_PACKET3_RAW ||
622 		    *cmd != CP_PACKET3(RADEON_CP_INDX_BUFFER, 2)) {
623 			DRM_ERROR("3D_DRAW_INDX_2: expect subsequent INDX_BUFFER.\n");
624 			return -EINVAL;
625 		}
626 
627 		if ((*cmd1 & 0x8000ffff) != 0x80000810) {
628 			DRM_ERROR("Invalid indx_buffer reg address %08X\n",
629 					*cmd1);
630 			return -EINVAL;
631 		}
632 		if (!radeon_check_offset(dev_priv, *cmd2)) {
633 			DRM_ERROR("Invalid indx_buffer offset is %08X\n",
634 					*cmd2);
635 			return -EINVAL;
636 		}
637 		if (*cmd3 != expected_count) {
638 			DRM_ERROR("INDX_BUFFER: buffer size %i, expected %i\n",
639 				*cmd3, expected_count);
640 			return -EINVAL;
641 		}
642 
643 		BEGIN_RING(4);
644 		OUT_RING_DRM_BUFFER(cmdbuf->buffer, 4);
645 		ADVANCE_RING();
646 	}
647 
648 	return 0;
649 }
650 
r300_emit_raw_packet3(drm_radeon_private_t * dev_priv,drm_radeon_kcmd_buffer_t * cmdbuf)651 static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t *dev_priv,
652 					    drm_radeon_kcmd_buffer_t *cmdbuf)
653 {
654 	u32 *header;
655 	int count;
656 	RING_LOCALS;
657 
658 	if (4 > drm_buffer_unprocessed(cmdbuf->buffer))
659 		return -EINVAL;
660 
661 	/* Fixme !! This simply emits a packet without much checking.
662 	   We need to be smarter. */
663 
664 	/* obtain first word - actual packet3 header */
665 	header = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
666 
667 	/* Is it packet 3 ? */
668 	if ((*header >> 30) != 0x3) {
669 		DRM_ERROR("Not a packet3 header (0x%08x)\n", *header);
670 		return -EINVAL;
671 	}
672 
673 	count = (*header >> 16) & 0x3fff;
674 
675 	/* Check again now that we know how much data to expect */
676 	if ((count + 2) * 4 > drm_buffer_unprocessed(cmdbuf->buffer)) {
677 		DRM_ERROR
678 		    ("Expected packet3 of length %d but have only %d bytes left\n",
679 		     (count + 2) * 4, drm_buffer_unprocessed(cmdbuf->buffer));
680 		return -EINVAL;
681 	}
682 
683 	/* Is it a packet type we know about ? */
684 	switch (*header & 0xff00) {
685 	case RADEON_3D_LOAD_VBPNTR:	/* load vertex array pointers */
686 		return r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, *header);
687 
688 	case RADEON_CNTL_BITBLT_MULTI:
689 		return r300_emit_bitblt_multi(dev_priv, cmdbuf);
690 
691 	case RADEON_CP_INDX_BUFFER:
692 		DRM_ERROR("packet3 INDX_BUFFER without preceding 3D_DRAW_INDX_2 is illegal.\n");
693 		return -EINVAL;
694 	case RADEON_CP_3D_DRAW_IMMD_2:
695 		/* triggers drawing using in-packet vertex data */
696 	case RADEON_CP_3D_DRAW_VBUF_2:
697 		/* triggers drawing of vertex buffers setup elsewhere */
698 		dev_priv->track_flush &= ~(RADEON_FLUSH_EMITED |
699 					   RADEON_PURGE_EMITED);
700 		break;
701 	case RADEON_CP_3D_DRAW_INDX_2:
702 		/* triggers drawing using indices to vertex buffer */
703 		/* whenever we send vertex we clear flush & purge */
704 		dev_priv->track_flush &= ~(RADEON_FLUSH_EMITED |
705 					   RADEON_PURGE_EMITED);
706 		return r300_emit_draw_indx_2(dev_priv, cmdbuf);
707 	case RADEON_WAIT_FOR_IDLE:
708 	case RADEON_CP_NOP:
709 		/* these packets are safe */
710 		break;
711 	default:
712 		DRM_ERROR("Unknown packet3 header (0x%08x)\n", *header);
713 		return -EINVAL;
714 	}
715 
716 	BEGIN_RING(count + 2);
717 	OUT_RING_DRM_BUFFER(cmdbuf->buffer, count + 2);
718 	ADVANCE_RING();
719 
720 	return 0;
721 }
722 
723 /**
724  * Emit a rendering packet3 from userspace.
725  * Called by r300_do_cp_cmdbuf.
726  */
r300_emit_packet3(drm_radeon_private_t * dev_priv,drm_radeon_kcmd_buffer_t * cmdbuf,drm_r300_cmd_header_t header)727 static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv,
728 					drm_radeon_kcmd_buffer_t *cmdbuf,
729 					drm_r300_cmd_header_t header)
730 {
731 	int n;
732 	int ret;
733 	int orig_iter = cmdbuf->buffer->iterator;
734 
735 	/* This is a do-while-loop so that we run the interior at least once,
736 	 * even if cmdbuf->nbox is 0. Compare r300_emit_cliprects for rationale.
737 	 */
738 	n = 0;
739 	do {
740 		if (cmdbuf->nbox > R300_SIMULTANEOUS_CLIPRECTS) {
741 			ret = r300_emit_cliprects(dev_priv, cmdbuf, n);
742 			if (ret)
743 				return ret;
744 
745 			cmdbuf->buffer->iterator = orig_iter;
746 		}
747 
748 		switch (header.packet3.packet) {
749 		case R300_CMD_PACKET3_CLEAR:
750 			DRM_DEBUG("R300_CMD_PACKET3_CLEAR\n");
751 			ret = r300_emit_clear(dev_priv, cmdbuf);
752 			if (ret) {
753 				DRM_ERROR("r300_emit_clear failed\n");
754 				return ret;
755 			}
756 			break;
757 
758 		case R300_CMD_PACKET3_RAW:
759 			DRM_DEBUG("R300_CMD_PACKET3_RAW\n");
760 			ret = r300_emit_raw_packet3(dev_priv, cmdbuf);
761 			if (ret) {
762 				DRM_ERROR("r300_emit_raw_packet3 failed\n");
763 				return ret;
764 			}
765 			break;
766 
767 		default:
768 			DRM_ERROR("bad packet3 type %i at byte %d\n",
769 				  header.packet3.packet,
770 				  cmdbuf->buffer->iterator - (int)sizeof(header));
771 			return -EINVAL;
772 		}
773 
774 		n += R300_SIMULTANEOUS_CLIPRECTS;
775 	} while (n < cmdbuf->nbox);
776 
777 	return 0;
778 }
779 
780 /* Some of the R300 chips seem to be extremely touchy about the two registers
781  * that are configured in r300_pacify.
782  * Among the worst offenders seems to be the R300 ND (0x4E44): When userspace
783  * sends a command buffer that contains only state setting commands and a
784  * vertex program/parameter upload sequence, this will eventually lead to a
785  * lockup, unless the sequence is bracketed by calls to r300_pacify.
786  * So we should take great care to *always* call r300_pacify before
787  * *anything* 3D related, and again afterwards. This is what the
788  * call bracket in r300_do_cp_cmdbuf is for.
789  */
790 
791 /**
792  * Emit the sequence to pacify R300.
793  */
r300_pacify(drm_radeon_private_t * dev_priv)794 static void r300_pacify(drm_radeon_private_t *dev_priv)
795 {
796 	uint32_t cache_z, cache_3d, cache_2d;
797 	RING_LOCALS;
798 
799 	cache_z = R300_ZC_FLUSH;
800 	cache_2d = R300_RB2D_DC_FLUSH;
801 	cache_3d = R300_RB3D_DC_FLUSH;
802 	if (!(dev_priv->track_flush & RADEON_PURGE_EMITED)) {
803 		/* we can purge, primitive where draw since last purge */
804 		cache_z |= R300_ZC_FREE;
805 		cache_2d |= R300_RB2D_DC_FREE;
806 		cache_3d |= R300_RB3D_DC_FREE;
807 	}
808 
809 	/* flush & purge zbuffer */
810 	BEGIN_RING(2);
811 	OUT_RING(CP_PACKET0(R300_ZB_ZCACHE_CTLSTAT, 0));
812 	OUT_RING(cache_z);
813 	ADVANCE_RING();
814 	/* flush & purge 3d */
815 	BEGIN_RING(2);
816 	OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
817 	OUT_RING(cache_3d);
818 	ADVANCE_RING();
819 	/* flush & purge texture */
820 	BEGIN_RING(2);
821 	OUT_RING(CP_PACKET0(R300_TX_INVALTAGS, 0));
822 	OUT_RING(0);
823 	ADVANCE_RING();
824 	/* FIXME: is this one really needed ? */
825 	BEGIN_RING(2);
826 	OUT_RING(CP_PACKET0(R300_RB3D_AARESOLVE_CTL, 0));
827 	OUT_RING(0);
828 	ADVANCE_RING();
829 	BEGIN_RING(2);
830 	OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
831 	OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
832 	ADVANCE_RING();
833 	/* flush & purge 2d through E2 as RB2D will trigger lockup */
834 	BEGIN_RING(4);
835 	OUT_RING(CP_PACKET0(R300_DSTCACHE_CTLSTAT, 0));
836 	OUT_RING(cache_2d);
837 	OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
838 	OUT_RING(RADEON_WAIT_2D_IDLECLEAN |
839 		 RADEON_WAIT_HOST_IDLECLEAN);
840 	ADVANCE_RING();
841 	/* set flush & purge flags */
842 	dev_priv->track_flush |= RADEON_FLUSH_EMITED | RADEON_PURGE_EMITED;
843 }
844 
845 /**
846  * Called by r300_do_cp_cmdbuf to update the internal buffer age and state.
847  * The actual age emit is done by r300_do_cp_cmdbuf, which is why you must
848  * be careful about how this function is called.
849  */
r300_discard_buffer(struct drm_device * dev,struct drm_master * master,struct drm_buf * buf)850 static void r300_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf)
851 {
852 	drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
853 	struct drm_radeon_master_private *master_priv = master->driver_priv;
854 
855 	buf_priv->age = ++master_priv->sarea_priv->last_dispatch;
856 	buf->pending = 1;
857 	buf->used = 0;
858 }
859 
r300_cmd_wait(drm_radeon_private_t * dev_priv,drm_r300_cmd_header_t header)860 static void r300_cmd_wait(drm_radeon_private_t * dev_priv,
861 			  drm_r300_cmd_header_t header)
862 {
863 	u32 wait_until;
864 	RING_LOCALS;
865 
866 	if (!header.wait.flags)
867 		return;
868 
869 	wait_until = 0;
870 
871 	switch(header.wait.flags) {
872 	case R300_WAIT_2D:
873 		wait_until = RADEON_WAIT_2D_IDLE;
874 		break;
875 	case R300_WAIT_3D:
876 		wait_until = RADEON_WAIT_3D_IDLE;
877 		break;
878 	case R300_NEW_WAIT_2D_3D:
879 		wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_3D_IDLE;
880 		break;
881 	case R300_NEW_WAIT_2D_2D_CLEAN:
882 		wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_2D_IDLECLEAN;
883 		break;
884 	case R300_NEW_WAIT_3D_3D_CLEAN:
885 		wait_until = RADEON_WAIT_3D_IDLE|RADEON_WAIT_3D_IDLECLEAN;
886 		break;
887 	case R300_NEW_WAIT_2D_2D_CLEAN_3D_3D_CLEAN:
888 		wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_2D_IDLECLEAN;
889 		wait_until |= RADEON_WAIT_3D_IDLE|RADEON_WAIT_3D_IDLECLEAN;
890 		break;
891 	default:
892 		return;
893 	}
894 
895 	BEGIN_RING(2);
896 	OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
897 	OUT_RING(wait_until);
898 	ADVANCE_RING();
899 }
900 
r300_scratch(drm_radeon_private_t * dev_priv,drm_radeon_kcmd_buffer_t * cmdbuf,drm_r300_cmd_header_t header)901 static int r300_scratch(drm_radeon_private_t *dev_priv,
902 			drm_radeon_kcmd_buffer_t *cmdbuf,
903 			drm_r300_cmd_header_t header)
904 {
905 	u32 *ref_age_base;
906 	u32 i, *buf_idx, h_pending;
907 	u64 *ptr_addr;
908 	u64 stack_ptr_addr;
909 	RING_LOCALS;
910 
911 	if (drm_buffer_unprocessed(cmdbuf->buffer) <
912 	    (sizeof(u64) + header.scratch.n_bufs * sizeof(*buf_idx))) {
913 		return -EINVAL;
914 	}
915 
916 	if (header.scratch.reg >= 5) {
917 		return -EINVAL;
918 	}
919 
920 	dev_priv->scratch_ages[header.scratch.reg]++;
921 
922 	ptr_addr = drm_buffer_read_object(cmdbuf->buffer,
923 			sizeof(stack_ptr_addr), &stack_ptr_addr);
924 	ref_age_base = (u32 *)(unsigned long)get_unaligned(ptr_addr);
925 
926 	for (i=0; i < header.scratch.n_bufs; i++) {
927 		buf_idx = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
928 		*buf_idx *= 2; /* 8 bytes per buf */
929 
930 		if (DRM_COPY_TO_USER(ref_age_base + *buf_idx,
931 				&dev_priv->scratch_ages[header.scratch.reg],
932 				sizeof(u32)))
933 			return -EINVAL;
934 
935 		if (DRM_COPY_FROM_USER(&h_pending,
936 				ref_age_base + *buf_idx + 1,
937 				sizeof(u32)))
938 			return -EINVAL;
939 
940 		if (h_pending == 0)
941 			return -EINVAL;
942 
943 		h_pending--;
944 
945 		if (DRM_COPY_TO_USER(ref_age_base + *buf_idx + 1,
946 					&h_pending,
947 					sizeof(u32)))
948 			return -EINVAL;
949 
950 		drm_buffer_advance(cmdbuf->buffer, sizeof(*buf_idx));
951 	}
952 
953 	BEGIN_RING(2);
954 	OUT_RING( CP_PACKET0( RADEON_SCRATCH_REG0 + header.scratch.reg * 4, 0 ) );
955 	OUT_RING( dev_priv->scratch_ages[header.scratch.reg] );
956 	ADVANCE_RING();
957 
958 	return 0;
959 }
960 
961 /**
962  * Uploads user-supplied vertex program instructions or parameters onto
963  * the graphics card.
964  * Called by r300_do_cp_cmdbuf.
965  */
r300_emit_r500fp(drm_radeon_private_t * dev_priv,drm_radeon_kcmd_buffer_t * cmdbuf,drm_r300_cmd_header_t header)966 static inline int r300_emit_r500fp(drm_radeon_private_t *dev_priv,
967 				       drm_radeon_kcmd_buffer_t *cmdbuf,
968 				       drm_r300_cmd_header_t header)
969 {
970 	int sz;
971 	int addr;
972 	int type;
973 	int isclamp;
974 	int stride;
975 	RING_LOCALS;
976 
977 	sz = header.r500fp.count;
978 	/* address is 9 bits 0 - 8, bit 1 of flags is part of address */
979 	addr = ((header.r500fp.adrhi_flags & 1) << 8) | header.r500fp.adrlo;
980 
981 	type = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_TYPE);
982 	isclamp = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_CLAMP);
983 
984 	addr |= (type << 16);
985 	addr |= (isclamp << 17);
986 
987 	stride = type ? 4 : 6;
988 
989 	DRM_DEBUG("r500fp %d %d type: %d\n", sz, addr, type);
990 	if (!sz)
991 		return 0;
992 	if (sz * stride * 4 > drm_buffer_unprocessed(cmdbuf->buffer))
993 		return -EINVAL;
994 
995 	BEGIN_RING(3 + sz * stride);
996 	OUT_RING_REG(R500_GA_US_VECTOR_INDEX, addr);
997 	OUT_RING(CP_PACKET0_TABLE(R500_GA_US_VECTOR_DATA, sz * stride - 1));
998 	OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz * stride);
999 
1000 	ADVANCE_RING();
1001 
1002 	return 0;
1003 }
1004 
1005 
1006 /**
1007  * Parses and validates a user-supplied command buffer and emits appropriate
1008  * commands on the DMA ring buffer.
1009  * Called by the ioctl handler function radeon_cp_cmdbuf.
1010  */
r300_do_cp_cmdbuf(struct drm_device * dev,struct drm_file * file_priv,drm_radeon_kcmd_buffer_t * cmdbuf)1011 int r300_do_cp_cmdbuf(struct drm_device *dev,
1012 		      struct drm_file *file_priv,
1013 		      drm_radeon_kcmd_buffer_t *cmdbuf)
1014 {
1015 	drm_radeon_private_t *dev_priv = dev->dev_private;
1016 	struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
1017 	struct drm_device_dma *dma = dev->dma;
1018 	struct drm_buf *buf = NULL;
1019 	int emit_dispatch_age = 0;
1020 	int ret = 0;
1021 
1022 	DRM_DEBUG("\n");
1023 
1024 	/* pacify */
1025 	r300_pacify(dev_priv);
1026 
1027 	if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) {
1028 		ret = r300_emit_cliprects(dev_priv, cmdbuf, 0);
1029 		if (ret)
1030 			goto cleanup;
1031 	}
1032 
1033 	while (drm_buffer_unprocessed(cmdbuf->buffer)
1034 			>= sizeof(drm_r300_cmd_header_t)) {
1035 		int idx;
1036 		drm_r300_cmd_header_t *header, stack_header;
1037 
1038 		header = drm_buffer_read_object(cmdbuf->buffer,
1039 				sizeof(stack_header), &stack_header);
1040 
1041 		switch (header->header.cmd_type) {
1042 		case R300_CMD_PACKET0:
1043 			DRM_DEBUG("R300_CMD_PACKET0\n");
1044 			ret = r300_emit_packet0(dev_priv, cmdbuf, *header);
1045 			if (ret) {
1046 				DRM_ERROR("r300_emit_packet0 failed\n");
1047 				goto cleanup;
1048 			}
1049 			break;
1050 
1051 		case R300_CMD_VPU:
1052 			DRM_DEBUG("R300_CMD_VPU\n");
1053 			ret = r300_emit_vpu(dev_priv, cmdbuf, *header);
1054 			if (ret) {
1055 				DRM_ERROR("r300_emit_vpu failed\n");
1056 				goto cleanup;
1057 			}
1058 			break;
1059 
1060 		case R300_CMD_PACKET3:
1061 			DRM_DEBUG("R300_CMD_PACKET3\n");
1062 			ret = r300_emit_packet3(dev_priv, cmdbuf, *header);
1063 			if (ret) {
1064 				DRM_ERROR("r300_emit_packet3 failed\n");
1065 				goto cleanup;
1066 			}
1067 			break;
1068 
1069 		case R300_CMD_END3D:
1070 			DRM_DEBUG("R300_CMD_END3D\n");
1071 			/* TODO:
1072 			   Ideally userspace driver should not need to issue this call,
1073 			   i.e. the drm driver should issue it automatically and prevent
1074 			   lockups.
1075 
1076 			   In practice, we do not understand why this call is needed and what
1077 			   it does (except for some vague guesses that it has to do with cache
1078 			   coherence) and so the user space driver does it.
1079 
1080 			   Once we are sure which uses prevent lockups the code could be moved
1081 			   into the kernel and the userspace driver will not
1082 			   need to use this command.
1083 
1084 			   Note that issuing this command does not hurt anything
1085 			   except, possibly, performance */
1086 			r300_pacify(dev_priv);
1087 			break;
1088 
1089 		case R300_CMD_CP_DELAY:
1090 			/* simple enough, we can do it here */
1091 			DRM_DEBUG("R300_CMD_CP_DELAY\n");
1092 			{
1093 				int i;
1094 				RING_LOCALS;
1095 
1096 				BEGIN_RING(header->delay.count);
1097 				for (i = 0; i < header->delay.count; i++)
1098 					OUT_RING(RADEON_CP_PACKET2);
1099 				ADVANCE_RING();
1100 			}
1101 			break;
1102 
1103 		case R300_CMD_DMA_DISCARD:
1104 			DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
1105 			idx = header->dma.buf_idx;
1106 			if (idx < 0 || idx >= dma->buf_count) {
1107 				DRM_ERROR("buffer index %d (of %d max)\n",
1108 					  idx, dma->buf_count - 1);
1109 				ret = -EINVAL;
1110 				goto cleanup;
1111 			}
1112 
1113 			buf = dma->buflist[idx];
1114 			if (buf->file_priv != file_priv || buf->pending) {
1115 				DRM_ERROR("bad buffer %p %p %d\n",
1116 					  buf->file_priv, file_priv,
1117 					  buf->pending);
1118 				ret = -EINVAL;
1119 				goto cleanup;
1120 			}
1121 
1122 			emit_dispatch_age = 1;
1123 			r300_discard_buffer(dev, file_priv->master, buf);
1124 			break;
1125 
1126 		case R300_CMD_WAIT:
1127 			DRM_DEBUG("R300_CMD_WAIT\n");
1128 			r300_cmd_wait(dev_priv, *header);
1129 			break;
1130 
1131 		case R300_CMD_SCRATCH:
1132 			DRM_DEBUG("R300_CMD_SCRATCH\n");
1133 			ret = r300_scratch(dev_priv, cmdbuf, *header);
1134 			if (ret) {
1135 				DRM_ERROR("r300_scratch failed\n");
1136 				goto cleanup;
1137 			}
1138 			break;
1139 
1140 		case R300_CMD_R500FP:
1141 			if ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV515) {
1142 				DRM_ERROR("Calling r500 command on r300 card\n");
1143 				ret = -EINVAL;
1144 				goto cleanup;
1145 			}
1146 			DRM_DEBUG("R300_CMD_R500FP\n");
1147 			ret = r300_emit_r500fp(dev_priv, cmdbuf, *header);
1148 			if (ret) {
1149 				DRM_ERROR("r300_emit_r500fp failed\n");
1150 				goto cleanup;
1151 			}
1152 			break;
1153 		default:
1154 			DRM_ERROR("bad cmd_type %i at byte %d\n",
1155 				  header->header.cmd_type,
1156 				  cmdbuf->buffer->iterator - (int)sizeof(*header));
1157 			ret = -EINVAL;
1158 			goto cleanup;
1159 		}
1160 	}
1161 
1162 	DRM_DEBUG("END\n");
1163 
1164       cleanup:
1165 	r300_pacify(dev_priv);
1166 
1167 	/* We emit the vertex buffer age here, outside the pacifier "brackets"
1168 	 * for two reasons:
1169 	 *  (1) This may coalesce multiple age emissions into a single one and
1170 	 *  (2) more importantly, some chips lock up hard when scratch registers
1171 	 *      are written inside the pacifier bracket.
1172 	 */
1173 	if (emit_dispatch_age) {
1174 		RING_LOCALS;
1175 
1176 		/* Emit the vertex buffer age */
1177 		BEGIN_RING(2);
1178 		RADEON_DISPATCH_AGE(master_priv->sarea_priv->last_dispatch);
1179 		ADVANCE_RING();
1180 	}
1181 
1182 	COMMIT_RING();
1183 
1184 	return ret;
1185 }
1186