1 /*
2  * Copyright © 2008-2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Zou Nan hai <nanhai.zou@intel.com>
26  *    Xiang Hai hao<haihao.xiang@intel.com>
27  *
28  */
29 
30 #include "drmP.h"
31 #include "drm.h"
32 #include "i915_drv.h"
33 #include "i915_drm.h"
34 #include "i915_trace.h"
35 #include "intel_drv.h"
36 
37 /*
38  * 965+ support PIPE_CONTROL commands, which provide finer grained control
39  * over cache flushing.
40  */
41 struct pipe_control {
42 	struct drm_i915_gem_object *obj;
43 	volatile u32 *cpu_page;
44 	u32 gtt_offset;
45 };
46 
ring_space(struct intel_ring_buffer * ring)47 static inline int ring_space(struct intel_ring_buffer *ring)
48 {
49 	int space = (ring->head & HEAD_ADDR) - (ring->tail + 8);
50 	if (space < 0)
51 		space += ring->size;
52 	return space;
53 }
54 
55 static int
render_ring_flush(struct intel_ring_buffer * ring,u32 invalidate_domains,u32 flush_domains)56 render_ring_flush(struct intel_ring_buffer *ring,
57 		  u32	invalidate_domains,
58 		  u32	flush_domains)
59 {
60 	struct drm_device *dev = ring->dev;
61 	u32 cmd;
62 	int ret;
63 
64 	/*
65 	 * read/write caches:
66 	 *
67 	 * I915_GEM_DOMAIN_RENDER is always invalidated, but is
68 	 * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
69 	 * also flushed at 2d versus 3d pipeline switches.
70 	 *
71 	 * read-only caches:
72 	 *
73 	 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
74 	 * MI_READ_FLUSH is set, and is always flushed on 965.
75 	 *
76 	 * I915_GEM_DOMAIN_COMMAND may not exist?
77 	 *
78 	 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
79 	 * invalidated when MI_EXE_FLUSH is set.
80 	 *
81 	 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
82 	 * invalidated with every MI_FLUSH.
83 	 *
84 	 * TLBs:
85 	 *
86 	 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
87 	 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
88 	 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
89 	 * are flushed at any MI_FLUSH.
90 	 */
91 
92 	cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
93 	if ((invalidate_domains|flush_domains) &
94 	    I915_GEM_DOMAIN_RENDER)
95 		cmd &= ~MI_NO_WRITE_FLUSH;
96 	if (INTEL_INFO(dev)->gen < 4) {
97 		/*
98 		 * On the 965, the sampler cache always gets flushed
99 		 * and this bit is reserved.
100 		 */
101 		if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
102 			cmd |= MI_READ_FLUSH;
103 	}
104 	if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
105 		cmd |= MI_EXE_FLUSH;
106 
107 	if (invalidate_domains & I915_GEM_DOMAIN_COMMAND &&
108 	    (IS_G4X(dev) || IS_GEN5(dev)))
109 		cmd |= MI_INVALIDATE_ISP;
110 
111 	ret = intel_ring_begin(ring, 2);
112 	if (ret)
113 		return ret;
114 
115 	intel_ring_emit(ring, cmd);
116 	intel_ring_emit(ring, MI_NOOP);
117 	intel_ring_advance(ring);
118 
119 	return 0;
120 }
121 
122 /**
123  * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
124  * implementing two workarounds on gen6.  From section 1.4.7.1
125  * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
126  *
127  * [DevSNB-C+{W/A}] Before any depth stall flush (including those
128  * produced by non-pipelined state commands), software needs to first
129  * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
130  * 0.
131  *
132  * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
133  * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
134  *
135  * And the workaround for these two requires this workaround first:
136  *
137  * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
138  * BEFORE the pipe-control with a post-sync op and no write-cache
139  * flushes.
140  *
141  * And this last workaround is tricky because of the requirements on
142  * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
143  * volume 2 part 1:
144  *
145  *     "1 of the following must also be set:
146  *      - Render Target Cache Flush Enable ([12] of DW1)
147  *      - Depth Cache Flush Enable ([0] of DW1)
148  *      - Stall at Pixel Scoreboard ([1] of DW1)
149  *      - Depth Stall ([13] of DW1)
150  *      - Post-Sync Operation ([13] of DW1)
151  *      - Notify Enable ([8] of DW1)"
152  *
153  * The cache flushes require the workaround flush that triggered this
154  * one, so we can't use it.  Depth stall would trigger the same.
155  * Post-sync nonzero is what triggered this second workaround, so we
156  * can't use that one either.  Notify enable is IRQs, which aren't
157  * really our business.  That leaves only stall at scoreboard.
158  */
159 static int
intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer * ring)160 intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring)
161 {
162 	struct pipe_control *pc = ring->private;
163 	u32 scratch_addr = pc->gtt_offset + 128;
164 	int ret;
165 
166 
167 	ret = intel_ring_begin(ring, 6);
168 	if (ret)
169 		return ret;
170 
171 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
172 	intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
173 			PIPE_CONTROL_STALL_AT_SCOREBOARD);
174 	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
175 	intel_ring_emit(ring, 0); /* low dword */
176 	intel_ring_emit(ring, 0); /* high dword */
177 	intel_ring_emit(ring, MI_NOOP);
178 	intel_ring_advance(ring);
179 
180 	ret = intel_ring_begin(ring, 6);
181 	if (ret)
182 		return ret;
183 
184 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
185 	intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE);
186 	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
187 	intel_ring_emit(ring, 0);
188 	intel_ring_emit(ring, 0);
189 	intel_ring_emit(ring, MI_NOOP);
190 	intel_ring_advance(ring);
191 
192 	return 0;
193 }
194 
195 static int
gen6_render_ring_flush(struct intel_ring_buffer * ring,u32 invalidate_domains,u32 flush_domains)196 gen6_render_ring_flush(struct intel_ring_buffer *ring,
197                          u32 invalidate_domains, u32 flush_domains)
198 {
199 	u32 flags = 0;
200 	struct pipe_control *pc = ring->private;
201 	u32 scratch_addr = pc->gtt_offset + 128;
202 	int ret;
203 
204 	/* Force SNB workarounds for PIPE_CONTROL flushes */
205 	intel_emit_post_sync_nonzero_flush(ring);
206 
207 	/* Just flush everything.  Experiments have shown that reducing the
208 	 * number of bits based on the write domains has little performance
209 	 * impact.
210 	 */
211 	flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
212 	flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
213 	flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
214 	flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
215 	flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
216 	flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
217 	flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
218 
219 	ret = intel_ring_begin(ring, 6);
220 	if (ret)
221 		return ret;
222 
223 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
224 	intel_ring_emit(ring, flags);
225 	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
226 	intel_ring_emit(ring, 0); /* lower dword */
227 	intel_ring_emit(ring, 0); /* uppwer dword */
228 	intel_ring_emit(ring, MI_NOOP);
229 	intel_ring_advance(ring);
230 
231 	return 0;
232 }
233 
ring_write_tail(struct intel_ring_buffer * ring,u32 value)234 static void ring_write_tail(struct intel_ring_buffer *ring,
235 			    u32 value)
236 {
237 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
238 	I915_WRITE_TAIL(ring, value);
239 }
240 
intel_ring_get_active_head(struct intel_ring_buffer * ring)241 u32 intel_ring_get_active_head(struct intel_ring_buffer *ring)
242 {
243 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
244 	u32 acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ?
245 			RING_ACTHD(ring->mmio_base) : ACTHD;
246 
247 	return I915_READ(acthd_reg);
248 }
249 
init_ring_common(struct intel_ring_buffer * ring)250 static int init_ring_common(struct intel_ring_buffer *ring)
251 {
252 	struct drm_device *dev = ring->dev;
253 	drm_i915_private_t *dev_priv = dev->dev_private;
254 	struct drm_i915_gem_object *obj = ring->obj;
255 	int ret = 0;
256 	u32 head;
257 
258 	if (HAS_FORCE_WAKE(dev))
259 		gen6_gt_force_wake_get(dev_priv);
260 
261 	/* Stop the ring if it's running. */
262 	I915_WRITE_CTL(ring, 0);
263 	I915_WRITE_HEAD(ring, 0);
264 	ring->write_tail(ring, 0);
265 
266 	/* Initialize the ring. */
267 	I915_WRITE_START(ring, obj->gtt_offset);
268 	head = I915_READ_HEAD(ring) & HEAD_ADDR;
269 
270 	/* G45 ring initialization fails to reset head to zero */
271 	if (head != 0) {
272 		DRM_DEBUG_KMS("%s head not reset to zero "
273 			      "ctl %08x head %08x tail %08x start %08x\n",
274 			      ring->name,
275 			      I915_READ_CTL(ring),
276 			      I915_READ_HEAD(ring),
277 			      I915_READ_TAIL(ring),
278 			      I915_READ_START(ring));
279 
280 		I915_WRITE_HEAD(ring, 0);
281 
282 		if (I915_READ_HEAD(ring) & HEAD_ADDR) {
283 			DRM_ERROR("failed to set %s head to zero "
284 				  "ctl %08x head %08x tail %08x start %08x\n",
285 				  ring->name,
286 				  I915_READ_CTL(ring),
287 				  I915_READ_HEAD(ring),
288 				  I915_READ_TAIL(ring),
289 				  I915_READ_START(ring));
290 		}
291 	}
292 
293 	I915_WRITE_CTL(ring,
294 			((ring->size - PAGE_SIZE) & RING_NR_PAGES)
295 			| RING_VALID);
296 
297 	/* If the head is still not zero, the ring is dead */
298 	if (wait_for((I915_READ_CTL(ring) & RING_VALID) != 0 &&
299 		     I915_READ_START(ring) == obj->gtt_offset &&
300 		     (I915_READ_HEAD(ring) & HEAD_ADDR) == 0, 50)) {
301 		DRM_ERROR("%s initialization failed "
302 				"ctl %08x head %08x tail %08x start %08x\n",
303 				ring->name,
304 				I915_READ_CTL(ring),
305 				I915_READ_HEAD(ring),
306 				I915_READ_TAIL(ring),
307 				I915_READ_START(ring));
308 		ret = -EIO;
309 		goto out;
310 	}
311 
312 	if (!drm_core_check_feature(ring->dev, DRIVER_MODESET))
313 		i915_kernel_lost_context(ring->dev);
314 	else {
315 		ring->head = I915_READ_HEAD(ring);
316 		ring->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
317 		ring->space = ring_space(ring);
318 		ring->last_retired_head = -1;
319 	}
320 
321 out:
322 	if (HAS_FORCE_WAKE(dev))
323 		gen6_gt_force_wake_put(dev_priv);
324 
325 	return ret;
326 }
327 
328 static int
init_pipe_control(struct intel_ring_buffer * ring)329 init_pipe_control(struct intel_ring_buffer *ring)
330 {
331 	struct pipe_control *pc;
332 	struct drm_i915_gem_object *obj;
333 	int ret;
334 
335 	if (ring->private)
336 		return 0;
337 
338 	pc = kmalloc(sizeof(*pc), GFP_KERNEL);
339 	if (!pc)
340 		return -ENOMEM;
341 
342 	obj = i915_gem_alloc_object(ring->dev, 4096);
343 	if (obj == NULL) {
344 		DRM_ERROR("Failed to allocate seqno page\n");
345 		ret = -ENOMEM;
346 		goto err;
347 	}
348 
349 	i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
350 
351 	ret = i915_gem_object_pin(obj, 4096, true);
352 	if (ret)
353 		goto err_unref;
354 
355 	pc->gtt_offset = obj->gtt_offset;
356 	pc->cpu_page =  kmap(obj->pages[0]);
357 	if (pc->cpu_page == NULL)
358 		goto err_unpin;
359 
360 	pc->obj = obj;
361 	ring->private = pc;
362 	return 0;
363 
364 err_unpin:
365 	i915_gem_object_unpin(obj);
366 err_unref:
367 	drm_gem_object_unreference(&obj->base);
368 err:
369 	kfree(pc);
370 	return ret;
371 }
372 
373 static void
cleanup_pipe_control(struct intel_ring_buffer * ring)374 cleanup_pipe_control(struct intel_ring_buffer *ring)
375 {
376 	struct pipe_control *pc = ring->private;
377 	struct drm_i915_gem_object *obj;
378 
379 	if (!ring->private)
380 		return;
381 
382 	obj = pc->obj;
383 	kunmap(obj->pages[0]);
384 	i915_gem_object_unpin(obj);
385 	drm_gem_object_unreference(&obj->base);
386 
387 	kfree(pc);
388 	ring->private = NULL;
389 }
390 
init_render_ring(struct intel_ring_buffer * ring)391 static int init_render_ring(struct intel_ring_buffer *ring)
392 {
393 	struct drm_device *dev = ring->dev;
394 	struct drm_i915_private *dev_priv = dev->dev_private;
395 	int ret = init_ring_common(ring);
396 
397 	if (INTEL_INFO(dev)->gen > 3) {
398 		int mode = VS_TIMER_DISPATCH << 16 | VS_TIMER_DISPATCH;
399 		I915_WRITE(MI_MODE, mode);
400 		if (IS_GEN7(dev))
401 			I915_WRITE(GFX_MODE_GEN7,
402 				   GFX_MODE_DISABLE(GFX_TLB_INVALIDATE_ALWAYS) |
403 				   GFX_MODE_ENABLE(GFX_REPLAY_MODE));
404 	}
405 
406 	if (INTEL_INFO(dev)->gen >= 5) {
407 		ret = init_pipe_control(ring);
408 		if (ret)
409 			return ret;
410 	}
411 
412 
413 	if (IS_GEN6(dev)) {
414 		/* From the Sandybridge PRM, volume 1 part 3, page 24:
415 		 * "If this bit is set, STCunit will have LRA as replacement
416 		 *  policy. [...] This bit must be reset.  LRA replacement
417 		 *  policy is not supported."
418 		 */
419 		I915_WRITE(CACHE_MODE_0,
420 			   CM0_STC_EVICT_DISABLE_LRA_SNB << CM0_MASK_SHIFT);
421 	}
422 
423 	if (INTEL_INFO(dev)->gen >= 6) {
424 		I915_WRITE(INSTPM,
425 			   INSTPM_FORCE_ORDERING << 16 | INSTPM_FORCE_ORDERING);
426 	}
427 
428 	return ret;
429 }
430 
render_ring_cleanup(struct intel_ring_buffer * ring)431 static void render_ring_cleanup(struct intel_ring_buffer *ring)
432 {
433 	if (!ring->private)
434 		return;
435 
436 	cleanup_pipe_control(ring);
437 }
438 
439 static void
update_mboxes(struct intel_ring_buffer * ring,u32 seqno,u32 mmio_offset)440 update_mboxes(struct intel_ring_buffer *ring,
441 	    u32 seqno,
442 	    u32 mmio_offset)
443 {
444 	intel_ring_emit(ring, MI_SEMAPHORE_MBOX |
445 			      MI_SEMAPHORE_GLOBAL_GTT |
446 			      MI_SEMAPHORE_REGISTER |
447 			      MI_SEMAPHORE_UPDATE);
448 	intel_ring_emit(ring, seqno);
449 	intel_ring_emit(ring, mmio_offset);
450 }
451 
452 /**
453  * gen6_add_request - Update the semaphore mailbox registers
454  *
455  * @ring - ring that is adding a request
456  * @seqno - return seqno stuck into the ring
457  *
458  * Update the mailbox registers in the *other* rings with the current seqno.
459  * This acts like a signal in the canonical semaphore.
460  */
461 static int
gen6_add_request(struct intel_ring_buffer * ring,u32 * seqno)462 gen6_add_request(struct intel_ring_buffer *ring,
463 		 u32 *seqno)
464 {
465 	u32 mbox1_reg;
466 	u32 mbox2_reg;
467 	int ret;
468 
469 	ret = intel_ring_begin(ring, 10);
470 	if (ret)
471 		return ret;
472 
473 	mbox1_reg = ring->signal_mbox[0];
474 	mbox2_reg = ring->signal_mbox[1];
475 
476 	*seqno = i915_gem_next_request_seqno(ring);
477 
478 	update_mboxes(ring, *seqno, mbox1_reg);
479 	update_mboxes(ring, *seqno, mbox2_reg);
480 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
481 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
482 	intel_ring_emit(ring, *seqno);
483 	intel_ring_emit(ring, MI_USER_INTERRUPT);
484 	intel_ring_advance(ring);
485 
486 	return 0;
487 }
488 
489 /**
490  * intel_ring_sync - sync the waiter to the signaller on seqno
491  *
492  * @waiter - ring that is waiting
493  * @signaller - ring which has, or will signal
494  * @seqno - seqno which the waiter will block on
495  */
496 static int
intel_ring_sync(struct intel_ring_buffer * waiter,struct intel_ring_buffer * signaller,int ring,u32 seqno)497 intel_ring_sync(struct intel_ring_buffer *waiter,
498 		struct intel_ring_buffer *signaller,
499 		int ring,
500 		u32 seqno)
501 {
502 	int ret;
503 	u32 dw1 = MI_SEMAPHORE_MBOX |
504 		  MI_SEMAPHORE_COMPARE |
505 		  MI_SEMAPHORE_REGISTER;
506 
507 	ret = intel_ring_begin(waiter, 4);
508 	if (ret)
509 		return ret;
510 
511 	intel_ring_emit(waiter, dw1 | signaller->semaphore_register[ring]);
512 	intel_ring_emit(waiter, seqno);
513 	intel_ring_emit(waiter, 0);
514 	intel_ring_emit(waiter, MI_NOOP);
515 	intel_ring_advance(waiter);
516 
517 	return 0;
518 }
519 
520 /* VCS->RCS (RVSYNC) or BCS->RCS (RBSYNC) */
521 int
render_ring_sync_to(struct intel_ring_buffer * waiter,struct intel_ring_buffer * signaller,u32 seqno)522 render_ring_sync_to(struct intel_ring_buffer *waiter,
523 		    struct intel_ring_buffer *signaller,
524 		    u32 seqno)
525 {
526 	WARN_ON(signaller->semaphore_register[RCS] == MI_SEMAPHORE_SYNC_INVALID);
527 	return intel_ring_sync(waiter,
528 			       signaller,
529 			       RCS,
530 			       seqno);
531 }
532 
533 /* RCS->VCS (VRSYNC) or BCS->VCS (VBSYNC) */
534 int
gen6_bsd_ring_sync_to(struct intel_ring_buffer * waiter,struct intel_ring_buffer * signaller,u32 seqno)535 gen6_bsd_ring_sync_to(struct intel_ring_buffer *waiter,
536 		      struct intel_ring_buffer *signaller,
537 		      u32 seqno)
538 {
539 	WARN_ON(signaller->semaphore_register[VCS] == MI_SEMAPHORE_SYNC_INVALID);
540 	return intel_ring_sync(waiter,
541 			       signaller,
542 			       VCS,
543 			       seqno);
544 }
545 
546 /* RCS->BCS (BRSYNC) or VCS->BCS (BVSYNC) */
547 int
gen6_blt_ring_sync_to(struct intel_ring_buffer * waiter,struct intel_ring_buffer * signaller,u32 seqno)548 gen6_blt_ring_sync_to(struct intel_ring_buffer *waiter,
549 		      struct intel_ring_buffer *signaller,
550 		      u32 seqno)
551 {
552 	WARN_ON(signaller->semaphore_register[BCS] == MI_SEMAPHORE_SYNC_INVALID);
553 	return intel_ring_sync(waiter,
554 			       signaller,
555 			       BCS,
556 			       seqno);
557 }
558 
559 
560 
561 #define PIPE_CONTROL_FLUSH(ring__, addr__)					\
562 do {									\
563 	intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |		\
564 		 PIPE_CONTROL_DEPTH_STALL);				\
565 	intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT);			\
566 	intel_ring_emit(ring__, 0);							\
567 	intel_ring_emit(ring__, 0);							\
568 } while (0)
569 
570 static int
pc_render_add_request(struct intel_ring_buffer * ring,u32 * result)571 pc_render_add_request(struct intel_ring_buffer *ring,
572 		      u32 *result)
573 {
574 	u32 seqno = i915_gem_next_request_seqno(ring);
575 	struct pipe_control *pc = ring->private;
576 	u32 scratch_addr = pc->gtt_offset + 128;
577 	int ret;
578 
579 	/* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently
580 	 * incoherent with writes to memory, i.e. completely fubar,
581 	 * so we need to use PIPE_NOTIFY instead.
582 	 *
583 	 * However, we also need to workaround the qword write
584 	 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to
585 	 * memory before requesting an interrupt.
586 	 */
587 	ret = intel_ring_begin(ring, 32);
588 	if (ret)
589 		return ret;
590 
591 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
592 			PIPE_CONTROL_WRITE_FLUSH |
593 			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
594 	intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
595 	intel_ring_emit(ring, seqno);
596 	intel_ring_emit(ring, 0);
597 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
598 	scratch_addr += 128; /* write to separate cachelines */
599 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
600 	scratch_addr += 128;
601 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
602 	scratch_addr += 128;
603 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
604 	scratch_addr += 128;
605 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
606 	scratch_addr += 128;
607 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
608 
609 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
610 			PIPE_CONTROL_WRITE_FLUSH |
611 			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
612 			PIPE_CONTROL_NOTIFY);
613 	intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
614 	intel_ring_emit(ring, seqno);
615 	intel_ring_emit(ring, 0);
616 	intel_ring_advance(ring);
617 
618 	*result = seqno;
619 	return 0;
620 }
621 
622 static int
render_ring_add_request(struct intel_ring_buffer * ring,u32 * result)623 render_ring_add_request(struct intel_ring_buffer *ring,
624 			u32 *result)
625 {
626 	u32 seqno = i915_gem_next_request_seqno(ring);
627 	int ret;
628 
629 	ret = intel_ring_begin(ring, 4);
630 	if (ret)
631 		return ret;
632 
633 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
634 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
635 	intel_ring_emit(ring, seqno);
636 	intel_ring_emit(ring, MI_USER_INTERRUPT);
637 	intel_ring_advance(ring);
638 
639 	*result = seqno;
640 	return 0;
641 }
642 
643 static u32
gen6_ring_get_seqno(struct intel_ring_buffer * ring)644 gen6_ring_get_seqno(struct intel_ring_buffer *ring)
645 {
646 	struct drm_device *dev = ring->dev;
647 
648 	/* Workaround to force correct ordering between irq and seqno writes on
649 	 * ivb (and maybe also on snb) by reading from a CS register (like
650 	 * ACTHD) before reading the status page. */
651 	if (IS_GEN6(dev) || IS_GEN7(dev))
652 		intel_ring_get_active_head(ring);
653 	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
654 }
655 
656 static u32
ring_get_seqno(struct intel_ring_buffer * ring)657 ring_get_seqno(struct intel_ring_buffer *ring)
658 {
659 	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
660 }
661 
662 static u32
pc_render_get_seqno(struct intel_ring_buffer * ring)663 pc_render_get_seqno(struct intel_ring_buffer *ring)
664 {
665 	struct pipe_control *pc = ring->private;
666 	return pc->cpu_page[0];
667 }
668 
669 static void
ironlake_enable_irq(drm_i915_private_t * dev_priv,u32 mask)670 ironlake_enable_irq(drm_i915_private_t *dev_priv, u32 mask)
671 {
672 	dev_priv->gt_irq_mask &= ~mask;
673 	I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
674 	POSTING_READ(GTIMR);
675 }
676 
677 static void
ironlake_disable_irq(drm_i915_private_t * dev_priv,u32 mask)678 ironlake_disable_irq(drm_i915_private_t *dev_priv, u32 mask)
679 {
680 	dev_priv->gt_irq_mask |= mask;
681 	I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
682 	POSTING_READ(GTIMR);
683 }
684 
685 static void
i915_enable_irq(drm_i915_private_t * dev_priv,u32 mask)686 i915_enable_irq(drm_i915_private_t *dev_priv, u32 mask)
687 {
688 	dev_priv->irq_mask &= ~mask;
689 	I915_WRITE(IMR, dev_priv->irq_mask);
690 	POSTING_READ(IMR);
691 }
692 
693 static void
i915_disable_irq(drm_i915_private_t * dev_priv,u32 mask)694 i915_disable_irq(drm_i915_private_t *dev_priv, u32 mask)
695 {
696 	dev_priv->irq_mask |= mask;
697 	I915_WRITE(IMR, dev_priv->irq_mask);
698 	POSTING_READ(IMR);
699 }
700 
701 static bool
render_ring_get_irq(struct intel_ring_buffer * ring)702 render_ring_get_irq(struct intel_ring_buffer *ring)
703 {
704 	struct drm_device *dev = ring->dev;
705 	drm_i915_private_t *dev_priv = dev->dev_private;
706 
707 	if (!dev->irq_enabled)
708 		return false;
709 
710 	spin_lock(&ring->irq_lock);
711 	if (ring->irq_refcount++ == 0) {
712 		if (HAS_PCH_SPLIT(dev))
713 			ironlake_enable_irq(dev_priv,
714 					    GT_PIPE_NOTIFY | GT_USER_INTERRUPT);
715 		else
716 			i915_enable_irq(dev_priv, I915_USER_INTERRUPT);
717 	}
718 	spin_unlock(&ring->irq_lock);
719 
720 	return true;
721 }
722 
723 static void
render_ring_put_irq(struct intel_ring_buffer * ring)724 render_ring_put_irq(struct intel_ring_buffer *ring)
725 {
726 	struct drm_device *dev = ring->dev;
727 	drm_i915_private_t *dev_priv = dev->dev_private;
728 
729 	spin_lock(&ring->irq_lock);
730 	if (--ring->irq_refcount == 0) {
731 		if (HAS_PCH_SPLIT(dev))
732 			ironlake_disable_irq(dev_priv,
733 					     GT_USER_INTERRUPT |
734 					     GT_PIPE_NOTIFY);
735 		else
736 			i915_disable_irq(dev_priv, I915_USER_INTERRUPT);
737 	}
738 	spin_unlock(&ring->irq_lock);
739 }
740 
intel_ring_setup_status_page(struct intel_ring_buffer * ring)741 void intel_ring_setup_status_page(struct intel_ring_buffer *ring)
742 {
743 	struct drm_device *dev = ring->dev;
744 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
745 	u32 mmio = 0;
746 
747 	/* The ring status page addresses are no longer next to the rest of
748 	 * the ring registers as of gen7.
749 	 */
750 	if (IS_GEN7(dev)) {
751 		switch (ring->id) {
752 		case RCS:
753 			mmio = RENDER_HWS_PGA_GEN7;
754 			break;
755 		case BCS:
756 			mmio = BLT_HWS_PGA_GEN7;
757 			break;
758 		case VCS:
759 			mmio = BSD_HWS_PGA_GEN7;
760 			break;
761 		}
762 	} else if (IS_GEN6(ring->dev)) {
763 		mmio = RING_HWS_PGA_GEN6(ring->mmio_base);
764 	} else {
765 		mmio = RING_HWS_PGA(ring->mmio_base);
766 	}
767 
768 	I915_WRITE(mmio, (u32)ring->status_page.gfx_addr);
769 	POSTING_READ(mmio);
770 
771 	/* Flush the TLB for this page */
772 	if (INTEL_INFO(dev)->gen >= 6) {
773 		u32 reg = RING_INSTPM(ring->mmio_base);
774 		I915_WRITE(reg,
775 			   _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
776 					      INSTPM_SYNC_FLUSH));
777 		if (wait_for((I915_READ(reg) & INSTPM_SYNC_FLUSH) == 0,
778 			     1000))
779 			DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
780 				  ring->name);
781 	}
782 }
783 
784 static int
bsd_ring_flush(struct intel_ring_buffer * ring,u32 invalidate_domains,u32 flush_domains)785 bsd_ring_flush(struct intel_ring_buffer *ring,
786 	       u32     invalidate_domains,
787 	       u32     flush_domains)
788 {
789 	int ret;
790 
791 	ret = intel_ring_begin(ring, 2);
792 	if (ret)
793 		return ret;
794 
795 	intel_ring_emit(ring, MI_FLUSH);
796 	intel_ring_emit(ring, MI_NOOP);
797 	intel_ring_advance(ring);
798 	return 0;
799 }
800 
801 static int
ring_add_request(struct intel_ring_buffer * ring,u32 * result)802 ring_add_request(struct intel_ring_buffer *ring,
803 		 u32 *result)
804 {
805 	u32 seqno;
806 	int ret;
807 
808 	ret = intel_ring_begin(ring, 4);
809 	if (ret)
810 		return ret;
811 
812 	seqno = i915_gem_next_request_seqno(ring);
813 
814 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
815 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
816 	intel_ring_emit(ring, seqno);
817 	intel_ring_emit(ring, MI_USER_INTERRUPT);
818 	intel_ring_advance(ring);
819 
820 	*result = seqno;
821 	return 0;
822 }
823 
824 static bool
gen6_ring_get_irq(struct intel_ring_buffer * ring,u32 gflag,u32 rflag)825 gen6_ring_get_irq(struct intel_ring_buffer *ring, u32 gflag, u32 rflag)
826 {
827 	struct drm_device *dev = ring->dev;
828 	drm_i915_private_t *dev_priv = dev->dev_private;
829 
830 	if (!dev->irq_enabled)
831 	       return false;
832 
833 	/* It looks like we need to prevent the gt from suspending while waiting
834 	 * for an notifiy irq, otherwise irqs seem to get lost on at least the
835 	 * blt/bsd rings on ivb. */
836 	gen6_gt_force_wake_get(dev_priv);
837 
838 	spin_lock(&ring->irq_lock);
839 	if (ring->irq_refcount++ == 0) {
840 		ring->irq_mask &= ~rflag;
841 		I915_WRITE_IMR(ring, ring->irq_mask);
842 		ironlake_enable_irq(dev_priv, gflag);
843 	}
844 	spin_unlock(&ring->irq_lock);
845 
846 	return true;
847 }
848 
849 static void
gen6_ring_put_irq(struct intel_ring_buffer * ring,u32 gflag,u32 rflag)850 gen6_ring_put_irq(struct intel_ring_buffer *ring, u32 gflag, u32 rflag)
851 {
852 	struct drm_device *dev = ring->dev;
853 	drm_i915_private_t *dev_priv = dev->dev_private;
854 
855 	spin_lock(&ring->irq_lock);
856 	if (--ring->irq_refcount == 0) {
857 		ring->irq_mask |= rflag;
858 		I915_WRITE_IMR(ring, ring->irq_mask);
859 		ironlake_disable_irq(dev_priv, gflag);
860 	}
861 	spin_unlock(&ring->irq_lock);
862 
863 	gen6_gt_force_wake_put(dev_priv);
864 }
865 
866 static bool
bsd_ring_get_irq(struct intel_ring_buffer * ring)867 bsd_ring_get_irq(struct intel_ring_buffer *ring)
868 {
869 	struct drm_device *dev = ring->dev;
870 	drm_i915_private_t *dev_priv = dev->dev_private;
871 
872 	if (!dev->irq_enabled)
873 		return false;
874 
875 	spin_lock(&ring->irq_lock);
876 	if (ring->irq_refcount++ == 0) {
877 		if (IS_G4X(dev))
878 			i915_enable_irq(dev_priv, I915_BSD_USER_INTERRUPT);
879 		else
880 			ironlake_enable_irq(dev_priv, GT_BSD_USER_INTERRUPT);
881 	}
882 	spin_unlock(&ring->irq_lock);
883 
884 	return true;
885 }
886 static void
bsd_ring_put_irq(struct intel_ring_buffer * ring)887 bsd_ring_put_irq(struct intel_ring_buffer *ring)
888 {
889 	struct drm_device *dev = ring->dev;
890 	drm_i915_private_t *dev_priv = dev->dev_private;
891 
892 	spin_lock(&ring->irq_lock);
893 	if (--ring->irq_refcount == 0) {
894 		if (IS_G4X(dev))
895 			i915_disable_irq(dev_priv, I915_BSD_USER_INTERRUPT);
896 		else
897 			ironlake_disable_irq(dev_priv, GT_BSD_USER_INTERRUPT);
898 	}
899 	spin_unlock(&ring->irq_lock);
900 }
901 
902 static int
ring_dispatch_execbuffer(struct intel_ring_buffer * ring,u32 offset,u32 length)903 ring_dispatch_execbuffer(struct intel_ring_buffer *ring, u32 offset, u32 length)
904 {
905 	int ret;
906 
907 	ret = intel_ring_begin(ring, 2);
908 	if (ret)
909 		return ret;
910 
911 	intel_ring_emit(ring,
912 			MI_BATCH_BUFFER_START | (2 << 6) |
913 			MI_BATCH_NON_SECURE_I965);
914 	intel_ring_emit(ring, offset);
915 	intel_ring_advance(ring);
916 
917 	return 0;
918 }
919 
920 static int
render_ring_dispatch_execbuffer(struct intel_ring_buffer * ring,u32 offset,u32 len)921 render_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
922 				u32 offset, u32 len)
923 {
924 	struct drm_device *dev = ring->dev;
925 	int ret;
926 
927 	if (IS_I830(dev) || IS_845G(dev)) {
928 		ret = intel_ring_begin(ring, 4);
929 		if (ret)
930 			return ret;
931 
932 		intel_ring_emit(ring, MI_BATCH_BUFFER);
933 		intel_ring_emit(ring, offset | MI_BATCH_NON_SECURE);
934 		intel_ring_emit(ring, offset + len - 8);
935 		intel_ring_emit(ring, 0);
936 	} else {
937 		ret = intel_ring_begin(ring, 2);
938 		if (ret)
939 			return ret;
940 
941 		if (INTEL_INFO(dev)->gen >= 4) {
942 			intel_ring_emit(ring,
943 					MI_BATCH_BUFFER_START | (2 << 6) |
944 					MI_BATCH_NON_SECURE_I965);
945 			intel_ring_emit(ring, offset);
946 		} else {
947 			intel_ring_emit(ring,
948 					MI_BATCH_BUFFER_START | (2 << 6));
949 			intel_ring_emit(ring, offset | MI_BATCH_NON_SECURE);
950 		}
951 	}
952 	intel_ring_advance(ring);
953 
954 	return 0;
955 }
956 
cleanup_status_page(struct intel_ring_buffer * ring)957 static void cleanup_status_page(struct intel_ring_buffer *ring)
958 {
959 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
960 	struct drm_i915_gem_object *obj;
961 
962 	obj = ring->status_page.obj;
963 	if (obj == NULL)
964 		return;
965 
966 	kunmap(obj->pages[0]);
967 	i915_gem_object_unpin(obj);
968 	drm_gem_object_unreference(&obj->base);
969 	ring->status_page.obj = NULL;
970 
971 	memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map));
972 }
973 
init_status_page(struct intel_ring_buffer * ring)974 static int init_status_page(struct intel_ring_buffer *ring)
975 {
976 	struct drm_device *dev = ring->dev;
977 	drm_i915_private_t *dev_priv = dev->dev_private;
978 	struct drm_i915_gem_object *obj;
979 	int ret;
980 
981 	obj = i915_gem_alloc_object(dev, 4096);
982 	if (obj == NULL) {
983 		DRM_ERROR("Failed to allocate status page\n");
984 		ret = -ENOMEM;
985 		goto err;
986 	}
987 
988 	i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
989 
990 	ret = i915_gem_object_pin(obj, 4096, true);
991 	if (ret != 0) {
992 		goto err_unref;
993 	}
994 
995 	ring->status_page.gfx_addr = obj->gtt_offset;
996 	ring->status_page.page_addr = kmap(obj->pages[0]);
997 	if (ring->status_page.page_addr == NULL) {
998 		memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map));
999 		goto err_unpin;
1000 	}
1001 	ring->status_page.obj = obj;
1002 	memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1003 
1004 	intel_ring_setup_status_page(ring);
1005 	DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
1006 			ring->name, ring->status_page.gfx_addr);
1007 
1008 	return 0;
1009 
1010 err_unpin:
1011 	i915_gem_object_unpin(obj);
1012 err_unref:
1013 	drm_gem_object_unreference(&obj->base);
1014 err:
1015 	return ret;
1016 }
1017 
intel_init_ring_buffer(struct drm_device * dev,struct intel_ring_buffer * ring)1018 int intel_init_ring_buffer(struct drm_device *dev,
1019 			   struct intel_ring_buffer *ring)
1020 {
1021 	struct drm_i915_gem_object *obj;
1022 	int ret;
1023 
1024 	ring->dev = dev;
1025 	INIT_LIST_HEAD(&ring->active_list);
1026 	INIT_LIST_HEAD(&ring->request_list);
1027 	INIT_LIST_HEAD(&ring->gpu_write_list);
1028 
1029 	init_waitqueue_head(&ring->irq_queue);
1030 	spin_lock_init(&ring->irq_lock);
1031 	ring->irq_mask = ~0;
1032 
1033 	if (I915_NEED_GFX_HWS(dev)) {
1034 		ret = init_status_page(ring);
1035 		if (ret)
1036 			return ret;
1037 	}
1038 
1039 	obj = i915_gem_alloc_object(dev, ring->size);
1040 	if (obj == NULL) {
1041 		DRM_ERROR("Failed to allocate ringbuffer\n");
1042 		ret = -ENOMEM;
1043 		goto err_hws;
1044 	}
1045 
1046 	ring->obj = obj;
1047 
1048 	ret = i915_gem_object_pin(obj, PAGE_SIZE, true);
1049 	if (ret)
1050 		goto err_unref;
1051 
1052 	ret = i915_gem_object_set_to_gtt_domain(obj, true);
1053 	if (ret)
1054 		goto err_unpin;
1055 
1056 	ring->map.size = ring->size;
1057 	ring->map.offset = dev->agp->base + obj->gtt_offset;
1058 	ring->map.type = 0;
1059 	ring->map.flags = 0;
1060 	ring->map.mtrr = 0;
1061 
1062 	drm_core_ioremap_wc(&ring->map, dev);
1063 	if (ring->map.handle == NULL) {
1064 		DRM_ERROR("Failed to map ringbuffer.\n");
1065 		ret = -EINVAL;
1066 		goto err_unpin;
1067 	}
1068 
1069 	ring->virtual_start = ring->map.handle;
1070 	ret = ring->init(ring);
1071 	if (ret)
1072 		goto err_unmap;
1073 
1074 	/* Workaround an erratum on the i830 which causes a hang if
1075 	 * the TAIL pointer points to within the last 2 cachelines
1076 	 * of the buffer.
1077 	 */
1078 	ring->effective_size = ring->size;
1079 	if (IS_I830(ring->dev) || IS_845G(ring->dev))
1080 		ring->effective_size -= 128;
1081 
1082 	return 0;
1083 
1084 err_unmap:
1085 	drm_core_ioremapfree(&ring->map, dev);
1086 err_unpin:
1087 	i915_gem_object_unpin(obj);
1088 err_unref:
1089 	drm_gem_object_unreference(&obj->base);
1090 	ring->obj = NULL;
1091 err_hws:
1092 	cleanup_status_page(ring);
1093 	return ret;
1094 }
1095 
intel_cleanup_ring_buffer(struct intel_ring_buffer * ring)1096 void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring)
1097 {
1098 	struct drm_i915_private *dev_priv;
1099 	int ret;
1100 
1101 	if (ring->obj == NULL)
1102 		return;
1103 
1104 	/* Disable the ring buffer. The ring must be idle at this point */
1105 	dev_priv = ring->dev->dev_private;
1106 	ret = intel_wait_ring_idle(ring);
1107 	if (ret)
1108 		DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
1109 			  ring->name, ret);
1110 
1111 	I915_WRITE_CTL(ring, 0);
1112 
1113 	drm_core_ioremapfree(&ring->map, ring->dev);
1114 
1115 	i915_gem_object_unpin(ring->obj);
1116 	drm_gem_object_unreference(&ring->obj->base);
1117 	ring->obj = NULL;
1118 
1119 	if (ring->cleanup)
1120 		ring->cleanup(ring);
1121 
1122 	cleanup_status_page(ring);
1123 }
1124 
intel_wrap_ring_buffer(struct intel_ring_buffer * ring)1125 static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring)
1126 {
1127 	unsigned int *virt;
1128 	int rem = ring->size - ring->tail;
1129 
1130 	if (ring->space < rem) {
1131 		int ret = intel_wait_ring_buffer(ring, rem);
1132 		if (ret)
1133 			return ret;
1134 	}
1135 
1136 	virt = (unsigned int *)(ring->virtual_start + ring->tail);
1137 	rem /= 8;
1138 	while (rem--) {
1139 		*virt++ = MI_NOOP;
1140 		*virt++ = MI_NOOP;
1141 	}
1142 
1143 	ring->tail = 0;
1144 	ring->space = ring_space(ring);
1145 
1146 	return 0;
1147 }
1148 
intel_ring_wait_seqno(struct intel_ring_buffer * ring,u32 seqno)1149 static int intel_ring_wait_seqno(struct intel_ring_buffer *ring, u32 seqno)
1150 {
1151 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
1152 	bool was_interruptible;
1153 	int ret;
1154 
1155 	/* XXX As we have not yet audited all the paths to check that
1156 	 * they are ready for ERESTARTSYS from intel_ring_begin, do not
1157 	 * allow us to be interruptible by a signal.
1158 	 */
1159 	was_interruptible = dev_priv->mm.interruptible;
1160 	dev_priv->mm.interruptible = false;
1161 
1162 	ret = i915_wait_request(ring, seqno, true);
1163 
1164 	dev_priv->mm.interruptible = was_interruptible;
1165 
1166 	return ret;
1167 }
1168 
intel_ring_wait_request(struct intel_ring_buffer * ring,int n)1169 static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n)
1170 {
1171 	struct drm_i915_gem_request *request;
1172 	u32 seqno = 0;
1173 	int ret;
1174 
1175 	i915_gem_retire_requests_ring(ring);
1176 
1177 	if (ring->last_retired_head != -1) {
1178 		ring->head = ring->last_retired_head;
1179 		ring->last_retired_head = -1;
1180 		ring->space = ring_space(ring);
1181 		if (ring->space >= n)
1182 			return 0;
1183 	}
1184 
1185 	list_for_each_entry(request, &ring->request_list, list) {
1186 		int space;
1187 
1188 		if (request->tail == -1)
1189 			continue;
1190 
1191 		space = request->tail - (ring->tail + 8);
1192 		if (space < 0)
1193 			space += ring->size;
1194 		if (space >= n) {
1195 			seqno = request->seqno;
1196 			break;
1197 		}
1198 
1199 		/* Consume this request in case we need more space than
1200 		 * is available and so need to prevent a race between
1201 		 * updating last_retired_head and direct reads of
1202 		 * I915_RING_HEAD. It also provides a nice sanity check.
1203 		 */
1204 		request->tail = -1;
1205 	}
1206 
1207 	if (seqno == 0)
1208 		return -ENOSPC;
1209 
1210 	ret = intel_ring_wait_seqno(ring, seqno);
1211 	if (ret)
1212 		return ret;
1213 
1214 	if (WARN_ON(ring->last_retired_head == -1))
1215 		return -ENOSPC;
1216 
1217 	ring->head = ring->last_retired_head;
1218 	ring->last_retired_head = -1;
1219 	ring->space = ring_space(ring);
1220 	if (WARN_ON(ring->space < n))
1221 		return -ENOSPC;
1222 
1223 	return 0;
1224 }
1225 
intel_wait_ring_buffer(struct intel_ring_buffer * ring,int n)1226 int intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n)
1227 {
1228 	struct drm_device *dev = ring->dev;
1229 	struct drm_i915_private *dev_priv = dev->dev_private;
1230 	unsigned long end;
1231 	int ret;
1232 
1233 	ret = intel_ring_wait_request(ring, n);
1234 	if (ret != -ENOSPC)
1235 		return ret;
1236 
1237 	trace_i915_ring_wait_begin(ring);
1238 	if (drm_core_check_feature(dev, DRIVER_GEM))
1239 		/* With GEM the hangcheck timer should kick us out of the loop,
1240 		 * leaving it early runs the risk of corrupting GEM state (due
1241 		 * to running on almost untested codepaths). But on resume
1242 		 * timers don't work yet, so prevent a complete hang in that
1243 		 * case by choosing an insanely large timeout. */
1244 		end = jiffies + 60 * HZ;
1245 	else
1246 		end = jiffies + 3 * HZ;
1247 
1248 	do {
1249 		ring->head = I915_READ_HEAD(ring);
1250 		ring->space = ring_space(ring);
1251 		if (ring->space >= n) {
1252 			trace_i915_ring_wait_end(ring);
1253 			return 0;
1254 		}
1255 
1256 		if (dev->primary->master) {
1257 			struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
1258 			if (master_priv->sarea_priv)
1259 				master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT;
1260 		}
1261 
1262 		msleep(1);
1263 		if (atomic_read(&dev_priv->mm.wedged))
1264 			return -EAGAIN;
1265 	} while (!time_after(jiffies, end));
1266 	trace_i915_ring_wait_end(ring);
1267 	return -EBUSY;
1268 }
1269 
intel_ring_begin(struct intel_ring_buffer * ring,int num_dwords)1270 int intel_ring_begin(struct intel_ring_buffer *ring,
1271 		     int num_dwords)
1272 {
1273 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
1274 	int n = 4*num_dwords;
1275 	int ret;
1276 
1277 	if (unlikely(atomic_read(&dev_priv->mm.wedged)))
1278 		return -EIO;
1279 
1280 	if (unlikely(ring->tail + n > ring->effective_size)) {
1281 		ret = intel_wrap_ring_buffer(ring);
1282 		if (unlikely(ret))
1283 			return ret;
1284 	}
1285 
1286 	if (unlikely(ring->space < n)) {
1287 		ret = intel_wait_ring_buffer(ring, n);
1288 		if (unlikely(ret))
1289 			return ret;
1290 	}
1291 
1292 	ring->space -= n;
1293 	return 0;
1294 }
1295 
intel_ring_advance(struct intel_ring_buffer * ring)1296 void intel_ring_advance(struct intel_ring_buffer *ring)
1297 {
1298 	ring->tail &= ring->size - 1;
1299 	ring->write_tail(ring, ring->tail);
1300 }
1301 
1302 static const struct intel_ring_buffer render_ring = {
1303 	.name			= "render ring",
1304 	.id			= RCS,
1305 	.mmio_base		= RENDER_RING_BASE,
1306 	.size			= 32 * PAGE_SIZE,
1307 	.init			= init_render_ring,
1308 	.write_tail		= ring_write_tail,
1309 	.flush			= render_ring_flush,
1310 	.add_request		= render_ring_add_request,
1311 	.get_seqno		= ring_get_seqno,
1312 	.irq_get		= render_ring_get_irq,
1313 	.irq_put		= render_ring_put_irq,
1314 	.dispatch_execbuffer	= render_ring_dispatch_execbuffer,
1315 	.cleanup		= render_ring_cleanup,
1316 	.sync_to		= render_ring_sync_to,
1317 	.semaphore_register	= {MI_SEMAPHORE_SYNC_INVALID,
1318 				   MI_SEMAPHORE_SYNC_RV,
1319 				   MI_SEMAPHORE_SYNC_RB},
1320 	.signal_mbox		= {GEN6_VRSYNC, GEN6_BRSYNC},
1321 };
1322 
1323 /* ring buffer for bit-stream decoder */
1324 
1325 static const struct intel_ring_buffer bsd_ring = {
1326 	.name                   = "bsd ring",
1327 	.id			= VCS,
1328 	.mmio_base		= BSD_RING_BASE,
1329 	.size			= 32 * PAGE_SIZE,
1330 	.init			= init_ring_common,
1331 	.write_tail		= ring_write_tail,
1332 	.flush			= bsd_ring_flush,
1333 	.add_request		= ring_add_request,
1334 	.get_seqno		= ring_get_seqno,
1335 	.irq_get		= bsd_ring_get_irq,
1336 	.irq_put		= bsd_ring_put_irq,
1337 	.dispatch_execbuffer	= ring_dispatch_execbuffer,
1338 };
1339 
1340 
gen6_bsd_ring_write_tail(struct intel_ring_buffer * ring,u32 value)1341 static void gen6_bsd_ring_write_tail(struct intel_ring_buffer *ring,
1342 				     u32 value)
1343 {
1344 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
1345 
1346        /* Every tail move must follow the sequence below */
1347 	I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
1348 		GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_MODIFY_MASK |
1349 		GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_DISABLE);
1350 	I915_WRITE(GEN6_BSD_RNCID, 0x0);
1351 
1352 	if (wait_for((I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) &
1353 		GEN6_BSD_SLEEP_PSMI_CONTROL_IDLE_INDICATOR) == 0,
1354 		50))
1355 	DRM_ERROR("timed out waiting for IDLE Indicator\n");
1356 
1357 	I915_WRITE_TAIL(ring, value);
1358 	I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
1359 		GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_MODIFY_MASK |
1360 		GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_ENABLE);
1361 }
1362 
gen6_ring_flush(struct intel_ring_buffer * ring,u32 invalidate,u32 flush)1363 static int gen6_ring_flush(struct intel_ring_buffer *ring,
1364 			   u32 invalidate, u32 flush)
1365 {
1366 	uint32_t cmd;
1367 	int ret;
1368 
1369 	ret = intel_ring_begin(ring, 4);
1370 	if (ret)
1371 		return ret;
1372 
1373 	cmd = MI_FLUSH_DW;
1374 	if (invalidate & I915_GEM_GPU_DOMAINS)
1375 		cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
1376 	intel_ring_emit(ring, cmd);
1377 	intel_ring_emit(ring, 0);
1378 	intel_ring_emit(ring, 0);
1379 	intel_ring_emit(ring, MI_NOOP);
1380 	intel_ring_advance(ring);
1381 	return 0;
1382 }
1383 
1384 static int
gen6_ring_dispatch_execbuffer(struct intel_ring_buffer * ring,u32 offset,u32 len)1385 gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
1386 			      u32 offset, u32 len)
1387 {
1388 	int ret;
1389 
1390 	ret = intel_ring_begin(ring, 2);
1391 	if (ret)
1392 		return ret;
1393 
1394 	intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_NON_SECURE_I965);
1395 	/* bit0-7 is the length on GEN6+ */
1396 	intel_ring_emit(ring, offset);
1397 	intel_ring_advance(ring);
1398 
1399 	return 0;
1400 }
1401 
1402 static bool
gen6_render_ring_get_irq(struct intel_ring_buffer * ring)1403 gen6_render_ring_get_irq(struct intel_ring_buffer *ring)
1404 {
1405 	return gen6_ring_get_irq(ring,
1406 				 GT_USER_INTERRUPT,
1407 				 GEN6_RENDER_USER_INTERRUPT);
1408 }
1409 
1410 static void
gen6_render_ring_put_irq(struct intel_ring_buffer * ring)1411 gen6_render_ring_put_irq(struct intel_ring_buffer *ring)
1412 {
1413 	return gen6_ring_put_irq(ring,
1414 				 GT_USER_INTERRUPT,
1415 				 GEN6_RENDER_USER_INTERRUPT);
1416 }
1417 
1418 static bool
gen6_bsd_ring_get_irq(struct intel_ring_buffer * ring)1419 gen6_bsd_ring_get_irq(struct intel_ring_buffer *ring)
1420 {
1421 	return gen6_ring_get_irq(ring,
1422 				 GT_GEN6_BSD_USER_INTERRUPT,
1423 				 GEN6_BSD_USER_INTERRUPT);
1424 }
1425 
1426 static void
gen6_bsd_ring_put_irq(struct intel_ring_buffer * ring)1427 gen6_bsd_ring_put_irq(struct intel_ring_buffer *ring)
1428 {
1429 	return gen6_ring_put_irq(ring,
1430 				 GT_GEN6_BSD_USER_INTERRUPT,
1431 				 GEN6_BSD_USER_INTERRUPT);
1432 }
1433 
1434 /* ring buffer for Video Codec for Gen6+ */
1435 static const struct intel_ring_buffer gen6_bsd_ring = {
1436 	.name			= "gen6 bsd ring",
1437 	.id			= VCS,
1438 	.mmio_base		= GEN6_BSD_RING_BASE,
1439 	.size			= 32 * PAGE_SIZE,
1440 	.init			= init_ring_common,
1441 	.write_tail		= gen6_bsd_ring_write_tail,
1442 	.flush			= gen6_ring_flush,
1443 	.add_request		= gen6_add_request,
1444 	.get_seqno		= gen6_ring_get_seqno,
1445 	.irq_get		= gen6_bsd_ring_get_irq,
1446 	.irq_put		= gen6_bsd_ring_put_irq,
1447 	.dispatch_execbuffer	= gen6_ring_dispatch_execbuffer,
1448 	.sync_to		= gen6_bsd_ring_sync_to,
1449 	.semaphore_register	= {MI_SEMAPHORE_SYNC_VR,
1450 				   MI_SEMAPHORE_SYNC_INVALID,
1451 				   MI_SEMAPHORE_SYNC_VB},
1452 	.signal_mbox		= {GEN6_RVSYNC, GEN6_BVSYNC},
1453 };
1454 
1455 /* Blitter support (SandyBridge+) */
1456 
1457 static bool
blt_ring_get_irq(struct intel_ring_buffer * ring)1458 blt_ring_get_irq(struct intel_ring_buffer *ring)
1459 {
1460 	return gen6_ring_get_irq(ring,
1461 				 GT_BLT_USER_INTERRUPT,
1462 				 GEN6_BLITTER_USER_INTERRUPT);
1463 }
1464 
1465 static void
blt_ring_put_irq(struct intel_ring_buffer * ring)1466 blt_ring_put_irq(struct intel_ring_buffer *ring)
1467 {
1468 	gen6_ring_put_irq(ring,
1469 			  GT_BLT_USER_INTERRUPT,
1470 			  GEN6_BLITTER_USER_INTERRUPT);
1471 }
1472 
blt_ring_flush(struct intel_ring_buffer * ring,u32 invalidate,u32 flush)1473 static int blt_ring_flush(struct intel_ring_buffer *ring,
1474 			  u32 invalidate, u32 flush)
1475 {
1476 	uint32_t cmd;
1477 	int ret;
1478 
1479 	ret = intel_ring_begin(ring, 4);
1480 	if (ret)
1481 		return ret;
1482 
1483 	cmd = MI_FLUSH_DW;
1484 	if (invalidate & I915_GEM_DOMAIN_RENDER)
1485 		cmd |= MI_INVALIDATE_TLB;
1486 	intel_ring_emit(ring, cmd);
1487 	intel_ring_emit(ring, 0);
1488 	intel_ring_emit(ring, 0);
1489 	intel_ring_emit(ring, MI_NOOP);
1490 	intel_ring_advance(ring);
1491 	return 0;
1492 }
1493 
1494 static const struct intel_ring_buffer gen6_blt_ring = {
1495 	.name			= "blt ring",
1496 	.id			= BCS,
1497 	.mmio_base		= BLT_RING_BASE,
1498 	.size			= 32 * PAGE_SIZE,
1499 	.init			= init_ring_common,
1500 	.write_tail		= ring_write_tail,
1501 	.flush			= blt_ring_flush,
1502 	.add_request		= gen6_add_request,
1503 	.get_seqno		= gen6_ring_get_seqno,
1504 	.irq_get		= blt_ring_get_irq,
1505 	.irq_put		= blt_ring_put_irq,
1506 	.dispatch_execbuffer	= gen6_ring_dispatch_execbuffer,
1507 	.sync_to		= gen6_blt_ring_sync_to,
1508 	.semaphore_register	= {MI_SEMAPHORE_SYNC_BR,
1509 				   MI_SEMAPHORE_SYNC_BV,
1510 				   MI_SEMAPHORE_SYNC_INVALID},
1511 	.signal_mbox		= {GEN6_RBSYNC, GEN6_VBSYNC},
1512 };
1513 
intel_init_render_ring_buffer(struct drm_device * dev)1514 int intel_init_render_ring_buffer(struct drm_device *dev)
1515 {
1516 	drm_i915_private_t *dev_priv = dev->dev_private;
1517 	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
1518 
1519 	*ring = render_ring;
1520 	if (INTEL_INFO(dev)->gen >= 6) {
1521 		ring->add_request = gen6_add_request;
1522 		ring->flush = gen6_render_ring_flush;
1523 		ring->irq_get = gen6_render_ring_get_irq;
1524 		ring->irq_put = gen6_render_ring_put_irq;
1525 		ring->get_seqno = gen6_ring_get_seqno;
1526 	} else if (IS_GEN5(dev)) {
1527 		ring->add_request = pc_render_add_request;
1528 		ring->get_seqno = pc_render_get_seqno;
1529 	}
1530 
1531 	if (!I915_NEED_GFX_HWS(dev)) {
1532 		ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
1533 		memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1534 	}
1535 
1536 	return intel_init_ring_buffer(dev, ring);
1537 }
1538 
intel_render_ring_init_dri(struct drm_device * dev,u64 start,u32 size)1539 int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size)
1540 {
1541 	drm_i915_private_t *dev_priv = dev->dev_private;
1542 	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
1543 
1544 	*ring = render_ring;
1545 	if (INTEL_INFO(dev)->gen >= 6) {
1546 		ring->add_request = gen6_add_request;
1547 		ring->irq_get = gen6_render_ring_get_irq;
1548 		ring->irq_put = gen6_render_ring_put_irq;
1549 	} else if (IS_GEN5(dev)) {
1550 		ring->add_request = pc_render_add_request;
1551 		ring->get_seqno = pc_render_get_seqno;
1552 	}
1553 
1554 	if (!I915_NEED_GFX_HWS(dev))
1555 		ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
1556 
1557 	ring->dev = dev;
1558 	INIT_LIST_HEAD(&ring->active_list);
1559 	INIT_LIST_HEAD(&ring->request_list);
1560 	INIT_LIST_HEAD(&ring->gpu_write_list);
1561 
1562 	ring->size = size;
1563 	ring->effective_size = ring->size;
1564 	if (IS_I830(ring->dev))
1565 		ring->effective_size -= 128;
1566 
1567 	ring->map.offset = start;
1568 	ring->map.size = size;
1569 	ring->map.type = 0;
1570 	ring->map.flags = 0;
1571 	ring->map.mtrr = 0;
1572 
1573 	drm_core_ioremap_wc(&ring->map, dev);
1574 	if (ring->map.handle == NULL) {
1575 		DRM_ERROR("can not ioremap virtual address for"
1576 			  " ring buffer\n");
1577 		return -ENOMEM;
1578 	}
1579 
1580 	ring->virtual_start = (void __force __iomem *)ring->map.handle;
1581 	return 0;
1582 }
1583 
intel_init_bsd_ring_buffer(struct drm_device * dev)1584 int intel_init_bsd_ring_buffer(struct drm_device *dev)
1585 {
1586 	drm_i915_private_t *dev_priv = dev->dev_private;
1587 	struct intel_ring_buffer *ring = &dev_priv->ring[VCS];
1588 
1589 	if (IS_GEN6(dev) || IS_GEN7(dev))
1590 		*ring = gen6_bsd_ring;
1591 	else
1592 		*ring = bsd_ring;
1593 
1594 	return intel_init_ring_buffer(dev, ring);
1595 }
1596 
intel_init_blt_ring_buffer(struct drm_device * dev)1597 int intel_init_blt_ring_buffer(struct drm_device *dev)
1598 {
1599 	drm_i915_private_t *dev_priv = dev->dev_private;
1600 	struct intel_ring_buffer *ring = &dev_priv->ring[BCS];
1601 
1602 	*ring = gen6_blt_ring;
1603 
1604 	return intel_init_ring_buffer(dev, ring);
1605 }
1606