1 /*
2 * Copyright © 2008-2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 * Zou Nan hai <nanhai.zou@intel.com>
26 * Xiang Hai hao<haihao.xiang@intel.com>
27 *
28 */
29
30 #include "drmP.h"
31 #include "drm.h"
32 #include "i915_drv.h"
33 #include "i915_drm.h"
34 #include "i915_trace.h"
35 #include "intel_drv.h"
36
37 /*
38 * 965+ support PIPE_CONTROL commands, which provide finer grained control
39 * over cache flushing.
40 */
41 struct pipe_control {
42 struct drm_i915_gem_object *obj;
43 volatile u32 *cpu_page;
44 u32 gtt_offset;
45 };
46
ring_space(struct intel_ring_buffer * ring)47 static inline int ring_space(struct intel_ring_buffer *ring)
48 {
49 int space = (ring->head & HEAD_ADDR) - (ring->tail + 8);
50 if (space < 0)
51 space += ring->size;
52 return space;
53 }
54
55 static int
render_ring_flush(struct intel_ring_buffer * ring,u32 invalidate_domains,u32 flush_domains)56 render_ring_flush(struct intel_ring_buffer *ring,
57 u32 invalidate_domains,
58 u32 flush_domains)
59 {
60 struct drm_device *dev = ring->dev;
61 u32 cmd;
62 int ret;
63
64 /*
65 * read/write caches:
66 *
67 * I915_GEM_DOMAIN_RENDER is always invalidated, but is
68 * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is
69 * also flushed at 2d versus 3d pipeline switches.
70 *
71 * read-only caches:
72 *
73 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
74 * MI_READ_FLUSH is set, and is always flushed on 965.
75 *
76 * I915_GEM_DOMAIN_COMMAND may not exist?
77 *
78 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
79 * invalidated when MI_EXE_FLUSH is set.
80 *
81 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
82 * invalidated with every MI_FLUSH.
83 *
84 * TLBs:
85 *
86 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
87 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
88 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
89 * are flushed at any MI_FLUSH.
90 */
91
92 cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
93 if ((invalidate_domains|flush_domains) &
94 I915_GEM_DOMAIN_RENDER)
95 cmd &= ~MI_NO_WRITE_FLUSH;
96 if (INTEL_INFO(dev)->gen < 4) {
97 /*
98 * On the 965, the sampler cache always gets flushed
99 * and this bit is reserved.
100 */
101 if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
102 cmd |= MI_READ_FLUSH;
103 }
104 if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
105 cmd |= MI_EXE_FLUSH;
106
107 if (invalidate_domains & I915_GEM_DOMAIN_COMMAND &&
108 (IS_G4X(dev) || IS_GEN5(dev)))
109 cmd |= MI_INVALIDATE_ISP;
110
111 ret = intel_ring_begin(ring, 2);
112 if (ret)
113 return ret;
114
115 intel_ring_emit(ring, cmd);
116 intel_ring_emit(ring, MI_NOOP);
117 intel_ring_advance(ring);
118
119 return 0;
120 }
121
122 /**
123 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
124 * implementing two workarounds on gen6. From section 1.4.7.1
125 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
126 *
127 * [DevSNB-C+{W/A}] Before any depth stall flush (including those
128 * produced by non-pipelined state commands), software needs to first
129 * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
130 * 0.
131 *
132 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
133 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
134 *
135 * And the workaround for these two requires this workaround first:
136 *
137 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
138 * BEFORE the pipe-control with a post-sync op and no write-cache
139 * flushes.
140 *
141 * And this last workaround is tricky because of the requirements on
142 * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
143 * volume 2 part 1:
144 *
145 * "1 of the following must also be set:
146 * - Render Target Cache Flush Enable ([12] of DW1)
147 * - Depth Cache Flush Enable ([0] of DW1)
148 * - Stall at Pixel Scoreboard ([1] of DW1)
149 * - Depth Stall ([13] of DW1)
150 * - Post-Sync Operation ([13] of DW1)
151 * - Notify Enable ([8] of DW1)"
152 *
153 * The cache flushes require the workaround flush that triggered this
154 * one, so we can't use it. Depth stall would trigger the same.
155 * Post-sync nonzero is what triggered this second workaround, so we
156 * can't use that one either. Notify enable is IRQs, which aren't
157 * really our business. That leaves only stall at scoreboard.
158 */
159 static int
intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer * ring)160 intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring)
161 {
162 struct pipe_control *pc = ring->private;
163 u32 scratch_addr = pc->gtt_offset + 128;
164 int ret;
165
166
167 ret = intel_ring_begin(ring, 6);
168 if (ret)
169 return ret;
170
171 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
172 intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
173 PIPE_CONTROL_STALL_AT_SCOREBOARD);
174 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
175 intel_ring_emit(ring, 0); /* low dword */
176 intel_ring_emit(ring, 0); /* high dword */
177 intel_ring_emit(ring, MI_NOOP);
178 intel_ring_advance(ring);
179
180 ret = intel_ring_begin(ring, 6);
181 if (ret)
182 return ret;
183
184 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
185 intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE);
186 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
187 intel_ring_emit(ring, 0);
188 intel_ring_emit(ring, 0);
189 intel_ring_emit(ring, MI_NOOP);
190 intel_ring_advance(ring);
191
192 return 0;
193 }
194
195 static int
gen6_render_ring_flush(struct intel_ring_buffer * ring,u32 invalidate_domains,u32 flush_domains)196 gen6_render_ring_flush(struct intel_ring_buffer *ring,
197 u32 invalidate_domains, u32 flush_domains)
198 {
199 u32 flags = 0;
200 struct pipe_control *pc = ring->private;
201 u32 scratch_addr = pc->gtt_offset + 128;
202 int ret;
203
204 /* Force SNB workarounds for PIPE_CONTROL flushes */
205 intel_emit_post_sync_nonzero_flush(ring);
206
207 /* Just flush everything. Experiments have shown that reducing the
208 * number of bits based on the write domains has little performance
209 * impact.
210 */
211 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
212 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
213 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
214 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
215 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
216 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
217 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
218
219 ret = intel_ring_begin(ring, 6);
220 if (ret)
221 return ret;
222
223 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
224 intel_ring_emit(ring, flags);
225 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
226 intel_ring_emit(ring, 0); /* lower dword */
227 intel_ring_emit(ring, 0); /* uppwer dword */
228 intel_ring_emit(ring, MI_NOOP);
229 intel_ring_advance(ring);
230
231 return 0;
232 }
233
ring_write_tail(struct intel_ring_buffer * ring,u32 value)234 static void ring_write_tail(struct intel_ring_buffer *ring,
235 u32 value)
236 {
237 drm_i915_private_t *dev_priv = ring->dev->dev_private;
238 I915_WRITE_TAIL(ring, value);
239 }
240
intel_ring_get_active_head(struct intel_ring_buffer * ring)241 u32 intel_ring_get_active_head(struct intel_ring_buffer *ring)
242 {
243 drm_i915_private_t *dev_priv = ring->dev->dev_private;
244 u32 acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ?
245 RING_ACTHD(ring->mmio_base) : ACTHD;
246
247 return I915_READ(acthd_reg);
248 }
249
init_ring_common(struct intel_ring_buffer * ring)250 static int init_ring_common(struct intel_ring_buffer *ring)
251 {
252 struct drm_device *dev = ring->dev;
253 drm_i915_private_t *dev_priv = dev->dev_private;
254 struct drm_i915_gem_object *obj = ring->obj;
255 int ret = 0;
256 u32 head;
257
258 if (HAS_FORCE_WAKE(dev))
259 gen6_gt_force_wake_get(dev_priv);
260
261 /* Stop the ring if it's running. */
262 I915_WRITE_CTL(ring, 0);
263 I915_WRITE_HEAD(ring, 0);
264 ring->write_tail(ring, 0);
265
266 /* Initialize the ring. */
267 I915_WRITE_START(ring, obj->gtt_offset);
268 head = I915_READ_HEAD(ring) & HEAD_ADDR;
269
270 /* G45 ring initialization fails to reset head to zero */
271 if (head != 0) {
272 DRM_DEBUG_KMS("%s head not reset to zero "
273 "ctl %08x head %08x tail %08x start %08x\n",
274 ring->name,
275 I915_READ_CTL(ring),
276 I915_READ_HEAD(ring),
277 I915_READ_TAIL(ring),
278 I915_READ_START(ring));
279
280 I915_WRITE_HEAD(ring, 0);
281
282 if (I915_READ_HEAD(ring) & HEAD_ADDR) {
283 DRM_ERROR("failed to set %s head to zero "
284 "ctl %08x head %08x tail %08x start %08x\n",
285 ring->name,
286 I915_READ_CTL(ring),
287 I915_READ_HEAD(ring),
288 I915_READ_TAIL(ring),
289 I915_READ_START(ring));
290 }
291 }
292
293 I915_WRITE_CTL(ring,
294 ((ring->size - PAGE_SIZE) & RING_NR_PAGES)
295 | RING_VALID);
296
297 /* If the head is still not zero, the ring is dead */
298 if (wait_for((I915_READ_CTL(ring) & RING_VALID) != 0 &&
299 I915_READ_START(ring) == obj->gtt_offset &&
300 (I915_READ_HEAD(ring) & HEAD_ADDR) == 0, 50)) {
301 DRM_ERROR("%s initialization failed "
302 "ctl %08x head %08x tail %08x start %08x\n",
303 ring->name,
304 I915_READ_CTL(ring),
305 I915_READ_HEAD(ring),
306 I915_READ_TAIL(ring),
307 I915_READ_START(ring));
308 ret = -EIO;
309 goto out;
310 }
311
312 if (!drm_core_check_feature(ring->dev, DRIVER_MODESET))
313 i915_kernel_lost_context(ring->dev);
314 else {
315 ring->head = I915_READ_HEAD(ring);
316 ring->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
317 ring->space = ring_space(ring);
318 ring->last_retired_head = -1;
319 }
320
321 out:
322 if (HAS_FORCE_WAKE(dev))
323 gen6_gt_force_wake_put(dev_priv);
324
325 return ret;
326 }
327
328 static int
init_pipe_control(struct intel_ring_buffer * ring)329 init_pipe_control(struct intel_ring_buffer *ring)
330 {
331 struct pipe_control *pc;
332 struct drm_i915_gem_object *obj;
333 int ret;
334
335 if (ring->private)
336 return 0;
337
338 pc = kmalloc(sizeof(*pc), GFP_KERNEL);
339 if (!pc)
340 return -ENOMEM;
341
342 obj = i915_gem_alloc_object(ring->dev, 4096);
343 if (obj == NULL) {
344 DRM_ERROR("Failed to allocate seqno page\n");
345 ret = -ENOMEM;
346 goto err;
347 }
348
349 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
350
351 ret = i915_gem_object_pin(obj, 4096, true);
352 if (ret)
353 goto err_unref;
354
355 pc->gtt_offset = obj->gtt_offset;
356 pc->cpu_page = kmap(obj->pages[0]);
357 if (pc->cpu_page == NULL)
358 goto err_unpin;
359
360 pc->obj = obj;
361 ring->private = pc;
362 return 0;
363
364 err_unpin:
365 i915_gem_object_unpin(obj);
366 err_unref:
367 drm_gem_object_unreference(&obj->base);
368 err:
369 kfree(pc);
370 return ret;
371 }
372
373 static void
cleanup_pipe_control(struct intel_ring_buffer * ring)374 cleanup_pipe_control(struct intel_ring_buffer *ring)
375 {
376 struct pipe_control *pc = ring->private;
377 struct drm_i915_gem_object *obj;
378
379 if (!ring->private)
380 return;
381
382 obj = pc->obj;
383 kunmap(obj->pages[0]);
384 i915_gem_object_unpin(obj);
385 drm_gem_object_unreference(&obj->base);
386
387 kfree(pc);
388 ring->private = NULL;
389 }
390
init_render_ring(struct intel_ring_buffer * ring)391 static int init_render_ring(struct intel_ring_buffer *ring)
392 {
393 struct drm_device *dev = ring->dev;
394 struct drm_i915_private *dev_priv = dev->dev_private;
395 int ret = init_ring_common(ring);
396
397 if (INTEL_INFO(dev)->gen > 3) {
398 int mode = VS_TIMER_DISPATCH << 16 | VS_TIMER_DISPATCH;
399 I915_WRITE(MI_MODE, mode);
400 if (IS_GEN7(dev))
401 I915_WRITE(GFX_MODE_GEN7,
402 GFX_MODE_DISABLE(GFX_TLB_INVALIDATE_ALWAYS) |
403 GFX_MODE_ENABLE(GFX_REPLAY_MODE));
404 }
405
406 if (INTEL_INFO(dev)->gen >= 5) {
407 ret = init_pipe_control(ring);
408 if (ret)
409 return ret;
410 }
411
412
413 if (IS_GEN6(dev)) {
414 /* From the Sandybridge PRM, volume 1 part 3, page 24:
415 * "If this bit is set, STCunit will have LRA as replacement
416 * policy. [...] This bit must be reset. LRA replacement
417 * policy is not supported."
418 */
419 I915_WRITE(CACHE_MODE_0,
420 CM0_STC_EVICT_DISABLE_LRA_SNB << CM0_MASK_SHIFT);
421 }
422
423 if (INTEL_INFO(dev)->gen >= 6) {
424 I915_WRITE(INSTPM,
425 INSTPM_FORCE_ORDERING << 16 | INSTPM_FORCE_ORDERING);
426 }
427
428 return ret;
429 }
430
render_ring_cleanup(struct intel_ring_buffer * ring)431 static void render_ring_cleanup(struct intel_ring_buffer *ring)
432 {
433 if (!ring->private)
434 return;
435
436 cleanup_pipe_control(ring);
437 }
438
439 static void
update_mboxes(struct intel_ring_buffer * ring,u32 seqno,u32 mmio_offset)440 update_mboxes(struct intel_ring_buffer *ring,
441 u32 seqno,
442 u32 mmio_offset)
443 {
444 intel_ring_emit(ring, MI_SEMAPHORE_MBOX |
445 MI_SEMAPHORE_GLOBAL_GTT |
446 MI_SEMAPHORE_REGISTER |
447 MI_SEMAPHORE_UPDATE);
448 intel_ring_emit(ring, seqno);
449 intel_ring_emit(ring, mmio_offset);
450 }
451
452 /**
453 * gen6_add_request - Update the semaphore mailbox registers
454 *
455 * @ring - ring that is adding a request
456 * @seqno - return seqno stuck into the ring
457 *
458 * Update the mailbox registers in the *other* rings with the current seqno.
459 * This acts like a signal in the canonical semaphore.
460 */
461 static int
gen6_add_request(struct intel_ring_buffer * ring,u32 * seqno)462 gen6_add_request(struct intel_ring_buffer *ring,
463 u32 *seqno)
464 {
465 u32 mbox1_reg;
466 u32 mbox2_reg;
467 int ret;
468
469 ret = intel_ring_begin(ring, 10);
470 if (ret)
471 return ret;
472
473 mbox1_reg = ring->signal_mbox[0];
474 mbox2_reg = ring->signal_mbox[1];
475
476 *seqno = i915_gem_next_request_seqno(ring);
477
478 update_mboxes(ring, *seqno, mbox1_reg);
479 update_mboxes(ring, *seqno, mbox2_reg);
480 intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
481 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
482 intel_ring_emit(ring, *seqno);
483 intel_ring_emit(ring, MI_USER_INTERRUPT);
484 intel_ring_advance(ring);
485
486 return 0;
487 }
488
489 /**
490 * intel_ring_sync - sync the waiter to the signaller on seqno
491 *
492 * @waiter - ring that is waiting
493 * @signaller - ring which has, or will signal
494 * @seqno - seqno which the waiter will block on
495 */
496 static int
intel_ring_sync(struct intel_ring_buffer * waiter,struct intel_ring_buffer * signaller,int ring,u32 seqno)497 intel_ring_sync(struct intel_ring_buffer *waiter,
498 struct intel_ring_buffer *signaller,
499 int ring,
500 u32 seqno)
501 {
502 int ret;
503 u32 dw1 = MI_SEMAPHORE_MBOX |
504 MI_SEMAPHORE_COMPARE |
505 MI_SEMAPHORE_REGISTER;
506
507 ret = intel_ring_begin(waiter, 4);
508 if (ret)
509 return ret;
510
511 intel_ring_emit(waiter, dw1 | signaller->semaphore_register[ring]);
512 intel_ring_emit(waiter, seqno);
513 intel_ring_emit(waiter, 0);
514 intel_ring_emit(waiter, MI_NOOP);
515 intel_ring_advance(waiter);
516
517 return 0;
518 }
519
520 /* VCS->RCS (RVSYNC) or BCS->RCS (RBSYNC) */
521 int
render_ring_sync_to(struct intel_ring_buffer * waiter,struct intel_ring_buffer * signaller,u32 seqno)522 render_ring_sync_to(struct intel_ring_buffer *waiter,
523 struct intel_ring_buffer *signaller,
524 u32 seqno)
525 {
526 WARN_ON(signaller->semaphore_register[RCS] == MI_SEMAPHORE_SYNC_INVALID);
527 return intel_ring_sync(waiter,
528 signaller,
529 RCS,
530 seqno);
531 }
532
533 /* RCS->VCS (VRSYNC) or BCS->VCS (VBSYNC) */
534 int
gen6_bsd_ring_sync_to(struct intel_ring_buffer * waiter,struct intel_ring_buffer * signaller,u32 seqno)535 gen6_bsd_ring_sync_to(struct intel_ring_buffer *waiter,
536 struct intel_ring_buffer *signaller,
537 u32 seqno)
538 {
539 WARN_ON(signaller->semaphore_register[VCS] == MI_SEMAPHORE_SYNC_INVALID);
540 return intel_ring_sync(waiter,
541 signaller,
542 VCS,
543 seqno);
544 }
545
546 /* RCS->BCS (BRSYNC) or VCS->BCS (BVSYNC) */
547 int
gen6_blt_ring_sync_to(struct intel_ring_buffer * waiter,struct intel_ring_buffer * signaller,u32 seqno)548 gen6_blt_ring_sync_to(struct intel_ring_buffer *waiter,
549 struct intel_ring_buffer *signaller,
550 u32 seqno)
551 {
552 WARN_ON(signaller->semaphore_register[BCS] == MI_SEMAPHORE_SYNC_INVALID);
553 return intel_ring_sync(waiter,
554 signaller,
555 BCS,
556 seqno);
557 }
558
559
560
561 #define PIPE_CONTROL_FLUSH(ring__, addr__) \
562 do { \
563 intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | \
564 PIPE_CONTROL_DEPTH_STALL); \
565 intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT); \
566 intel_ring_emit(ring__, 0); \
567 intel_ring_emit(ring__, 0); \
568 } while (0)
569
570 static int
pc_render_add_request(struct intel_ring_buffer * ring,u32 * result)571 pc_render_add_request(struct intel_ring_buffer *ring,
572 u32 *result)
573 {
574 u32 seqno = i915_gem_next_request_seqno(ring);
575 struct pipe_control *pc = ring->private;
576 u32 scratch_addr = pc->gtt_offset + 128;
577 int ret;
578
579 /* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently
580 * incoherent with writes to memory, i.e. completely fubar,
581 * so we need to use PIPE_NOTIFY instead.
582 *
583 * However, we also need to workaround the qword write
584 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to
585 * memory before requesting an interrupt.
586 */
587 ret = intel_ring_begin(ring, 32);
588 if (ret)
589 return ret;
590
591 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
592 PIPE_CONTROL_WRITE_FLUSH |
593 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
594 intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
595 intel_ring_emit(ring, seqno);
596 intel_ring_emit(ring, 0);
597 PIPE_CONTROL_FLUSH(ring, scratch_addr);
598 scratch_addr += 128; /* write to separate cachelines */
599 PIPE_CONTROL_FLUSH(ring, scratch_addr);
600 scratch_addr += 128;
601 PIPE_CONTROL_FLUSH(ring, scratch_addr);
602 scratch_addr += 128;
603 PIPE_CONTROL_FLUSH(ring, scratch_addr);
604 scratch_addr += 128;
605 PIPE_CONTROL_FLUSH(ring, scratch_addr);
606 scratch_addr += 128;
607 PIPE_CONTROL_FLUSH(ring, scratch_addr);
608
609 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
610 PIPE_CONTROL_WRITE_FLUSH |
611 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
612 PIPE_CONTROL_NOTIFY);
613 intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
614 intel_ring_emit(ring, seqno);
615 intel_ring_emit(ring, 0);
616 intel_ring_advance(ring);
617
618 *result = seqno;
619 return 0;
620 }
621
622 static int
render_ring_add_request(struct intel_ring_buffer * ring,u32 * result)623 render_ring_add_request(struct intel_ring_buffer *ring,
624 u32 *result)
625 {
626 u32 seqno = i915_gem_next_request_seqno(ring);
627 int ret;
628
629 ret = intel_ring_begin(ring, 4);
630 if (ret)
631 return ret;
632
633 intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
634 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
635 intel_ring_emit(ring, seqno);
636 intel_ring_emit(ring, MI_USER_INTERRUPT);
637 intel_ring_advance(ring);
638
639 *result = seqno;
640 return 0;
641 }
642
643 static u32
gen6_ring_get_seqno(struct intel_ring_buffer * ring)644 gen6_ring_get_seqno(struct intel_ring_buffer *ring)
645 {
646 struct drm_device *dev = ring->dev;
647
648 /* Workaround to force correct ordering between irq and seqno writes on
649 * ivb (and maybe also on snb) by reading from a CS register (like
650 * ACTHD) before reading the status page. */
651 if (IS_GEN6(dev) || IS_GEN7(dev))
652 intel_ring_get_active_head(ring);
653 return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
654 }
655
656 static u32
ring_get_seqno(struct intel_ring_buffer * ring)657 ring_get_seqno(struct intel_ring_buffer *ring)
658 {
659 return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
660 }
661
662 static u32
pc_render_get_seqno(struct intel_ring_buffer * ring)663 pc_render_get_seqno(struct intel_ring_buffer *ring)
664 {
665 struct pipe_control *pc = ring->private;
666 return pc->cpu_page[0];
667 }
668
669 static void
ironlake_enable_irq(drm_i915_private_t * dev_priv,u32 mask)670 ironlake_enable_irq(drm_i915_private_t *dev_priv, u32 mask)
671 {
672 dev_priv->gt_irq_mask &= ~mask;
673 I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
674 POSTING_READ(GTIMR);
675 }
676
677 static void
ironlake_disable_irq(drm_i915_private_t * dev_priv,u32 mask)678 ironlake_disable_irq(drm_i915_private_t *dev_priv, u32 mask)
679 {
680 dev_priv->gt_irq_mask |= mask;
681 I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
682 POSTING_READ(GTIMR);
683 }
684
685 static void
i915_enable_irq(drm_i915_private_t * dev_priv,u32 mask)686 i915_enable_irq(drm_i915_private_t *dev_priv, u32 mask)
687 {
688 dev_priv->irq_mask &= ~mask;
689 I915_WRITE(IMR, dev_priv->irq_mask);
690 POSTING_READ(IMR);
691 }
692
693 static void
i915_disable_irq(drm_i915_private_t * dev_priv,u32 mask)694 i915_disable_irq(drm_i915_private_t *dev_priv, u32 mask)
695 {
696 dev_priv->irq_mask |= mask;
697 I915_WRITE(IMR, dev_priv->irq_mask);
698 POSTING_READ(IMR);
699 }
700
701 static bool
render_ring_get_irq(struct intel_ring_buffer * ring)702 render_ring_get_irq(struct intel_ring_buffer *ring)
703 {
704 struct drm_device *dev = ring->dev;
705 drm_i915_private_t *dev_priv = dev->dev_private;
706
707 if (!dev->irq_enabled)
708 return false;
709
710 spin_lock(&ring->irq_lock);
711 if (ring->irq_refcount++ == 0) {
712 if (HAS_PCH_SPLIT(dev))
713 ironlake_enable_irq(dev_priv,
714 GT_PIPE_NOTIFY | GT_USER_INTERRUPT);
715 else
716 i915_enable_irq(dev_priv, I915_USER_INTERRUPT);
717 }
718 spin_unlock(&ring->irq_lock);
719
720 return true;
721 }
722
723 static void
render_ring_put_irq(struct intel_ring_buffer * ring)724 render_ring_put_irq(struct intel_ring_buffer *ring)
725 {
726 struct drm_device *dev = ring->dev;
727 drm_i915_private_t *dev_priv = dev->dev_private;
728
729 spin_lock(&ring->irq_lock);
730 if (--ring->irq_refcount == 0) {
731 if (HAS_PCH_SPLIT(dev))
732 ironlake_disable_irq(dev_priv,
733 GT_USER_INTERRUPT |
734 GT_PIPE_NOTIFY);
735 else
736 i915_disable_irq(dev_priv, I915_USER_INTERRUPT);
737 }
738 spin_unlock(&ring->irq_lock);
739 }
740
intel_ring_setup_status_page(struct intel_ring_buffer * ring)741 void intel_ring_setup_status_page(struct intel_ring_buffer *ring)
742 {
743 struct drm_device *dev = ring->dev;
744 drm_i915_private_t *dev_priv = ring->dev->dev_private;
745 u32 mmio = 0;
746
747 /* The ring status page addresses are no longer next to the rest of
748 * the ring registers as of gen7.
749 */
750 if (IS_GEN7(dev)) {
751 switch (ring->id) {
752 case RCS:
753 mmio = RENDER_HWS_PGA_GEN7;
754 break;
755 case BCS:
756 mmio = BLT_HWS_PGA_GEN7;
757 break;
758 case VCS:
759 mmio = BSD_HWS_PGA_GEN7;
760 break;
761 }
762 } else if (IS_GEN6(ring->dev)) {
763 mmio = RING_HWS_PGA_GEN6(ring->mmio_base);
764 } else {
765 mmio = RING_HWS_PGA(ring->mmio_base);
766 }
767
768 I915_WRITE(mmio, (u32)ring->status_page.gfx_addr);
769 POSTING_READ(mmio);
770
771 /* Flush the TLB for this page */
772 if (INTEL_INFO(dev)->gen >= 6) {
773 u32 reg = RING_INSTPM(ring->mmio_base);
774 I915_WRITE(reg,
775 _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
776 INSTPM_SYNC_FLUSH));
777 if (wait_for((I915_READ(reg) & INSTPM_SYNC_FLUSH) == 0,
778 1000))
779 DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
780 ring->name);
781 }
782 }
783
784 static int
bsd_ring_flush(struct intel_ring_buffer * ring,u32 invalidate_domains,u32 flush_domains)785 bsd_ring_flush(struct intel_ring_buffer *ring,
786 u32 invalidate_domains,
787 u32 flush_domains)
788 {
789 int ret;
790
791 ret = intel_ring_begin(ring, 2);
792 if (ret)
793 return ret;
794
795 intel_ring_emit(ring, MI_FLUSH);
796 intel_ring_emit(ring, MI_NOOP);
797 intel_ring_advance(ring);
798 return 0;
799 }
800
801 static int
ring_add_request(struct intel_ring_buffer * ring,u32 * result)802 ring_add_request(struct intel_ring_buffer *ring,
803 u32 *result)
804 {
805 u32 seqno;
806 int ret;
807
808 ret = intel_ring_begin(ring, 4);
809 if (ret)
810 return ret;
811
812 seqno = i915_gem_next_request_seqno(ring);
813
814 intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
815 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
816 intel_ring_emit(ring, seqno);
817 intel_ring_emit(ring, MI_USER_INTERRUPT);
818 intel_ring_advance(ring);
819
820 *result = seqno;
821 return 0;
822 }
823
824 static bool
gen6_ring_get_irq(struct intel_ring_buffer * ring,u32 gflag,u32 rflag)825 gen6_ring_get_irq(struct intel_ring_buffer *ring, u32 gflag, u32 rflag)
826 {
827 struct drm_device *dev = ring->dev;
828 drm_i915_private_t *dev_priv = dev->dev_private;
829
830 if (!dev->irq_enabled)
831 return false;
832
833 /* It looks like we need to prevent the gt from suspending while waiting
834 * for an notifiy irq, otherwise irqs seem to get lost on at least the
835 * blt/bsd rings on ivb. */
836 gen6_gt_force_wake_get(dev_priv);
837
838 spin_lock(&ring->irq_lock);
839 if (ring->irq_refcount++ == 0) {
840 ring->irq_mask &= ~rflag;
841 I915_WRITE_IMR(ring, ring->irq_mask);
842 ironlake_enable_irq(dev_priv, gflag);
843 }
844 spin_unlock(&ring->irq_lock);
845
846 return true;
847 }
848
849 static void
gen6_ring_put_irq(struct intel_ring_buffer * ring,u32 gflag,u32 rflag)850 gen6_ring_put_irq(struct intel_ring_buffer *ring, u32 gflag, u32 rflag)
851 {
852 struct drm_device *dev = ring->dev;
853 drm_i915_private_t *dev_priv = dev->dev_private;
854
855 spin_lock(&ring->irq_lock);
856 if (--ring->irq_refcount == 0) {
857 ring->irq_mask |= rflag;
858 I915_WRITE_IMR(ring, ring->irq_mask);
859 ironlake_disable_irq(dev_priv, gflag);
860 }
861 spin_unlock(&ring->irq_lock);
862
863 gen6_gt_force_wake_put(dev_priv);
864 }
865
866 static bool
bsd_ring_get_irq(struct intel_ring_buffer * ring)867 bsd_ring_get_irq(struct intel_ring_buffer *ring)
868 {
869 struct drm_device *dev = ring->dev;
870 drm_i915_private_t *dev_priv = dev->dev_private;
871
872 if (!dev->irq_enabled)
873 return false;
874
875 spin_lock(&ring->irq_lock);
876 if (ring->irq_refcount++ == 0) {
877 if (IS_G4X(dev))
878 i915_enable_irq(dev_priv, I915_BSD_USER_INTERRUPT);
879 else
880 ironlake_enable_irq(dev_priv, GT_BSD_USER_INTERRUPT);
881 }
882 spin_unlock(&ring->irq_lock);
883
884 return true;
885 }
886 static void
bsd_ring_put_irq(struct intel_ring_buffer * ring)887 bsd_ring_put_irq(struct intel_ring_buffer *ring)
888 {
889 struct drm_device *dev = ring->dev;
890 drm_i915_private_t *dev_priv = dev->dev_private;
891
892 spin_lock(&ring->irq_lock);
893 if (--ring->irq_refcount == 0) {
894 if (IS_G4X(dev))
895 i915_disable_irq(dev_priv, I915_BSD_USER_INTERRUPT);
896 else
897 ironlake_disable_irq(dev_priv, GT_BSD_USER_INTERRUPT);
898 }
899 spin_unlock(&ring->irq_lock);
900 }
901
902 static int
ring_dispatch_execbuffer(struct intel_ring_buffer * ring,u32 offset,u32 length)903 ring_dispatch_execbuffer(struct intel_ring_buffer *ring, u32 offset, u32 length)
904 {
905 int ret;
906
907 ret = intel_ring_begin(ring, 2);
908 if (ret)
909 return ret;
910
911 intel_ring_emit(ring,
912 MI_BATCH_BUFFER_START | (2 << 6) |
913 MI_BATCH_NON_SECURE_I965);
914 intel_ring_emit(ring, offset);
915 intel_ring_advance(ring);
916
917 return 0;
918 }
919
920 static int
render_ring_dispatch_execbuffer(struct intel_ring_buffer * ring,u32 offset,u32 len)921 render_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
922 u32 offset, u32 len)
923 {
924 struct drm_device *dev = ring->dev;
925 int ret;
926
927 if (IS_I830(dev) || IS_845G(dev)) {
928 ret = intel_ring_begin(ring, 4);
929 if (ret)
930 return ret;
931
932 intel_ring_emit(ring, MI_BATCH_BUFFER);
933 intel_ring_emit(ring, offset | MI_BATCH_NON_SECURE);
934 intel_ring_emit(ring, offset + len - 8);
935 intel_ring_emit(ring, 0);
936 } else {
937 ret = intel_ring_begin(ring, 2);
938 if (ret)
939 return ret;
940
941 if (INTEL_INFO(dev)->gen >= 4) {
942 intel_ring_emit(ring,
943 MI_BATCH_BUFFER_START | (2 << 6) |
944 MI_BATCH_NON_SECURE_I965);
945 intel_ring_emit(ring, offset);
946 } else {
947 intel_ring_emit(ring,
948 MI_BATCH_BUFFER_START | (2 << 6));
949 intel_ring_emit(ring, offset | MI_BATCH_NON_SECURE);
950 }
951 }
952 intel_ring_advance(ring);
953
954 return 0;
955 }
956
cleanup_status_page(struct intel_ring_buffer * ring)957 static void cleanup_status_page(struct intel_ring_buffer *ring)
958 {
959 drm_i915_private_t *dev_priv = ring->dev->dev_private;
960 struct drm_i915_gem_object *obj;
961
962 obj = ring->status_page.obj;
963 if (obj == NULL)
964 return;
965
966 kunmap(obj->pages[0]);
967 i915_gem_object_unpin(obj);
968 drm_gem_object_unreference(&obj->base);
969 ring->status_page.obj = NULL;
970
971 memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map));
972 }
973
init_status_page(struct intel_ring_buffer * ring)974 static int init_status_page(struct intel_ring_buffer *ring)
975 {
976 struct drm_device *dev = ring->dev;
977 drm_i915_private_t *dev_priv = dev->dev_private;
978 struct drm_i915_gem_object *obj;
979 int ret;
980
981 obj = i915_gem_alloc_object(dev, 4096);
982 if (obj == NULL) {
983 DRM_ERROR("Failed to allocate status page\n");
984 ret = -ENOMEM;
985 goto err;
986 }
987
988 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
989
990 ret = i915_gem_object_pin(obj, 4096, true);
991 if (ret != 0) {
992 goto err_unref;
993 }
994
995 ring->status_page.gfx_addr = obj->gtt_offset;
996 ring->status_page.page_addr = kmap(obj->pages[0]);
997 if (ring->status_page.page_addr == NULL) {
998 memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map));
999 goto err_unpin;
1000 }
1001 ring->status_page.obj = obj;
1002 memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1003
1004 intel_ring_setup_status_page(ring);
1005 DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
1006 ring->name, ring->status_page.gfx_addr);
1007
1008 return 0;
1009
1010 err_unpin:
1011 i915_gem_object_unpin(obj);
1012 err_unref:
1013 drm_gem_object_unreference(&obj->base);
1014 err:
1015 return ret;
1016 }
1017
intel_init_ring_buffer(struct drm_device * dev,struct intel_ring_buffer * ring)1018 int intel_init_ring_buffer(struct drm_device *dev,
1019 struct intel_ring_buffer *ring)
1020 {
1021 struct drm_i915_gem_object *obj;
1022 int ret;
1023
1024 ring->dev = dev;
1025 INIT_LIST_HEAD(&ring->active_list);
1026 INIT_LIST_HEAD(&ring->request_list);
1027 INIT_LIST_HEAD(&ring->gpu_write_list);
1028
1029 init_waitqueue_head(&ring->irq_queue);
1030 spin_lock_init(&ring->irq_lock);
1031 ring->irq_mask = ~0;
1032
1033 if (I915_NEED_GFX_HWS(dev)) {
1034 ret = init_status_page(ring);
1035 if (ret)
1036 return ret;
1037 }
1038
1039 obj = i915_gem_alloc_object(dev, ring->size);
1040 if (obj == NULL) {
1041 DRM_ERROR("Failed to allocate ringbuffer\n");
1042 ret = -ENOMEM;
1043 goto err_hws;
1044 }
1045
1046 ring->obj = obj;
1047
1048 ret = i915_gem_object_pin(obj, PAGE_SIZE, true);
1049 if (ret)
1050 goto err_unref;
1051
1052 ret = i915_gem_object_set_to_gtt_domain(obj, true);
1053 if (ret)
1054 goto err_unpin;
1055
1056 ring->map.size = ring->size;
1057 ring->map.offset = dev->agp->base + obj->gtt_offset;
1058 ring->map.type = 0;
1059 ring->map.flags = 0;
1060 ring->map.mtrr = 0;
1061
1062 drm_core_ioremap_wc(&ring->map, dev);
1063 if (ring->map.handle == NULL) {
1064 DRM_ERROR("Failed to map ringbuffer.\n");
1065 ret = -EINVAL;
1066 goto err_unpin;
1067 }
1068
1069 ring->virtual_start = ring->map.handle;
1070 ret = ring->init(ring);
1071 if (ret)
1072 goto err_unmap;
1073
1074 /* Workaround an erratum on the i830 which causes a hang if
1075 * the TAIL pointer points to within the last 2 cachelines
1076 * of the buffer.
1077 */
1078 ring->effective_size = ring->size;
1079 if (IS_I830(ring->dev) || IS_845G(ring->dev))
1080 ring->effective_size -= 128;
1081
1082 return 0;
1083
1084 err_unmap:
1085 drm_core_ioremapfree(&ring->map, dev);
1086 err_unpin:
1087 i915_gem_object_unpin(obj);
1088 err_unref:
1089 drm_gem_object_unreference(&obj->base);
1090 ring->obj = NULL;
1091 err_hws:
1092 cleanup_status_page(ring);
1093 return ret;
1094 }
1095
intel_cleanup_ring_buffer(struct intel_ring_buffer * ring)1096 void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring)
1097 {
1098 struct drm_i915_private *dev_priv;
1099 int ret;
1100
1101 if (ring->obj == NULL)
1102 return;
1103
1104 /* Disable the ring buffer. The ring must be idle at this point */
1105 dev_priv = ring->dev->dev_private;
1106 ret = intel_wait_ring_idle(ring);
1107 if (ret)
1108 DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
1109 ring->name, ret);
1110
1111 I915_WRITE_CTL(ring, 0);
1112
1113 drm_core_ioremapfree(&ring->map, ring->dev);
1114
1115 i915_gem_object_unpin(ring->obj);
1116 drm_gem_object_unreference(&ring->obj->base);
1117 ring->obj = NULL;
1118
1119 if (ring->cleanup)
1120 ring->cleanup(ring);
1121
1122 cleanup_status_page(ring);
1123 }
1124
intel_wrap_ring_buffer(struct intel_ring_buffer * ring)1125 static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring)
1126 {
1127 unsigned int *virt;
1128 int rem = ring->size - ring->tail;
1129
1130 if (ring->space < rem) {
1131 int ret = intel_wait_ring_buffer(ring, rem);
1132 if (ret)
1133 return ret;
1134 }
1135
1136 virt = (unsigned int *)(ring->virtual_start + ring->tail);
1137 rem /= 8;
1138 while (rem--) {
1139 *virt++ = MI_NOOP;
1140 *virt++ = MI_NOOP;
1141 }
1142
1143 ring->tail = 0;
1144 ring->space = ring_space(ring);
1145
1146 return 0;
1147 }
1148
intel_ring_wait_seqno(struct intel_ring_buffer * ring,u32 seqno)1149 static int intel_ring_wait_seqno(struct intel_ring_buffer *ring, u32 seqno)
1150 {
1151 struct drm_i915_private *dev_priv = ring->dev->dev_private;
1152 bool was_interruptible;
1153 int ret;
1154
1155 /* XXX As we have not yet audited all the paths to check that
1156 * they are ready for ERESTARTSYS from intel_ring_begin, do not
1157 * allow us to be interruptible by a signal.
1158 */
1159 was_interruptible = dev_priv->mm.interruptible;
1160 dev_priv->mm.interruptible = false;
1161
1162 ret = i915_wait_request(ring, seqno, true);
1163
1164 dev_priv->mm.interruptible = was_interruptible;
1165
1166 return ret;
1167 }
1168
intel_ring_wait_request(struct intel_ring_buffer * ring,int n)1169 static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n)
1170 {
1171 struct drm_i915_gem_request *request;
1172 u32 seqno = 0;
1173 int ret;
1174
1175 i915_gem_retire_requests_ring(ring);
1176
1177 if (ring->last_retired_head != -1) {
1178 ring->head = ring->last_retired_head;
1179 ring->last_retired_head = -1;
1180 ring->space = ring_space(ring);
1181 if (ring->space >= n)
1182 return 0;
1183 }
1184
1185 list_for_each_entry(request, &ring->request_list, list) {
1186 int space;
1187
1188 if (request->tail == -1)
1189 continue;
1190
1191 space = request->tail - (ring->tail + 8);
1192 if (space < 0)
1193 space += ring->size;
1194 if (space >= n) {
1195 seqno = request->seqno;
1196 break;
1197 }
1198
1199 /* Consume this request in case we need more space than
1200 * is available and so need to prevent a race between
1201 * updating last_retired_head and direct reads of
1202 * I915_RING_HEAD. It also provides a nice sanity check.
1203 */
1204 request->tail = -1;
1205 }
1206
1207 if (seqno == 0)
1208 return -ENOSPC;
1209
1210 ret = intel_ring_wait_seqno(ring, seqno);
1211 if (ret)
1212 return ret;
1213
1214 if (WARN_ON(ring->last_retired_head == -1))
1215 return -ENOSPC;
1216
1217 ring->head = ring->last_retired_head;
1218 ring->last_retired_head = -1;
1219 ring->space = ring_space(ring);
1220 if (WARN_ON(ring->space < n))
1221 return -ENOSPC;
1222
1223 return 0;
1224 }
1225
intel_wait_ring_buffer(struct intel_ring_buffer * ring,int n)1226 int intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n)
1227 {
1228 struct drm_device *dev = ring->dev;
1229 struct drm_i915_private *dev_priv = dev->dev_private;
1230 unsigned long end;
1231 int ret;
1232
1233 ret = intel_ring_wait_request(ring, n);
1234 if (ret != -ENOSPC)
1235 return ret;
1236
1237 trace_i915_ring_wait_begin(ring);
1238 if (drm_core_check_feature(dev, DRIVER_GEM))
1239 /* With GEM the hangcheck timer should kick us out of the loop,
1240 * leaving it early runs the risk of corrupting GEM state (due
1241 * to running on almost untested codepaths). But on resume
1242 * timers don't work yet, so prevent a complete hang in that
1243 * case by choosing an insanely large timeout. */
1244 end = jiffies + 60 * HZ;
1245 else
1246 end = jiffies + 3 * HZ;
1247
1248 do {
1249 ring->head = I915_READ_HEAD(ring);
1250 ring->space = ring_space(ring);
1251 if (ring->space >= n) {
1252 trace_i915_ring_wait_end(ring);
1253 return 0;
1254 }
1255
1256 if (dev->primary->master) {
1257 struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
1258 if (master_priv->sarea_priv)
1259 master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT;
1260 }
1261
1262 msleep(1);
1263 if (atomic_read(&dev_priv->mm.wedged))
1264 return -EAGAIN;
1265 } while (!time_after(jiffies, end));
1266 trace_i915_ring_wait_end(ring);
1267 return -EBUSY;
1268 }
1269
intel_ring_begin(struct intel_ring_buffer * ring,int num_dwords)1270 int intel_ring_begin(struct intel_ring_buffer *ring,
1271 int num_dwords)
1272 {
1273 struct drm_i915_private *dev_priv = ring->dev->dev_private;
1274 int n = 4*num_dwords;
1275 int ret;
1276
1277 if (unlikely(atomic_read(&dev_priv->mm.wedged)))
1278 return -EIO;
1279
1280 if (unlikely(ring->tail + n > ring->effective_size)) {
1281 ret = intel_wrap_ring_buffer(ring);
1282 if (unlikely(ret))
1283 return ret;
1284 }
1285
1286 if (unlikely(ring->space < n)) {
1287 ret = intel_wait_ring_buffer(ring, n);
1288 if (unlikely(ret))
1289 return ret;
1290 }
1291
1292 ring->space -= n;
1293 return 0;
1294 }
1295
intel_ring_advance(struct intel_ring_buffer * ring)1296 void intel_ring_advance(struct intel_ring_buffer *ring)
1297 {
1298 ring->tail &= ring->size - 1;
1299 ring->write_tail(ring, ring->tail);
1300 }
1301
1302 static const struct intel_ring_buffer render_ring = {
1303 .name = "render ring",
1304 .id = RCS,
1305 .mmio_base = RENDER_RING_BASE,
1306 .size = 32 * PAGE_SIZE,
1307 .init = init_render_ring,
1308 .write_tail = ring_write_tail,
1309 .flush = render_ring_flush,
1310 .add_request = render_ring_add_request,
1311 .get_seqno = ring_get_seqno,
1312 .irq_get = render_ring_get_irq,
1313 .irq_put = render_ring_put_irq,
1314 .dispatch_execbuffer = render_ring_dispatch_execbuffer,
1315 .cleanup = render_ring_cleanup,
1316 .sync_to = render_ring_sync_to,
1317 .semaphore_register = {MI_SEMAPHORE_SYNC_INVALID,
1318 MI_SEMAPHORE_SYNC_RV,
1319 MI_SEMAPHORE_SYNC_RB},
1320 .signal_mbox = {GEN6_VRSYNC, GEN6_BRSYNC},
1321 };
1322
1323 /* ring buffer for bit-stream decoder */
1324
1325 static const struct intel_ring_buffer bsd_ring = {
1326 .name = "bsd ring",
1327 .id = VCS,
1328 .mmio_base = BSD_RING_BASE,
1329 .size = 32 * PAGE_SIZE,
1330 .init = init_ring_common,
1331 .write_tail = ring_write_tail,
1332 .flush = bsd_ring_flush,
1333 .add_request = ring_add_request,
1334 .get_seqno = ring_get_seqno,
1335 .irq_get = bsd_ring_get_irq,
1336 .irq_put = bsd_ring_put_irq,
1337 .dispatch_execbuffer = ring_dispatch_execbuffer,
1338 };
1339
1340
gen6_bsd_ring_write_tail(struct intel_ring_buffer * ring,u32 value)1341 static void gen6_bsd_ring_write_tail(struct intel_ring_buffer *ring,
1342 u32 value)
1343 {
1344 drm_i915_private_t *dev_priv = ring->dev->dev_private;
1345
1346 /* Every tail move must follow the sequence below */
1347 I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
1348 GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_MODIFY_MASK |
1349 GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_DISABLE);
1350 I915_WRITE(GEN6_BSD_RNCID, 0x0);
1351
1352 if (wait_for((I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) &
1353 GEN6_BSD_SLEEP_PSMI_CONTROL_IDLE_INDICATOR) == 0,
1354 50))
1355 DRM_ERROR("timed out waiting for IDLE Indicator\n");
1356
1357 I915_WRITE_TAIL(ring, value);
1358 I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
1359 GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_MODIFY_MASK |
1360 GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_ENABLE);
1361 }
1362
gen6_ring_flush(struct intel_ring_buffer * ring,u32 invalidate,u32 flush)1363 static int gen6_ring_flush(struct intel_ring_buffer *ring,
1364 u32 invalidate, u32 flush)
1365 {
1366 uint32_t cmd;
1367 int ret;
1368
1369 ret = intel_ring_begin(ring, 4);
1370 if (ret)
1371 return ret;
1372
1373 cmd = MI_FLUSH_DW;
1374 if (invalidate & I915_GEM_GPU_DOMAINS)
1375 cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
1376 intel_ring_emit(ring, cmd);
1377 intel_ring_emit(ring, 0);
1378 intel_ring_emit(ring, 0);
1379 intel_ring_emit(ring, MI_NOOP);
1380 intel_ring_advance(ring);
1381 return 0;
1382 }
1383
1384 static int
gen6_ring_dispatch_execbuffer(struct intel_ring_buffer * ring,u32 offset,u32 len)1385 gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
1386 u32 offset, u32 len)
1387 {
1388 int ret;
1389
1390 ret = intel_ring_begin(ring, 2);
1391 if (ret)
1392 return ret;
1393
1394 intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_NON_SECURE_I965);
1395 /* bit0-7 is the length on GEN6+ */
1396 intel_ring_emit(ring, offset);
1397 intel_ring_advance(ring);
1398
1399 return 0;
1400 }
1401
1402 static bool
gen6_render_ring_get_irq(struct intel_ring_buffer * ring)1403 gen6_render_ring_get_irq(struct intel_ring_buffer *ring)
1404 {
1405 return gen6_ring_get_irq(ring,
1406 GT_USER_INTERRUPT,
1407 GEN6_RENDER_USER_INTERRUPT);
1408 }
1409
1410 static void
gen6_render_ring_put_irq(struct intel_ring_buffer * ring)1411 gen6_render_ring_put_irq(struct intel_ring_buffer *ring)
1412 {
1413 return gen6_ring_put_irq(ring,
1414 GT_USER_INTERRUPT,
1415 GEN6_RENDER_USER_INTERRUPT);
1416 }
1417
1418 static bool
gen6_bsd_ring_get_irq(struct intel_ring_buffer * ring)1419 gen6_bsd_ring_get_irq(struct intel_ring_buffer *ring)
1420 {
1421 return gen6_ring_get_irq(ring,
1422 GT_GEN6_BSD_USER_INTERRUPT,
1423 GEN6_BSD_USER_INTERRUPT);
1424 }
1425
1426 static void
gen6_bsd_ring_put_irq(struct intel_ring_buffer * ring)1427 gen6_bsd_ring_put_irq(struct intel_ring_buffer *ring)
1428 {
1429 return gen6_ring_put_irq(ring,
1430 GT_GEN6_BSD_USER_INTERRUPT,
1431 GEN6_BSD_USER_INTERRUPT);
1432 }
1433
1434 /* ring buffer for Video Codec for Gen6+ */
1435 static const struct intel_ring_buffer gen6_bsd_ring = {
1436 .name = "gen6 bsd ring",
1437 .id = VCS,
1438 .mmio_base = GEN6_BSD_RING_BASE,
1439 .size = 32 * PAGE_SIZE,
1440 .init = init_ring_common,
1441 .write_tail = gen6_bsd_ring_write_tail,
1442 .flush = gen6_ring_flush,
1443 .add_request = gen6_add_request,
1444 .get_seqno = gen6_ring_get_seqno,
1445 .irq_get = gen6_bsd_ring_get_irq,
1446 .irq_put = gen6_bsd_ring_put_irq,
1447 .dispatch_execbuffer = gen6_ring_dispatch_execbuffer,
1448 .sync_to = gen6_bsd_ring_sync_to,
1449 .semaphore_register = {MI_SEMAPHORE_SYNC_VR,
1450 MI_SEMAPHORE_SYNC_INVALID,
1451 MI_SEMAPHORE_SYNC_VB},
1452 .signal_mbox = {GEN6_RVSYNC, GEN6_BVSYNC},
1453 };
1454
1455 /* Blitter support (SandyBridge+) */
1456
1457 static bool
blt_ring_get_irq(struct intel_ring_buffer * ring)1458 blt_ring_get_irq(struct intel_ring_buffer *ring)
1459 {
1460 return gen6_ring_get_irq(ring,
1461 GT_BLT_USER_INTERRUPT,
1462 GEN6_BLITTER_USER_INTERRUPT);
1463 }
1464
1465 static void
blt_ring_put_irq(struct intel_ring_buffer * ring)1466 blt_ring_put_irq(struct intel_ring_buffer *ring)
1467 {
1468 gen6_ring_put_irq(ring,
1469 GT_BLT_USER_INTERRUPT,
1470 GEN6_BLITTER_USER_INTERRUPT);
1471 }
1472
blt_ring_flush(struct intel_ring_buffer * ring,u32 invalidate,u32 flush)1473 static int blt_ring_flush(struct intel_ring_buffer *ring,
1474 u32 invalidate, u32 flush)
1475 {
1476 uint32_t cmd;
1477 int ret;
1478
1479 ret = intel_ring_begin(ring, 4);
1480 if (ret)
1481 return ret;
1482
1483 cmd = MI_FLUSH_DW;
1484 if (invalidate & I915_GEM_DOMAIN_RENDER)
1485 cmd |= MI_INVALIDATE_TLB;
1486 intel_ring_emit(ring, cmd);
1487 intel_ring_emit(ring, 0);
1488 intel_ring_emit(ring, 0);
1489 intel_ring_emit(ring, MI_NOOP);
1490 intel_ring_advance(ring);
1491 return 0;
1492 }
1493
1494 static const struct intel_ring_buffer gen6_blt_ring = {
1495 .name = "blt ring",
1496 .id = BCS,
1497 .mmio_base = BLT_RING_BASE,
1498 .size = 32 * PAGE_SIZE,
1499 .init = init_ring_common,
1500 .write_tail = ring_write_tail,
1501 .flush = blt_ring_flush,
1502 .add_request = gen6_add_request,
1503 .get_seqno = gen6_ring_get_seqno,
1504 .irq_get = blt_ring_get_irq,
1505 .irq_put = blt_ring_put_irq,
1506 .dispatch_execbuffer = gen6_ring_dispatch_execbuffer,
1507 .sync_to = gen6_blt_ring_sync_to,
1508 .semaphore_register = {MI_SEMAPHORE_SYNC_BR,
1509 MI_SEMAPHORE_SYNC_BV,
1510 MI_SEMAPHORE_SYNC_INVALID},
1511 .signal_mbox = {GEN6_RBSYNC, GEN6_VBSYNC},
1512 };
1513
intel_init_render_ring_buffer(struct drm_device * dev)1514 int intel_init_render_ring_buffer(struct drm_device *dev)
1515 {
1516 drm_i915_private_t *dev_priv = dev->dev_private;
1517 struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
1518
1519 *ring = render_ring;
1520 if (INTEL_INFO(dev)->gen >= 6) {
1521 ring->add_request = gen6_add_request;
1522 ring->flush = gen6_render_ring_flush;
1523 ring->irq_get = gen6_render_ring_get_irq;
1524 ring->irq_put = gen6_render_ring_put_irq;
1525 ring->get_seqno = gen6_ring_get_seqno;
1526 } else if (IS_GEN5(dev)) {
1527 ring->add_request = pc_render_add_request;
1528 ring->get_seqno = pc_render_get_seqno;
1529 }
1530
1531 if (!I915_NEED_GFX_HWS(dev)) {
1532 ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
1533 memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1534 }
1535
1536 return intel_init_ring_buffer(dev, ring);
1537 }
1538
intel_render_ring_init_dri(struct drm_device * dev,u64 start,u32 size)1539 int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size)
1540 {
1541 drm_i915_private_t *dev_priv = dev->dev_private;
1542 struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
1543
1544 *ring = render_ring;
1545 if (INTEL_INFO(dev)->gen >= 6) {
1546 ring->add_request = gen6_add_request;
1547 ring->irq_get = gen6_render_ring_get_irq;
1548 ring->irq_put = gen6_render_ring_put_irq;
1549 } else if (IS_GEN5(dev)) {
1550 ring->add_request = pc_render_add_request;
1551 ring->get_seqno = pc_render_get_seqno;
1552 }
1553
1554 if (!I915_NEED_GFX_HWS(dev))
1555 ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
1556
1557 ring->dev = dev;
1558 INIT_LIST_HEAD(&ring->active_list);
1559 INIT_LIST_HEAD(&ring->request_list);
1560 INIT_LIST_HEAD(&ring->gpu_write_list);
1561
1562 ring->size = size;
1563 ring->effective_size = ring->size;
1564 if (IS_I830(ring->dev))
1565 ring->effective_size -= 128;
1566
1567 ring->map.offset = start;
1568 ring->map.size = size;
1569 ring->map.type = 0;
1570 ring->map.flags = 0;
1571 ring->map.mtrr = 0;
1572
1573 drm_core_ioremap_wc(&ring->map, dev);
1574 if (ring->map.handle == NULL) {
1575 DRM_ERROR("can not ioremap virtual address for"
1576 " ring buffer\n");
1577 return -ENOMEM;
1578 }
1579
1580 ring->virtual_start = (void __force __iomem *)ring->map.handle;
1581 return 0;
1582 }
1583
intel_init_bsd_ring_buffer(struct drm_device * dev)1584 int intel_init_bsd_ring_buffer(struct drm_device *dev)
1585 {
1586 drm_i915_private_t *dev_priv = dev->dev_private;
1587 struct intel_ring_buffer *ring = &dev_priv->ring[VCS];
1588
1589 if (IS_GEN6(dev) || IS_GEN7(dev))
1590 *ring = gen6_bsd_ring;
1591 else
1592 *ring = bsd_ring;
1593
1594 return intel_init_ring_buffer(dev, ring);
1595 }
1596
intel_init_blt_ring_buffer(struct drm_device * dev)1597 int intel_init_blt_ring_buffer(struct drm_device *dev)
1598 {
1599 drm_i915_private_t *dev_priv = dev->dev_private;
1600 struct intel_ring_buffer *ring = &dev_priv->ring[BCS];
1601
1602 *ring = gen6_blt_ring;
1603
1604 return intel_init_ring_buffer(dev, ring);
1605 }
1606