1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2018 Intel Corporation
4 */
5
6 #include <linux/prime_numbers.h>
7
8 #include "gem/i915_gem_internal.h"
9
10 #include "i915_selftest.h"
11 #include "intel_engine_heartbeat.h"
12 #include "intel_engine_pm.h"
13 #include "intel_reset.h"
14 #include "intel_ring.h"
15 #include "selftest_engine_heartbeat.h"
16 #include "selftests/i915_random.h"
17 #include "selftests/igt_flush_test.h"
18 #include "selftests/igt_live_test.h"
19 #include "selftests/igt_spinner.h"
20 #include "selftests/lib_sw_fence.h"
21 #include "shmem_utils.h"
22
23 #include "gem/selftests/igt_gem_utils.h"
24 #include "gem/selftests/mock_context.h"
25
26 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
27 #define NUM_GPR 16
28 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
29
30 #define LRI_HEADER MI_INSTR(0x22, 0)
31 #define LRI_LENGTH_MASK GENMASK(7, 0)
32
create_scratch(struct intel_gt * gt)33 static struct i915_vma *create_scratch(struct intel_gt *gt)
34 {
35 return __vm_create_scratch_for_read_pinned(>->ggtt->vm, PAGE_SIZE);
36 }
37
is_active(struct i915_request * rq)38 static bool is_active(struct i915_request *rq)
39 {
40 if (i915_request_is_active(rq))
41 return true;
42
43 if (i915_request_on_hold(rq))
44 return true;
45
46 if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq))
47 return true;
48
49 return false;
50 }
51
wait_for_submit(struct intel_engine_cs * engine,struct i915_request * rq,unsigned long timeout)52 static int wait_for_submit(struct intel_engine_cs *engine,
53 struct i915_request *rq,
54 unsigned long timeout)
55 {
56 /* Ignore our own attempts to suppress excess tasklets */
57 tasklet_hi_schedule(&engine->sched_engine->tasklet);
58
59 timeout += jiffies;
60 do {
61 bool done = time_after(jiffies, timeout);
62
63 if (i915_request_completed(rq)) /* that was quick! */
64 return 0;
65
66 /* Wait until the HW has acknowleged the submission (or err) */
67 intel_engine_flush_submission(engine);
68 if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
69 return 0;
70
71 if (done)
72 return -ETIME;
73
74 cond_resched();
75 } while (1);
76 }
77
emit_semaphore_signal(struct intel_context * ce,void * slot)78 static int emit_semaphore_signal(struct intel_context *ce, void *slot)
79 {
80 const u32 offset =
81 i915_ggtt_offset(ce->engine->status_page.vma) +
82 offset_in_page(slot);
83 struct i915_request *rq;
84 u32 *cs;
85
86 rq = intel_context_create_request(ce);
87 if (IS_ERR(rq))
88 return PTR_ERR(rq);
89
90 cs = intel_ring_begin(rq, 4);
91 if (IS_ERR(cs)) {
92 i915_request_add(rq);
93 return PTR_ERR(cs);
94 }
95
96 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
97 *cs++ = offset;
98 *cs++ = 0;
99 *cs++ = 1;
100
101 intel_ring_advance(rq, cs);
102
103 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
104 i915_request_add(rq);
105 return 0;
106 }
107
context_flush(struct intel_context * ce,long timeout)108 static int context_flush(struct intel_context *ce, long timeout)
109 {
110 struct i915_request *rq;
111 struct dma_fence *fence;
112 int err = 0;
113
114 rq = intel_engine_create_kernel_request(ce->engine);
115 if (IS_ERR(rq))
116 return PTR_ERR(rq);
117
118 fence = i915_active_fence_get(&ce->timeline->last_request);
119 if (fence) {
120 i915_request_await_dma_fence(rq, fence);
121 dma_fence_put(fence);
122 }
123
124 rq = i915_request_get(rq);
125 i915_request_add(rq);
126 if (i915_request_wait(rq, 0, timeout) < 0)
127 err = -ETIME;
128 i915_request_put(rq);
129
130 rmb(); /* We know the request is written, make sure all state is too! */
131 return err;
132 }
133
get_lri_mask(struct intel_engine_cs * engine,u32 lri)134 static int get_lri_mask(struct intel_engine_cs *engine, u32 lri)
135 {
136 if ((lri & MI_LRI_LRM_CS_MMIO) == 0)
137 return ~0u;
138
139 if (GRAPHICS_VER(engine->i915) < 12)
140 return 0xfff;
141
142 switch (engine->class) {
143 default:
144 case RENDER_CLASS:
145 case COMPUTE_CLASS:
146 return 0x07ff;
147 case COPY_ENGINE_CLASS:
148 return 0x0fff;
149 case VIDEO_DECODE_CLASS:
150 case VIDEO_ENHANCEMENT_CLASS:
151 return 0x3fff;
152 }
153 }
154
live_lrc_layout(void * arg)155 static int live_lrc_layout(void *arg)
156 {
157 struct intel_gt *gt = arg;
158 struct intel_engine_cs *engine;
159 enum intel_engine_id id;
160 u32 *lrc;
161 int err;
162
163 /*
164 * Check the registers offsets we use to create the initial reg state
165 * match the layout saved by HW.
166 */
167
168 lrc = (u32 *)__get_free_page(GFP_KERNEL); /* requires page alignment */
169 if (!lrc)
170 return -ENOMEM;
171 GEM_BUG_ON(offset_in_page(lrc));
172
173 err = 0;
174 for_each_engine(engine, gt, id) {
175 u32 *hw;
176 int dw;
177
178 if (!engine->default_state)
179 continue;
180
181 hw = shmem_pin_map(engine->default_state);
182 if (!hw) {
183 err = -ENOMEM;
184 break;
185 }
186 hw += LRC_STATE_OFFSET / sizeof(*hw);
187
188 __lrc_init_regs(memset(lrc, POISON_INUSE, PAGE_SIZE),
189 engine->kernel_context, engine, true);
190
191 dw = 0;
192 do {
193 u32 lri = READ_ONCE(hw[dw]);
194 u32 lri_mask;
195
196 if (lri == 0) {
197 dw++;
198 continue;
199 }
200
201 if (lrc[dw] == 0) {
202 pr_debug("%s: skipped instruction %x at dword %d\n",
203 engine->name, lri, dw);
204 dw++;
205 continue;
206 }
207
208 if ((lri & GENMASK(31, 23)) != LRI_HEADER) {
209 pr_err("%s: Expected LRI command at dword %d, found %08x\n",
210 engine->name, dw, lri);
211 err = -EINVAL;
212 break;
213 }
214
215 if (lrc[dw] != lri) {
216 pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
217 engine->name, dw, lri, lrc[dw]);
218 err = -EINVAL;
219 break;
220 }
221
222 /*
223 * When bit 19 of MI_LOAD_REGISTER_IMM instruction
224 * opcode is set on Gen12+ devices, HW does not
225 * care about certain register address offsets, and
226 * instead check the following for valid address
227 * ranges on specific engines:
228 * RCS && CCS: BITS(0 - 10)
229 * BCS: BITS(0 - 11)
230 * VECS && VCS: BITS(0 - 13)
231 */
232 lri_mask = get_lri_mask(engine, lri);
233
234 lri &= 0x7f;
235 lri++;
236 dw++;
237
238 while (lri) {
239 u32 offset = READ_ONCE(hw[dw]);
240
241 if ((offset ^ lrc[dw]) & lri_mask) {
242 pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
243 engine->name, dw, offset, lrc[dw]);
244 err = -EINVAL;
245 break;
246 }
247
248 /*
249 * Skip over the actual register value as we
250 * expect that to differ.
251 */
252 dw += 2;
253 lri -= 2;
254 }
255 } while (!err && (lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
256
257 if (err) {
258 pr_info("%s: HW register image:\n", engine->name);
259 igt_hexdump(hw, PAGE_SIZE);
260
261 pr_info("%s: SW register image:\n", engine->name);
262 igt_hexdump(lrc, PAGE_SIZE);
263 }
264
265 shmem_unpin_map(engine->default_state, hw);
266 if (err)
267 break;
268 }
269
270 free_page((unsigned long)lrc);
271 return err;
272 }
273
find_offset(const u32 * lri,u32 offset)274 static int find_offset(const u32 *lri, u32 offset)
275 {
276 int i;
277
278 for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
279 if (lri[i] == offset)
280 return i;
281
282 return -1;
283 }
284
live_lrc_fixed(void * arg)285 static int live_lrc_fixed(void *arg)
286 {
287 struct intel_gt *gt = arg;
288 struct intel_engine_cs *engine;
289 enum intel_engine_id id;
290 int err = 0;
291
292 /*
293 * Check the assumed register offsets match the actual locations in
294 * the context image.
295 */
296
297 for_each_engine(engine, gt, id) {
298 const struct {
299 u32 reg;
300 u32 offset;
301 const char *name;
302 } tbl[] = {
303 {
304 i915_mmio_reg_offset(RING_START(engine->mmio_base)),
305 CTX_RING_START - 1,
306 "RING_START"
307 },
308 {
309 i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
310 CTX_RING_CTL - 1,
311 "RING_CTL"
312 },
313 {
314 i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
315 CTX_RING_HEAD - 1,
316 "RING_HEAD"
317 },
318 {
319 i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
320 CTX_RING_TAIL - 1,
321 "RING_TAIL"
322 },
323 {
324 i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
325 lrc_ring_mi_mode(engine),
326 "RING_MI_MODE"
327 },
328 {
329 i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
330 CTX_BB_STATE - 1,
331 "BB_STATE"
332 },
333 {
334 i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine->mmio_base)),
335 lrc_ring_wa_bb_per_ctx(engine),
336 "RING_BB_PER_CTX_PTR"
337 },
338 {
339 i915_mmio_reg_offset(RING_INDIRECT_CTX(engine->mmio_base)),
340 lrc_ring_indirect_ptr(engine),
341 "RING_INDIRECT_CTX_PTR"
342 },
343 {
344 i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine->mmio_base)),
345 lrc_ring_indirect_offset(engine),
346 "RING_INDIRECT_CTX_OFFSET"
347 },
348 {
349 i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)),
350 CTX_TIMESTAMP - 1,
351 "RING_CTX_TIMESTAMP"
352 },
353 {
354 i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine->mmio_base, 0)),
355 lrc_ring_gpr0(engine),
356 "RING_CS_GPR0"
357 },
358 {
359 i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine->mmio_base)),
360 lrc_ring_cmd_buf_cctl(engine),
361 "RING_CMD_BUF_CCTL"
362 },
363 {
364 i915_mmio_reg_offset(RING_BB_OFFSET(engine->mmio_base)),
365 lrc_ring_bb_offset(engine),
366 "RING_BB_OFFSET"
367 },
368 { },
369 }, *t;
370 u32 *hw;
371
372 if (!engine->default_state)
373 continue;
374
375 hw = shmem_pin_map(engine->default_state);
376 if (!hw) {
377 err = -ENOMEM;
378 break;
379 }
380 hw += LRC_STATE_OFFSET / sizeof(*hw);
381
382 for (t = tbl; t->name; t++) {
383 int dw = find_offset(hw, t->reg);
384
385 if (dw != t->offset) {
386 pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
387 engine->name,
388 t->name,
389 t->reg,
390 dw,
391 t->offset);
392 err = -EINVAL;
393 }
394 }
395
396 shmem_unpin_map(engine->default_state, hw);
397 }
398
399 return err;
400 }
401
__live_lrc_state(struct intel_engine_cs * engine,struct i915_vma * scratch)402 static int __live_lrc_state(struct intel_engine_cs *engine,
403 struct i915_vma *scratch)
404 {
405 struct intel_context *ce;
406 struct i915_request *rq;
407 struct i915_gem_ww_ctx ww;
408 enum {
409 RING_START_IDX = 0,
410 RING_TAIL_IDX,
411 MAX_IDX
412 };
413 u32 expected[MAX_IDX];
414 u32 *cs;
415 int err;
416 int n;
417
418 ce = intel_context_create(engine);
419 if (IS_ERR(ce))
420 return PTR_ERR(ce);
421
422 i915_gem_ww_ctx_init(&ww, false);
423 retry:
424 err = i915_gem_object_lock(scratch->obj, &ww);
425 if (!err)
426 err = intel_context_pin_ww(ce, &ww);
427 if (err)
428 goto err_put;
429
430 rq = i915_request_create(ce);
431 if (IS_ERR(rq)) {
432 err = PTR_ERR(rq);
433 goto err_unpin;
434 }
435
436 cs = intel_ring_begin(rq, 4 * MAX_IDX);
437 if (IS_ERR(cs)) {
438 err = PTR_ERR(cs);
439 i915_request_add(rq);
440 goto err_unpin;
441 }
442
443 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
444 *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
445 *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
446 *cs++ = 0;
447
448 expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
449
450 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
451 *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
452 *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
453 *cs++ = 0;
454
455 err = i915_request_await_object(rq, scratch->obj, true);
456 if (!err)
457 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
458
459 i915_request_get(rq);
460 i915_request_add(rq);
461 if (err)
462 goto err_rq;
463
464 intel_engine_flush_submission(engine);
465 expected[RING_TAIL_IDX] = ce->ring->tail;
466
467 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
468 err = -ETIME;
469 goto err_rq;
470 }
471
472 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
473 if (IS_ERR(cs)) {
474 err = PTR_ERR(cs);
475 goto err_rq;
476 }
477
478 for (n = 0; n < MAX_IDX; n++) {
479 if (cs[n] != expected[n]) {
480 pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
481 engine->name, n, cs[n], expected[n]);
482 err = -EINVAL;
483 break;
484 }
485 }
486
487 i915_gem_object_unpin_map(scratch->obj);
488
489 err_rq:
490 i915_request_put(rq);
491 err_unpin:
492 intel_context_unpin(ce);
493 err_put:
494 if (err == -EDEADLK) {
495 err = i915_gem_ww_ctx_backoff(&ww);
496 if (!err)
497 goto retry;
498 }
499 i915_gem_ww_ctx_fini(&ww);
500 intel_context_put(ce);
501 return err;
502 }
503
live_lrc_state(void * arg)504 static int live_lrc_state(void *arg)
505 {
506 struct intel_gt *gt = arg;
507 struct intel_engine_cs *engine;
508 struct i915_vma *scratch;
509 enum intel_engine_id id;
510 int err = 0;
511
512 /*
513 * Check the live register state matches what we expect for this
514 * intel_context.
515 */
516
517 scratch = create_scratch(gt);
518 if (IS_ERR(scratch))
519 return PTR_ERR(scratch);
520
521 for_each_engine(engine, gt, id) {
522 err = __live_lrc_state(engine, scratch);
523 if (err)
524 break;
525 }
526
527 if (igt_flush_test(gt->i915))
528 err = -EIO;
529
530 i915_vma_unpin_and_release(&scratch, 0);
531 return err;
532 }
533
gpr_make_dirty(struct intel_context * ce)534 static int gpr_make_dirty(struct intel_context *ce)
535 {
536 struct i915_request *rq;
537 u32 *cs;
538 int n;
539
540 rq = intel_context_create_request(ce);
541 if (IS_ERR(rq))
542 return PTR_ERR(rq);
543
544 cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
545 if (IS_ERR(cs)) {
546 i915_request_add(rq);
547 return PTR_ERR(cs);
548 }
549
550 *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
551 for (n = 0; n < NUM_GPR_DW; n++) {
552 *cs++ = CS_GPR(ce->engine, n);
553 *cs++ = STACK_MAGIC;
554 }
555 *cs++ = MI_NOOP;
556
557 intel_ring_advance(rq, cs);
558
559 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
560 i915_request_add(rq);
561
562 return 0;
563 }
564
565 static struct i915_request *
__gpr_read(struct intel_context * ce,struct i915_vma * scratch,u32 * slot)566 __gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot)
567 {
568 const u32 offset =
569 i915_ggtt_offset(ce->engine->status_page.vma) +
570 offset_in_page(slot);
571 struct i915_request *rq;
572 u32 *cs;
573 int err;
574 int n;
575
576 rq = intel_context_create_request(ce);
577 if (IS_ERR(rq))
578 return rq;
579
580 cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW);
581 if (IS_ERR(cs)) {
582 i915_request_add(rq);
583 return ERR_CAST(cs);
584 }
585
586 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
587 *cs++ = MI_NOOP;
588
589 *cs++ = MI_SEMAPHORE_WAIT |
590 MI_SEMAPHORE_GLOBAL_GTT |
591 MI_SEMAPHORE_POLL |
592 MI_SEMAPHORE_SAD_NEQ_SDD;
593 *cs++ = 0;
594 *cs++ = offset;
595 *cs++ = 0;
596
597 for (n = 0; n < NUM_GPR_DW; n++) {
598 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
599 *cs++ = CS_GPR(ce->engine, n);
600 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
601 *cs++ = 0;
602 }
603
604 i915_vma_lock(scratch);
605 err = i915_request_await_object(rq, scratch->obj, true);
606 if (!err)
607 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
608 i915_vma_unlock(scratch);
609
610 i915_request_get(rq);
611 i915_request_add(rq);
612 if (err) {
613 i915_request_put(rq);
614 rq = ERR_PTR(err);
615 }
616
617 return rq;
618 }
619
__live_lrc_gpr(struct intel_engine_cs * engine,struct i915_vma * scratch,bool preempt)620 static int __live_lrc_gpr(struct intel_engine_cs *engine,
621 struct i915_vma *scratch,
622 bool preempt)
623 {
624 u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4);
625 struct intel_context *ce;
626 struct i915_request *rq;
627 u32 *cs;
628 int err;
629 int n;
630
631 if (GRAPHICS_VER(engine->i915) < 9 && engine->class != RENDER_CLASS)
632 return 0; /* GPR only on rcs0 for gen8 */
633
634 err = gpr_make_dirty(engine->kernel_context);
635 if (err)
636 return err;
637
638 ce = intel_context_create(engine);
639 if (IS_ERR(ce))
640 return PTR_ERR(ce);
641
642 rq = __gpr_read(ce, scratch, slot);
643 if (IS_ERR(rq)) {
644 err = PTR_ERR(rq);
645 goto err_put;
646 }
647
648 err = wait_for_submit(engine, rq, HZ / 2);
649 if (err)
650 goto err_rq;
651
652 if (preempt) {
653 err = gpr_make_dirty(engine->kernel_context);
654 if (err)
655 goto err_rq;
656
657 err = emit_semaphore_signal(engine->kernel_context, slot);
658 if (err)
659 goto err_rq;
660
661 err = wait_for_submit(engine, rq, HZ / 2);
662 if (err)
663 goto err_rq;
664 } else {
665 slot[0] = 1;
666 wmb();
667 }
668
669 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
670 err = -ETIME;
671 goto err_rq;
672 }
673
674 cs = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB);
675 if (IS_ERR(cs)) {
676 err = PTR_ERR(cs);
677 goto err_rq;
678 }
679
680 for (n = 0; n < NUM_GPR_DW; n++) {
681 if (cs[n]) {
682 pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
683 engine->name,
684 n / 2, n & 1 ? "udw" : "ldw",
685 cs[n]);
686 err = -EINVAL;
687 break;
688 }
689 }
690
691 i915_gem_object_unpin_map(scratch->obj);
692
693 err_rq:
694 memset32(&slot[0], -1, 4);
695 wmb();
696 i915_request_put(rq);
697 err_put:
698 intel_context_put(ce);
699 return err;
700 }
701
live_lrc_gpr(void * arg)702 static int live_lrc_gpr(void *arg)
703 {
704 struct intel_gt *gt = arg;
705 struct intel_engine_cs *engine;
706 struct i915_vma *scratch;
707 enum intel_engine_id id;
708 int err = 0;
709
710 /*
711 * Check that GPR registers are cleared in new contexts as we need
712 * to avoid leaking any information from previous contexts.
713 */
714
715 scratch = create_scratch(gt);
716 if (IS_ERR(scratch))
717 return PTR_ERR(scratch);
718
719 for_each_engine(engine, gt, id) {
720 st_engine_heartbeat_disable(engine);
721
722 err = __live_lrc_gpr(engine, scratch, false);
723 if (err)
724 goto err;
725
726 err = __live_lrc_gpr(engine, scratch, true);
727 if (err)
728 goto err;
729
730 err:
731 st_engine_heartbeat_enable(engine);
732 if (igt_flush_test(gt->i915))
733 err = -EIO;
734 if (err)
735 break;
736 }
737
738 i915_vma_unpin_and_release(&scratch, 0);
739 return err;
740 }
741
742 static struct i915_request *
create_timestamp(struct intel_context * ce,void * slot,int idx)743 create_timestamp(struct intel_context *ce, void *slot, int idx)
744 {
745 const u32 offset =
746 i915_ggtt_offset(ce->engine->status_page.vma) +
747 offset_in_page(slot);
748 struct i915_request *rq;
749 u32 *cs;
750 int err;
751
752 rq = intel_context_create_request(ce);
753 if (IS_ERR(rq))
754 return rq;
755
756 cs = intel_ring_begin(rq, 10);
757 if (IS_ERR(cs)) {
758 err = PTR_ERR(cs);
759 goto err;
760 }
761
762 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
763 *cs++ = MI_NOOP;
764
765 *cs++ = MI_SEMAPHORE_WAIT |
766 MI_SEMAPHORE_GLOBAL_GTT |
767 MI_SEMAPHORE_POLL |
768 MI_SEMAPHORE_SAD_NEQ_SDD;
769 *cs++ = 0;
770 *cs++ = offset;
771 *cs++ = 0;
772
773 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
774 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base));
775 *cs++ = offset + idx * sizeof(u32);
776 *cs++ = 0;
777
778 intel_ring_advance(rq, cs);
779
780 err = 0;
781 err:
782 i915_request_get(rq);
783 i915_request_add(rq);
784 if (err) {
785 i915_request_put(rq);
786 return ERR_PTR(err);
787 }
788
789 return rq;
790 }
791
792 struct lrc_timestamp {
793 struct intel_engine_cs *engine;
794 struct intel_context *ce[2];
795 u32 poison;
796 };
797
timestamp_advanced(u32 start,u32 end)798 static bool timestamp_advanced(u32 start, u32 end)
799 {
800 return (s32)(end - start) > 0;
801 }
802
__lrc_timestamp(const struct lrc_timestamp * arg,bool preempt)803 static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt)
804 {
805 u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4);
806 struct i915_request *rq;
807 u32 timestamp;
808 int err = 0;
809
810 arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison;
811 rq = create_timestamp(arg->ce[0], slot, 1);
812 if (IS_ERR(rq))
813 return PTR_ERR(rq);
814
815 err = wait_for_submit(rq->engine, rq, HZ / 2);
816 if (err)
817 goto err;
818
819 if (preempt) {
820 arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef;
821 err = emit_semaphore_signal(arg->ce[1], slot);
822 if (err)
823 goto err;
824 } else {
825 slot[0] = 1;
826 wmb();
827 }
828
829 /* And wait for switch to kernel (to save our context to memory) */
830 err = context_flush(arg->ce[0], HZ / 2);
831 if (err)
832 goto err;
833
834 if (!timestamp_advanced(arg->poison, slot[1])) {
835 pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n",
836 arg->engine->name, preempt ? "preempt" : "simple",
837 arg->poison, slot[1]);
838 err = -EINVAL;
839 }
840
841 timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]);
842 if (!timestamp_advanced(slot[1], timestamp)) {
843 pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n",
844 arg->engine->name, preempt ? "preempt" : "simple",
845 slot[1], timestamp);
846 err = -EINVAL;
847 }
848
849 err:
850 memset32(slot, -1, 4);
851 i915_request_put(rq);
852 return err;
853 }
854
live_lrc_timestamp(void * arg)855 static int live_lrc_timestamp(void *arg)
856 {
857 struct lrc_timestamp data = {};
858 struct intel_gt *gt = arg;
859 enum intel_engine_id id;
860 const u32 poison[] = {
861 0,
862 S32_MAX,
863 (u32)S32_MAX + 1,
864 U32_MAX,
865 };
866
867 /*
868 * We want to verify that the timestamp is saved and restore across
869 * context switches and is monotonic.
870 *
871 * So we do this with a little bit of LRC poisoning to check various
872 * boundary conditions, and see what happens if we preempt the context
873 * with a second request (carrying more poison into the timestamp).
874 */
875
876 for_each_engine(data.engine, gt, id) {
877 int i, err = 0;
878
879 st_engine_heartbeat_disable(data.engine);
880
881 for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
882 struct intel_context *tmp;
883
884 tmp = intel_context_create(data.engine);
885 if (IS_ERR(tmp)) {
886 err = PTR_ERR(tmp);
887 goto err;
888 }
889
890 err = intel_context_pin(tmp);
891 if (err) {
892 intel_context_put(tmp);
893 goto err;
894 }
895
896 data.ce[i] = tmp;
897 }
898
899 for (i = 0; i < ARRAY_SIZE(poison); i++) {
900 data.poison = poison[i];
901
902 err = __lrc_timestamp(&data, false);
903 if (err)
904 break;
905
906 err = __lrc_timestamp(&data, true);
907 if (err)
908 break;
909 }
910
911 err:
912 st_engine_heartbeat_enable(data.engine);
913 for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
914 if (!data.ce[i])
915 break;
916
917 intel_context_unpin(data.ce[i]);
918 intel_context_put(data.ce[i]);
919 }
920
921 if (igt_flush_test(gt->i915))
922 err = -EIO;
923 if (err)
924 return err;
925 }
926
927 return 0;
928 }
929
930 static struct i915_vma *
create_user_vma(struct i915_address_space * vm,unsigned long size)931 create_user_vma(struct i915_address_space *vm, unsigned long size)
932 {
933 struct drm_i915_gem_object *obj;
934 struct i915_vma *vma;
935 int err;
936
937 obj = i915_gem_object_create_internal(vm->i915, size);
938 if (IS_ERR(obj))
939 return ERR_CAST(obj);
940
941 vma = i915_vma_instance(obj, vm, NULL);
942 if (IS_ERR(vma)) {
943 i915_gem_object_put(obj);
944 return vma;
945 }
946
947 err = i915_vma_pin(vma, 0, 0, PIN_USER);
948 if (err) {
949 i915_gem_object_put(obj);
950 return ERR_PTR(err);
951 }
952
953 return vma;
954 }
955
safe_poison(u32 offset,u32 poison)956 static u32 safe_poison(u32 offset, u32 poison)
957 {
958 /*
959 * Do not enable predication as it will nop all subsequent commands,
960 * not only disabling the tests (by preventing all the other SRM) but
961 * also preventing the arbitration events at the end of the request.
962 */
963 if (offset == i915_mmio_reg_offset(RING_PREDICATE_RESULT(0)))
964 poison &= ~REG_BIT(0);
965
966 return poison;
967 }
968
969 static struct i915_vma *
store_context(struct intel_context * ce,struct i915_vma * scratch)970 store_context(struct intel_context *ce, struct i915_vma *scratch)
971 {
972 struct i915_vma *batch;
973 u32 dw, x, *cs, *hw;
974 u32 *defaults;
975
976 batch = create_user_vma(ce->vm, SZ_64K);
977 if (IS_ERR(batch))
978 return batch;
979
980 cs = i915_gem_object_pin_map_unlocked(batch->obj, I915_MAP_WC);
981 if (IS_ERR(cs)) {
982 i915_vma_put(batch);
983 return ERR_CAST(cs);
984 }
985
986 defaults = shmem_pin_map(ce->engine->default_state);
987 if (!defaults) {
988 i915_gem_object_unpin_map(batch->obj);
989 i915_vma_put(batch);
990 return ERR_PTR(-ENOMEM);
991 }
992
993 x = 0;
994 dw = 0;
995 hw = defaults;
996 hw += LRC_STATE_OFFSET / sizeof(*hw);
997 do {
998 u32 len = hw[dw] & LRI_LENGTH_MASK;
999
1000 /*
1001 * Keep it simple, skip parsing complex commands
1002 *
1003 * At present, there are no more MI_LOAD_REGISTER_IMM
1004 * commands after the first 3D state command. Rather
1005 * than include a table (see i915_cmd_parser.c) of all
1006 * the possible commands and their instruction lengths
1007 * (or mask for variable length instructions), assume
1008 * we have gathered the complete list of registers and
1009 * bail out.
1010 */
1011 if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT)
1012 break;
1013
1014 if (hw[dw] == 0) {
1015 dw++;
1016 continue;
1017 }
1018
1019 if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) {
1020 /* Assume all other MI commands match LRI length mask */
1021 dw += len + 2;
1022 continue;
1023 }
1024
1025 if (!len) {
1026 pr_err("%s: invalid LRI found in context image\n",
1027 ce->engine->name);
1028 igt_hexdump(defaults, PAGE_SIZE);
1029 break;
1030 }
1031
1032 dw++;
1033 len = (len + 1) / 2;
1034 while (len--) {
1035 *cs++ = MI_STORE_REGISTER_MEM_GEN8;
1036 *cs++ = hw[dw];
1037 *cs++ = lower_32_bits(scratch->node.start + x);
1038 *cs++ = upper_32_bits(scratch->node.start + x);
1039
1040 dw += 2;
1041 x += 4;
1042 }
1043 } while (dw < PAGE_SIZE / sizeof(u32) &&
1044 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
1045
1046 *cs++ = MI_BATCH_BUFFER_END;
1047
1048 shmem_unpin_map(ce->engine->default_state, defaults);
1049
1050 i915_gem_object_flush_map(batch->obj);
1051 i915_gem_object_unpin_map(batch->obj);
1052
1053 return batch;
1054 }
1055
move_to_active(struct i915_request * rq,struct i915_vma * vma,unsigned int flags)1056 static int move_to_active(struct i915_request *rq,
1057 struct i915_vma *vma,
1058 unsigned int flags)
1059 {
1060 int err;
1061
1062 i915_vma_lock(vma);
1063 err = i915_request_await_object(rq, vma->obj, flags);
1064 if (!err)
1065 err = i915_vma_move_to_active(vma, rq, flags);
1066 i915_vma_unlock(vma);
1067
1068 return err;
1069 }
1070
1071 static struct i915_request *
record_registers(struct intel_context * ce,struct i915_vma * before,struct i915_vma * after,u32 * sema)1072 record_registers(struct intel_context *ce,
1073 struct i915_vma *before,
1074 struct i915_vma *after,
1075 u32 *sema)
1076 {
1077 struct i915_vma *b_before, *b_after;
1078 struct i915_request *rq;
1079 u32 *cs;
1080 int err;
1081
1082 b_before = store_context(ce, before);
1083 if (IS_ERR(b_before))
1084 return ERR_CAST(b_before);
1085
1086 b_after = store_context(ce, after);
1087 if (IS_ERR(b_after)) {
1088 rq = ERR_CAST(b_after);
1089 goto err_before;
1090 }
1091
1092 rq = intel_context_create_request(ce);
1093 if (IS_ERR(rq))
1094 goto err_after;
1095
1096 err = move_to_active(rq, before, EXEC_OBJECT_WRITE);
1097 if (err)
1098 goto err_rq;
1099
1100 err = move_to_active(rq, b_before, 0);
1101 if (err)
1102 goto err_rq;
1103
1104 err = move_to_active(rq, after, EXEC_OBJECT_WRITE);
1105 if (err)
1106 goto err_rq;
1107
1108 err = move_to_active(rq, b_after, 0);
1109 if (err)
1110 goto err_rq;
1111
1112 cs = intel_ring_begin(rq, 14);
1113 if (IS_ERR(cs)) {
1114 err = PTR_ERR(cs);
1115 goto err_rq;
1116 }
1117
1118 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1119 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
1120 *cs++ = lower_32_bits(b_before->node.start);
1121 *cs++ = upper_32_bits(b_before->node.start);
1122
1123 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1124 *cs++ = MI_SEMAPHORE_WAIT |
1125 MI_SEMAPHORE_GLOBAL_GTT |
1126 MI_SEMAPHORE_POLL |
1127 MI_SEMAPHORE_SAD_NEQ_SDD;
1128 *cs++ = 0;
1129 *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
1130 offset_in_page(sema);
1131 *cs++ = 0;
1132 *cs++ = MI_NOOP;
1133
1134 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1135 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
1136 *cs++ = lower_32_bits(b_after->node.start);
1137 *cs++ = upper_32_bits(b_after->node.start);
1138
1139 intel_ring_advance(rq, cs);
1140
1141 WRITE_ONCE(*sema, 0);
1142 i915_request_get(rq);
1143 i915_request_add(rq);
1144 err_after:
1145 i915_vma_put(b_after);
1146 err_before:
1147 i915_vma_put(b_before);
1148 return rq;
1149
1150 err_rq:
1151 i915_request_add(rq);
1152 rq = ERR_PTR(err);
1153 goto err_after;
1154 }
1155
load_context(struct intel_context * ce,u32 poison)1156 static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
1157 {
1158 struct i915_vma *batch;
1159 u32 dw, *cs, *hw;
1160 u32 *defaults;
1161
1162 batch = create_user_vma(ce->vm, SZ_64K);
1163 if (IS_ERR(batch))
1164 return batch;
1165
1166 cs = i915_gem_object_pin_map_unlocked(batch->obj, I915_MAP_WC);
1167 if (IS_ERR(cs)) {
1168 i915_vma_put(batch);
1169 return ERR_CAST(cs);
1170 }
1171
1172 defaults = shmem_pin_map(ce->engine->default_state);
1173 if (!defaults) {
1174 i915_gem_object_unpin_map(batch->obj);
1175 i915_vma_put(batch);
1176 return ERR_PTR(-ENOMEM);
1177 }
1178
1179 dw = 0;
1180 hw = defaults;
1181 hw += LRC_STATE_OFFSET / sizeof(*hw);
1182 do {
1183 u32 len = hw[dw] & LRI_LENGTH_MASK;
1184
1185 /* For simplicity, break parsing at the first complex command */
1186 if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT)
1187 break;
1188
1189 if (hw[dw] == 0) {
1190 dw++;
1191 continue;
1192 }
1193
1194 if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) {
1195 dw += len + 2;
1196 continue;
1197 }
1198
1199 if (!len) {
1200 pr_err("%s: invalid LRI found in context image\n",
1201 ce->engine->name);
1202 igt_hexdump(defaults, PAGE_SIZE);
1203 break;
1204 }
1205
1206 dw++;
1207 len = (len + 1) / 2;
1208 *cs++ = MI_LOAD_REGISTER_IMM(len);
1209 while (len--) {
1210 *cs++ = hw[dw];
1211 *cs++ = safe_poison(hw[dw] & get_lri_mask(ce->engine,
1212 MI_LRI_LRM_CS_MMIO),
1213 poison);
1214 dw += 2;
1215 }
1216 } while (dw < PAGE_SIZE / sizeof(u32) &&
1217 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
1218
1219 *cs++ = MI_BATCH_BUFFER_END;
1220
1221 shmem_unpin_map(ce->engine->default_state, defaults);
1222
1223 i915_gem_object_flush_map(batch->obj);
1224 i915_gem_object_unpin_map(batch->obj);
1225
1226 return batch;
1227 }
1228
poison_registers(struct intel_context * ce,u32 poison,u32 * sema)1229 static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema)
1230 {
1231 struct i915_request *rq;
1232 struct i915_vma *batch;
1233 u32 *cs;
1234 int err;
1235
1236 batch = load_context(ce, poison);
1237 if (IS_ERR(batch))
1238 return PTR_ERR(batch);
1239
1240 rq = intel_context_create_request(ce);
1241 if (IS_ERR(rq)) {
1242 err = PTR_ERR(rq);
1243 goto err_batch;
1244 }
1245
1246 err = move_to_active(rq, batch, 0);
1247 if (err)
1248 goto err_rq;
1249
1250 cs = intel_ring_begin(rq, 8);
1251 if (IS_ERR(cs)) {
1252 err = PTR_ERR(cs);
1253 goto err_rq;
1254 }
1255
1256 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1257 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
1258 *cs++ = lower_32_bits(batch->node.start);
1259 *cs++ = upper_32_bits(batch->node.start);
1260
1261 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1262 *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
1263 offset_in_page(sema);
1264 *cs++ = 0;
1265 *cs++ = 1;
1266
1267 intel_ring_advance(rq, cs);
1268
1269 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
1270 err_rq:
1271 i915_request_add(rq);
1272 err_batch:
1273 i915_vma_put(batch);
1274 return err;
1275 }
1276
is_moving(u32 a,u32 b)1277 static bool is_moving(u32 a, u32 b)
1278 {
1279 return a != b;
1280 }
1281
compare_isolation(struct intel_engine_cs * engine,struct i915_vma * ref[2],struct i915_vma * result[2],struct intel_context * ce,u32 poison)1282 static int compare_isolation(struct intel_engine_cs *engine,
1283 struct i915_vma *ref[2],
1284 struct i915_vma *result[2],
1285 struct intel_context *ce,
1286 u32 poison)
1287 {
1288 u32 x, dw, *hw, *lrc;
1289 u32 *A[2], *B[2];
1290 u32 *defaults;
1291 int err = 0;
1292
1293 A[0] = i915_gem_object_pin_map_unlocked(ref[0]->obj, I915_MAP_WC);
1294 if (IS_ERR(A[0]))
1295 return PTR_ERR(A[0]);
1296
1297 A[1] = i915_gem_object_pin_map_unlocked(ref[1]->obj, I915_MAP_WC);
1298 if (IS_ERR(A[1])) {
1299 err = PTR_ERR(A[1]);
1300 goto err_A0;
1301 }
1302
1303 B[0] = i915_gem_object_pin_map_unlocked(result[0]->obj, I915_MAP_WC);
1304 if (IS_ERR(B[0])) {
1305 err = PTR_ERR(B[0]);
1306 goto err_A1;
1307 }
1308
1309 B[1] = i915_gem_object_pin_map_unlocked(result[1]->obj, I915_MAP_WC);
1310 if (IS_ERR(B[1])) {
1311 err = PTR_ERR(B[1]);
1312 goto err_B0;
1313 }
1314
1315 lrc = i915_gem_object_pin_map_unlocked(ce->state->obj,
1316 i915_coherent_map_type(engine->i915,
1317 ce->state->obj,
1318 false));
1319 if (IS_ERR(lrc)) {
1320 err = PTR_ERR(lrc);
1321 goto err_B1;
1322 }
1323 lrc += LRC_STATE_OFFSET / sizeof(*hw);
1324
1325 defaults = shmem_pin_map(ce->engine->default_state);
1326 if (!defaults) {
1327 err = -ENOMEM;
1328 goto err_lrc;
1329 }
1330
1331 x = 0;
1332 dw = 0;
1333 hw = defaults;
1334 hw += LRC_STATE_OFFSET / sizeof(*hw);
1335 do {
1336 u32 len = hw[dw] & LRI_LENGTH_MASK;
1337
1338 /* For simplicity, break parsing at the first complex command */
1339 if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT)
1340 break;
1341
1342 if (hw[dw] == 0) {
1343 dw++;
1344 continue;
1345 }
1346
1347 if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) {
1348 dw += len + 2;
1349 continue;
1350 }
1351
1352 if (!len) {
1353 pr_err("%s: invalid LRI found in context image\n",
1354 engine->name);
1355 igt_hexdump(defaults, PAGE_SIZE);
1356 break;
1357 }
1358
1359 dw++;
1360 len = (len + 1) / 2;
1361 while (len--) {
1362 if (!is_moving(A[0][x], A[1][x]) &&
1363 (A[0][x] != B[0][x] || A[1][x] != B[1][x])) {
1364 switch (hw[dw] & 4095) {
1365 case 0x30: /* RING_HEAD */
1366 case 0x34: /* RING_TAIL */
1367 break;
1368
1369 default:
1370 pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n",
1371 engine->name, dw,
1372 hw[dw], hw[dw + 1],
1373 A[0][x], B[0][x], B[1][x],
1374 poison, lrc[dw + 1]);
1375 err = -EINVAL;
1376 }
1377 }
1378 dw += 2;
1379 x++;
1380 }
1381 } while (dw < PAGE_SIZE / sizeof(u32) &&
1382 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
1383
1384 shmem_unpin_map(ce->engine->default_state, defaults);
1385 err_lrc:
1386 i915_gem_object_unpin_map(ce->state->obj);
1387 err_B1:
1388 i915_gem_object_unpin_map(result[1]->obj);
1389 err_B0:
1390 i915_gem_object_unpin_map(result[0]->obj);
1391 err_A1:
1392 i915_gem_object_unpin_map(ref[1]->obj);
1393 err_A0:
1394 i915_gem_object_unpin_map(ref[0]->obj);
1395 return err;
1396 }
1397
1398 static struct i915_vma *
create_result_vma(struct i915_address_space * vm,unsigned long sz)1399 create_result_vma(struct i915_address_space *vm, unsigned long sz)
1400 {
1401 struct i915_vma *vma;
1402 void *ptr;
1403
1404 vma = create_user_vma(vm, sz);
1405 if (IS_ERR(vma))
1406 return vma;
1407
1408 /* Set the results to a known value distinct from the poison */
1409 ptr = i915_gem_object_pin_map_unlocked(vma->obj, I915_MAP_WC);
1410 if (IS_ERR(ptr)) {
1411 i915_vma_put(vma);
1412 return ERR_CAST(ptr);
1413 }
1414
1415 memset(ptr, POISON_INUSE, vma->size);
1416 i915_gem_object_flush_map(vma->obj);
1417 i915_gem_object_unpin_map(vma->obj);
1418
1419 return vma;
1420 }
1421
__lrc_isolation(struct intel_engine_cs * engine,u32 poison)1422 static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison)
1423 {
1424 u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1);
1425 struct i915_vma *ref[2], *result[2];
1426 struct intel_context *A, *B;
1427 struct i915_request *rq;
1428 int err;
1429
1430 A = intel_context_create(engine);
1431 if (IS_ERR(A))
1432 return PTR_ERR(A);
1433
1434 B = intel_context_create(engine);
1435 if (IS_ERR(B)) {
1436 err = PTR_ERR(B);
1437 goto err_A;
1438 }
1439
1440 ref[0] = create_result_vma(A->vm, SZ_64K);
1441 if (IS_ERR(ref[0])) {
1442 err = PTR_ERR(ref[0]);
1443 goto err_B;
1444 }
1445
1446 ref[1] = create_result_vma(A->vm, SZ_64K);
1447 if (IS_ERR(ref[1])) {
1448 err = PTR_ERR(ref[1]);
1449 goto err_ref0;
1450 }
1451
1452 rq = record_registers(A, ref[0], ref[1], sema);
1453 if (IS_ERR(rq)) {
1454 err = PTR_ERR(rq);
1455 goto err_ref1;
1456 }
1457
1458 WRITE_ONCE(*sema, 1);
1459 wmb();
1460
1461 if (i915_request_wait(rq, 0, HZ / 2) < 0) {
1462 i915_request_put(rq);
1463 err = -ETIME;
1464 goto err_ref1;
1465 }
1466 i915_request_put(rq);
1467
1468 result[0] = create_result_vma(A->vm, SZ_64K);
1469 if (IS_ERR(result[0])) {
1470 err = PTR_ERR(result[0]);
1471 goto err_ref1;
1472 }
1473
1474 result[1] = create_result_vma(A->vm, SZ_64K);
1475 if (IS_ERR(result[1])) {
1476 err = PTR_ERR(result[1]);
1477 goto err_result0;
1478 }
1479
1480 rq = record_registers(A, result[0], result[1], sema);
1481 if (IS_ERR(rq)) {
1482 err = PTR_ERR(rq);
1483 goto err_result1;
1484 }
1485
1486 err = poison_registers(B, poison, sema);
1487 if (err == 0 && i915_request_wait(rq, 0, HZ / 2) < 0) {
1488 pr_err("%s(%s): wait for results timed out\n",
1489 __func__, engine->name);
1490 err = -ETIME;
1491 }
1492
1493 /* Always cancel the semaphore wait, just in case the GPU gets stuck */
1494 WRITE_ONCE(*sema, -1);
1495 i915_request_put(rq);
1496 if (err)
1497 goto err_result1;
1498
1499 err = compare_isolation(engine, ref, result, A, poison);
1500
1501 err_result1:
1502 i915_vma_put(result[1]);
1503 err_result0:
1504 i915_vma_put(result[0]);
1505 err_ref1:
1506 i915_vma_put(ref[1]);
1507 err_ref0:
1508 i915_vma_put(ref[0]);
1509 err_B:
1510 intel_context_put(B);
1511 err_A:
1512 intel_context_put(A);
1513 return err;
1514 }
1515
skip_isolation(const struct intel_engine_cs * engine)1516 static bool skip_isolation(const struct intel_engine_cs *engine)
1517 {
1518 if (engine->class == COPY_ENGINE_CLASS && GRAPHICS_VER(engine->i915) == 9)
1519 return true;
1520
1521 if (engine->class == RENDER_CLASS && GRAPHICS_VER(engine->i915) == 11)
1522 return true;
1523
1524 return false;
1525 }
1526
live_lrc_isolation(void * arg)1527 static int live_lrc_isolation(void *arg)
1528 {
1529 struct intel_gt *gt = arg;
1530 struct intel_engine_cs *engine;
1531 enum intel_engine_id id;
1532 const u32 poison[] = {
1533 STACK_MAGIC,
1534 0x3a3a3a3a,
1535 0x5c5c5c5c,
1536 0xffffffff,
1537 0xffff0000,
1538 };
1539 int err = 0;
1540
1541 /*
1542 * Our goal is try and verify that per-context state cannot be
1543 * tampered with by another non-privileged client.
1544 *
1545 * We take the list of context registers from the LRI in the default
1546 * context image and attempt to modify that list from a remote context.
1547 */
1548
1549 for_each_engine(engine, gt, id) {
1550 int i;
1551
1552 /* Just don't even ask */
1553 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) &&
1554 skip_isolation(engine))
1555 continue;
1556
1557 intel_engine_pm_get(engine);
1558 for (i = 0; i < ARRAY_SIZE(poison); i++) {
1559 int result;
1560
1561 result = __lrc_isolation(engine, poison[i]);
1562 if (result && !err)
1563 err = result;
1564
1565 result = __lrc_isolation(engine, ~poison[i]);
1566 if (result && !err)
1567 err = result;
1568 }
1569 intel_engine_pm_put(engine);
1570 if (igt_flush_test(gt->i915)) {
1571 err = -EIO;
1572 break;
1573 }
1574 }
1575
1576 return err;
1577 }
1578
indirect_ctx_submit_req(struct intel_context * ce)1579 static int indirect_ctx_submit_req(struct intel_context *ce)
1580 {
1581 struct i915_request *rq;
1582 int err = 0;
1583
1584 rq = intel_context_create_request(ce);
1585 if (IS_ERR(rq))
1586 return PTR_ERR(rq);
1587
1588 i915_request_get(rq);
1589 i915_request_add(rq);
1590
1591 if (i915_request_wait(rq, 0, HZ / 5) < 0)
1592 err = -ETIME;
1593
1594 i915_request_put(rq);
1595
1596 return err;
1597 }
1598
1599 #define CTX_BB_CANARY_OFFSET (3 * 1024)
1600 #define CTX_BB_CANARY_INDEX (CTX_BB_CANARY_OFFSET / sizeof(u32))
1601
1602 static u32 *
emit_indirect_ctx_bb_canary(const struct intel_context * ce,u32 * cs)1603 emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
1604 {
1605 *cs++ = MI_STORE_REGISTER_MEM_GEN8 |
1606 MI_SRM_LRM_GLOBAL_GTT |
1607 MI_LRI_LRM_CS_MMIO;
1608 *cs++ = i915_mmio_reg_offset(RING_START(0));
1609 *cs++ = i915_ggtt_offset(ce->state) +
1610 context_wa_bb_offset(ce) +
1611 CTX_BB_CANARY_OFFSET;
1612 *cs++ = 0;
1613
1614 return cs;
1615 }
1616
1617 static void
indirect_ctx_bb_setup(struct intel_context * ce)1618 indirect_ctx_bb_setup(struct intel_context *ce)
1619 {
1620 u32 *cs = context_indirect_bb(ce);
1621
1622 cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d;
1623
1624 setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary);
1625 }
1626
check_ring_start(struct intel_context * ce)1627 static bool check_ring_start(struct intel_context *ce)
1628 {
1629 const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) -
1630 LRC_STATE_OFFSET + context_wa_bb_offset(ce);
1631
1632 if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START])
1633 return true;
1634
1635 pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n",
1636 ctx_bb[CTX_BB_CANARY_INDEX],
1637 ce->lrc_reg_state[CTX_RING_START]);
1638
1639 return false;
1640 }
1641
indirect_ctx_bb_check(struct intel_context * ce)1642 static int indirect_ctx_bb_check(struct intel_context *ce)
1643 {
1644 int err;
1645
1646 err = indirect_ctx_submit_req(ce);
1647 if (err)
1648 return err;
1649
1650 if (!check_ring_start(ce))
1651 return -EINVAL;
1652
1653 return 0;
1654 }
1655
__live_lrc_indirect_ctx_bb(struct intel_engine_cs * engine)1656 static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine)
1657 {
1658 struct intel_context *a, *b;
1659 int err;
1660
1661 a = intel_context_create(engine);
1662 if (IS_ERR(a))
1663 return PTR_ERR(a);
1664 err = intel_context_pin(a);
1665 if (err)
1666 goto put_a;
1667
1668 b = intel_context_create(engine);
1669 if (IS_ERR(b)) {
1670 err = PTR_ERR(b);
1671 goto unpin_a;
1672 }
1673 err = intel_context_pin(b);
1674 if (err)
1675 goto put_b;
1676
1677 /* We use the already reserved extra page in context state */
1678 if (!a->wa_bb_page) {
1679 GEM_BUG_ON(b->wa_bb_page);
1680 GEM_BUG_ON(GRAPHICS_VER(engine->i915) == 12);
1681 goto unpin_b;
1682 }
1683
1684 /*
1685 * In order to test that our per context bb is truly per context,
1686 * and executes at the intended spot on context restoring process,
1687 * make the batch store the ring start value to memory.
1688 * As ring start is restored apriori of starting the indirect ctx bb and
1689 * as it will be different for each context, it fits to this purpose.
1690 */
1691 indirect_ctx_bb_setup(a);
1692 indirect_ctx_bb_setup(b);
1693
1694 err = indirect_ctx_bb_check(a);
1695 if (err)
1696 goto unpin_b;
1697
1698 err = indirect_ctx_bb_check(b);
1699
1700 unpin_b:
1701 intel_context_unpin(b);
1702 put_b:
1703 intel_context_put(b);
1704 unpin_a:
1705 intel_context_unpin(a);
1706 put_a:
1707 intel_context_put(a);
1708
1709 return err;
1710 }
1711
live_lrc_indirect_ctx_bb(void * arg)1712 static int live_lrc_indirect_ctx_bb(void *arg)
1713 {
1714 struct intel_gt *gt = arg;
1715 struct intel_engine_cs *engine;
1716 enum intel_engine_id id;
1717 int err = 0;
1718
1719 for_each_engine(engine, gt, id) {
1720 intel_engine_pm_get(engine);
1721 err = __live_lrc_indirect_ctx_bb(engine);
1722 intel_engine_pm_put(engine);
1723
1724 if (igt_flush_test(gt->i915))
1725 err = -EIO;
1726
1727 if (err)
1728 break;
1729 }
1730
1731 return err;
1732 }
1733
garbage_reset(struct intel_engine_cs * engine,struct i915_request * rq)1734 static void garbage_reset(struct intel_engine_cs *engine,
1735 struct i915_request *rq)
1736 {
1737 const unsigned int bit = I915_RESET_ENGINE + engine->id;
1738 unsigned long *lock = &engine->gt->reset.flags;
1739
1740 local_bh_disable();
1741 if (!test_and_set_bit(bit, lock)) {
1742 tasklet_disable(&engine->sched_engine->tasklet);
1743
1744 if (!rq->fence.error)
1745 __intel_engine_reset_bh(engine, NULL);
1746
1747 tasklet_enable(&engine->sched_engine->tasklet);
1748 clear_and_wake_up_bit(bit, lock);
1749 }
1750 local_bh_enable();
1751 }
1752
garbage(struct intel_context * ce,struct rnd_state * prng)1753 static struct i915_request *garbage(struct intel_context *ce,
1754 struct rnd_state *prng)
1755 {
1756 struct i915_request *rq;
1757 int err;
1758
1759 err = intel_context_pin(ce);
1760 if (err)
1761 return ERR_PTR(err);
1762
1763 prandom_bytes_state(prng,
1764 ce->lrc_reg_state,
1765 ce->engine->context_size -
1766 LRC_STATE_OFFSET);
1767
1768 rq = intel_context_create_request(ce);
1769 if (IS_ERR(rq)) {
1770 err = PTR_ERR(rq);
1771 goto err_unpin;
1772 }
1773
1774 i915_request_get(rq);
1775 i915_request_add(rq);
1776 return rq;
1777
1778 err_unpin:
1779 intel_context_unpin(ce);
1780 return ERR_PTR(err);
1781 }
1782
__lrc_garbage(struct intel_engine_cs * engine,struct rnd_state * prng)1783 static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng)
1784 {
1785 struct intel_context *ce;
1786 struct i915_request *hang;
1787 int err = 0;
1788
1789 ce = intel_context_create(engine);
1790 if (IS_ERR(ce))
1791 return PTR_ERR(ce);
1792
1793 hang = garbage(ce, prng);
1794 if (IS_ERR(hang)) {
1795 err = PTR_ERR(hang);
1796 goto err_ce;
1797 }
1798
1799 if (wait_for_submit(engine, hang, HZ / 2)) {
1800 i915_request_put(hang);
1801 err = -ETIME;
1802 goto err_ce;
1803 }
1804
1805 intel_context_set_banned(ce);
1806 garbage_reset(engine, hang);
1807
1808 intel_engine_flush_submission(engine);
1809 if (!hang->fence.error) {
1810 i915_request_put(hang);
1811 pr_err("%s: corrupted context was not reset\n",
1812 engine->name);
1813 err = -EINVAL;
1814 goto err_ce;
1815 }
1816
1817 if (i915_request_wait(hang, 0, HZ / 2) < 0) {
1818 pr_err("%s: corrupted context did not recover\n",
1819 engine->name);
1820 i915_request_put(hang);
1821 err = -EIO;
1822 goto err_ce;
1823 }
1824 i915_request_put(hang);
1825
1826 err_ce:
1827 intel_context_put(ce);
1828 return err;
1829 }
1830
live_lrc_garbage(void * arg)1831 static int live_lrc_garbage(void *arg)
1832 {
1833 struct intel_gt *gt = arg;
1834 struct intel_engine_cs *engine;
1835 enum intel_engine_id id;
1836
1837 /*
1838 * Verify that we can recover if one context state is completely
1839 * corrupted.
1840 */
1841
1842 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN))
1843 return 0;
1844
1845 for_each_engine(engine, gt, id) {
1846 I915_RND_STATE(prng);
1847 int err = 0, i;
1848
1849 if (!intel_has_reset_engine(engine->gt))
1850 continue;
1851
1852 intel_engine_pm_get(engine);
1853 for (i = 0; i < 3; i++) {
1854 err = __lrc_garbage(engine, &prng);
1855 if (err)
1856 break;
1857 }
1858 intel_engine_pm_put(engine);
1859
1860 if (igt_flush_test(gt->i915))
1861 err = -EIO;
1862 if (err)
1863 return err;
1864 }
1865
1866 return 0;
1867 }
1868
__live_pphwsp_runtime(struct intel_engine_cs * engine)1869 static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
1870 {
1871 struct intel_context *ce;
1872 struct i915_request *rq;
1873 IGT_TIMEOUT(end_time);
1874 int err;
1875
1876 ce = intel_context_create(engine);
1877 if (IS_ERR(ce))
1878 return PTR_ERR(ce);
1879
1880 ce->stats.runtime.num_underflow = 0;
1881 ce->stats.runtime.max_underflow = 0;
1882
1883 do {
1884 unsigned int loop = 1024;
1885
1886 while (loop) {
1887 rq = intel_context_create_request(ce);
1888 if (IS_ERR(rq)) {
1889 err = PTR_ERR(rq);
1890 goto err_rq;
1891 }
1892
1893 if (--loop == 0)
1894 i915_request_get(rq);
1895
1896 i915_request_add(rq);
1897 }
1898
1899 if (__igt_timeout(end_time, NULL))
1900 break;
1901
1902 i915_request_put(rq);
1903 } while (1);
1904
1905 err = i915_request_wait(rq, 0, HZ / 5);
1906 if (err < 0) {
1907 pr_err("%s: request not completed!\n", engine->name);
1908 goto err_wait;
1909 }
1910
1911 igt_flush_test(engine->i915);
1912
1913 pr_info("%s: pphwsp runtime %lluns, average %lluns\n",
1914 engine->name,
1915 intel_context_get_total_runtime_ns(ce),
1916 intel_context_get_avg_runtime_ns(ce));
1917
1918 err = 0;
1919 if (ce->stats.runtime.num_underflow) {
1920 pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n",
1921 engine->name,
1922 ce->stats.runtime.num_underflow,
1923 ce->stats.runtime.max_underflow);
1924 GEM_TRACE_DUMP();
1925 err = -EOVERFLOW;
1926 }
1927
1928 err_wait:
1929 i915_request_put(rq);
1930 err_rq:
1931 intel_context_put(ce);
1932 return err;
1933 }
1934
live_pphwsp_runtime(void * arg)1935 static int live_pphwsp_runtime(void *arg)
1936 {
1937 struct intel_gt *gt = arg;
1938 struct intel_engine_cs *engine;
1939 enum intel_engine_id id;
1940 int err = 0;
1941
1942 /*
1943 * Check that cumulative context runtime as stored in the pphwsp[16]
1944 * is monotonic.
1945 */
1946
1947 for_each_engine(engine, gt, id) {
1948 err = __live_pphwsp_runtime(engine);
1949 if (err)
1950 break;
1951 }
1952
1953 if (igt_flush_test(gt->i915))
1954 err = -EIO;
1955
1956 return err;
1957 }
1958
intel_lrc_live_selftests(struct drm_i915_private * i915)1959 int intel_lrc_live_selftests(struct drm_i915_private *i915)
1960 {
1961 static const struct i915_subtest tests[] = {
1962 SUBTEST(live_lrc_layout),
1963 SUBTEST(live_lrc_fixed),
1964 SUBTEST(live_lrc_state),
1965 SUBTEST(live_lrc_gpr),
1966 SUBTEST(live_lrc_isolation),
1967 SUBTEST(live_lrc_timestamp),
1968 SUBTEST(live_lrc_garbage),
1969 SUBTEST(live_pphwsp_runtime),
1970 SUBTEST(live_lrc_indirect_ctx_bb),
1971 };
1972
1973 if (!HAS_LOGICAL_RING_CONTEXTS(i915))
1974 return 0;
1975
1976 return intel_gt_live_subtests(tests, to_gt(i915));
1977 }
1978