1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2018 Intel Corporation
4  */
5 
6 #include <linux/prime_numbers.h>
7 
8 #include "gem/i915_gem_internal.h"
9 #include "gem/i915_gem_pm.h"
10 #include "gt/intel_engine_heartbeat.h"
11 #include "gt/intel_reset.h"
12 #include "gt/selftest_engine_heartbeat.h"
13 
14 #include "i915_selftest.h"
15 #include "selftests/i915_random.h"
16 #include "selftests/igt_flush_test.h"
17 #include "selftests/igt_live_test.h"
18 #include "selftests/igt_spinner.h"
19 #include "selftests/lib_sw_fence.h"
20 
21 #include "gem/selftests/igt_gem_utils.h"
22 #include "gem/selftests/mock_context.h"
23 
24 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
25 #define NUM_GPR 16
26 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
27 
is_active(struct i915_request * rq)28 static bool is_active(struct i915_request *rq)
29 {
30 	if (i915_request_is_active(rq))
31 		return true;
32 
33 	if (i915_request_on_hold(rq))
34 		return true;
35 
36 	if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq))
37 		return true;
38 
39 	return false;
40 }
41 
wait_for_submit(struct intel_engine_cs * engine,struct i915_request * rq,unsigned long timeout)42 static int wait_for_submit(struct intel_engine_cs *engine,
43 			   struct i915_request *rq,
44 			   unsigned long timeout)
45 {
46 	/* Ignore our own attempts to suppress excess tasklets */
47 	tasklet_hi_schedule(&engine->sched_engine->tasklet);
48 
49 	timeout += jiffies;
50 	do {
51 		bool done = time_after(jiffies, timeout);
52 
53 		if (i915_request_completed(rq)) /* that was quick! */
54 			return 0;
55 
56 		/* Wait until the HW has acknowleged the submission (or err) */
57 		intel_engine_flush_submission(engine);
58 		if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
59 			return 0;
60 
61 		if (done)
62 			return -ETIME;
63 
64 		cond_resched();
65 	} while (1);
66 }
67 
wait_for_reset(struct intel_engine_cs * engine,struct i915_request * rq,unsigned long timeout)68 static int wait_for_reset(struct intel_engine_cs *engine,
69 			  struct i915_request *rq,
70 			  unsigned long timeout)
71 {
72 	timeout += jiffies;
73 
74 	do {
75 		cond_resched();
76 		intel_engine_flush_submission(engine);
77 
78 		if (READ_ONCE(engine->execlists.pending[0]))
79 			continue;
80 
81 		if (i915_request_completed(rq))
82 			break;
83 
84 		if (READ_ONCE(rq->fence.error))
85 			break;
86 	} while (time_before(jiffies, timeout));
87 
88 	flush_scheduled_work();
89 
90 	if (rq->fence.error != -EIO) {
91 		pr_err("%s: hanging request %llx:%lld not reset\n",
92 		       engine->name,
93 		       rq->fence.context,
94 		       rq->fence.seqno);
95 		return -EINVAL;
96 	}
97 
98 	/* Give the request a jiffie to complete after flushing the worker */
99 	if (i915_request_wait(rq, 0,
100 			      max(0l, (long)(timeout - jiffies)) + 1) < 0) {
101 		pr_err("%s: hanging request %llx:%lld did not complete\n",
102 		       engine->name,
103 		       rq->fence.context,
104 		       rq->fence.seqno);
105 		return -ETIME;
106 	}
107 
108 	return 0;
109 }
110 
live_sanitycheck(void * arg)111 static int live_sanitycheck(void *arg)
112 {
113 	struct intel_gt *gt = arg;
114 	struct intel_engine_cs *engine;
115 	enum intel_engine_id id;
116 	struct igt_spinner spin;
117 	int err = 0;
118 
119 	if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
120 		return 0;
121 
122 	if (igt_spinner_init(&spin, gt))
123 		return -ENOMEM;
124 
125 	for_each_engine(engine, gt, id) {
126 		struct intel_context *ce;
127 		struct i915_request *rq;
128 
129 		ce = intel_context_create(engine);
130 		if (IS_ERR(ce)) {
131 			err = PTR_ERR(ce);
132 			break;
133 		}
134 
135 		rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
136 		if (IS_ERR(rq)) {
137 			err = PTR_ERR(rq);
138 			goto out_ctx;
139 		}
140 
141 		i915_request_add(rq);
142 		if (!igt_wait_for_spinner(&spin, rq)) {
143 			GEM_TRACE("spinner failed to start\n");
144 			GEM_TRACE_DUMP();
145 			intel_gt_set_wedged(gt);
146 			err = -EIO;
147 			goto out_ctx;
148 		}
149 
150 		igt_spinner_end(&spin);
151 		if (igt_flush_test(gt->i915)) {
152 			err = -EIO;
153 			goto out_ctx;
154 		}
155 
156 out_ctx:
157 		intel_context_put(ce);
158 		if (err)
159 			break;
160 	}
161 
162 	igt_spinner_fini(&spin);
163 	return err;
164 }
165 
live_unlite_restore(struct intel_gt * gt,int prio)166 static int live_unlite_restore(struct intel_gt *gt, int prio)
167 {
168 	struct intel_engine_cs *engine;
169 	enum intel_engine_id id;
170 	struct igt_spinner spin;
171 	int err = -ENOMEM;
172 
173 	/*
174 	 * Check that we can correctly context switch between 2 instances
175 	 * on the same engine from the same parent context.
176 	 */
177 
178 	if (igt_spinner_init(&spin, gt))
179 		return err;
180 
181 	err = 0;
182 	for_each_engine(engine, gt, id) {
183 		struct intel_context *ce[2] = {};
184 		struct i915_request *rq[2];
185 		struct igt_live_test t;
186 		int n;
187 
188 		if (prio && !intel_engine_has_preemption(engine))
189 			continue;
190 
191 		if (!intel_engine_can_store_dword(engine))
192 			continue;
193 
194 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
195 			err = -EIO;
196 			break;
197 		}
198 		st_engine_heartbeat_disable(engine);
199 
200 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
201 			struct intel_context *tmp;
202 
203 			tmp = intel_context_create(engine);
204 			if (IS_ERR(tmp)) {
205 				err = PTR_ERR(tmp);
206 				goto err_ce;
207 			}
208 
209 			err = intel_context_pin(tmp);
210 			if (err) {
211 				intel_context_put(tmp);
212 				goto err_ce;
213 			}
214 
215 			/*
216 			 * Setup the pair of contexts such that if we
217 			 * lite-restore using the RING_TAIL from ce[1] it
218 			 * will execute garbage from ce[0]->ring.
219 			 */
220 			memset(tmp->ring->vaddr,
221 			       POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
222 			       tmp->ring->vma->size);
223 
224 			ce[n] = tmp;
225 		}
226 		GEM_BUG_ON(!ce[1]->ring->size);
227 		intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
228 		lrc_update_regs(ce[1], engine, ce[1]->ring->head);
229 
230 		rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
231 		if (IS_ERR(rq[0])) {
232 			err = PTR_ERR(rq[0]);
233 			goto err_ce;
234 		}
235 
236 		i915_request_get(rq[0]);
237 		i915_request_add(rq[0]);
238 		GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
239 
240 		if (!igt_wait_for_spinner(&spin, rq[0])) {
241 			i915_request_put(rq[0]);
242 			goto err_ce;
243 		}
244 
245 		rq[1] = i915_request_create(ce[1]);
246 		if (IS_ERR(rq[1])) {
247 			err = PTR_ERR(rq[1]);
248 			i915_request_put(rq[0]);
249 			goto err_ce;
250 		}
251 
252 		if (!prio) {
253 			/*
254 			 * Ensure we do the switch to ce[1] on completion.
255 			 *
256 			 * rq[0] is already submitted, so this should reduce
257 			 * to a no-op (a wait on a request on the same engine
258 			 * uses the submit fence, not the completion fence),
259 			 * but it will install a dependency on rq[1] for rq[0]
260 			 * that will prevent the pair being reordered by
261 			 * timeslicing.
262 			 */
263 			i915_request_await_dma_fence(rq[1], &rq[0]->fence);
264 		}
265 
266 		i915_request_get(rq[1]);
267 		i915_request_add(rq[1]);
268 		GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
269 		i915_request_put(rq[0]);
270 
271 		if (prio) {
272 			struct i915_sched_attr attr = {
273 				.priority = prio,
274 			};
275 
276 			/* Alternatively preempt the spinner with ce[1] */
277 			engine->sched_engine->schedule(rq[1], &attr);
278 		}
279 
280 		/* And switch back to ce[0] for good measure */
281 		rq[0] = i915_request_create(ce[0]);
282 		if (IS_ERR(rq[0])) {
283 			err = PTR_ERR(rq[0]);
284 			i915_request_put(rq[1]);
285 			goto err_ce;
286 		}
287 
288 		i915_request_await_dma_fence(rq[0], &rq[1]->fence);
289 		i915_request_get(rq[0]);
290 		i915_request_add(rq[0]);
291 		GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
292 		i915_request_put(rq[1]);
293 		i915_request_put(rq[0]);
294 
295 err_ce:
296 		intel_engine_flush_submission(engine);
297 		igt_spinner_end(&spin);
298 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
299 			if (IS_ERR_OR_NULL(ce[n]))
300 				break;
301 
302 			intel_context_unpin(ce[n]);
303 			intel_context_put(ce[n]);
304 		}
305 
306 		st_engine_heartbeat_enable(engine);
307 		if (igt_live_test_end(&t))
308 			err = -EIO;
309 		if (err)
310 			break;
311 	}
312 
313 	igt_spinner_fini(&spin);
314 	return err;
315 }
316 
live_unlite_switch(void * arg)317 static int live_unlite_switch(void *arg)
318 {
319 	return live_unlite_restore(arg, 0);
320 }
321 
live_unlite_preempt(void * arg)322 static int live_unlite_preempt(void *arg)
323 {
324 	return live_unlite_restore(arg, I915_PRIORITY_MAX);
325 }
326 
live_unlite_ring(void * arg)327 static int live_unlite_ring(void *arg)
328 {
329 	struct intel_gt *gt = arg;
330 	struct intel_engine_cs *engine;
331 	struct igt_spinner spin;
332 	enum intel_engine_id id;
333 	int err = 0;
334 
335 	/*
336 	 * Setup a preemption event that will cause almost the entire ring
337 	 * to be unwound, potentially fooling our intel_ring_direction()
338 	 * into emitting a forward lite-restore instead of the rollback.
339 	 */
340 
341 	if (igt_spinner_init(&spin, gt))
342 		return -ENOMEM;
343 
344 	for_each_engine(engine, gt, id) {
345 		struct intel_context *ce[2] = {};
346 		struct i915_request *rq;
347 		struct igt_live_test t;
348 		int n;
349 
350 		if (!intel_engine_has_preemption(engine))
351 			continue;
352 
353 		if (!intel_engine_can_store_dword(engine))
354 			continue;
355 
356 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
357 			err = -EIO;
358 			break;
359 		}
360 		st_engine_heartbeat_disable(engine);
361 
362 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
363 			struct intel_context *tmp;
364 
365 			tmp = intel_context_create(engine);
366 			if (IS_ERR(tmp)) {
367 				err = PTR_ERR(tmp);
368 				goto err_ce;
369 			}
370 
371 			err = intel_context_pin(tmp);
372 			if (err) {
373 				intel_context_put(tmp);
374 				goto err_ce;
375 			}
376 
377 			memset32(tmp->ring->vaddr,
378 				 0xdeadbeef, /* trigger a hang if executed */
379 				 tmp->ring->vma->size / sizeof(u32));
380 
381 			ce[n] = tmp;
382 		}
383 
384 		/* Create max prio spinner, followed by N low prio nops */
385 		rq = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
386 		if (IS_ERR(rq)) {
387 			err = PTR_ERR(rq);
388 			goto err_ce;
389 		}
390 
391 		i915_request_get(rq);
392 		rq->sched.attr.priority = I915_PRIORITY_BARRIER;
393 		i915_request_add(rq);
394 
395 		if (!igt_wait_for_spinner(&spin, rq)) {
396 			intel_gt_set_wedged(gt);
397 			i915_request_put(rq);
398 			err = -ETIME;
399 			goto err_ce;
400 		}
401 
402 		/* Fill the ring, until we will cause a wrap */
403 		n = 0;
404 		while (intel_ring_direction(ce[0]->ring,
405 					    rq->wa_tail,
406 					    ce[0]->ring->tail) <= 0) {
407 			struct i915_request *tmp;
408 
409 			tmp = intel_context_create_request(ce[0]);
410 			if (IS_ERR(tmp)) {
411 				err = PTR_ERR(tmp);
412 				i915_request_put(rq);
413 				goto err_ce;
414 			}
415 
416 			i915_request_add(tmp);
417 			intel_engine_flush_submission(engine);
418 			n++;
419 		}
420 		intel_engine_flush_submission(engine);
421 		pr_debug("%s: Filled ring with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
422 			 engine->name, n,
423 			 ce[0]->ring->size,
424 			 ce[0]->ring->tail,
425 			 ce[0]->ring->emit,
426 			 rq->tail);
427 		GEM_BUG_ON(intel_ring_direction(ce[0]->ring,
428 						rq->tail,
429 						ce[0]->ring->tail) <= 0);
430 		i915_request_put(rq);
431 
432 		/* Create a second ring to preempt the first ring after rq[0] */
433 		rq = intel_context_create_request(ce[1]);
434 		if (IS_ERR(rq)) {
435 			err = PTR_ERR(rq);
436 			goto err_ce;
437 		}
438 
439 		rq->sched.attr.priority = I915_PRIORITY_BARRIER;
440 		i915_request_get(rq);
441 		i915_request_add(rq);
442 
443 		err = wait_for_submit(engine, rq, HZ / 2);
444 		i915_request_put(rq);
445 		if (err) {
446 			pr_err("%s: preemption request was not submitted\n",
447 			       engine->name);
448 			err = -ETIME;
449 		}
450 
451 		pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
452 			 engine->name,
453 			 ce[0]->ring->tail, ce[0]->ring->emit,
454 			 ce[1]->ring->tail, ce[1]->ring->emit);
455 
456 err_ce:
457 		intel_engine_flush_submission(engine);
458 		igt_spinner_end(&spin);
459 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
460 			if (IS_ERR_OR_NULL(ce[n]))
461 				break;
462 
463 			intel_context_unpin(ce[n]);
464 			intel_context_put(ce[n]);
465 		}
466 		st_engine_heartbeat_enable(engine);
467 		if (igt_live_test_end(&t))
468 			err = -EIO;
469 		if (err)
470 			break;
471 	}
472 
473 	igt_spinner_fini(&spin);
474 	return err;
475 }
476 
live_pin_rewind(void * arg)477 static int live_pin_rewind(void *arg)
478 {
479 	struct intel_gt *gt = arg;
480 	struct intel_engine_cs *engine;
481 	enum intel_engine_id id;
482 	int err = 0;
483 
484 	/*
485 	 * We have to be careful not to trust intel_ring too much, for example
486 	 * ring->head is updated upon retire which is out of sync with pinning
487 	 * the context. Thus we cannot use ring->head to set CTX_RING_HEAD,
488 	 * or else we risk writing an older, stale value.
489 	 *
490 	 * To simulate this, let's apply a bit of deliberate sabotague.
491 	 */
492 
493 	for_each_engine(engine, gt, id) {
494 		struct intel_context *ce;
495 		struct i915_request *rq;
496 		struct intel_ring *ring;
497 		struct igt_live_test t;
498 
499 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
500 			err = -EIO;
501 			break;
502 		}
503 
504 		ce = intel_context_create(engine);
505 		if (IS_ERR(ce)) {
506 			err = PTR_ERR(ce);
507 			break;
508 		}
509 
510 		err = intel_context_pin(ce);
511 		if (err) {
512 			intel_context_put(ce);
513 			break;
514 		}
515 
516 		/* Keep the context awake while we play games */
517 		err = i915_active_acquire(&ce->active);
518 		if (err) {
519 			intel_context_unpin(ce);
520 			intel_context_put(ce);
521 			break;
522 		}
523 		ring = ce->ring;
524 
525 		/* Poison the ring, and offset the next request from HEAD */
526 		memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32));
527 		ring->emit = ring->size / 2;
528 		ring->tail = ring->emit;
529 		GEM_BUG_ON(ring->head);
530 
531 		intel_context_unpin(ce);
532 
533 		/* Submit a simple nop request */
534 		GEM_BUG_ON(intel_context_is_pinned(ce));
535 		rq = intel_context_create_request(ce);
536 		i915_active_release(&ce->active); /* e.g. async retire */
537 		intel_context_put(ce);
538 		if (IS_ERR(rq)) {
539 			err = PTR_ERR(rq);
540 			break;
541 		}
542 		GEM_BUG_ON(!rq->head);
543 		i915_request_add(rq);
544 
545 		/* Expect not to hang! */
546 		if (igt_live_test_end(&t)) {
547 			err = -EIO;
548 			break;
549 		}
550 	}
551 
552 	return err;
553 }
554 
engine_lock_reset_tasklet(struct intel_engine_cs * engine)555 static int engine_lock_reset_tasklet(struct intel_engine_cs *engine)
556 {
557 	tasklet_disable(&engine->sched_engine->tasklet);
558 	local_bh_disable();
559 
560 	if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
561 			     &engine->gt->reset.flags)) {
562 		local_bh_enable();
563 		tasklet_enable(&engine->sched_engine->tasklet);
564 
565 		intel_gt_set_wedged(engine->gt);
566 		return -EBUSY;
567 	}
568 
569 	return 0;
570 }
571 
engine_unlock_reset_tasklet(struct intel_engine_cs * engine)572 static void engine_unlock_reset_tasklet(struct intel_engine_cs *engine)
573 {
574 	clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id,
575 			      &engine->gt->reset.flags);
576 
577 	local_bh_enable();
578 	tasklet_enable(&engine->sched_engine->tasklet);
579 }
580 
live_hold_reset(void * arg)581 static int live_hold_reset(void *arg)
582 {
583 	struct intel_gt *gt = arg;
584 	struct intel_engine_cs *engine;
585 	enum intel_engine_id id;
586 	struct igt_spinner spin;
587 	int err = 0;
588 
589 	/*
590 	 * In order to support offline error capture for fast preempt reset,
591 	 * we need to decouple the guilty request and ensure that it and its
592 	 * descendents are not executed while the capture is in progress.
593 	 */
594 
595 	if (!intel_has_reset_engine(gt))
596 		return 0;
597 
598 	if (igt_spinner_init(&spin, gt))
599 		return -ENOMEM;
600 
601 	for_each_engine(engine, gt, id) {
602 		struct intel_context *ce;
603 		struct i915_request *rq;
604 
605 		ce = intel_context_create(engine);
606 		if (IS_ERR(ce)) {
607 			err = PTR_ERR(ce);
608 			break;
609 		}
610 
611 		st_engine_heartbeat_disable(engine);
612 
613 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
614 		if (IS_ERR(rq)) {
615 			err = PTR_ERR(rq);
616 			goto out;
617 		}
618 		i915_request_add(rq);
619 
620 		if (!igt_wait_for_spinner(&spin, rq)) {
621 			intel_gt_set_wedged(gt);
622 			err = -ETIME;
623 			goto out;
624 		}
625 
626 		/* We have our request executing, now remove it and reset */
627 
628 		err = engine_lock_reset_tasklet(engine);
629 		if (err)
630 			goto out;
631 
632 		engine->sched_engine->tasklet.callback(&engine->sched_engine->tasklet);
633 		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
634 
635 		i915_request_get(rq);
636 		execlists_hold(engine, rq);
637 		GEM_BUG_ON(!i915_request_on_hold(rq));
638 
639 		__intel_engine_reset_bh(engine, NULL);
640 		GEM_BUG_ON(rq->fence.error != -EIO);
641 
642 		engine_unlock_reset_tasklet(engine);
643 
644 		/* Check that we do not resubmit the held request */
645 		if (!i915_request_wait(rq, 0, HZ / 5)) {
646 			pr_err("%s: on hold request completed!\n",
647 			       engine->name);
648 			i915_request_put(rq);
649 			err = -EIO;
650 			goto out;
651 		}
652 		GEM_BUG_ON(!i915_request_on_hold(rq));
653 
654 		/* But is resubmitted on release */
655 		execlists_unhold(engine, rq);
656 		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
657 			pr_err("%s: held request did not complete!\n",
658 			       engine->name);
659 			intel_gt_set_wedged(gt);
660 			err = -ETIME;
661 		}
662 		i915_request_put(rq);
663 
664 out:
665 		st_engine_heartbeat_enable(engine);
666 		intel_context_put(ce);
667 		if (err)
668 			break;
669 	}
670 
671 	igt_spinner_fini(&spin);
672 	return err;
673 }
674 
error_repr(int err)675 static const char *error_repr(int err)
676 {
677 	return err ? "bad" : "good";
678 }
679 
live_error_interrupt(void * arg)680 static int live_error_interrupt(void *arg)
681 {
682 	static const struct error_phase {
683 		enum { GOOD = 0, BAD = -EIO } error[2];
684 	} phases[] = {
685 		{ { BAD,  GOOD } },
686 		{ { BAD,  BAD  } },
687 		{ { BAD,  GOOD } },
688 		{ { GOOD, GOOD } }, /* sentinel */
689 	};
690 	struct intel_gt *gt = arg;
691 	struct intel_engine_cs *engine;
692 	enum intel_engine_id id;
693 
694 	/*
695 	 * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning
696 	 * of invalid commands in user batches that will cause a GPU hang.
697 	 * This is a faster mechanism than using hangcheck/heartbeats, but
698 	 * only detects problems the HW knows about -- it will not warn when
699 	 * we kill the HW!
700 	 *
701 	 * To verify our detection and reset, we throw some invalid commands
702 	 * at the HW and wait for the interrupt.
703 	 */
704 
705 	if (!intel_has_reset_engine(gt))
706 		return 0;
707 
708 	for_each_engine(engine, gt, id) {
709 		const struct error_phase *p;
710 		int err = 0;
711 
712 		st_engine_heartbeat_disable(engine);
713 
714 		for (p = phases; p->error[0] != GOOD; p++) {
715 			struct i915_request *client[ARRAY_SIZE(phases->error)];
716 			u32 *cs;
717 			int i;
718 
719 			memset(client, 0, sizeof(*client));
720 			for (i = 0; i < ARRAY_SIZE(client); i++) {
721 				struct intel_context *ce;
722 				struct i915_request *rq;
723 
724 				ce = intel_context_create(engine);
725 				if (IS_ERR(ce)) {
726 					err = PTR_ERR(ce);
727 					goto out;
728 				}
729 
730 				rq = intel_context_create_request(ce);
731 				intel_context_put(ce);
732 				if (IS_ERR(rq)) {
733 					err = PTR_ERR(rq);
734 					goto out;
735 				}
736 
737 				if (rq->engine->emit_init_breadcrumb) {
738 					err = rq->engine->emit_init_breadcrumb(rq);
739 					if (err) {
740 						i915_request_add(rq);
741 						goto out;
742 					}
743 				}
744 
745 				cs = intel_ring_begin(rq, 2);
746 				if (IS_ERR(cs)) {
747 					i915_request_add(rq);
748 					err = PTR_ERR(cs);
749 					goto out;
750 				}
751 
752 				if (p->error[i]) {
753 					*cs++ = 0xdeadbeef;
754 					*cs++ = 0xdeadbeef;
755 				} else {
756 					*cs++ = MI_NOOP;
757 					*cs++ = MI_NOOP;
758 				}
759 
760 				client[i] = i915_request_get(rq);
761 				i915_request_add(rq);
762 			}
763 
764 			err = wait_for_submit(engine, client[0], HZ / 2);
765 			if (err) {
766 				pr_err("%s: first request did not start within time!\n",
767 				       engine->name);
768 				err = -ETIME;
769 				goto out;
770 			}
771 
772 			for (i = 0; i < ARRAY_SIZE(client); i++) {
773 				if (i915_request_wait(client[i], 0, HZ / 5) < 0)
774 					pr_debug("%s: %s request incomplete!\n",
775 						 engine->name,
776 						 error_repr(p->error[i]));
777 
778 				if (!i915_request_started(client[i])) {
779 					pr_err("%s: %s request not started!\n",
780 					       engine->name,
781 					       error_repr(p->error[i]));
782 					err = -ETIME;
783 					goto out;
784 				}
785 
786 				/* Kick the tasklet to process the error */
787 				intel_engine_flush_submission(engine);
788 				if (client[i]->fence.error != p->error[i]) {
789 					pr_err("%s: %s request (%s) with wrong error code: %d\n",
790 					       engine->name,
791 					       error_repr(p->error[i]),
792 					       i915_request_completed(client[i]) ? "completed" : "running",
793 					       client[i]->fence.error);
794 					err = -EINVAL;
795 					goto out;
796 				}
797 			}
798 
799 out:
800 			for (i = 0; i < ARRAY_SIZE(client); i++)
801 				if (client[i])
802 					i915_request_put(client[i]);
803 			if (err) {
804 				pr_err("%s: failed at phase[%zd] { %d, %d }\n",
805 				       engine->name, p - phases,
806 				       p->error[0], p->error[1]);
807 				break;
808 			}
809 		}
810 
811 		st_engine_heartbeat_enable(engine);
812 		if (err) {
813 			intel_gt_set_wedged(gt);
814 			return err;
815 		}
816 	}
817 
818 	return 0;
819 }
820 
821 static int
emit_semaphore_chain(struct i915_request * rq,struct i915_vma * vma,int idx)822 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
823 {
824 	u32 *cs;
825 
826 	cs = intel_ring_begin(rq, 10);
827 	if (IS_ERR(cs))
828 		return PTR_ERR(cs);
829 
830 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
831 
832 	*cs++ = MI_SEMAPHORE_WAIT |
833 		MI_SEMAPHORE_GLOBAL_GTT |
834 		MI_SEMAPHORE_POLL |
835 		MI_SEMAPHORE_SAD_NEQ_SDD;
836 	*cs++ = 0;
837 	*cs++ = i915_ggtt_offset(vma) + 4 * idx;
838 	*cs++ = 0;
839 
840 	if (idx > 0) {
841 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
842 		*cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
843 		*cs++ = 0;
844 		*cs++ = 1;
845 	} else {
846 		*cs++ = MI_NOOP;
847 		*cs++ = MI_NOOP;
848 		*cs++ = MI_NOOP;
849 		*cs++ = MI_NOOP;
850 	}
851 
852 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
853 
854 	intel_ring_advance(rq, cs);
855 	return 0;
856 }
857 
858 static struct i915_request *
semaphore_queue(struct intel_engine_cs * engine,struct i915_vma * vma,int idx)859 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
860 {
861 	struct intel_context *ce;
862 	struct i915_request *rq;
863 	int err;
864 
865 	ce = intel_context_create(engine);
866 	if (IS_ERR(ce))
867 		return ERR_CAST(ce);
868 
869 	rq = intel_context_create_request(ce);
870 	if (IS_ERR(rq))
871 		goto out_ce;
872 
873 	err = 0;
874 	if (rq->engine->emit_init_breadcrumb)
875 		err = rq->engine->emit_init_breadcrumb(rq);
876 	if (err == 0)
877 		err = emit_semaphore_chain(rq, vma, idx);
878 	if (err == 0)
879 		i915_request_get(rq);
880 	i915_request_add(rq);
881 	if (err)
882 		rq = ERR_PTR(err);
883 
884 out_ce:
885 	intel_context_put(ce);
886 	return rq;
887 }
888 
889 static int
release_queue(struct intel_engine_cs * engine,struct i915_vma * vma,int idx,int prio)890 release_queue(struct intel_engine_cs *engine,
891 	      struct i915_vma *vma,
892 	      int idx, int prio)
893 {
894 	struct i915_sched_attr attr = {
895 		.priority = prio,
896 	};
897 	struct i915_request *rq;
898 	u32 *cs;
899 
900 	rq = intel_engine_create_kernel_request(engine);
901 	if (IS_ERR(rq))
902 		return PTR_ERR(rq);
903 
904 	cs = intel_ring_begin(rq, 4);
905 	if (IS_ERR(cs)) {
906 		i915_request_add(rq);
907 		return PTR_ERR(cs);
908 	}
909 
910 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
911 	*cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
912 	*cs++ = 0;
913 	*cs++ = 1;
914 
915 	intel_ring_advance(rq, cs);
916 
917 	i915_request_get(rq);
918 	i915_request_add(rq);
919 
920 	local_bh_disable();
921 	engine->sched_engine->schedule(rq, &attr);
922 	local_bh_enable(); /* kick tasklet */
923 
924 	i915_request_put(rq);
925 
926 	return 0;
927 }
928 
929 static int
slice_semaphore_queue(struct intel_engine_cs * outer,struct i915_vma * vma,int count)930 slice_semaphore_queue(struct intel_engine_cs *outer,
931 		      struct i915_vma *vma,
932 		      int count)
933 {
934 	struct intel_engine_cs *engine;
935 	struct i915_request *head;
936 	enum intel_engine_id id;
937 	int err, i, n = 0;
938 
939 	head = semaphore_queue(outer, vma, n++);
940 	if (IS_ERR(head))
941 		return PTR_ERR(head);
942 
943 	for_each_engine(engine, outer->gt, id) {
944 		if (!intel_engine_has_preemption(engine))
945 			continue;
946 
947 		for (i = 0; i < count; i++) {
948 			struct i915_request *rq;
949 
950 			rq = semaphore_queue(engine, vma, n++);
951 			if (IS_ERR(rq)) {
952 				err = PTR_ERR(rq);
953 				goto out;
954 			}
955 
956 			i915_request_put(rq);
957 		}
958 	}
959 
960 	err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER);
961 	if (err)
962 		goto out;
963 
964 	if (i915_request_wait(head, 0,
965 			      2 * outer->gt->info.num_engines * (count + 2) * (count + 3)) < 0) {
966 		pr_err("%s: Failed to slice along semaphore chain of length (%d, %d)!\n",
967 		       outer->name, count, n);
968 		GEM_TRACE_DUMP();
969 		intel_gt_set_wedged(outer->gt);
970 		err = -EIO;
971 	}
972 
973 out:
974 	i915_request_put(head);
975 	return err;
976 }
977 
live_timeslice_preempt(void * arg)978 static int live_timeslice_preempt(void *arg)
979 {
980 	struct intel_gt *gt = arg;
981 	struct drm_i915_gem_object *obj;
982 	struct intel_engine_cs *engine;
983 	enum intel_engine_id id;
984 	struct i915_vma *vma;
985 	void *vaddr;
986 	int err = 0;
987 
988 	/*
989 	 * If a request takes too long, we would like to give other users
990 	 * a fair go on the GPU. In particular, users may create batches
991 	 * that wait upon external input, where that input may even be
992 	 * supplied by another GPU job. To avoid blocking forever, we
993 	 * need to preempt the current task and replace it with another
994 	 * ready task.
995 	 */
996 	if (!CONFIG_DRM_I915_TIMESLICE_DURATION)
997 		return 0;
998 
999 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1000 	if (IS_ERR(obj))
1001 		return PTR_ERR(obj);
1002 
1003 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1004 	if (IS_ERR(vma)) {
1005 		err = PTR_ERR(vma);
1006 		goto err_obj;
1007 	}
1008 
1009 	vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1010 	if (IS_ERR(vaddr)) {
1011 		err = PTR_ERR(vaddr);
1012 		goto err_obj;
1013 	}
1014 
1015 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1016 	if (err)
1017 		goto err_map;
1018 
1019 	err = i915_vma_sync(vma);
1020 	if (err)
1021 		goto err_pin;
1022 
1023 	for_each_engine(engine, gt, id) {
1024 		if (!intel_engine_has_preemption(engine))
1025 			continue;
1026 
1027 		memset(vaddr, 0, PAGE_SIZE);
1028 
1029 		st_engine_heartbeat_disable(engine);
1030 		err = slice_semaphore_queue(engine, vma, 5);
1031 		st_engine_heartbeat_enable(engine);
1032 		if (err)
1033 			goto err_pin;
1034 
1035 		if (igt_flush_test(gt->i915)) {
1036 			err = -EIO;
1037 			goto err_pin;
1038 		}
1039 	}
1040 
1041 err_pin:
1042 	i915_vma_unpin(vma);
1043 err_map:
1044 	i915_gem_object_unpin_map(obj);
1045 err_obj:
1046 	i915_gem_object_put(obj);
1047 	return err;
1048 }
1049 
1050 static struct i915_request *
create_rewinder(struct intel_context * ce,struct i915_request * wait,void * slot,int idx)1051 create_rewinder(struct intel_context *ce,
1052 		struct i915_request *wait,
1053 		void *slot, int idx)
1054 {
1055 	const u32 offset =
1056 		i915_ggtt_offset(ce->engine->status_page.vma) +
1057 		offset_in_page(slot);
1058 	struct i915_request *rq;
1059 	u32 *cs;
1060 	int err;
1061 
1062 	rq = intel_context_create_request(ce);
1063 	if (IS_ERR(rq))
1064 		return rq;
1065 
1066 	if (wait) {
1067 		err = i915_request_await_dma_fence(rq, &wait->fence);
1068 		if (err)
1069 			goto err;
1070 	}
1071 
1072 	cs = intel_ring_begin(rq, 14);
1073 	if (IS_ERR(cs)) {
1074 		err = PTR_ERR(cs);
1075 		goto err;
1076 	}
1077 
1078 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1079 	*cs++ = MI_NOOP;
1080 
1081 	*cs++ = MI_SEMAPHORE_WAIT |
1082 		MI_SEMAPHORE_GLOBAL_GTT |
1083 		MI_SEMAPHORE_POLL |
1084 		MI_SEMAPHORE_SAD_GTE_SDD;
1085 	*cs++ = idx;
1086 	*cs++ = offset;
1087 	*cs++ = 0;
1088 
1089 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
1090 	*cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
1091 	*cs++ = offset + idx * sizeof(u32);
1092 	*cs++ = 0;
1093 
1094 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1095 	*cs++ = offset;
1096 	*cs++ = 0;
1097 	*cs++ = idx + 1;
1098 
1099 	intel_ring_advance(rq, cs);
1100 
1101 	err = 0;
1102 err:
1103 	i915_request_get(rq);
1104 	i915_request_add(rq);
1105 	if (err) {
1106 		i915_request_put(rq);
1107 		return ERR_PTR(err);
1108 	}
1109 
1110 	return rq;
1111 }
1112 
live_timeslice_rewind(void * arg)1113 static int live_timeslice_rewind(void *arg)
1114 {
1115 	struct intel_gt *gt = arg;
1116 	struct intel_engine_cs *engine;
1117 	enum intel_engine_id id;
1118 
1119 	/*
1120 	 * The usual presumption on timeslice expiration is that we replace
1121 	 * the active context with another. However, given a chain of
1122 	 * dependencies we may end up with replacing the context with itself,
1123 	 * but only a few of those requests, forcing us to rewind the
1124 	 * RING_TAIL of the original request.
1125 	 */
1126 	if (!CONFIG_DRM_I915_TIMESLICE_DURATION)
1127 		return 0;
1128 
1129 	for_each_engine(engine, gt, id) {
1130 		enum { A1, A2, B1 };
1131 		enum { X = 1, Z, Y };
1132 		struct i915_request *rq[3] = {};
1133 		struct intel_context *ce;
1134 		unsigned long timeslice;
1135 		int i, err = 0;
1136 		u32 *slot;
1137 
1138 		if (!intel_engine_has_timeslices(engine))
1139 			continue;
1140 
1141 		/*
1142 		 * A:rq1 -- semaphore wait, timestamp X
1143 		 * A:rq2 -- write timestamp Y
1144 		 *
1145 		 * B:rq1 [await A:rq1] -- write timestamp Z
1146 		 *
1147 		 * Force timeslice, release semaphore.
1148 		 *
1149 		 * Expect execution/evaluation order XZY
1150 		 */
1151 
1152 		st_engine_heartbeat_disable(engine);
1153 		timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1154 
1155 		slot = memset32(engine->status_page.addr + 1000, 0, 4);
1156 
1157 		ce = intel_context_create(engine);
1158 		if (IS_ERR(ce)) {
1159 			err = PTR_ERR(ce);
1160 			goto err;
1161 		}
1162 
1163 		rq[A1] = create_rewinder(ce, NULL, slot, X);
1164 		if (IS_ERR(rq[A1])) {
1165 			intel_context_put(ce);
1166 			goto err;
1167 		}
1168 
1169 		rq[A2] = create_rewinder(ce, NULL, slot, Y);
1170 		intel_context_put(ce);
1171 		if (IS_ERR(rq[A2]))
1172 			goto err;
1173 
1174 		err = wait_for_submit(engine, rq[A2], HZ / 2);
1175 		if (err) {
1176 			pr_err("%s: failed to submit first context\n",
1177 			       engine->name);
1178 			goto err;
1179 		}
1180 
1181 		ce = intel_context_create(engine);
1182 		if (IS_ERR(ce)) {
1183 			err = PTR_ERR(ce);
1184 			goto err;
1185 		}
1186 
1187 		rq[B1] = create_rewinder(ce, rq[A1], slot, Z);
1188 		intel_context_put(ce);
1189 		if (IS_ERR(rq[2]))
1190 			goto err;
1191 
1192 		err = wait_for_submit(engine, rq[B1], HZ / 2);
1193 		if (err) {
1194 			pr_err("%s: failed to submit second context\n",
1195 			       engine->name);
1196 			goto err;
1197 		}
1198 
1199 		/* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
1200 		ENGINE_TRACE(engine, "forcing tasklet for rewind\n");
1201 		while (i915_request_is_active(rq[A2])) { /* semaphore yield! */
1202 			/* Wait for the timeslice to kick in */
1203 			del_timer(&engine->execlists.timer);
1204 			tasklet_hi_schedule(&engine->sched_engine->tasklet);
1205 			intel_engine_flush_submission(engine);
1206 		}
1207 		/* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
1208 		GEM_BUG_ON(!i915_request_is_active(rq[A1]));
1209 		GEM_BUG_ON(!i915_request_is_active(rq[B1]));
1210 		GEM_BUG_ON(i915_request_is_active(rq[A2]));
1211 
1212 		/* Release the hounds! */
1213 		slot[0] = 1;
1214 		wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */
1215 
1216 		for (i = 1; i <= 3; i++) {
1217 			unsigned long timeout = jiffies + HZ / 2;
1218 
1219 			while (!READ_ONCE(slot[i]) &&
1220 			       time_before(jiffies, timeout))
1221 				;
1222 
1223 			if (!time_before(jiffies, timeout)) {
1224 				pr_err("%s: rq[%d] timed out\n",
1225 				       engine->name, i - 1);
1226 				err = -ETIME;
1227 				goto err;
1228 			}
1229 
1230 			pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]);
1231 		}
1232 
1233 		/* XZY: XZ < XY */
1234 		if (slot[Z] - slot[X] >= slot[Y] - slot[X]) {
1235 			pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n",
1236 			       engine->name,
1237 			       slot[Z] - slot[X],
1238 			       slot[Y] - slot[X]);
1239 			err = -EINVAL;
1240 		}
1241 
1242 err:
1243 		memset32(&slot[0], -1, 4);
1244 		wmb();
1245 
1246 		engine->props.timeslice_duration_ms = timeslice;
1247 		st_engine_heartbeat_enable(engine);
1248 		for (i = 0; i < 3; i++)
1249 			i915_request_put(rq[i]);
1250 		if (igt_flush_test(gt->i915))
1251 			err = -EIO;
1252 		if (err)
1253 			return err;
1254 	}
1255 
1256 	return 0;
1257 }
1258 
nop_request(struct intel_engine_cs * engine)1259 static struct i915_request *nop_request(struct intel_engine_cs *engine)
1260 {
1261 	struct i915_request *rq;
1262 
1263 	rq = intel_engine_create_kernel_request(engine);
1264 	if (IS_ERR(rq))
1265 		return rq;
1266 
1267 	i915_request_get(rq);
1268 	i915_request_add(rq);
1269 
1270 	return rq;
1271 }
1272 
slice_timeout(struct intel_engine_cs * engine)1273 static long slice_timeout(struct intel_engine_cs *engine)
1274 {
1275 	long timeout;
1276 
1277 	/* Enough time for a timeslice to kick in, and kick out */
1278 	timeout = 2 * msecs_to_jiffies_timeout(timeslice(engine));
1279 
1280 	/* Enough time for the nop request to complete */
1281 	timeout += HZ / 5;
1282 
1283 	return timeout + 1;
1284 }
1285 
live_timeslice_queue(void * arg)1286 static int live_timeslice_queue(void *arg)
1287 {
1288 	struct intel_gt *gt = arg;
1289 	struct drm_i915_gem_object *obj;
1290 	struct intel_engine_cs *engine;
1291 	enum intel_engine_id id;
1292 	struct i915_vma *vma;
1293 	void *vaddr;
1294 	int err = 0;
1295 
1296 	/*
1297 	 * Make sure that even if ELSP[0] and ELSP[1] are filled with
1298 	 * timeslicing between them disabled, we *do* enable timeslicing
1299 	 * if the queue demands it. (Normally, we do not submit if
1300 	 * ELSP[1] is already occupied, so must rely on timeslicing to
1301 	 * eject ELSP[0] in favour of the queue.)
1302 	 */
1303 	if (!CONFIG_DRM_I915_TIMESLICE_DURATION)
1304 		return 0;
1305 
1306 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1307 	if (IS_ERR(obj))
1308 		return PTR_ERR(obj);
1309 
1310 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1311 	if (IS_ERR(vma)) {
1312 		err = PTR_ERR(vma);
1313 		goto err_obj;
1314 	}
1315 
1316 	vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1317 	if (IS_ERR(vaddr)) {
1318 		err = PTR_ERR(vaddr);
1319 		goto err_obj;
1320 	}
1321 
1322 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1323 	if (err)
1324 		goto err_map;
1325 
1326 	err = i915_vma_sync(vma);
1327 	if (err)
1328 		goto err_pin;
1329 
1330 	for_each_engine(engine, gt, id) {
1331 		struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
1332 		struct i915_request *rq, *nop;
1333 
1334 		if (!intel_engine_has_preemption(engine))
1335 			continue;
1336 
1337 		st_engine_heartbeat_disable(engine);
1338 		memset(vaddr, 0, PAGE_SIZE);
1339 
1340 		/* ELSP[0]: semaphore wait */
1341 		rq = semaphore_queue(engine, vma, 0);
1342 		if (IS_ERR(rq)) {
1343 			err = PTR_ERR(rq);
1344 			goto err_heartbeat;
1345 		}
1346 		engine->sched_engine->schedule(rq, &attr);
1347 		err = wait_for_submit(engine, rq, HZ / 2);
1348 		if (err) {
1349 			pr_err("%s: Timed out trying to submit semaphores\n",
1350 			       engine->name);
1351 			goto err_rq;
1352 		}
1353 
1354 		/* ELSP[1]: nop request */
1355 		nop = nop_request(engine);
1356 		if (IS_ERR(nop)) {
1357 			err = PTR_ERR(nop);
1358 			goto err_rq;
1359 		}
1360 		err = wait_for_submit(engine, nop, HZ / 2);
1361 		i915_request_put(nop);
1362 		if (err) {
1363 			pr_err("%s: Timed out trying to submit nop\n",
1364 			       engine->name);
1365 			goto err_rq;
1366 		}
1367 
1368 		GEM_BUG_ON(i915_request_completed(rq));
1369 		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
1370 
1371 		/* Queue: semaphore signal, matching priority as semaphore */
1372 		err = release_queue(engine, vma, 1, effective_prio(rq));
1373 		if (err)
1374 			goto err_rq;
1375 
1376 		/* Wait until we ack the release_queue and start timeslicing */
1377 		do {
1378 			cond_resched();
1379 			intel_engine_flush_submission(engine);
1380 		} while (READ_ONCE(engine->execlists.pending[0]));
1381 
1382 		/* Timeslice every jiffy, so within 2 we should signal */
1383 		if (i915_request_wait(rq, 0, slice_timeout(engine)) < 0) {
1384 			struct drm_printer p =
1385 				drm_info_printer(gt->i915->drm.dev);
1386 
1387 			pr_err("%s: Failed to timeslice into queue\n",
1388 			       engine->name);
1389 			intel_engine_dump(engine, &p,
1390 					  "%s\n", engine->name);
1391 
1392 			memset(vaddr, 0xff, PAGE_SIZE);
1393 			err = -EIO;
1394 		}
1395 err_rq:
1396 		i915_request_put(rq);
1397 err_heartbeat:
1398 		st_engine_heartbeat_enable(engine);
1399 		if (err)
1400 			break;
1401 	}
1402 
1403 err_pin:
1404 	i915_vma_unpin(vma);
1405 err_map:
1406 	i915_gem_object_unpin_map(obj);
1407 err_obj:
1408 	i915_gem_object_put(obj);
1409 	return err;
1410 }
1411 
live_timeslice_nopreempt(void * arg)1412 static int live_timeslice_nopreempt(void *arg)
1413 {
1414 	struct intel_gt *gt = arg;
1415 	struct intel_engine_cs *engine;
1416 	enum intel_engine_id id;
1417 	struct igt_spinner spin;
1418 	int err = 0;
1419 
1420 	/*
1421 	 * We should not timeslice into a request that is marked with
1422 	 * I915_REQUEST_NOPREEMPT.
1423 	 */
1424 	if (!CONFIG_DRM_I915_TIMESLICE_DURATION)
1425 		return 0;
1426 
1427 	if (igt_spinner_init(&spin, gt))
1428 		return -ENOMEM;
1429 
1430 	for_each_engine(engine, gt, id) {
1431 		struct intel_context *ce;
1432 		struct i915_request *rq;
1433 		unsigned long timeslice;
1434 
1435 		if (!intel_engine_has_preemption(engine))
1436 			continue;
1437 
1438 		ce = intel_context_create(engine);
1439 		if (IS_ERR(ce)) {
1440 			err = PTR_ERR(ce);
1441 			break;
1442 		}
1443 
1444 		st_engine_heartbeat_disable(engine);
1445 		timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1446 
1447 		/* Create an unpreemptible spinner */
1448 
1449 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
1450 		intel_context_put(ce);
1451 		if (IS_ERR(rq)) {
1452 			err = PTR_ERR(rq);
1453 			goto out_heartbeat;
1454 		}
1455 
1456 		i915_request_get(rq);
1457 		i915_request_add(rq);
1458 
1459 		if (!igt_wait_for_spinner(&spin, rq)) {
1460 			i915_request_put(rq);
1461 			err = -ETIME;
1462 			goto out_spin;
1463 		}
1464 
1465 		set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags);
1466 		i915_request_put(rq);
1467 
1468 		/* Followed by a maximum priority barrier (heartbeat) */
1469 
1470 		ce = intel_context_create(engine);
1471 		if (IS_ERR(ce)) {
1472 			err = PTR_ERR(ce);
1473 			goto out_spin;
1474 		}
1475 
1476 		rq = intel_context_create_request(ce);
1477 		intel_context_put(ce);
1478 		if (IS_ERR(rq)) {
1479 			err = PTR_ERR(rq);
1480 			goto out_spin;
1481 		}
1482 
1483 		rq->sched.attr.priority = I915_PRIORITY_BARRIER;
1484 		i915_request_get(rq);
1485 		i915_request_add(rq);
1486 
1487 		/*
1488 		 * Wait until the barrier is in ELSP, and we know timeslicing
1489 		 * will have been activated.
1490 		 */
1491 		if (wait_for_submit(engine, rq, HZ / 2)) {
1492 			i915_request_put(rq);
1493 			err = -ETIME;
1494 			goto out_spin;
1495 		}
1496 
1497 		/*
1498 		 * Since the ELSP[0] request is unpreemptible, it should not
1499 		 * allow the maximum priority barrier through. Wait long
1500 		 * enough to see if it is timesliced in by mistake.
1501 		 */
1502 		if (i915_request_wait(rq, 0, slice_timeout(engine)) >= 0) {
1503 			pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n",
1504 			       engine->name);
1505 			err = -EINVAL;
1506 		}
1507 		i915_request_put(rq);
1508 
1509 out_spin:
1510 		igt_spinner_end(&spin);
1511 out_heartbeat:
1512 		xchg(&engine->props.timeslice_duration_ms, timeslice);
1513 		st_engine_heartbeat_enable(engine);
1514 		if (err)
1515 			break;
1516 
1517 		if (igt_flush_test(gt->i915)) {
1518 			err = -EIO;
1519 			break;
1520 		}
1521 	}
1522 
1523 	igt_spinner_fini(&spin);
1524 	return err;
1525 }
1526 
live_busywait_preempt(void * arg)1527 static int live_busywait_preempt(void *arg)
1528 {
1529 	struct intel_gt *gt = arg;
1530 	struct i915_gem_context *ctx_hi, *ctx_lo;
1531 	struct intel_engine_cs *engine;
1532 	struct drm_i915_gem_object *obj;
1533 	struct i915_vma *vma;
1534 	enum intel_engine_id id;
1535 	int err = -ENOMEM;
1536 	u32 *map;
1537 
1538 	/*
1539 	 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
1540 	 * preempt the busywaits used to synchronise between rings.
1541 	 */
1542 
1543 	ctx_hi = kernel_context(gt->i915, NULL);
1544 	if (!ctx_hi)
1545 		return -ENOMEM;
1546 	ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
1547 
1548 	ctx_lo = kernel_context(gt->i915, NULL);
1549 	if (!ctx_lo)
1550 		goto err_ctx_hi;
1551 	ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
1552 
1553 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1554 	if (IS_ERR(obj)) {
1555 		err = PTR_ERR(obj);
1556 		goto err_ctx_lo;
1557 	}
1558 
1559 	map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1560 	if (IS_ERR(map)) {
1561 		err = PTR_ERR(map);
1562 		goto err_obj;
1563 	}
1564 
1565 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1566 	if (IS_ERR(vma)) {
1567 		err = PTR_ERR(vma);
1568 		goto err_map;
1569 	}
1570 
1571 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1572 	if (err)
1573 		goto err_map;
1574 
1575 	err = i915_vma_sync(vma);
1576 	if (err)
1577 		goto err_vma;
1578 
1579 	for_each_engine(engine, gt, id) {
1580 		struct i915_request *lo, *hi;
1581 		struct igt_live_test t;
1582 		u32 *cs;
1583 
1584 		if (!intel_engine_has_preemption(engine))
1585 			continue;
1586 
1587 		if (!intel_engine_can_store_dword(engine))
1588 			continue;
1589 
1590 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1591 			err = -EIO;
1592 			goto err_vma;
1593 		}
1594 
1595 		/*
1596 		 * We create two requests. The low priority request
1597 		 * busywaits on a semaphore (inside the ringbuffer where
1598 		 * is should be preemptible) and the high priority requests
1599 		 * uses a MI_STORE_DWORD_IMM to update the semaphore value
1600 		 * allowing the first request to complete. If preemption
1601 		 * fails, we hang instead.
1602 		 */
1603 
1604 		lo = igt_request_alloc(ctx_lo, engine);
1605 		if (IS_ERR(lo)) {
1606 			err = PTR_ERR(lo);
1607 			goto err_vma;
1608 		}
1609 
1610 		cs = intel_ring_begin(lo, 8);
1611 		if (IS_ERR(cs)) {
1612 			err = PTR_ERR(cs);
1613 			i915_request_add(lo);
1614 			goto err_vma;
1615 		}
1616 
1617 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1618 		*cs++ = i915_ggtt_offset(vma);
1619 		*cs++ = 0;
1620 		*cs++ = 1;
1621 
1622 		/* XXX Do we need a flush + invalidate here? */
1623 
1624 		*cs++ = MI_SEMAPHORE_WAIT |
1625 			MI_SEMAPHORE_GLOBAL_GTT |
1626 			MI_SEMAPHORE_POLL |
1627 			MI_SEMAPHORE_SAD_EQ_SDD;
1628 		*cs++ = 0;
1629 		*cs++ = i915_ggtt_offset(vma);
1630 		*cs++ = 0;
1631 
1632 		intel_ring_advance(lo, cs);
1633 
1634 		i915_request_get(lo);
1635 		i915_request_add(lo);
1636 
1637 		if (wait_for(READ_ONCE(*map), 10)) {
1638 			i915_request_put(lo);
1639 			err = -ETIMEDOUT;
1640 			goto err_vma;
1641 		}
1642 
1643 		/* Low priority request should be busywaiting now */
1644 		if (i915_request_wait(lo, 0, 1) != -ETIME) {
1645 			i915_request_put(lo);
1646 			pr_err("%s: Busywaiting request did not!\n",
1647 			       engine->name);
1648 			err = -EIO;
1649 			goto err_vma;
1650 		}
1651 
1652 		hi = igt_request_alloc(ctx_hi, engine);
1653 		if (IS_ERR(hi)) {
1654 			err = PTR_ERR(hi);
1655 			i915_request_put(lo);
1656 			goto err_vma;
1657 		}
1658 
1659 		cs = intel_ring_begin(hi, 4);
1660 		if (IS_ERR(cs)) {
1661 			err = PTR_ERR(cs);
1662 			i915_request_add(hi);
1663 			i915_request_put(lo);
1664 			goto err_vma;
1665 		}
1666 
1667 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1668 		*cs++ = i915_ggtt_offset(vma);
1669 		*cs++ = 0;
1670 		*cs++ = 0;
1671 
1672 		intel_ring_advance(hi, cs);
1673 		i915_request_add(hi);
1674 
1675 		if (i915_request_wait(lo, 0, HZ / 5) < 0) {
1676 			struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
1677 
1678 			pr_err("%s: Failed to preempt semaphore busywait!\n",
1679 			       engine->name);
1680 
1681 			intel_engine_dump(engine, &p, "%s\n", engine->name);
1682 			GEM_TRACE_DUMP();
1683 
1684 			i915_request_put(lo);
1685 			intel_gt_set_wedged(gt);
1686 			err = -EIO;
1687 			goto err_vma;
1688 		}
1689 		GEM_BUG_ON(READ_ONCE(*map));
1690 		i915_request_put(lo);
1691 
1692 		if (igt_live_test_end(&t)) {
1693 			err = -EIO;
1694 			goto err_vma;
1695 		}
1696 	}
1697 
1698 	err = 0;
1699 err_vma:
1700 	i915_vma_unpin(vma);
1701 err_map:
1702 	i915_gem_object_unpin_map(obj);
1703 err_obj:
1704 	i915_gem_object_put(obj);
1705 err_ctx_lo:
1706 	kernel_context_close(ctx_lo);
1707 err_ctx_hi:
1708 	kernel_context_close(ctx_hi);
1709 	return err;
1710 }
1711 
1712 static struct i915_request *
spinner_create_request(struct igt_spinner * spin,struct i915_gem_context * ctx,struct intel_engine_cs * engine,u32 arb)1713 spinner_create_request(struct igt_spinner *spin,
1714 		       struct i915_gem_context *ctx,
1715 		       struct intel_engine_cs *engine,
1716 		       u32 arb)
1717 {
1718 	struct intel_context *ce;
1719 	struct i915_request *rq;
1720 
1721 	ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
1722 	if (IS_ERR(ce))
1723 		return ERR_CAST(ce);
1724 
1725 	rq = igt_spinner_create_request(spin, ce, arb);
1726 	intel_context_put(ce);
1727 	return rq;
1728 }
1729 
live_preempt(void * arg)1730 static int live_preempt(void *arg)
1731 {
1732 	struct intel_gt *gt = arg;
1733 	struct i915_gem_context *ctx_hi, *ctx_lo;
1734 	struct igt_spinner spin_hi, spin_lo;
1735 	struct intel_engine_cs *engine;
1736 	enum intel_engine_id id;
1737 	int err = -ENOMEM;
1738 
1739 	ctx_hi = kernel_context(gt->i915, NULL);
1740 	if (!ctx_hi)
1741 		return -ENOMEM;
1742 	ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
1743 
1744 	ctx_lo = kernel_context(gt->i915, NULL);
1745 	if (!ctx_lo)
1746 		goto err_ctx_hi;
1747 	ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
1748 
1749 	if (igt_spinner_init(&spin_hi, gt))
1750 		goto err_ctx_lo;
1751 
1752 	if (igt_spinner_init(&spin_lo, gt))
1753 		goto err_spin_hi;
1754 
1755 	for_each_engine(engine, gt, id) {
1756 		struct igt_live_test t;
1757 		struct i915_request *rq;
1758 
1759 		if (!intel_engine_has_preemption(engine))
1760 			continue;
1761 
1762 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1763 			err = -EIO;
1764 			goto err_spin_lo;
1765 		}
1766 
1767 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1768 					    MI_ARB_CHECK);
1769 		if (IS_ERR(rq)) {
1770 			err = PTR_ERR(rq);
1771 			goto err_spin_lo;
1772 		}
1773 
1774 		i915_request_add(rq);
1775 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
1776 			GEM_TRACE("lo spinner failed to start\n");
1777 			GEM_TRACE_DUMP();
1778 			intel_gt_set_wedged(gt);
1779 			err = -EIO;
1780 			goto err_spin_lo;
1781 		}
1782 
1783 		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1784 					    MI_ARB_CHECK);
1785 		if (IS_ERR(rq)) {
1786 			igt_spinner_end(&spin_lo);
1787 			err = PTR_ERR(rq);
1788 			goto err_spin_lo;
1789 		}
1790 
1791 		i915_request_add(rq);
1792 		if (!igt_wait_for_spinner(&spin_hi, rq)) {
1793 			GEM_TRACE("hi spinner failed to start\n");
1794 			GEM_TRACE_DUMP();
1795 			intel_gt_set_wedged(gt);
1796 			err = -EIO;
1797 			goto err_spin_lo;
1798 		}
1799 
1800 		igt_spinner_end(&spin_hi);
1801 		igt_spinner_end(&spin_lo);
1802 
1803 		if (igt_live_test_end(&t)) {
1804 			err = -EIO;
1805 			goto err_spin_lo;
1806 		}
1807 	}
1808 
1809 	err = 0;
1810 err_spin_lo:
1811 	igt_spinner_fini(&spin_lo);
1812 err_spin_hi:
1813 	igt_spinner_fini(&spin_hi);
1814 err_ctx_lo:
1815 	kernel_context_close(ctx_lo);
1816 err_ctx_hi:
1817 	kernel_context_close(ctx_hi);
1818 	return err;
1819 }
1820 
live_late_preempt(void * arg)1821 static int live_late_preempt(void *arg)
1822 {
1823 	struct intel_gt *gt = arg;
1824 	struct i915_gem_context *ctx_hi, *ctx_lo;
1825 	struct igt_spinner spin_hi, spin_lo;
1826 	struct intel_engine_cs *engine;
1827 	struct i915_sched_attr attr = {};
1828 	enum intel_engine_id id;
1829 	int err = -ENOMEM;
1830 
1831 	ctx_hi = kernel_context(gt->i915, NULL);
1832 	if (!ctx_hi)
1833 		return -ENOMEM;
1834 
1835 	ctx_lo = kernel_context(gt->i915, NULL);
1836 	if (!ctx_lo)
1837 		goto err_ctx_hi;
1838 
1839 	if (igt_spinner_init(&spin_hi, gt))
1840 		goto err_ctx_lo;
1841 
1842 	if (igt_spinner_init(&spin_lo, gt))
1843 		goto err_spin_hi;
1844 
1845 	/* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1846 	ctx_lo->sched.priority = 1;
1847 
1848 	for_each_engine(engine, gt, id) {
1849 		struct igt_live_test t;
1850 		struct i915_request *rq;
1851 
1852 		if (!intel_engine_has_preemption(engine))
1853 			continue;
1854 
1855 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1856 			err = -EIO;
1857 			goto err_spin_lo;
1858 		}
1859 
1860 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1861 					    MI_ARB_CHECK);
1862 		if (IS_ERR(rq)) {
1863 			err = PTR_ERR(rq);
1864 			goto err_spin_lo;
1865 		}
1866 
1867 		i915_request_add(rq);
1868 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
1869 			pr_err("First context failed to start\n");
1870 			goto err_wedged;
1871 		}
1872 
1873 		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1874 					    MI_NOOP);
1875 		if (IS_ERR(rq)) {
1876 			igt_spinner_end(&spin_lo);
1877 			err = PTR_ERR(rq);
1878 			goto err_spin_lo;
1879 		}
1880 
1881 		i915_request_add(rq);
1882 		if (igt_wait_for_spinner(&spin_hi, rq)) {
1883 			pr_err("Second context overtook first?\n");
1884 			goto err_wedged;
1885 		}
1886 
1887 		attr.priority = I915_PRIORITY_MAX;
1888 		engine->sched_engine->schedule(rq, &attr);
1889 
1890 		if (!igt_wait_for_spinner(&spin_hi, rq)) {
1891 			pr_err("High priority context failed to preempt the low priority context\n");
1892 			GEM_TRACE_DUMP();
1893 			goto err_wedged;
1894 		}
1895 
1896 		igt_spinner_end(&spin_hi);
1897 		igt_spinner_end(&spin_lo);
1898 
1899 		if (igt_live_test_end(&t)) {
1900 			err = -EIO;
1901 			goto err_spin_lo;
1902 		}
1903 	}
1904 
1905 	err = 0;
1906 err_spin_lo:
1907 	igt_spinner_fini(&spin_lo);
1908 err_spin_hi:
1909 	igt_spinner_fini(&spin_hi);
1910 err_ctx_lo:
1911 	kernel_context_close(ctx_lo);
1912 err_ctx_hi:
1913 	kernel_context_close(ctx_hi);
1914 	return err;
1915 
1916 err_wedged:
1917 	igt_spinner_end(&spin_hi);
1918 	igt_spinner_end(&spin_lo);
1919 	intel_gt_set_wedged(gt);
1920 	err = -EIO;
1921 	goto err_spin_lo;
1922 }
1923 
1924 struct preempt_client {
1925 	struct igt_spinner spin;
1926 	struct i915_gem_context *ctx;
1927 };
1928 
preempt_client_init(struct intel_gt * gt,struct preempt_client * c)1929 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
1930 {
1931 	c->ctx = kernel_context(gt->i915, NULL);
1932 	if (!c->ctx)
1933 		return -ENOMEM;
1934 
1935 	if (igt_spinner_init(&c->spin, gt))
1936 		goto err_ctx;
1937 
1938 	return 0;
1939 
1940 err_ctx:
1941 	kernel_context_close(c->ctx);
1942 	return -ENOMEM;
1943 }
1944 
preempt_client_fini(struct preempt_client * c)1945 static void preempt_client_fini(struct preempt_client *c)
1946 {
1947 	igt_spinner_fini(&c->spin);
1948 	kernel_context_close(c->ctx);
1949 }
1950 
live_nopreempt(void * arg)1951 static int live_nopreempt(void *arg)
1952 {
1953 	struct intel_gt *gt = arg;
1954 	struct intel_engine_cs *engine;
1955 	struct preempt_client a, b;
1956 	enum intel_engine_id id;
1957 	int err = -ENOMEM;
1958 
1959 	/*
1960 	 * Verify that we can disable preemption for an individual request
1961 	 * that may be being observed and not want to be interrupted.
1962 	 */
1963 
1964 	if (preempt_client_init(gt, &a))
1965 		return -ENOMEM;
1966 	if (preempt_client_init(gt, &b))
1967 		goto err_client_a;
1968 	b.ctx->sched.priority = I915_PRIORITY_MAX;
1969 
1970 	for_each_engine(engine, gt, id) {
1971 		struct i915_request *rq_a, *rq_b;
1972 
1973 		if (!intel_engine_has_preemption(engine))
1974 			continue;
1975 
1976 		engine->execlists.preempt_hang.count = 0;
1977 
1978 		rq_a = spinner_create_request(&a.spin,
1979 					      a.ctx, engine,
1980 					      MI_ARB_CHECK);
1981 		if (IS_ERR(rq_a)) {
1982 			err = PTR_ERR(rq_a);
1983 			goto err_client_b;
1984 		}
1985 
1986 		/* Low priority client, but unpreemptable! */
1987 		__set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
1988 
1989 		i915_request_add(rq_a);
1990 		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1991 			pr_err("First client failed to start\n");
1992 			goto err_wedged;
1993 		}
1994 
1995 		rq_b = spinner_create_request(&b.spin,
1996 					      b.ctx, engine,
1997 					      MI_ARB_CHECK);
1998 		if (IS_ERR(rq_b)) {
1999 			err = PTR_ERR(rq_b);
2000 			goto err_client_b;
2001 		}
2002 
2003 		i915_request_add(rq_b);
2004 
2005 		/* B is much more important than A! (But A is unpreemptable.) */
2006 		GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
2007 
2008 		/* Wait long enough for preemption and timeslicing */
2009 		if (igt_wait_for_spinner(&b.spin, rq_b)) {
2010 			pr_err("Second client started too early!\n");
2011 			goto err_wedged;
2012 		}
2013 
2014 		igt_spinner_end(&a.spin);
2015 
2016 		if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2017 			pr_err("Second client failed to start\n");
2018 			goto err_wedged;
2019 		}
2020 
2021 		igt_spinner_end(&b.spin);
2022 
2023 		if (engine->execlists.preempt_hang.count) {
2024 			pr_err("Preemption recorded x%d; should have been suppressed!\n",
2025 			       engine->execlists.preempt_hang.count);
2026 			err = -EINVAL;
2027 			goto err_wedged;
2028 		}
2029 
2030 		if (igt_flush_test(gt->i915))
2031 			goto err_wedged;
2032 	}
2033 
2034 	err = 0;
2035 err_client_b:
2036 	preempt_client_fini(&b);
2037 err_client_a:
2038 	preempt_client_fini(&a);
2039 	return err;
2040 
2041 err_wedged:
2042 	igt_spinner_end(&b.spin);
2043 	igt_spinner_end(&a.spin);
2044 	intel_gt_set_wedged(gt);
2045 	err = -EIO;
2046 	goto err_client_b;
2047 }
2048 
2049 struct live_preempt_cancel {
2050 	struct intel_engine_cs *engine;
2051 	struct preempt_client a, b;
2052 };
2053 
__cancel_active0(struct live_preempt_cancel * arg)2054 static int __cancel_active0(struct live_preempt_cancel *arg)
2055 {
2056 	struct i915_request *rq;
2057 	struct igt_live_test t;
2058 	int err;
2059 
2060 	/* Preempt cancel of ELSP0 */
2061 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2062 	if (igt_live_test_begin(&t, arg->engine->i915,
2063 				__func__, arg->engine->name))
2064 		return -EIO;
2065 
2066 	rq = spinner_create_request(&arg->a.spin,
2067 				    arg->a.ctx, arg->engine,
2068 				    MI_ARB_CHECK);
2069 	if (IS_ERR(rq))
2070 		return PTR_ERR(rq);
2071 
2072 	clear_bit(CONTEXT_BANNED, &rq->context->flags);
2073 	i915_request_get(rq);
2074 	i915_request_add(rq);
2075 	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2076 		err = -EIO;
2077 		goto out;
2078 	}
2079 
2080 	intel_context_ban(rq->context, rq);
2081 	err = intel_engine_pulse(arg->engine);
2082 	if (err)
2083 		goto out;
2084 
2085 	err = wait_for_reset(arg->engine, rq, HZ / 2);
2086 	if (err) {
2087 		pr_err("Cancelled inflight0 request did not reset\n");
2088 		goto out;
2089 	}
2090 
2091 out:
2092 	i915_request_put(rq);
2093 	if (igt_live_test_end(&t))
2094 		err = -EIO;
2095 	return err;
2096 }
2097 
__cancel_active1(struct live_preempt_cancel * arg)2098 static int __cancel_active1(struct live_preempt_cancel *arg)
2099 {
2100 	struct i915_request *rq[2] = {};
2101 	struct igt_live_test t;
2102 	int err;
2103 
2104 	/* Preempt cancel of ELSP1 */
2105 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2106 	if (igt_live_test_begin(&t, arg->engine->i915,
2107 				__func__, arg->engine->name))
2108 		return -EIO;
2109 
2110 	rq[0] = spinner_create_request(&arg->a.spin,
2111 				       arg->a.ctx, arg->engine,
2112 				       MI_NOOP); /* no preemption */
2113 	if (IS_ERR(rq[0]))
2114 		return PTR_ERR(rq[0]);
2115 
2116 	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2117 	i915_request_get(rq[0]);
2118 	i915_request_add(rq[0]);
2119 	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2120 		err = -EIO;
2121 		goto out;
2122 	}
2123 
2124 	rq[1] = spinner_create_request(&arg->b.spin,
2125 				       arg->b.ctx, arg->engine,
2126 				       MI_ARB_CHECK);
2127 	if (IS_ERR(rq[1])) {
2128 		err = PTR_ERR(rq[1]);
2129 		goto out;
2130 	}
2131 
2132 	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2133 	i915_request_get(rq[1]);
2134 	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2135 	i915_request_add(rq[1]);
2136 	if (err)
2137 		goto out;
2138 
2139 	intel_context_ban(rq[1]->context, rq[1]);
2140 	err = intel_engine_pulse(arg->engine);
2141 	if (err)
2142 		goto out;
2143 
2144 	igt_spinner_end(&arg->a.spin);
2145 	err = wait_for_reset(arg->engine, rq[1], HZ / 2);
2146 	if (err)
2147 		goto out;
2148 
2149 	if (rq[0]->fence.error != 0) {
2150 		pr_err("Normal inflight0 request did not complete\n");
2151 		err = -EINVAL;
2152 		goto out;
2153 	}
2154 
2155 	if (rq[1]->fence.error != -EIO) {
2156 		pr_err("Cancelled inflight1 request did not report -EIO\n");
2157 		err = -EINVAL;
2158 		goto out;
2159 	}
2160 
2161 out:
2162 	i915_request_put(rq[1]);
2163 	i915_request_put(rq[0]);
2164 	if (igt_live_test_end(&t))
2165 		err = -EIO;
2166 	return err;
2167 }
2168 
__cancel_queued(struct live_preempt_cancel * arg)2169 static int __cancel_queued(struct live_preempt_cancel *arg)
2170 {
2171 	struct i915_request *rq[3] = {};
2172 	struct igt_live_test t;
2173 	int err;
2174 
2175 	/* Full ELSP and one in the wings */
2176 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2177 	if (igt_live_test_begin(&t, arg->engine->i915,
2178 				__func__, arg->engine->name))
2179 		return -EIO;
2180 
2181 	rq[0] = spinner_create_request(&arg->a.spin,
2182 				       arg->a.ctx, arg->engine,
2183 				       MI_ARB_CHECK);
2184 	if (IS_ERR(rq[0]))
2185 		return PTR_ERR(rq[0]);
2186 
2187 	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2188 	i915_request_get(rq[0]);
2189 	i915_request_add(rq[0]);
2190 	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2191 		err = -EIO;
2192 		goto out;
2193 	}
2194 
2195 	rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
2196 	if (IS_ERR(rq[1])) {
2197 		err = PTR_ERR(rq[1]);
2198 		goto out;
2199 	}
2200 
2201 	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2202 	i915_request_get(rq[1]);
2203 	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2204 	i915_request_add(rq[1]);
2205 	if (err)
2206 		goto out;
2207 
2208 	rq[2] = spinner_create_request(&arg->b.spin,
2209 				       arg->a.ctx, arg->engine,
2210 				       MI_ARB_CHECK);
2211 	if (IS_ERR(rq[2])) {
2212 		err = PTR_ERR(rq[2]);
2213 		goto out;
2214 	}
2215 
2216 	i915_request_get(rq[2]);
2217 	err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
2218 	i915_request_add(rq[2]);
2219 	if (err)
2220 		goto out;
2221 
2222 	intel_context_ban(rq[2]->context, rq[2]);
2223 	err = intel_engine_pulse(arg->engine);
2224 	if (err)
2225 		goto out;
2226 
2227 	err = wait_for_reset(arg->engine, rq[2], HZ / 2);
2228 	if (err)
2229 		goto out;
2230 
2231 	if (rq[0]->fence.error != -EIO) {
2232 		pr_err("Cancelled inflight0 request did not report -EIO\n");
2233 		err = -EINVAL;
2234 		goto out;
2235 	}
2236 
2237 	/*
2238 	 * The behavior between having semaphores and not is different. With
2239 	 * semaphores the subsequent request is on the hardware and not cancelled
2240 	 * while without the request is held in the driver and cancelled.
2241 	 */
2242 	if (intel_engine_has_semaphores(rq[1]->engine) &&
2243 	    rq[1]->fence.error != 0) {
2244 		pr_err("Normal inflight1 request did not complete\n");
2245 		err = -EINVAL;
2246 		goto out;
2247 	}
2248 
2249 	if (rq[2]->fence.error != -EIO) {
2250 		pr_err("Cancelled queued request did not report -EIO\n");
2251 		err = -EINVAL;
2252 		goto out;
2253 	}
2254 
2255 out:
2256 	i915_request_put(rq[2]);
2257 	i915_request_put(rq[1]);
2258 	i915_request_put(rq[0]);
2259 	if (igt_live_test_end(&t))
2260 		err = -EIO;
2261 	return err;
2262 }
2263 
__cancel_hostile(struct live_preempt_cancel * arg)2264 static int __cancel_hostile(struct live_preempt_cancel *arg)
2265 {
2266 	struct i915_request *rq;
2267 	int err;
2268 
2269 	/* Preempt cancel non-preemptible spinner in ELSP0 */
2270 	if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
2271 		return 0;
2272 
2273 	if (!intel_has_reset_engine(arg->engine->gt))
2274 		return 0;
2275 
2276 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2277 	rq = spinner_create_request(&arg->a.spin,
2278 				    arg->a.ctx, arg->engine,
2279 				    MI_NOOP); /* preemption disabled */
2280 	if (IS_ERR(rq))
2281 		return PTR_ERR(rq);
2282 
2283 	clear_bit(CONTEXT_BANNED, &rq->context->flags);
2284 	i915_request_get(rq);
2285 	i915_request_add(rq);
2286 	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2287 		err = -EIO;
2288 		goto out;
2289 	}
2290 
2291 	intel_context_ban(rq->context, rq);
2292 	err = intel_engine_pulse(arg->engine); /* force reset */
2293 	if (err)
2294 		goto out;
2295 
2296 	err = wait_for_reset(arg->engine, rq, HZ / 2);
2297 	if (err) {
2298 		pr_err("Cancelled inflight0 request did not reset\n");
2299 		goto out;
2300 	}
2301 
2302 out:
2303 	i915_request_put(rq);
2304 	if (igt_flush_test(arg->engine->i915))
2305 		err = -EIO;
2306 	return err;
2307 }
2308 
force_reset_timeout(struct intel_engine_cs * engine)2309 static void force_reset_timeout(struct intel_engine_cs *engine)
2310 {
2311 	engine->reset_timeout.probability = 999;
2312 	atomic_set(&engine->reset_timeout.times, -1);
2313 }
2314 
cancel_reset_timeout(struct intel_engine_cs * engine)2315 static void cancel_reset_timeout(struct intel_engine_cs *engine)
2316 {
2317 	memset(&engine->reset_timeout, 0, sizeof(engine->reset_timeout));
2318 }
2319 
__cancel_fail(struct live_preempt_cancel * arg)2320 static int __cancel_fail(struct live_preempt_cancel *arg)
2321 {
2322 	struct intel_engine_cs *engine = arg->engine;
2323 	struct i915_request *rq;
2324 	int err;
2325 
2326 	if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
2327 		return 0;
2328 
2329 	if (!intel_has_reset_engine(engine->gt))
2330 		return 0;
2331 
2332 	GEM_TRACE("%s(%s)\n", __func__, engine->name);
2333 	rq = spinner_create_request(&arg->a.spin,
2334 				    arg->a.ctx, engine,
2335 				    MI_NOOP); /* preemption disabled */
2336 	if (IS_ERR(rq))
2337 		return PTR_ERR(rq);
2338 
2339 	clear_bit(CONTEXT_BANNED, &rq->context->flags);
2340 	i915_request_get(rq);
2341 	i915_request_add(rq);
2342 	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2343 		err = -EIO;
2344 		goto out;
2345 	}
2346 
2347 	intel_context_set_banned(rq->context);
2348 
2349 	err = intel_engine_pulse(engine);
2350 	if (err)
2351 		goto out;
2352 
2353 	force_reset_timeout(engine);
2354 
2355 	/* force preempt reset [failure] */
2356 	while (!engine->execlists.pending[0])
2357 		intel_engine_flush_submission(engine);
2358 	del_timer_sync(&engine->execlists.preempt);
2359 	intel_engine_flush_submission(engine);
2360 
2361 	cancel_reset_timeout(engine);
2362 
2363 	/* after failure, require heartbeats to reset device */
2364 	intel_engine_set_heartbeat(engine, 1);
2365 	err = wait_for_reset(engine, rq, HZ / 2);
2366 	intel_engine_set_heartbeat(engine,
2367 				   engine->defaults.heartbeat_interval_ms);
2368 	if (err) {
2369 		pr_err("Cancelled inflight0 request did not reset\n");
2370 		goto out;
2371 	}
2372 
2373 out:
2374 	i915_request_put(rq);
2375 	if (igt_flush_test(engine->i915))
2376 		err = -EIO;
2377 	return err;
2378 }
2379 
live_preempt_cancel(void * arg)2380 static int live_preempt_cancel(void *arg)
2381 {
2382 	struct intel_gt *gt = arg;
2383 	struct live_preempt_cancel data;
2384 	enum intel_engine_id id;
2385 	int err = -ENOMEM;
2386 
2387 	/*
2388 	 * To cancel an inflight context, we need to first remove it from the
2389 	 * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
2390 	 */
2391 
2392 	if (preempt_client_init(gt, &data.a))
2393 		return -ENOMEM;
2394 	if (preempt_client_init(gt, &data.b))
2395 		goto err_client_a;
2396 
2397 	for_each_engine(data.engine, gt, id) {
2398 		if (!intel_engine_has_preemption(data.engine))
2399 			continue;
2400 
2401 		err = __cancel_active0(&data);
2402 		if (err)
2403 			goto err_wedged;
2404 
2405 		err = __cancel_active1(&data);
2406 		if (err)
2407 			goto err_wedged;
2408 
2409 		err = __cancel_queued(&data);
2410 		if (err)
2411 			goto err_wedged;
2412 
2413 		err = __cancel_hostile(&data);
2414 		if (err)
2415 			goto err_wedged;
2416 
2417 		err = __cancel_fail(&data);
2418 		if (err)
2419 			goto err_wedged;
2420 	}
2421 
2422 	err = 0;
2423 err_client_b:
2424 	preempt_client_fini(&data.b);
2425 err_client_a:
2426 	preempt_client_fini(&data.a);
2427 	return err;
2428 
2429 err_wedged:
2430 	GEM_TRACE_DUMP();
2431 	igt_spinner_end(&data.b.spin);
2432 	igt_spinner_end(&data.a.spin);
2433 	intel_gt_set_wedged(gt);
2434 	goto err_client_b;
2435 }
2436 
live_suppress_self_preempt(void * arg)2437 static int live_suppress_self_preempt(void *arg)
2438 {
2439 	struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
2440 	struct intel_gt *gt = arg;
2441 	struct intel_engine_cs *engine;
2442 	struct preempt_client a, b;
2443 	enum intel_engine_id id;
2444 	int err = -ENOMEM;
2445 
2446 	/*
2447 	 * Verify that if a preemption request does not cause a change in
2448 	 * the current execution order, the preempt-to-idle injection is
2449 	 * skipped and that we do not accidentally apply it after the CS
2450 	 * completion event.
2451 	 */
2452 
2453 	if (intel_uc_uses_guc_submission(&gt->uc))
2454 		return 0; /* presume black blox */
2455 
2456 	if (intel_vgpu_active(gt->i915))
2457 		return 0; /* GVT forces single port & request submission */
2458 
2459 	if (preempt_client_init(gt, &a))
2460 		return -ENOMEM;
2461 	if (preempt_client_init(gt, &b))
2462 		goto err_client_a;
2463 
2464 	for_each_engine(engine, gt, id) {
2465 		struct i915_request *rq_a, *rq_b;
2466 		int depth;
2467 
2468 		if (!intel_engine_has_preemption(engine))
2469 			continue;
2470 
2471 		if (igt_flush_test(gt->i915))
2472 			goto err_wedged;
2473 
2474 		st_engine_heartbeat_disable(engine);
2475 		engine->execlists.preempt_hang.count = 0;
2476 
2477 		rq_a = spinner_create_request(&a.spin,
2478 					      a.ctx, engine,
2479 					      MI_NOOP);
2480 		if (IS_ERR(rq_a)) {
2481 			err = PTR_ERR(rq_a);
2482 			st_engine_heartbeat_enable(engine);
2483 			goto err_client_b;
2484 		}
2485 
2486 		i915_request_add(rq_a);
2487 		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
2488 			pr_err("First client failed to start\n");
2489 			st_engine_heartbeat_enable(engine);
2490 			goto err_wedged;
2491 		}
2492 
2493 		/* Keep postponing the timer to avoid premature slicing */
2494 		mod_timer(&engine->execlists.timer, jiffies + HZ);
2495 		for (depth = 0; depth < 8; depth++) {
2496 			rq_b = spinner_create_request(&b.spin,
2497 						      b.ctx, engine,
2498 						      MI_NOOP);
2499 			if (IS_ERR(rq_b)) {
2500 				err = PTR_ERR(rq_b);
2501 				st_engine_heartbeat_enable(engine);
2502 				goto err_client_b;
2503 			}
2504 			i915_request_add(rq_b);
2505 
2506 			GEM_BUG_ON(i915_request_completed(rq_a));
2507 			engine->sched_engine->schedule(rq_a, &attr);
2508 			igt_spinner_end(&a.spin);
2509 
2510 			if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2511 				pr_err("Second client failed to start\n");
2512 				st_engine_heartbeat_enable(engine);
2513 				goto err_wedged;
2514 			}
2515 
2516 			swap(a, b);
2517 			rq_a = rq_b;
2518 		}
2519 		igt_spinner_end(&a.spin);
2520 
2521 		if (engine->execlists.preempt_hang.count) {
2522 			pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
2523 			       engine->name,
2524 			       engine->execlists.preempt_hang.count,
2525 			       depth);
2526 			st_engine_heartbeat_enable(engine);
2527 			err = -EINVAL;
2528 			goto err_client_b;
2529 		}
2530 
2531 		st_engine_heartbeat_enable(engine);
2532 		if (igt_flush_test(gt->i915))
2533 			goto err_wedged;
2534 	}
2535 
2536 	err = 0;
2537 err_client_b:
2538 	preempt_client_fini(&b);
2539 err_client_a:
2540 	preempt_client_fini(&a);
2541 	return err;
2542 
2543 err_wedged:
2544 	igt_spinner_end(&b.spin);
2545 	igt_spinner_end(&a.spin);
2546 	intel_gt_set_wedged(gt);
2547 	err = -EIO;
2548 	goto err_client_b;
2549 }
2550 
live_chain_preempt(void * arg)2551 static int live_chain_preempt(void *arg)
2552 {
2553 	struct intel_gt *gt = arg;
2554 	struct intel_engine_cs *engine;
2555 	struct preempt_client hi, lo;
2556 	enum intel_engine_id id;
2557 	int err = -ENOMEM;
2558 
2559 	/*
2560 	 * Build a chain AB...BA between two contexts (A, B) and request
2561 	 * preemption of the last request. It should then complete before
2562 	 * the previously submitted spinner in B.
2563 	 */
2564 
2565 	if (preempt_client_init(gt, &hi))
2566 		return -ENOMEM;
2567 
2568 	if (preempt_client_init(gt, &lo))
2569 		goto err_client_hi;
2570 
2571 	for_each_engine(engine, gt, id) {
2572 		struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
2573 		struct igt_live_test t;
2574 		struct i915_request *rq;
2575 		int ring_size, count, i;
2576 
2577 		if (!intel_engine_has_preemption(engine))
2578 			continue;
2579 
2580 		rq = spinner_create_request(&lo.spin,
2581 					    lo.ctx, engine,
2582 					    MI_ARB_CHECK);
2583 		if (IS_ERR(rq))
2584 			goto err_wedged;
2585 
2586 		i915_request_get(rq);
2587 		i915_request_add(rq);
2588 
2589 		ring_size = rq->wa_tail - rq->head;
2590 		if (ring_size < 0)
2591 			ring_size += rq->ring->size;
2592 		ring_size = rq->ring->size / ring_size;
2593 		pr_debug("%s(%s): Using maximum of %d requests\n",
2594 			 __func__, engine->name, ring_size);
2595 
2596 		igt_spinner_end(&lo.spin);
2597 		if (i915_request_wait(rq, 0, HZ / 2) < 0) {
2598 			pr_err("Timed out waiting to flush %s\n", engine->name);
2599 			i915_request_put(rq);
2600 			goto err_wedged;
2601 		}
2602 		i915_request_put(rq);
2603 
2604 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
2605 			err = -EIO;
2606 			goto err_wedged;
2607 		}
2608 
2609 		for_each_prime_number_from(count, 1, ring_size) {
2610 			rq = spinner_create_request(&hi.spin,
2611 						    hi.ctx, engine,
2612 						    MI_ARB_CHECK);
2613 			if (IS_ERR(rq))
2614 				goto err_wedged;
2615 			i915_request_add(rq);
2616 			if (!igt_wait_for_spinner(&hi.spin, rq))
2617 				goto err_wedged;
2618 
2619 			rq = spinner_create_request(&lo.spin,
2620 						    lo.ctx, engine,
2621 						    MI_ARB_CHECK);
2622 			if (IS_ERR(rq))
2623 				goto err_wedged;
2624 			i915_request_add(rq);
2625 
2626 			for (i = 0; i < count; i++) {
2627 				rq = igt_request_alloc(lo.ctx, engine);
2628 				if (IS_ERR(rq))
2629 					goto err_wedged;
2630 				i915_request_add(rq);
2631 			}
2632 
2633 			rq = igt_request_alloc(hi.ctx, engine);
2634 			if (IS_ERR(rq))
2635 				goto err_wedged;
2636 
2637 			i915_request_get(rq);
2638 			i915_request_add(rq);
2639 			engine->sched_engine->schedule(rq, &attr);
2640 
2641 			igt_spinner_end(&hi.spin);
2642 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2643 				struct drm_printer p =
2644 					drm_info_printer(gt->i915->drm.dev);
2645 
2646 				pr_err("Failed to preempt over chain of %d\n",
2647 				       count);
2648 				intel_engine_dump(engine, &p,
2649 						  "%s\n", engine->name);
2650 				i915_request_put(rq);
2651 				goto err_wedged;
2652 			}
2653 			igt_spinner_end(&lo.spin);
2654 			i915_request_put(rq);
2655 
2656 			rq = igt_request_alloc(lo.ctx, engine);
2657 			if (IS_ERR(rq))
2658 				goto err_wedged;
2659 
2660 			i915_request_get(rq);
2661 			i915_request_add(rq);
2662 
2663 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2664 				struct drm_printer p =
2665 					drm_info_printer(gt->i915->drm.dev);
2666 
2667 				pr_err("Failed to flush low priority chain of %d requests\n",
2668 				       count);
2669 				intel_engine_dump(engine, &p,
2670 						  "%s\n", engine->name);
2671 
2672 				i915_request_put(rq);
2673 				goto err_wedged;
2674 			}
2675 			i915_request_put(rq);
2676 		}
2677 
2678 		if (igt_live_test_end(&t)) {
2679 			err = -EIO;
2680 			goto err_wedged;
2681 		}
2682 	}
2683 
2684 	err = 0;
2685 err_client_lo:
2686 	preempt_client_fini(&lo);
2687 err_client_hi:
2688 	preempt_client_fini(&hi);
2689 	return err;
2690 
2691 err_wedged:
2692 	igt_spinner_end(&hi.spin);
2693 	igt_spinner_end(&lo.spin);
2694 	intel_gt_set_wedged(gt);
2695 	err = -EIO;
2696 	goto err_client_lo;
2697 }
2698 
create_gang(struct intel_engine_cs * engine,struct i915_request ** prev)2699 static int create_gang(struct intel_engine_cs *engine,
2700 		       struct i915_request **prev)
2701 {
2702 	struct drm_i915_gem_object *obj;
2703 	struct intel_context *ce;
2704 	struct i915_request *rq;
2705 	struct i915_vma *vma;
2706 	u32 *cs;
2707 	int err;
2708 
2709 	ce = intel_context_create(engine);
2710 	if (IS_ERR(ce))
2711 		return PTR_ERR(ce);
2712 
2713 	obj = i915_gem_object_create_internal(engine->i915, 4096);
2714 	if (IS_ERR(obj)) {
2715 		err = PTR_ERR(obj);
2716 		goto err_ce;
2717 	}
2718 
2719 	vma = i915_vma_instance(obj, ce->vm, NULL);
2720 	if (IS_ERR(vma)) {
2721 		err = PTR_ERR(vma);
2722 		goto err_obj;
2723 	}
2724 
2725 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
2726 	if (err)
2727 		goto err_obj;
2728 
2729 	cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
2730 	if (IS_ERR(cs)) {
2731 		err = PTR_ERR(cs);
2732 		goto err_obj;
2733 	}
2734 
2735 	/* Semaphore target: spin until zero */
2736 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2737 
2738 	*cs++ = MI_SEMAPHORE_WAIT |
2739 		MI_SEMAPHORE_POLL |
2740 		MI_SEMAPHORE_SAD_EQ_SDD;
2741 	*cs++ = 0;
2742 	*cs++ = lower_32_bits(vma->node.start);
2743 	*cs++ = upper_32_bits(vma->node.start);
2744 
2745 	if (*prev) {
2746 		u64 offset = (*prev)->batch->node.start;
2747 
2748 		/* Terminate the spinner in the next lower priority batch. */
2749 		*cs++ = MI_STORE_DWORD_IMM_GEN4;
2750 		*cs++ = lower_32_bits(offset);
2751 		*cs++ = upper_32_bits(offset);
2752 		*cs++ = 0;
2753 	}
2754 
2755 	*cs++ = MI_BATCH_BUFFER_END;
2756 	i915_gem_object_flush_map(obj);
2757 	i915_gem_object_unpin_map(obj);
2758 
2759 	rq = intel_context_create_request(ce);
2760 	if (IS_ERR(rq)) {
2761 		err = PTR_ERR(rq);
2762 		goto err_obj;
2763 	}
2764 
2765 	rq->batch = i915_vma_get(vma);
2766 	i915_request_get(rq);
2767 
2768 	i915_vma_lock(vma);
2769 	err = i915_request_await_object(rq, vma->obj, false);
2770 	if (!err)
2771 		err = i915_vma_move_to_active(vma, rq, 0);
2772 	if (!err)
2773 		err = rq->engine->emit_bb_start(rq,
2774 						vma->node.start,
2775 						PAGE_SIZE, 0);
2776 	i915_vma_unlock(vma);
2777 	i915_request_add(rq);
2778 	if (err)
2779 		goto err_rq;
2780 
2781 	i915_gem_object_put(obj);
2782 	intel_context_put(ce);
2783 
2784 	rq->mock.link.next = &(*prev)->mock.link;
2785 	*prev = rq;
2786 	return 0;
2787 
2788 err_rq:
2789 	i915_vma_put(rq->batch);
2790 	i915_request_put(rq);
2791 err_obj:
2792 	i915_gem_object_put(obj);
2793 err_ce:
2794 	intel_context_put(ce);
2795 	return err;
2796 }
2797 
__live_preempt_ring(struct intel_engine_cs * engine,struct igt_spinner * spin,int queue_sz,int ring_sz)2798 static int __live_preempt_ring(struct intel_engine_cs *engine,
2799 			       struct igt_spinner *spin,
2800 			       int queue_sz, int ring_sz)
2801 {
2802 	struct intel_context *ce[2] = {};
2803 	struct i915_request *rq;
2804 	struct igt_live_test t;
2805 	int err = 0;
2806 	int n;
2807 
2808 	if (igt_live_test_begin(&t, engine->i915, __func__, engine->name))
2809 		return -EIO;
2810 
2811 	for (n = 0; n < ARRAY_SIZE(ce); n++) {
2812 		struct intel_context *tmp;
2813 
2814 		tmp = intel_context_create(engine);
2815 		if (IS_ERR(tmp)) {
2816 			err = PTR_ERR(tmp);
2817 			goto err_ce;
2818 		}
2819 
2820 		tmp->ring_size = ring_sz;
2821 
2822 		err = intel_context_pin(tmp);
2823 		if (err) {
2824 			intel_context_put(tmp);
2825 			goto err_ce;
2826 		}
2827 
2828 		memset32(tmp->ring->vaddr,
2829 			 0xdeadbeef, /* trigger a hang if executed */
2830 			 tmp->ring->vma->size / sizeof(u32));
2831 
2832 		ce[n] = tmp;
2833 	}
2834 
2835 	rq = igt_spinner_create_request(spin, ce[0], MI_ARB_CHECK);
2836 	if (IS_ERR(rq)) {
2837 		err = PTR_ERR(rq);
2838 		goto err_ce;
2839 	}
2840 
2841 	i915_request_get(rq);
2842 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2843 	i915_request_add(rq);
2844 
2845 	if (!igt_wait_for_spinner(spin, rq)) {
2846 		intel_gt_set_wedged(engine->gt);
2847 		i915_request_put(rq);
2848 		err = -ETIME;
2849 		goto err_ce;
2850 	}
2851 
2852 	/* Fill the ring, until we will cause a wrap */
2853 	n = 0;
2854 	while (ce[0]->ring->tail - rq->wa_tail <= queue_sz) {
2855 		struct i915_request *tmp;
2856 
2857 		tmp = intel_context_create_request(ce[0]);
2858 		if (IS_ERR(tmp)) {
2859 			err = PTR_ERR(tmp);
2860 			i915_request_put(rq);
2861 			goto err_ce;
2862 		}
2863 
2864 		i915_request_add(tmp);
2865 		intel_engine_flush_submission(engine);
2866 		n++;
2867 	}
2868 	intel_engine_flush_submission(engine);
2869 	pr_debug("%s: Filled %d with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
2870 		 engine->name, queue_sz, n,
2871 		 ce[0]->ring->size,
2872 		 ce[0]->ring->tail,
2873 		 ce[0]->ring->emit,
2874 		 rq->tail);
2875 	i915_request_put(rq);
2876 
2877 	/* Create a second request to preempt the first ring */
2878 	rq = intel_context_create_request(ce[1]);
2879 	if (IS_ERR(rq)) {
2880 		err = PTR_ERR(rq);
2881 		goto err_ce;
2882 	}
2883 
2884 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2885 	i915_request_get(rq);
2886 	i915_request_add(rq);
2887 
2888 	err = wait_for_submit(engine, rq, HZ / 2);
2889 	i915_request_put(rq);
2890 	if (err) {
2891 		pr_err("%s: preemption request was not submitted\n",
2892 		       engine->name);
2893 		err = -ETIME;
2894 	}
2895 
2896 	pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
2897 		 engine->name,
2898 		 ce[0]->ring->tail, ce[0]->ring->emit,
2899 		 ce[1]->ring->tail, ce[1]->ring->emit);
2900 
2901 err_ce:
2902 	intel_engine_flush_submission(engine);
2903 	igt_spinner_end(spin);
2904 	for (n = 0; n < ARRAY_SIZE(ce); n++) {
2905 		if (IS_ERR_OR_NULL(ce[n]))
2906 			break;
2907 
2908 		intel_context_unpin(ce[n]);
2909 		intel_context_put(ce[n]);
2910 	}
2911 	if (igt_live_test_end(&t))
2912 		err = -EIO;
2913 	return err;
2914 }
2915 
live_preempt_ring(void * arg)2916 static int live_preempt_ring(void *arg)
2917 {
2918 	struct intel_gt *gt = arg;
2919 	struct intel_engine_cs *engine;
2920 	struct igt_spinner spin;
2921 	enum intel_engine_id id;
2922 	int err = 0;
2923 
2924 	/*
2925 	 * Check that we rollback large chunks of a ring in order to do a
2926 	 * preemption event. Similar to live_unlite_ring, but looking at
2927 	 * ring size rather than the impact of intel_ring_direction().
2928 	 */
2929 
2930 	if (igt_spinner_init(&spin, gt))
2931 		return -ENOMEM;
2932 
2933 	for_each_engine(engine, gt, id) {
2934 		int n;
2935 
2936 		if (!intel_engine_has_preemption(engine))
2937 			continue;
2938 
2939 		if (!intel_engine_can_store_dword(engine))
2940 			continue;
2941 
2942 		st_engine_heartbeat_disable(engine);
2943 
2944 		for (n = 0; n <= 3; n++) {
2945 			err = __live_preempt_ring(engine, &spin,
2946 						  n * SZ_4K / 4, SZ_4K);
2947 			if (err)
2948 				break;
2949 		}
2950 
2951 		st_engine_heartbeat_enable(engine);
2952 		if (err)
2953 			break;
2954 	}
2955 
2956 	igt_spinner_fini(&spin);
2957 	return err;
2958 }
2959 
live_preempt_gang(void * arg)2960 static int live_preempt_gang(void *arg)
2961 {
2962 	struct intel_gt *gt = arg;
2963 	struct intel_engine_cs *engine;
2964 	enum intel_engine_id id;
2965 
2966 	/*
2967 	 * Build as long a chain of preempters as we can, with each
2968 	 * request higher priority than the last. Once we are ready, we release
2969 	 * the last batch which then precolates down the chain, each releasing
2970 	 * the next oldest in turn. The intent is to simply push as hard as we
2971 	 * can with the number of preemptions, trying to exceed narrow HW
2972 	 * limits. At a minimum, we insist that we can sort all the user
2973 	 * high priority levels into execution order.
2974 	 */
2975 
2976 	for_each_engine(engine, gt, id) {
2977 		struct i915_request *rq = NULL;
2978 		struct igt_live_test t;
2979 		IGT_TIMEOUT(end_time);
2980 		int prio = 0;
2981 		int err = 0;
2982 		u32 *cs;
2983 
2984 		if (!intel_engine_has_preemption(engine))
2985 			continue;
2986 
2987 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
2988 			return -EIO;
2989 
2990 		do {
2991 			struct i915_sched_attr attr = { .priority = prio++ };
2992 
2993 			err = create_gang(engine, &rq);
2994 			if (err)
2995 				break;
2996 
2997 			/* Submit each spinner at increasing priority */
2998 			engine->sched_engine->schedule(rq, &attr);
2999 		} while (prio <= I915_PRIORITY_MAX &&
3000 			 !__igt_timeout(end_time, NULL));
3001 		pr_debug("%s: Preempt chain of %d requests\n",
3002 			 engine->name, prio);
3003 
3004 		/*
3005 		 * Such that the last spinner is the highest priority and
3006 		 * should execute first. When that spinner completes,
3007 		 * it will terminate the next lowest spinner until there
3008 		 * are no more spinners and the gang is complete.
3009 		 */
3010 		cs = i915_gem_object_pin_map_unlocked(rq->batch->obj, I915_MAP_WC);
3011 		if (!IS_ERR(cs)) {
3012 			*cs = 0;
3013 			i915_gem_object_unpin_map(rq->batch->obj);
3014 		} else {
3015 			err = PTR_ERR(cs);
3016 			intel_gt_set_wedged(gt);
3017 		}
3018 
3019 		while (rq) { /* wait for each rq from highest to lowest prio */
3020 			struct i915_request *n = list_next_entry(rq, mock.link);
3021 
3022 			if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
3023 				struct drm_printer p =
3024 					drm_info_printer(engine->i915->drm.dev);
3025 
3026 				pr_err("Failed to flush chain of %d requests, at %d\n",
3027 				       prio, rq_prio(rq));
3028 				intel_engine_dump(engine, &p,
3029 						  "%s\n", engine->name);
3030 
3031 				err = -ETIME;
3032 			}
3033 
3034 			i915_vma_put(rq->batch);
3035 			i915_request_put(rq);
3036 			rq = n;
3037 		}
3038 
3039 		if (igt_live_test_end(&t))
3040 			err = -EIO;
3041 		if (err)
3042 			return err;
3043 	}
3044 
3045 	return 0;
3046 }
3047 
3048 static struct i915_vma *
create_gpr_user(struct intel_engine_cs * engine,struct i915_vma * result,unsigned int offset)3049 create_gpr_user(struct intel_engine_cs *engine,
3050 		struct i915_vma *result,
3051 		unsigned int offset)
3052 {
3053 	struct drm_i915_gem_object *obj;
3054 	struct i915_vma *vma;
3055 	u32 *cs;
3056 	int err;
3057 	int i;
3058 
3059 	obj = i915_gem_object_create_internal(engine->i915, 4096);
3060 	if (IS_ERR(obj))
3061 		return ERR_CAST(obj);
3062 
3063 	vma = i915_vma_instance(obj, result->vm, NULL);
3064 	if (IS_ERR(vma)) {
3065 		i915_gem_object_put(obj);
3066 		return vma;
3067 	}
3068 
3069 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
3070 	if (err) {
3071 		i915_vma_put(vma);
3072 		return ERR_PTR(err);
3073 	}
3074 
3075 	cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
3076 	if (IS_ERR(cs)) {
3077 		i915_vma_put(vma);
3078 		return ERR_CAST(cs);
3079 	}
3080 
3081 	/* All GPR are clear for new contexts. We use GPR(0) as a constant */
3082 	*cs++ = MI_LOAD_REGISTER_IMM(1);
3083 	*cs++ = CS_GPR(engine, 0);
3084 	*cs++ = 1;
3085 
3086 	for (i = 1; i < NUM_GPR; i++) {
3087 		u64 addr;
3088 
3089 		/*
3090 		 * Perform: GPR[i]++
3091 		 *
3092 		 * As we read and write into the context saved GPR[i], if
3093 		 * we restart this batch buffer from an earlier point, we
3094 		 * will repeat the increment and store a value > 1.
3095 		 */
3096 		*cs++ = MI_MATH(4);
3097 		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i));
3098 		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0));
3099 		*cs++ = MI_MATH_ADD;
3100 		*cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU);
3101 
3102 		addr = result->node.start + offset + i * sizeof(*cs);
3103 		*cs++ = MI_STORE_REGISTER_MEM_GEN8;
3104 		*cs++ = CS_GPR(engine, 2 * i);
3105 		*cs++ = lower_32_bits(addr);
3106 		*cs++ = upper_32_bits(addr);
3107 
3108 		*cs++ = MI_SEMAPHORE_WAIT |
3109 			MI_SEMAPHORE_POLL |
3110 			MI_SEMAPHORE_SAD_GTE_SDD;
3111 		*cs++ = i;
3112 		*cs++ = lower_32_bits(result->node.start);
3113 		*cs++ = upper_32_bits(result->node.start);
3114 	}
3115 
3116 	*cs++ = MI_BATCH_BUFFER_END;
3117 	i915_gem_object_flush_map(obj);
3118 	i915_gem_object_unpin_map(obj);
3119 
3120 	return vma;
3121 }
3122 
create_global(struct intel_gt * gt,size_t sz)3123 static struct i915_vma *create_global(struct intel_gt *gt, size_t sz)
3124 {
3125 	struct drm_i915_gem_object *obj;
3126 	struct i915_vma *vma;
3127 	int err;
3128 
3129 	obj = i915_gem_object_create_internal(gt->i915, sz);
3130 	if (IS_ERR(obj))
3131 		return ERR_CAST(obj);
3132 
3133 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
3134 	if (IS_ERR(vma)) {
3135 		i915_gem_object_put(obj);
3136 		return vma;
3137 	}
3138 
3139 	err = i915_ggtt_pin(vma, NULL, 0, 0);
3140 	if (err) {
3141 		i915_vma_put(vma);
3142 		return ERR_PTR(err);
3143 	}
3144 
3145 	return vma;
3146 }
3147 
3148 static struct i915_request *
create_gpr_client(struct intel_engine_cs * engine,struct i915_vma * global,unsigned int offset)3149 create_gpr_client(struct intel_engine_cs *engine,
3150 		  struct i915_vma *global,
3151 		  unsigned int offset)
3152 {
3153 	struct i915_vma *batch, *vma;
3154 	struct intel_context *ce;
3155 	struct i915_request *rq;
3156 	int err;
3157 
3158 	ce = intel_context_create(engine);
3159 	if (IS_ERR(ce))
3160 		return ERR_CAST(ce);
3161 
3162 	vma = i915_vma_instance(global->obj, ce->vm, NULL);
3163 	if (IS_ERR(vma)) {
3164 		err = PTR_ERR(vma);
3165 		goto out_ce;
3166 	}
3167 
3168 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
3169 	if (err)
3170 		goto out_ce;
3171 
3172 	batch = create_gpr_user(engine, vma, offset);
3173 	if (IS_ERR(batch)) {
3174 		err = PTR_ERR(batch);
3175 		goto out_vma;
3176 	}
3177 
3178 	rq = intel_context_create_request(ce);
3179 	if (IS_ERR(rq)) {
3180 		err = PTR_ERR(rq);
3181 		goto out_batch;
3182 	}
3183 
3184 	i915_vma_lock(vma);
3185 	err = i915_request_await_object(rq, vma->obj, false);
3186 	if (!err)
3187 		err = i915_vma_move_to_active(vma, rq, 0);
3188 	i915_vma_unlock(vma);
3189 
3190 	i915_vma_lock(batch);
3191 	if (!err)
3192 		err = i915_request_await_object(rq, batch->obj, false);
3193 	if (!err)
3194 		err = i915_vma_move_to_active(batch, rq, 0);
3195 	if (!err)
3196 		err = rq->engine->emit_bb_start(rq,
3197 						batch->node.start,
3198 						PAGE_SIZE, 0);
3199 	i915_vma_unlock(batch);
3200 	i915_vma_unpin(batch);
3201 
3202 	if (!err)
3203 		i915_request_get(rq);
3204 	i915_request_add(rq);
3205 
3206 out_batch:
3207 	i915_vma_put(batch);
3208 out_vma:
3209 	i915_vma_unpin(vma);
3210 out_ce:
3211 	intel_context_put(ce);
3212 	return err ? ERR_PTR(err) : rq;
3213 }
3214 
preempt_user(struct intel_engine_cs * engine,struct i915_vma * global,int id)3215 static int preempt_user(struct intel_engine_cs *engine,
3216 			struct i915_vma *global,
3217 			int id)
3218 {
3219 	struct i915_sched_attr attr = {
3220 		.priority = I915_PRIORITY_MAX
3221 	};
3222 	struct i915_request *rq;
3223 	int err = 0;
3224 	u32 *cs;
3225 
3226 	rq = intel_engine_create_kernel_request(engine);
3227 	if (IS_ERR(rq))
3228 		return PTR_ERR(rq);
3229 
3230 	cs = intel_ring_begin(rq, 4);
3231 	if (IS_ERR(cs)) {
3232 		i915_request_add(rq);
3233 		return PTR_ERR(cs);
3234 	}
3235 
3236 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
3237 	*cs++ = i915_ggtt_offset(global);
3238 	*cs++ = 0;
3239 	*cs++ = id;
3240 
3241 	intel_ring_advance(rq, cs);
3242 
3243 	i915_request_get(rq);
3244 	i915_request_add(rq);
3245 
3246 	engine->sched_engine->schedule(rq, &attr);
3247 
3248 	if (i915_request_wait(rq, 0, HZ / 2) < 0)
3249 		err = -ETIME;
3250 	i915_request_put(rq);
3251 
3252 	return err;
3253 }
3254 
live_preempt_user(void * arg)3255 static int live_preempt_user(void *arg)
3256 {
3257 	struct intel_gt *gt = arg;
3258 	struct intel_engine_cs *engine;
3259 	struct i915_vma *global;
3260 	enum intel_engine_id id;
3261 	u32 *result;
3262 	int err = 0;
3263 
3264 	/*
3265 	 * In our other tests, we look at preemption in carefully
3266 	 * controlled conditions in the ringbuffer. Since most of the
3267 	 * time is spent in user batches, most of our preemptions naturally
3268 	 * occur there. We want to verify that when we preempt inside a batch
3269 	 * we continue on from the current instruction and do not roll back
3270 	 * to the start, or another earlier arbitration point.
3271 	 *
3272 	 * To verify this, we create a batch which is a mixture of
3273 	 * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with
3274 	 * a few preempting contexts thrown into the mix, we look for any
3275 	 * repeated instructions (which show up as incorrect values).
3276 	 */
3277 
3278 	global = create_global(gt, 4096);
3279 	if (IS_ERR(global))
3280 		return PTR_ERR(global);
3281 
3282 	result = i915_gem_object_pin_map_unlocked(global->obj, I915_MAP_WC);
3283 	if (IS_ERR(result)) {
3284 		i915_vma_unpin_and_release(&global, 0);
3285 		return PTR_ERR(result);
3286 	}
3287 
3288 	for_each_engine(engine, gt, id) {
3289 		struct i915_request *client[3] = {};
3290 		struct igt_live_test t;
3291 		int i;
3292 
3293 		if (!intel_engine_has_preemption(engine))
3294 			continue;
3295 
3296 		if (GRAPHICS_VER(gt->i915) == 8 && engine->class != RENDER_CLASS)
3297 			continue; /* we need per-context GPR */
3298 
3299 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
3300 			err = -EIO;
3301 			break;
3302 		}
3303 
3304 		memset(result, 0, 4096);
3305 
3306 		for (i = 0; i < ARRAY_SIZE(client); i++) {
3307 			struct i915_request *rq;
3308 
3309 			rq = create_gpr_client(engine, global,
3310 					       NUM_GPR * i * sizeof(u32));
3311 			if (IS_ERR(rq)) {
3312 				err = PTR_ERR(rq);
3313 				goto end_test;
3314 			}
3315 
3316 			client[i] = rq;
3317 		}
3318 
3319 		/* Continuously preempt the set of 3 running contexts */
3320 		for (i = 1; i <= NUM_GPR; i++) {
3321 			err = preempt_user(engine, global, i);
3322 			if (err)
3323 				goto end_test;
3324 		}
3325 
3326 		if (READ_ONCE(result[0]) != NUM_GPR) {
3327 			pr_err("%s: Failed to release semaphore\n",
3328 			       engine->name);
3329 			err = -EIO;
3330 			goto end_test;
3331 		}
3332 
3333 		for (i = 0; i < ARRAY_SIZE(client); i++) {
3334 			int gpr;
3335 
3336 			if (i915_request_wait(client[i], 0, HZ / 2) < 0) {
3337 				err = -ETIME;
3338 				goto end_test;
3339 			}
3340 
3341 			for (gpr = 1; gpr < NUM_GPR; gpr++) {
3342 				if (result[NUM_GPR * i + gpr] != 1) {
3343 					pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n",
3344 					       engine->name,
3345 					       i, gpr, result[NUM_GPR * i + gpr]);
3346 					err = -EINVAL;
3347 					goto end_test;
3348 				}
3349 			}
3350 		}
3351 
3352 end_test:
3353 		for (i = 0; i < ARRAY_SIZE(client); i++) {
3354 			if (!client[i])
3355 				break;
3356 
3357 			i915_request_put(client[i]);
3358 		}
3359 
3360 		/* Flush the semaphores on error */
3361 		smp_store_mb(result[0], -1);
3362 		if (igt_live_test_end(&t))
3363 			err = -EIO;
3364 		if (err)
3365 			break;
3366 	}
3367 
3368 	i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP);
3369 	return err;
3370 }
3371 
live_preempt_timeout(void * arg)3372 static int live_preempt_timeout(void *arg)
3373 {
3374 	struct intel_gt *gt = arg;
3375 	struct i915_gem_context *ctx_hi, *ctx_lo;
3376 	struct igt_spinner spin_lo;
3377 	struct intel_engine_cs *engine;
3378 	enum intel_engine_id id;
3379 	int err = -ENOMEM;
3380 
3381 	/*
3382 	 * Check that we force preemption to occur by cancelling the previous
3383 	 * context if it refuses to yield the GPU.
3384 	 */
3385 	if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
3386 		return 0;
3387 
3388 	if (!intel_has_reset_engine(gt))
3389 		return 0;
3390 
3391 	ctx_hi = kernel_context(gt->i915, NULL);
3392 	if (!ctx_hi)
3393 		return -ENOMEM;
3394 	ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
3395 
3396 	ctx_lo = kernel_context(gt->i915, NULL);
3397 	if (!ctx_lo)
3398 		goto err_ctx_hi;
3399 	ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
3400 
3401 	if (igt_spinner_init(&spin_lo, gt))
3402 		goto err_ctx_lo;
3403 
3404 	for_each_engine(engine, gt, id) {
3405 		unsigned long saved_timeout;
3406 		struct i915_request *rq;
3407 
3408 		if (!intel_engine_has_preemption(engine))
3409 			continue;
3410 
3411 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
3412 					    MI_NOOP); /* preemption disabled */
3413 		if (IS_ERR(rq)) {
3414 			err = PTR_ERR(rq);
3415 			goto err_spin_lo;
3416 		}
3417 
3418 		i915_request_add(rq);
3419 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
3420 			intel_gt_set_wedged(gt);
3421 			err = -EIO;
3422 			goto err_spin_lo;
3423 		}
3424 
3425 		rq = igt_request_alloc(ctx_hi, engine);
3426 		if (IS_ERR(rq)) {
3427 			igt_spinner_end(&spin_lo);
3428 			err = PTR_ERR(rq);
3429 			goto err_spin_lo;
3430 		}
3431 
3432 		/* Flush the previous CS ack before changing timeouts */
3433 		while (READ_ONCE(engine->execlists.pending[0]))
3434 			cpu_relax();
3435 
3436 		saved_timeout = engine->props.preempt_timeout_ms;
3437 		engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
3438 
3439 		i915_request_get(rq);
3440 		i915_request_add(rq);
3441 
3442 		intel_engine_flush_submission(engine);
3443 		engine->props.preempt_timeout_ms = saved_timeout;
3444 
3445 		if (i915_request_wait(rq, 0, HZ / 10) < 0) {
3446 			intel_gt_set_wedged(gt);
3447 			i915_request_put(rq);
3448 			err = -ETIME;
3449 			goto err_spin_lo;
3450 		}
3451 
3452 		igt_spinner_end(&spin_lo);
3453 		i915_request_put(rq);
3454 	}
3455 
3456 	err = 0;
3457 err_spin_lo:
3458 	igt_spinner_fini(&spin_lo);
3459 err_ctx_lo:
3460 	kernel_context_close(ctx_lo);
3461 err_ctx_hi:
3462 	kernel_context_close(ctx_hi);
3463 	return err;
3464 }
3465 
random_range(struct rnd_state * rnd,int min,int max)3466 static int random_range(struct rnd_state *rnd, int min, int max)
3467 {
3468 	return i915_prandom_u32_max_state(max - min, rnd) + min;
3469 }
3470 
random_priority(struct rnd_state * rnd)3471 static int random_priority(struct rnd_state *rnd)
3472 {
3473 	return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
3474 }
3475 
3476 struct preempt_smoke {
3477 	struct intel_gt *gt;
3478 	struct i915_gem_context **contexts;
3479 	struct intel_engine_cs *engine;
3480 	struct drm_i915_gem_object *batch;
3481 	unsigned int ncontext;
3482 	struct rnd_state prng;
3483 	unsigned long count;
3484 };
3485 
smoke_context(struct preempt_smoke * smoke)3486 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
3487 {
3488 	return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
3489 							  &smoke->prng)];
3490 }
3491 
smoke_submit(struct preempt_smoke * smoke,struct i915_gem_context * ctx,int prio,struct drm_i915_gem_object * batch)3492 static int smoke_submit(struct preempt_smoke *smoke,
3493 			struct i915_gem_context *ctx, int prio,
3494 			struct drm_i915_gem_object *batch)
3495 {
3496 	struct i915_request *rq;
3497 	struct i915_vma *vma = NULL;
3498 	int err = 0;
3499 
3500 	if (batch) {
3501 		struct i915_address_space *vm;
3502 
3503 		vm = i915_gem_context_get_eb_vm(ctx);
3504 		vma = i915_vma_instance(batch, vm, NULL);
3505 		i915_vm_put(vm);
3506 		if (IS_ERR(vma))
3507 			return PTR_ERR(vma);
3508 
3509 		err = i915_vma_pin(vma, 0, 0, PIN_USER);
3510 		if (err)
3511 			return err;
3512 	}
3513 
3514 	ctx->sched.priority = prio;
3515 
3516 	rq = igt_request_alloc(ctx, smoke->engine);
3517 	if (IS_ERR(rq)) {
3518 		err = PTR_ERR(rq);
3519 		goto unpin;
3520 	}
3521 
3522 	if (vma) {
3523 		i915_vma_lock(vma);
3524 		err = i915_request_await_object(rq, vma->obj, false);
3525 		if (!err)
3526 			err = i915_vma_move_to_active(vma, rq, 0);
3527 		if (!err)
3528 			err = rq->engine->emit_bb_start(rq,
3529 							vma->node.start,
3530 							PAGE_SIZE, 0);
3531 		i915_vma_unlock(vma);
3532 	}
3533 
3534 	i915_request_add(rq);
3535 
3536 unpin:
3537 	if (vma)
3538 		i915_vma_unpin(vma);
3539 
3540 	return err;
3541 }
3542 
smoke_crescendo_thread(void * arg)3543 static int smoke_crescendo_thread(void *arg)
3544 {
3545 	struct preempt_smoke *smoke = arg;
3546 	IGT_TIMEOUT(end_time);
3547 	unsigned long count;
3548 
3549 	count = 0;
3550 	do {
3551 		struct i915_gem_context *ctx = smoke_context(smoke);
3552 		int err;
3553 
3554 		err = smoke_submit(smoke,
3555 				   ctx, count % I915_PRIORITY_MAX,
3556 				   smoke->batch);
3557 		if (err)
3558 			return err;
3559 
3560 		count++;
3561 	} while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
3562 
3563 	smoke->count = count;
3564 	return 0;
3565 }
3566 
smoke_crescendo(struct preempt_smoke * smoke,unsigned int flags)3567 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
3568 #define BATCH BIT(0)
3569 {
3570 	struct task_struct *tsk[I915_NUM_ENGINES] = {};
3571 	struct preempt_smoke *arg;
3572 	struct intel_engine_cs *engine;
3573 	enum intel_engine_id id;
3574 	unsigned long count;
3575 	int err = 0;
3576 
3577 	arg = kmalloc_array(I915_NUM_ENGINES, sizeof(*arg), GFP_KERNEL);
3578 	if (!arg)
3579 		return -ENOMEM;
3580 
3581 	for_each_engine(engine, smoke->gt, id) {
3582 		arg[id] = *smoke;
3583 		arg[id].engine = engine;
3584 		if (!(flags & BATCH))
3585 			arg[id].batch = NULL;
3586 		arg[id].count = 0;
3587 
3588 		tsk[id] = kthread_run(smoke_crescendo_thread, arg,
3589 				      "igt/smoke:%d", id);
3590 		if (IS_ERR(tsk[id])) {
3591 			err = PTR_ERR(tsk[id]);
3592 			break;
3593 		}
3594 		get_task_struct(tsk[id]);
3595 	}
3596 
3597 	yield(); /* start all threads before we kthread_stop() */
3598 
3599 	count = 0;
3600 	for_each_engine(engine, smoke->gt, id) {
3601 		int status;
3602 
3603 		if (IS_ERR_OR_NULL(tsk[id]))
3604 			continue;
3605 
3606 		status = kthread_stop(tsk[id]);
3607 		if (status && !err)
3608 			err = status;
3609 
3610 		count += arg[id].count;
3611 
3612 		put_task_struct(tsk[id]);
3613 	}
3614 
3615 	pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
3616 		count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3617 
3618 	kfree(arg);
3619 	return 0;
3620 }
3621 
smoke_random(struct preempt_smoke * smoke,unsigned int flags)3622 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
3623 {
3624 	enum intel_engine_id id;
3625 	IGT_TIMEOUT(end_time);
3626 	unsigned long count;
3627 
3628 	count = 0;
3629 	do {
3630 		for_each_engine(smoke->engine, smoke->gt, id) {
3631 			struct i915_gem_context *ctx = smoke_context(smoke);
3632 			int err;
3633 
3634 			err = smoke_submit(smoke,
3635 					   ctx, random_priority(&smoke->prng),
3636 					   flags & BATCH ? smoke->batch : NULL);
3637 			if (err)
3638 				return err;
3639 
3640 			count++;
3641 		}
3642 	} while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
3643 
3644 	pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
3645 		count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3646 	return 0;
3647 }
3648 
live_preempt_smoke(void * arg)3649 static int live_preempt_smoke(void *arg)
3650 {
3651 	struct preempt_smoke smoke = {
3652 		.gt = arg,
3653 		.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
3654 		.ncontext = 256,
3655 	};
3656 	const unsigned int phase[] = { 0, BATCH };
3657 	struct igt_live_test t;
3658 	int err = -ENOMEM;
3659 	u32 *cs;
3660 	int n;
3661 
3662 	smoke.contexts = kmalloc_array(smoke.ncontext,
3663 				       sizeof(*smoke.contexts),
3664 				       GFP_KERNEL);
3665 	if (!smoke.contexts)
3666 		return -ENOMEM;
3667 
3668 	smoke.batch =
3669 		i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
3670 	if (IS_ERR(smoke.batch)) {
3671 		err = PTR_ERR(smoke.batch);
3672 		goto err_free;
3673 	}
3674 
3675 	cs = i915_gem_object_pin_map_unlocked(smoke.batch, I915_MAP_WB);
3676 	if (IS_ERR(cs)) {
3677 		err = PTR_ERR(cs);
3678 		goto err_batch;
3679 	}
3680 	for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
3681 		cs[n] = MI_ARB_CHECK;
3682 	cs[n] = MI_BATCH_BUFFER_END;
3683 	i915_gem_object_flush_map(smoke.batch);
3684 	i915_gem_object_unpin_map(smoke.batch);
3685 
3686 	if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
3687 		err = -EIO;
3688 		goto err_batch;
3689 	}
3690 
3691 	for (n = 0; n < smoke.ncontext; n++) {
3692 		smoke.contexts[n] = kernel_context(smoke.gt->i915, NULL);
3693 		if (!smoke.contexts[n])
3694 			goto err_ctx;
3695 	}
3696 
3697 	for (n = 0; n < ARRAY_SIZE(phase); n++) {
3698 		err = smoke_crescendo(&smoke, phase[n]);
3699 		if (err)
3700 			goto err_ctx;
3701 
3702 		err = smoke_random(&smoke, phase[n]);
3703 		if (err)
3704 			goto err_ctx;
3705 	}
3706 
3707 err_ctx:
3708 	if (igt_live_test_end(&t))
3709 		err = -EIO;
3710 
3711 	for (n = 0; n < smoke.ncontext; n++) {
3712 		if (!smoke.contexts[n])
3713 			break;
3714 		kernel_context_close(smoke.contexts[n]);
3715 	}
3716 
3717 err_batch:
3718 	i915_gem_object_put(smoke.batch);
3719 err_free:
3720 	kfree(smoke.contexts);
3721 
3722 	return err;
3723 }
3724 
nop_virtual_engine(struct intel_gt * gt,struct intel_engine_cs ** siblings,unsigned int nsibling,unsigned int nctx,unsigned int flags)3725 static int nop_virtual_engine(struct intel_gt *gt,
3726 			      struct intel_engine_cs **siblings,
3727 			      unsigned int nsibling,
3728 			      unsigned int nctx,
3729 			      unsigned int flags)
3730 #define CHAIN BIT(0)
3731 {
3732 	IGT_TIMEOUT(end_time);
3733 	struct i915_request *request[16] = {};
3734 	struct intel_context *ve[16];
3735 	unsigned long n, prime, nc;
3736 	struct igt_live_test t;
3737 	ktime_t times[2] = {};
3738 	int err;
3739 
3740 	GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
3741 
3742 	for (n = 0; n < nctx; n++) {
3743 		ve[n] = intel_engine_create_virtual(siblings, nsibling, 0);
3744 		if (IS_ERR(ve[n])) {
3745 			err = PTR_ERR(ve[n]);
3746 			nctx = n;
3747 			goto out;
3748 		}
3749 
3750 		err = intel_context_pin(ve[n]);
3751 		if (err) {
3752 			intel_context_put(ve[n]);
3753 			nctx = n;
3754 			goto out;
3755 		}
3756 	}
3757 
3758 	err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
3759 	if (err)
3760 		goto out;
3761 
3762 	for_each_prime_number_from(prime, 1, 8192) {
3763 		times[1] = ktime_get_raw();
3764 
3765 		if (flags & CHAIN) {
3766 			for (nc = 0; nc < nctx; nc++) {
3767 				for (n = 0; n < prime; n++) {
3768 					struct i915_request *rq;
3769 
3770 					rq = i915_request_create(ve[nc]);
3771 					if (IS_ERR(rq)) {
3772 						err = PTR_ERR(rq);
3773 						goto out;
3774 					}
3775 
3776 					if (request[nc])
3777 						i915_request_put(request[nc]);
3778 					request[nc] = i915_request_get(rq);
3779 					i915_request_add(rq);
3780 				}
3781 			}
3782 		} else {
3783 			for (n = 0; n < prime; n++) {
3784 				for (nc = 0; nc < nctx; nc++) {
3785 					struct i915_request *rq;
3786 
3787 					rq = i915_request_create(ve[nc]);
3788 					if (IS_ERR(rq)) {
3789 						err = PTR_ERR(rq);
3790 						goto out;
3791 					}
3792 
3793 					if (request[nc])
3794 						i915_request_put(request[nc]);
3795 					request[nc] = i915_request_get(rq);
3796 					i915_request_add(rq);
3797 				}
3798 			}
3799 		}
3800 
3801 		for (nc = 0; nc < nctx; nc++) {
3802 			if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
3803 				pr_err("%s(%s): wait for %llx:%lld timed out\n",
3804 				       __func__, ve[0]->engine->name,
3805 				       request[nc]->fence.context,
3806 				       request[nc]->fence.seqno);
3807 
3808 				GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3809 					  __func__, ve[0]->engine->name,
3810 					  request[nc]->fence.context,
3811 					  request[nc]->fence.seqno);
3812 				GEM_TRACE_DUMP();
3813 				intel_gt_set_wedged(gt);
3814 				break;
3815 			}
3816 		}
3817 
3818 		times[1] = ktime_sub(ktime_get_raw(), times[1]);
3819 		if (prime == 1)
3820 			times[0] = times[1];
3821 
3822 		for (nc = 0; nc < nctx; nc++) {
3823 			i915_request_put(request[nc]);
3824 			request[nc] = NULL;
3825 		}
3826 
3827 		if (__igt_timeout(end_time, NULL))
3828 			break;
3829 	}
3830 
3831 	err = igt_live_test_end(&t);
3832 	if (err)
3833 		goto out;
3834 
3835 	pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
3836 		nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
3837 		prime, div64_u64(ktime_to_ns(times[1]), prime));
3838 
3839 out:
3840 	if (igt_flush_test(gt->i915))
3841 		err = -EIO;
3842 
3843 	for (nc = 0; nc < nctx; nc++) {
3844 		i915_request_put(request[nc]);
3845 		intel_context_unpin(ve[nc]);
3846 		intel_context_put(ve[nc]);
3847 	}
3848 	return err;
3849 }
3850 
3851 static unsigned int
__select_siblings(struct intel_gt * gt,unsigned int class,struct intel_engine_cs ** siblings,bool (* filter)(const struct intel_engine_cs *))3852 __select_siblings(struct intel_gt *gt,
3853 		  unsigned int class,
3854 		  struct intel_engine_cs **siblings,
3855 		  bool (*filter)(const struct intel_engine_cs *))
3856 {
3857 	unsigned int n = 0;
3858 	unsigned int inst;
3859 
3860 	for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3861 		if (!gt->engine_class[class][inst])
3862 			continue;
3863 
3864 		if (filter && !filter(gt->engine_class[class][inst]))
3865 			continue;
3866 
3867 		siblings[n++] = gt->engine_class[class][inst];
3868 	}
3869 
3870 	return n;
3871 }
3872 
3873 static unsigned int
select_siblings(struct intel_gt * gt,unsigned int class,struct intel_engine_cs ** siblings)3874 select_siblings(struct intel_gt *gt,
3875 		unsigned int class,
3876 		struct intel_engine_cs **siblings)
3877 {
3878 	return __select_siblings(gt, class, siblings, NULL);
3879 }
3880 
live_virtual_engine(void * arg)3881 static int live_virtual_engine(void *arg)
3882 {
3883 	struct intel_gt *gt = arg;
3884 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3885 	struct intel_engine_cs *engine;
3886 	enum intel_engine_id id;
3887 	unsigned int class;
3888 	int err;
3889 
3890 	if (intel_uc_uses_guc_submission(&gt->uc))
3891 		return 0;
3892 
3893 	for_each_engine(engine, gt, id) {
3894 		err = nop_virtual_engine(gt, &engine, 1, 1, 0);
3895 		if (err) {
3896 			pr_err("Failed to wrap engine %s: err=%d\n",
3897 			       engine->name, err);
3898 			return err;
3899 		}
3900 	}
3901 
3902 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3903 		int nsibling, n;
3904 
3905 		nsibling = select_siblings(gt, class, siblings);
3906 		if (nsibling < 2)
3907 			continue;
3908 
3909 		for (n = 1; n <= nsibling + 1; n++) {
3910 			err = nop_virtual_engine(gt, siblings, nsibling,
3911 						 n, 0);
3912 			if (err)
3913 				return err;
3914 		}
3915 
3916 		err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
3917 		if (err)
3918 			return err;
3919 	}
3920 
3921 	return 0;
3922 }
3923 
mask_virtual_engine(struct intel_gt * gt,struct intel_engine_cs ** siblings,unsigned int nsibling)3924 static int mask_virtual_engine(struct intel_gt *gt,
3925 			       struct intel_engine_cs **siblings,
3926 			       unsigned int nsibling)
3927 {
3928 	struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
3929 	struct intel_context *ve;
3930 	struct igt_live_test t;
3931 	unsigned int n;
3932 	int err;
3933 
3934 	/*
3935 	 * Check that by setting the execution mask on a request, we can
3936 	 * restrict it to our desired engine within the virtual engine.
3937 	 */
3938 
3939 	ve = intel_engine_create_virtual(siblings, nsibling, 0);
3940 	if (IS_ERR(ve)) {
3941 		err = PTR_ERR(ve);
3942 		goto out_close;
3943 	}
3944 
3945 	err = intel_context_pin(ve);
3946 	if (err)
3947 		goto out_put;
3948 
3949 	err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3950 	if (err)
3951 		goto out_unpin;
3952 
3953 	for (n = 0; n < nsibling; n++) {
3954 		request[n] = i915_request_create(ve);
3955 		if (IS_ERR(request[n])) {
3956 			err = PTR_ERR(request[n]);
3957 			nsibling = n;
3958 			goto out;
3959 		}
3960 
3961 		/* Reverse order as it's more likely to be unnatural */
3962 		request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
3963 
3964 		i915_request_get(request[n]);
3965 		i915_request_add(request[n]);
3966 	}
3967 
3968 	for (n = 0; n < nsibling; n++) {
3969 		if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
3970 			pr_err("%s(%s): wait for %llx:%lld timed out\n",
3971 			       __func__, ve->engine->name,
3972 			       request[n]->fence.context,
3973 			       request[n]->fence.seqno);
3974 
3975 			GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3976 				  __func__, ve->engine->name,
3977 				  request[n]->fence.context,
3978 				  request[n]->fence.seqno);
3979 			GEM_TRACE_DUMP();
3980 			intel_gt_set_wedged(gt);
3981 			err = -EIO;
3982 			goto out;
3983 		}
3984 
3985 		if (request[n]->engine != siblings[nsibling - n - 1]) {
3986 			pr_err("Executed on wrong sibling '%s', expected '%s'\n",
3987 			       request[n]->engine->name,
3988 			       siblings[nsibling - n - 1]->name);
3989 			err = -EINVAL;
3990 			goto out;
3991 		}
3992 	}
3993 
3994 	err = igt_live_test_end(&t);
3995 out:
3996 	if (igt_flush_test(gt->i915))
3997 		err = -EIO;
3998 
3999 	for (n = 0; n < nsibling; n++)
4000 		i915_request_put(request[n]);
4001 
4002 out_unpin:
4003 	intel_context_unpin(ve);
4004 out_put:
4005 	intel_context_put(ve);
4006 out_close:
4007 	return err;
4008 }
4009 
live_virtual_mask(void * arg)4010 static int live_virtual_mask(void *arg)
4011 {
4012 	struct intel_gt *gt = arg;
4013 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4014 	unsigned int class;
4015 	int err;
4016 
4017 	if (intel_uc_uses_guc_submission(&gt->uc))
4018 		return 0;
4019 
4020 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4021 		unsigned int nsibling;
4022 
4023 		nsibling = select_siblings(gt, class, siblings);
4024 		if (nsibling < 2)
4025 			continue;
4026 
4027 		err = mask_virtual_engine(gt, siblings, nsibling);
4028 		if (err)
4029 			return err;
4030 	}
4031 
4032 	return 0;
4033 }
4034 
slicein_virtual_engine(struct intel_gt * gt,struct intel_engine_cs ** siblings,unsigned int nsibling)4035 static int slicein_virtual_engine(struct intel_gt *gt,
4036 				  struct intel_engine_cs **siblings,
4037 				  unsigned int nsibling)
4038 {
4039 	const long timeout = slice_timeout(siblings[0]);
4040 	struct intel_context *ce;
4041 	struct i915_request *rq;
4042 	struct igt_spinner spin;
4043 	unsigned int n;
4044 	int err = 0;
4045 
4046 	/*
4047 	 * Virtual requests must take part in timeslicing on the target engines.
4048 	 */
4049 
4050 	if (igt_spinner_init(&spin, gt))
4051 		return -ENOMEM;
4052 
4053 	for (n = 0; n < nsibling; n++) {
4054 		ce = intel_context_create(siblings[n]);
4055 		if (IS_ERR(ce)) {
4056 			err = PTR_ERR(ce);
4057 			goto out;
4058 		}
4059 
4060 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4061 		intel_context_put(ce);
4062 		if (IS_ERR(rq)) {
4063 			err = PTR_ERR(rq);
4064 			goto out;
4065 		}
4066 
4067 		i915_request_add(rq);
4068 	}
4069 
4070 	ce = intel_engine_create_virtual(siblings, nsibling, 0);
4071 	if (IS_ERR(ce)) {
4072 		err = PTR_ERR(ce);
4073 		goto out;
4074 	}
4075 
4076 	rq = intel_context_create_request(ce);
4077 	intel_context_put(ce);
4078 	if (IS_ERR(rq)) {
4079 		err = PTR_ERR(rq);
4080 		goto out;
4081 	}
4082 
4083 	i915_request_get(rq);
4084 	i915_request_add(rq);
4085 	if (i915_request_wait(rq, 0, timeout) < 0) {
4086 		GEM_TRACE_ERR("%s(%s) failed to slice in virtual request\n",
4087 			      __func__, rq->engine->name);
4088 		GEM_TRACE_DUMP();
4089 		intel_gt_set_wedged(gt);
4090 		err = -EIO;
4091 	}
4092 	i915_request_put(rq);
4093 
4094 out:
4095 	igt_spinner_end(&spin);
4096 	if (igt_flush_test(gt->i915))
4097 		err = -EIO;
4098 	igt_spinner_fini(&spin);
4099 	return err;
4100 }
4101 
sliceout_virtual_engine(struct intel_gt * gt,struct intel_engine_cs ** siblings,unsigned int nsibling)4102 static int sliceout_virtual_engine(struct intel_gt *gt,
4103 				   struct intel_engine_cs **siblings,
4104 				   unsigned int nsibling)
4105 {
4106 	const long timeout = slice_timeout(siblings[0]);
4107 	struct intel_context *ce;
4108 	struct i915_request *rq;
4109 	struct igt_spinner spin;
4110 	unsigned int n;
4111 	int err = 0;
4112 
4113 	/*
4114 	 * Virtual requests must allow others a fair timeslice.
4115 	 */
4116 
4117 	if (igt_spinner_init(&spin, gt))
4118 		return -ENOMEM;
4119 
4120 	/* XXX We do not handle oversubscription and fairness with normal rq */
4121 	for (n = 0; n < nsibling; n++) {
4122 		ce = intel_engine_create_virtual(siblings, nsibling, 0);
4123 		if (IS_ERR(ce)) {
4124 			err = PTR_ERR(ce);
4125 			goto out;
4126 		}
4127 
4128 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4129 		intel_context_put(ce);
4130 		if (IS_ERR(rq)) {
4131 			err = PTR_ERR(rq);
4132 			goto out;
4133 		}
4134 
4135 		i915_request_add(rq);
4136 	}
4137 
4138 	for (n = 0; !err && n < nsibling; n++) {
4139 		ce = intel_context_create(siblings[n]);
4140 		if (IS_ERR(ce)) {
4141 			err = PTR_ERR(ce);
4142 			goto out;
4143 		}
4144 
4145 		rq = intel_context_create_request(ce);
4146 		intel_context_put(ce);
4147 		if (IS_ERR(rq)) {
4148 			err = PTR_ERR(rq);
4149 			goto out;
4150 		}
4151 
4152 		i915_request_get(rq);
4153 		i915_request_add(rq);
4154 		if (i915_request_wait(rq, 0, timeout) < 0) {
4155 			GEM_TRACE_ERR("%s(%s) failed to slice out virtual request\n",
4156 				      __func__, siblings[n]->name);
4157 			GEM_TRACE_DUMP();
4158 			intel_gt_set_wedged(gt);
4159 			err = -EIO;
4160 		}
4161 		i915_request_put(rq);
4162 	}
4163 
4164 out:
4165 	igt_spinner_end(&spin);
4166 	if (igt_flush_test(gt->i915))
4167 		err = -EIO;
4168 	igt_spinner_fini(&spin);
4169 	return err;
4170 }
4171 
live_virtual_slice(void * arg)4172 static int live_virtual_slice(void *arg)
4173 {
4174 	struct intel_gt *gt = arg;
4175 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4176 	unsigned int class;
4177 	int err;
4178 
4179 	if (intel_uc_uses_guc_submission(&gt->uc))
4180 		return 0;
4181 
4182 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4183 		unsigned int nsibling;
4184 
4185 		nsibling = __select_siblings(gt, class, siblings,
4186 					     intel_engine_has_timeslices);
4187 		if (nsibling < 2)
4188 			continue;
4189 
4190 		err = slicein_virtual_engine(gt, siblings, nsibling);
4191 		if (err)
4192 			return err;
4193 
4194 		err = sliceout_virtual_engine(gt, siblings, nsibling);
4195 		if (err)
4196 			return err;
4197 	}
4198 
4199 	return 0;
4200 }
4201 
preserved_virtual_engine(struct intel_gt * gt,struct intel_engine_cs ** siblings,unsigned int nsibling)4202 static int preserved_virtual_engine(struct intel_gt *gt,
4203 				    struct intel_engine_cs **siblings,
4204 				    unsigned int nsibling)
4205 {
4206 	struct i915_request *last = NULL;
4207 	struct intel_context *ve;
4208 	struct i915_vma *scratch;
4209 	struct igt_live_test t;
4210 	unsigned int n;
4211 	int err = 0;
4212 	u32 *cs;
4213 
4214 	scratch =
4215 		__vm_create_scratch_for_read_pinned(&siblings[0]->gt->ggtt->vm,
4216 						    PAGE_SIZE);
4217 	if (IS_ERR(scratch))
4218 		return PTR_ERR(scratch);
4219 
4220 	err = i915_vma_sync(scratch);
4221 	if (err)
4222 		goto out_scratch;
4223 
4224 	ve = intel_engine_create_virtual(siblings, nsibling, 0);
4225 	if (IS_ERR(ve)) {
4226 		err = PTR_ERR(ve);
4227 		goto out_scratch;
4228 	}
4229 
4230 	err = intel_context_pin(ve);
4231 	if (err)
4232 		goto out_put;
4233 
4234 	err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
4235 	if (err)
4236 		goto out_unpin;
4237 
4238 	for (n = 0; n < NUM_GPR_DW; n++) {
4239 		struct intel_engine_cs *engine = siblings[n % nsibling];
4240 		struct i915_request *rq;
4241 
4242 		rq = i915_request_create(ve);
4243 		if (IS_ERR(rq)) {
4244 			err = PTR_ERR(rq);
4245 			goto out_end;
4246 		}
4247 
4248 		i915_request_put(last);
4249 		last = i915_request_get(rq);
4250 
4251 		cs = intel_ring_begin(rq, 8);
4252 		if (IS_ERR(cs)) {
4253 			i915_request_add(rq);
4254 			err = PTR_ERR(cs);
4255 			goto out_end;
4256 		}
4257 
4258 		*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4259 		*cs++ = CS_GPR(engine, n);
4260 		*cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
4261 		*cs++ = 0;
4262 
4263 		*cs++ = MI_LOAD_REGISTER_IMM(1);
4264 		*cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
4265 		*cs++ = n + 1;
4266 
4267 		*cs++ = MI_NOOP;
4268 		intel_ring_advance(rq, cs);
4269 
4270 		/* Restrict this request to run on a particular engine */
4271 		rq->execution_mask = engine->mask;
4272 		i915_request_add(rq);
4273 	}
4274 
4275 	if (i915_request_wait(last, 0, HZ / 5) < 0) {
4276 		err = -ETIME;
4277 		goto out_end;
4278 	}
4279 
4280 	cs = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB);
4281 	if (IS_ERR(cs)) {
4282 		err = PTR_ERR(cs);
4283 		goto out_end;
4284 	}
4285 
4286 	for (n = 0; n < NUM_GPR_DW; n++) {
4287 		if (cs[n] != n) {
4288 			pr_err("Incorrect value[%d] found for GPR[%d]\n",
4289 			       cs[n], n);
4290 			err = -EINVAL;
4291 			break;
4292 		}
4293 	}
4294 
4295 	i915_gem_object_unpin_map(scratch->obj);
4296 
4297 out_end:
4298 	if (igt_live_test_end(&t))
4299 		err = -EIO;
4300 	i915_request_put(last);
4301 out_unpin:
4302 	intel_context_unpin(ve);
4303 out_put:
4304 	intel_context_put(ve);
4305 out_scratch:
4306 	i915_vma_unpin_and_release(&scratch, 0);
4307 	return err;
4308 }
4309 
live_virtual_preserved(void * arg)4310 static int live_virtual_preserved(void *arg)
4311 {
4312 	struct intel_gt *gt = arg;
4313 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4314 	unsigned int class;
4315 
4316 	/*
4317 	 * Check that the context image retains non-privileged (user) registers
4318 	 * from one engine to the next. For this we check that the CS_GPR
4319 	 * are preserved.
4320 	 */
4321 
4322 	if (intel_uc_uses_guc_submission(&gt->uc))
4323 		return 0;
4324 
4325 	/* As we use CS_GPR we cannot run before they existed on all engines. */
4326 	if (GRAPHICS_VER(gt->i915) < 9)
4327 		return 0;
4328 
4329 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4330 		int nsibling, err;
4331 
4332 		nsibling = select_siblings(gt, class, siblings);
4333 		if (nsibling < 2)
4334 			continue;
4335 
4336 		err = preserved_virtual_engine(gt, siblings, nsibling);
4337 		if (err)
4338 			return err;
4339 	}
4340 
4341 	return 0;
4342 }
4343 
reset_virtual_engine(struct intel_gt * gt,struct intel_engine_cs ** siblings,unsigned int nsibling)4344 static int reset_virtual_engine(struct intel_gt *gt,
4345 				struct intel_engine_cs **siblings,
4346 				unsigned int nsibling)
4347 {
4348 	struct intel_engine_cs *engine;
4349 	struct intel_context *ve;
4350 	struct igt_spinner spin;
4351 	struct i915_request *rq;
4352 	unsigned int n;
4353 	int err = 0;
4354 
4355 	/*
4356 	 * In order to support offline error capture for fast preempt reset,
4357 	 * we need to decouple the guilty request and ensure that it and its
4358 	 * descendents are not executed while the capture is in progress.
4359 	 */
4360 
4361 	if (igt_spinner_init(&spin, gt))
4362 		return -ENOMEM;
4363 
4364 	ve = intel_engine_create_virtual(siblings, nsibling, 0);
4365 	if (IS_ERR(ve)) {
4366 		err = PTR_ERR(ve);
4367 		goto out_spin;
4368 	}
4369 
4370 	for (n = 0; n < nsibling; n++)
4371 		st_engine_heartbeat_disable(siblings[n]);
4372 
4373 	rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
4374 	if (IS_ERR(rq)) {
4375 		err = PTR_ERR(rq);
4376 		goto out_heartbeat;
4377 	}
4378 	i915_request_add(rq);
4379 
4380 	if (!igt_wait_for_spinner(&spin, rq)) {
4381 		intel_gt_set_wedged(gt);
4382 		err = -ETIME;
4383 		goto out_heartbeat;
4384 	}
4385 
4386 	engine = rq->engine;
4387 	GEM_BUG_ON(engine == ve->engine);
4388 
4389 	/* Take ownership of the reset and tasklet */
4390 	err = engine_lock_reset_tasklet(engine);
4391 	if (err)
4392 		goto out_heartbeat;
4393 
4394 	engine->sched_engine->tasklet.callback(&engine->sched_engine->tasklet);
4395 	GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
4396 
4397 	/* Fake a preemption event; failed of course */
4398 	spin_lock_irq(&engine->sched_engine->lock);
4399 	__unwind_incomplete_requests(engine);
4400 	spin_unlock_irq(&engine->sched_engine->lock);
4401 	GEM_BUG_ON(rq->engine != engine);
4402 
4403 	/* Reset the engine while keeping our active request on hold */
4404 	execlists_hold(engine, rq);
4405 	GEM_BUG_ON(!i915_request_on_hold(rq));
4406 
4407 	__intel_engine_reset_bh(engine, NULL);
4408 	GEM_BUG_ON(rq->fence.error != -EIO);
4409 
4410 	/* Release our grasp on the engine, letting CS flow again */
4411 	engine_unlock_reset_tasklet(engine);
4412 
4413 	/* Check that we do not resubmit the held request */
4414 	i915_request_get(rq);
4415 	if (!i915_request_wait(rq, 0, HZ / 5)) {
4416 		pr_err("%s: on hold request completed!\n",
4417 		       engine->name);
4418 		intel_gt_set_wedged(gt);
4419 		err = -EIO;
4420 		goto out_rq;
4421 	}
4422 	GEM_BUG_ON(!i915_request_on_hold(rq));
4423 
4424 	/* But is resubmitted on release */
4425 	execlists_unhold(engine, rq);
4426 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4427 		pr_err("%s: held request did not complete!\n",
4428 		       engine->name);
4429 		intel_gt_set_wedged(gt);
4430 		err = -ETIME;
4431 	}
4432 
4433 out_rq:
4434 	i915_request_put(rq);
4435 out_heartbeat:
4436 	for (n = 0; n < nsibling; n++)
4437 		st_engine_heartbeat_enable(siblings[n]);
4438 
4439 	intel_context_put(ve);
4440 out_spin:
4441 	igt_spinner_fini(&spin);
4442 	return err;
4443 }
4444 
live_virtual_reset(void * arg)4445 static int live_virtual_reset(void *arg)
4446 {
4447 	struct intel_gt *gt = arg;
4448 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4449 	unsigned int class;
4450 
4451 	/*
4452 	 * Check that we handle a reset event within a virtual engine.
4453 	 * Only the physical engine is reset, but we have to check the flow
4454 	 * of the virtual requests around the reset, and make sure it is not
4455 	 * forgotten.
4456 	 */
4457 
4458 	if (intel_uc_uses_guc_submission(&gt->uc))
4459 		return 0;
4460 
4461 	if (!intel_has_reset_engine(gt))
4462 		return 0;
4463 
4464 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4465 		int nsibling, err;
4466 
4467 		nsibling = select_siblings(gt, class, siblings);
4468 		if (nsibling < 2)
4469 			continue;
4470 
4471 		err = reset_virtual_engine(gt, siblings, nsibling);
4472 		if (err)
4473 			return err;
4474 	}
4475 
4476 	return 0;
4477 }
4478 
intel_execlists_live_selftests(struct drm_i915_private * i915)4479 int intel_execlists_live_selftests(struct drm_i915_private *i915)
4480 {
4481 	static const struct i915_subtest tests[] = {
4482 		SUBTEST(live_sanitycheck),
4483 		SUBTEST(live_unlite_switch),
4484 		SUBTEST(live_unlite_preempt),
4485 		SUBTEST(live_unlite_ring),
4486 		SUBTEST(live_pin_rewind),
4487 		SUBTEST(live_hold_reset),
4488 		SUBTEST(live_error_interrupt),
4489 		SUBTEST(live_timeslice_preempt),
4490 		SUBTEST(live_timeslice_rewind),
4491 		SUBTEST(live_timeslice_queue),
4492 		SUBTEST(live_timeslice_nopreempt),
4493 		SUBTEST(live_busywait_preempt),
4494 		SUBTEST(live_preempt),
4495 		SUBTEST(live_late_preempt),
4496 		SUBTEST(live_nopreempt),
4497 		SUBTEST(live_preempt_cancel),
4498 		SUBTEST(live_suppress_self_preempt),
4499 		SUBTEST(live_chain_preempt),
4500 		SUBTEST(live_preempt_ring),
4501 		SUBTEST(live_preempt_gang),
4502 		SUBTEST(live_preempt_timeout),
4503 		SUBTEST(live_preempt_user),
4504 		SUBTEST(live_preempt_smoke),
4505 		SUBTEST(live_virtual_engine),
4506 		SUBTEST(live_virtual_mask),
4507 		SUBTEST(live_virtual_preserved),
4508 		SUBTEST(live_virtual_slice),
4509 		SUBTEST(live_virtual_reset),
4510 	};
4511 
4512 	if (to_gt(i915)->submission_method != INTEL_SUBMISSION_ELSP)
4513 		return 0;
4514 
4515 	if (intel_gt_is_wedged(to_gt(i915)))
4516 		return 0;
4517 
4518 	return intel_gt_live_subtests(tests, to_gt(i915));
4519 }
4520