1 /*
2 * SPDX-License-Identifier: MIT
3 *
4 * Copyright © 2017 Intel Corporation
5 */
6
7 #include <linux/prime_numbers.h>
8 #include <linux/string_helpers.h>
9
10 #include "gem/i915_gem_internal.h"
11 #include "gem/i915_gem_pm.h"
12 #include "gt/intel_engine_pm.h"
13 #include "gt/intel_engine_regs.h"
14 #include "gt/intel_gt.h"
15 #include "gt/intel_gt_requests.h"
16 #include "gt/intel_reset.h"
17 #include "i915_selftest.h"
18
19 #include "gem/selftests/igt_gem_utils.h"
20 #include "selftests/i915_random.h"
21 #include "selftests/igt_flush_test.h"
22 #include "selftests/igt_live_test.h"
23 #include "selftests/igt_reset.h"
24 #include "selftests/igt_spinner.h"
25 #include "selftests/mock_drm.h"
26 #include "selftests/mock_gem_device.h"
27
28 #include "huge_gem_object.h"
29 #include "igt_gem_utils.h"
30
31 #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32))
32
live_nop_switch(void * arg)33 static int live_nop_switch(void *arg)
34 {
35 const unsigned int nctx = 1024;
36 struct drm_i915_private *i915 = arg;
37 struct intel_engine_cs *engine;
38 struct i915_gem_context **ctx;
39 struct igt_live_test t;
40 struct file *file;
41 unsigned long n;
42 int err = -ENODEV;
43
44 /*
45 * Create as many contexts as we can feasibly get away with
46 * and check we can switch between them rapidly.
47 *
48 * Serves as very simple stress test for submission and HW switching
49 * between contexts.
50 */
51
52 if (!DRIVER_CAPS(i915)->has_logical_contexts)
53 return 0;
54
55 file = mock_file(i915);
56 if (IS_ERR(file))
57 return PTR_ERR(file);
58
59 ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL);
60 if (!ctx) {
61 err = -ENOMEM;
62 goto out_file;
63 }
64
65 for (n = 0; n < nctx; n++) {
66 ctx[n] = live_context(i915, file);
67 if (IS_ERR(ctx[n])) {
68 err = PTR_ERR(ctx[n]);
69 goto out_file;
70 }
71 }
72
73 for_each_uabi_engine(engine, i915) {
74 struct i915_request *rq = NULL;
75 unsigned long end_time, prime;
76 ktime_t times[2] = {};
77
78 times[0] = ktime_get_raw();
79 for (n = 0; n < nctx; n++) {
80 struct i915_request *this;
81
82 this = igt_request_alloc(ctx[n], engine);
83 if (IS_ERR(this)) {
84 err = PTR_ERR(this);
85 goto out_file;
86 }
87 if (rq) {
88 i915_request_await_dma_fence(this, &rq->fence);
89 i915_request_put(rq);
90 }
91 rq = i915_request_get(this);
92 i915_request_add(this);
93 }
94 if (i915_request_wait(rq, 0, 10 * HZ) < 0) {
95 pr_err("Failed to populated %d contexts\n", nctx);
96 intel_gt_set_wedged(to_gt(i915));
97 i915_request_put(rq);
98 err = -EIO;
99 goto out_file;
100 }
101 i915_request_put(rq);
102
103 times[1] = ktime_get_raw();
104
105 pr_info("Populated %d contexts on %s in %lluns\n",
106 nctx, engine->name, ktime_to_ns(times[1] - times[0]));
107
108 err = igt_live_test_begin(&t, i915, __func__, engine->name);
109 if (err)
110 goto out_file;
111
112 end_time = jiffies + i915_selftest.timeout_jiffies;
113 for_each_prime_number_from(prime, 2, 8192) {
114 times[1] = ktime_get_raw();
115
116 rq = NULL;
117 for (n = 0; n < prime; n++) {
118 struct i915_request *this;
119
120 this = igt_request_alloc(ctx[n % nctx], engine);
121 if (IS_ERR(this)) {
122 err = PTR_ERR(this);
123 goto out_file;
124 }
125
126 if (rq) { /* Force submission order */
127 i915_request_await_dma_fence(this, &rq->fence);
128 i915_request_put(rq);
129 }
130
131 /*
132 * This space is left intentionally blank.
133 *
134 * We do not actually want to perform any
135 * action with this request, we just want
136 * to measure the latency in allocation
137 * and submission of our breadcrumbs -
138 * ensuring that the bare request is sufficient
139 * for the system to work (i.e. proper HEAD
140 * tracking of the rings, interrupt handling,
141 * etc). It also gives us the lowest bounds
142 * for latency.
143 */
144
145 rq = i915_request_get(this);
146 i915_request_add(this);
147 }
148 GEM_BUG_ON(!rq);
149 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
150 pr_err("Switching between %ld contexts timed out\n",
151 prime);
152 intel_gt_set_wedged(to_gt(i915));
153 i915_request_put(rq);
154 break;
155 }
156 i915_request_put(rq);
157
158 times[1] = ktime_sub(ktime_get_raw(), times[1]);
159 if (prime == 2)
160 times[0] = times[1];
161
162 if (__igt_timeout(end_time, NULL))
163 break;
164 }
165
166 err = igt_live_test_end(&t);
167 if (err)
168 goto out_file;
169
170 pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n",
171 engine->name,
172 ktime_to_ns(times[0]),
173 prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1));
174 }
175
176 out_file:
177 fput(file);
178 return err;
179 }
180
181 struct parallel_switch {
182 struct task_struct *tsk;
183 struct intel_context *ce[2];
184 };
185
__live_parallel_switch1(void * data)186 static int __live_parallel_switch1(void *data)
187 {
188 struct parallel_switch *arg = data;
189 IGT_TIMEOUT(end_time);
190 unsigned long count;
191
192 count = 0;
193 do {
194 struct i915_request *rq = NULL;
195 int err, n;
196
197 err = 0;
198 for (n = 0; !err && n < ARRAY_SIZE(arg->ce); n++) {
199 struct i915_request *prev = rq;
200
201 rq = i915_request_create(arg->ce[n]);
202 if (IS_ERR(rq)) {
203 i915_request_put(prev);
204 return PTR_ERR(rq);
205 }
206
207 i915_request_get(rq);
208 if (prev) {
209 err = i915_request_await_dma_fence(rq, &prev->fence);
210 i915_request_put(prev);
211 }
212
213 i915_request_add(rq);
214 }
215 if (i915_request_wait(rq, 0, HZ) < 0)
216 err = -ETIME;
217 i915_request_put(rq);
218 if (err)
219 return err;
220
221 count++;
222 } while (!__igt_timeout(end_time, NULL));
223
224 pr_info("%s: %lu switches (sync)\n", arg->ce[0]->engine->name, count);
225 return 0;
226 }
227
__live_parallel_switchN(void * data)228 static int __live_parallel_switchN(void *data)
229 {
230 struct parallel_switch *arg = data;
231 struct i915_request *rq = NULL;
232 IGT_TIMEOUT(end_time);
233 unsigned long count;
234 int n;
235
236 count = 0;
237 do {
238 for (n = 0; n < ARRAY_SIZE(arg->ce); n++) {
239 struct i915_request *prev = rq;
240 int err = 0;
241
242 rq = i915_request_create(arg->ce[n]);
243 if (IS_ERR(rq)) {
244 i915_request_put(prev);
245 return PTR_ERR(rq);
246 }
247
248 i915_request_get(rq);
249 if (prev) {
250 err = i915_request_await_dma_fence(rq, &prev->fence);
251 i915_request_put(prev);
252 }
253
254 i915_request_add(rq);
255 if (err) {
256 i915_request_put(rq);
257 return err;
258 }
259 }
260
261 count++;
262 } while (!__igt_timeout(end_time, NULL));
263 i915_request_put(rq);
264
265 pr_info("%s: %lu switches (many)\n", arg->ce[0]->engine->name, count);
266 return 0;
267 }
268
live_parallel_switch(void * arg)269 static int live_parallel_switch(void *arg)
270 {
271 struct drm_i915_private *i915 = arg;
272 static int (* const func[])(void *arg) = {
273 __live_parallel_switch1,
274 __live_parallel_switchN,
275 NULL,
276 };
277 struct parallel_switch *data = NULL;
278 struct i915_gem_engines *engines;
279 struct i915_gem_engines_iter it;
280 int (* const *fn)(void *arg);
281 struct i915_gem_context *ctx;
282 struct intel_context *ce;
283 struct file *file;
284 int n, m, count;
285 int err = 0;
286
287 /*
288 * Check we can process switches on all engines simultaneously.
289 */
290
291 if (!DRIVER_CAPS(i915)->has_logical_contexts)
292 return 0;
293
294 file = mock_file(i915);
295 if (IS_ERR(file))
296 return PTR_ERR(file);
297
298 ctx = live_context(i915, file);
299 if (IS_ERR(ctx)) {
300 err = PTR_ERR(ctx);
301 goto out_file;
302 }
303
304 engines = i915_gem_context_lock_engines(ctx);
305 count = engines->num_engines;
306
307 data = kcalloc(count, sizeof(*data), GFP_KERNEL);
308 if (!data) {
309 i915_gem_context_unlock_engines(ctx);
310 err = -ENOMEM;
311 goto out_file;
312 }
313
314 m = 0; /* Use the first context as our template for the engines */
315 for_each_gem_engine(ce, engines, it) {
316 err = intel_context_pin(ce);
317 if (err) {
318 i915_gem_context_unlock_engines(ctx);
319 goto out;
320 }
321 data[m++].ce[0] = intel_context_get(ce);
322 }
323 i915_gem_context_unlock_engines(ctx);
324
325 /* Clone the same set of engines into the other contexts */
326 for (n = 1; n < ARRAY_SIZE(data->ce); n++) {
327 ctx = live_context(i915, file);
328 if (IS_ERR(ctx)) {
329 err = PTR_ERR(ctx);
330 goto out;
331 }
332
333 for (m = 0; m < count; m++) {
334 if (!data[m].ce[0])
335 continue;
336
337 ce = intel_context_create(data[m].ce[0]->engine);
338 if (IS_ERR(ce))
339 goto out;
340
341 err = intel_context_pin(ce);
342 if (err) {
343 intel_context_put(ce);
344 goto out;
345 }
346
347 data[m].ce[n] = ce;
348 }
349 }
350
351 for (fn = func; !err && *fn; fn++) {
352 struct igt_live_test t;
353 int n;
354
355 err = igt_live_test_begin(&t, i915, __func__, "");
356 if (err)
357 break;
358
359 for (n = 0; n < count; n++) {
360 if (!data[n].ce[0])
361 continue;
362
363 data[n].tsk = kthread_run(*fn, &data[n],
364 "igt/parallel:%s",
365 data[n].ce[0]->engine->name);
366 if (IS_ERR(data[n].tsk)) {
367 err = PTR_ERR(data[n].tsk);
368 break;
369 }
370 get_task_struct(data[n].tsk);
371 }
372
373 yield(); /* start all threads before we kthread_stop() */
374
375 for (n = 0; n < count; n++) {
376 int status;
377
378 if (IS_ERR_OR_NULL(data[n].tsk))
379 continue;
380
381 status = kthread_stop(data[n].tsk);
382 if (status && !err)
383 err = status;
384
385 put_task_struct(data[n].tsk);
386 data[n].tsk = NULL;
387 }
388
389 if (igt_live_test_end(&t))
390 err = -EIO;
391 }
392
393 out:
394 for (n = 0; n < count; n++) {
395 for (m = 0; m < ARRAY_SIZE(data->ce); m++) {
396 if (!data[n].ce[m])
397 continue;
398
399 intel_context_unpin(data[n].ce[m]);
400 intel_context_put(data[n].ce[m]);
401 }
402 }
403 kfree(data);
404 out_file:
405 fput(file);
406 return err;
407 }
408
real_page_count(struct drm_i915_gem_object * obj)409 static unsigned long real_page_count(struct drm_i915_gem_object *obj)
410 {
411 return huge_gem_object_phys_size(obj) >> PAGE_SHIFT;
412 }
413
fake_page_count(struct drm_i915_gem_object * obj)414 static unsigned long fake_page_count(struct drm_i915_gem_object *obj)
415 {
416 return huge_gem_object_dma_size(obj) >> PAGE_SHIFT;
417 }
418
gpu_fill(struct intel_context * ce,struct drm_i915_gem_object * obj,unsigned int dw)419 static int gpu_fill(struct intel_context *ce,
420 struct drm_i915_gem_object *obj,
421 unsigned int dw)
422 {
423 struct i915_vma *vma;
424 int err;
425
426 GEM_BUG_ON(obj->base.size > ce->vm->total);
427 GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
428
429 vma = i915_vma_instance(obj, ce->vm, NULL);
430 if (IS_ERR(vma))
431 return PTR_ERR(vma);
432
433 err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER);
434 if (err)
435 return err;
436
437 /*
438 * Within the GTT the huge objects maps every page onto
439 * its 1024 real pages (using phys_pfn = dma_pfn % 1024).
440 * We set the nth dword within the page using the nth
441 * mapping via the GTT - this should exercise the GTT mapping
442 * whilst checking that each context provides a unique view
443 * into the object.
444 */
445 err = igt_gpu_fill_dw(ce, vma,
446 (dw * real_page_count(obj)) << PAGE_SHIFT |
447 (dw * sizeof(u32)),
448 real_page_count(obj),
449 dw);
450 i915_vma_unpin(vma);
451
452 return err;
453 }
454
cpu_fill(struct drm_i915_gem_object * obj,u32 value)455 static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
456 {
457 const bool has_llc = HAS_LLC(to_i915(obj->base.dev));
458 unsigned int n, m, need_flush;
459 int err;
460
461 i915_gem_object_lock(obj, NULL);
462 err = i915_gem_object_prepare_write(obj, &need_flush);
463 if (err)
464 goto out;
465
466 for (n = 0; n < real_page_count(obj); n++) {
467 u32 *map;
468
469 map = kmap_atomic(i915_gem_object_get_page(obj, n));
470 for (m = 0; m < DW_PER_PAGE; m++)
471 map[m] = value;
472 if (!has_llc)
473 drm_clflush_virt_range(map, PAGE_SIZE);
474 kunmap_atomic(map);
475 }
476
477 i915_gem_object_finish_access(obj);
478 obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU;
479 obj->write_domain = 0;
480 out:
481 i915_gem_object_unlock(obj);
482 return err;
483 }
484
cpu_check(struct drm_i915_gem_object * obj,unsigned int idx,unsigned int max)485 static noinline int cpu_check(struct drm_i915_gem_object *obj,
486 unsigned int idx, unsigned int max)
487 {
488 unsigned int n, m, needs_flush;
489 int err;
490
491 i915_gem_object_lock(obj, NULL);
492 err = i915_gem_object_prepare_read(obj, &needs_flush);
493 if (err)
494 goto out_unlock;
495
496 for (n = 0; n < real_page_count(obj); n++) {
497 u32 *map;
498
499 map = kmap_atomic(i915_gem_object_get_page(obj, n));
500 if (needs_flush & CLFLUSH_BEFORE)
501 drm_clflush_virt_range(map, PAGE_SIZE);
502
503 for (m = 0; m < max; m++) {
504 if (map[m] != m) {
505 pr_err("%pS: Invalid value at object %d page %d/%ld, offset %d/%d: found %x expected %x\n",
506 __builtin_return_address(0), idx,
507 n, real_page_count(obj), m, max,
508 map[m], m);
509 err = -EINVAL;
510 goto out_unmap;
511 }
512 }
513
514 for (; m < DW_PER_PAGE; m++) {
515 if (map[m] != STACK_MAGIC) {
516 pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x (uninitialised)\n",
517 __builtin_return_address(0), idx, n, m,
518 map[m], STACK_MAGIC);
519 err = -EINVAL;
520 goto out_unmap;
521 }
522 }
523
524 out_unmap:
525 kunmap_atomic(map);
526 if (err)
527 break;
528 }
529
530 i915_gem_object_finish_access(obj);
531 out_unlock:
532 i915_gem_object_unlock(obj);
533 return err;
534 }
535
file_add_object(struct file * file,struct drm_i915_gem_object * obj)536 static int file_add_object(struct file *file, struct drm_i915_gem_object *obj)
537 {
538 int err;
539
540 GEM_BUG_ON(obj->base.handle_count);
541
542 /* tie the object to the drm_file for easy reaping */
543 err = idr_alloc(&to_drm_file(file)->object_idr,
544 &obj->base, 1, 0, GFP_KERNEL);
545 if (err < 0)
546 return err;
547
548 i915_gem_object_get(obj);
549 obj->base.handle_count++;
550 return 0;
551 }
552
553 static struct drm_i915_gem_object *
create_test_object(struct i915_address_space * vm,struct file * file,struct list_head * objects)554 create_test_object(struct i915_address_space *vm,
555 struct file *file,
556 struct list_head *objects)
557 {
558 struct drm_i915_gem_object *obj;
559 u64 size;
560 int err;
561
562 /* Keep in GEM's good graces */
563 intel_gt_retire_requests(vm->gt);
564
565 size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE);
566 size = round_down(size, DW_PER_PAGE * PAGE_SIZE);
567
568 obj = huge_gem_object(vm->i915, DW_PER_PAGE * PAGE_SIZE, size);
569 if (IS_ERR(obj))
570 return obj;
571
572 err = file_add_object(file, obj);
573 i915_gem_object_put(obj);
574 if (err)
575 return ERR_PTR(err);
576
577 err = cpu_fill(obj, STACK_MAGIC);
578 if (err) {
579 pr_err("Failed to fill object with cpu, err=%d\n",
580 err);
581 return ERR_PTR(err);
582 }
583
584 list_add_tail(&obj->st_link, objects);
585 return obj;
586 }
587
max_dwords(struct drm_i915_gem_object * obj)588 static unsigned long max_dwords(struct drm_i915_gem_object *obj)
589 {
590 unsigned long npages = fake_page_count(obj);
591
592 GEM_BUG_ON(!IS_ALIGNED(npages, DW_PER_PAGE));
593 return npages / DW_PER_PAGE;
594 }
595
throttle_release(struct i915_request ** q,int count)596 static void throttle_release(struct i915_request **q, int count)
597 {
598 int i;
599
600 for (i = 0; i < count; i++) {
601 if (IS_ERR_OR_NULL(q[i]))
602 continue;
603
604 i915_request_put(fetch_and_zero(&q[i]));
605 }
606 }
607
throttle(struct intel_context * ce,struct i915_request ** q,int count)608 static int throttle(struct intel_context *ce,
609 struct i915_request **q, int count)
610 {
611 int i;
612
613 if (!IS_ERR_OR_NULL(q[0])) {
614 if (i915_request_wait(q[0],
615 I915_WAIT_INTERRUPTIBLE,
616 MAX_SCHEDULE_TIMEOUT) < 0)
617 return -EINTR;
618
619 i915_request_put(q[0]);
620 }
621
622 for (i = 0; i < count - 1; i++)
623 q[i] = q[i + 1];
624
625 q[i] = intel_context_create_request(ce);
626 if (IS_ERR(q[i]))
627 return PTR_ERR(q[i]);
628
629 i915_request_get(q[i]);
630 i915_request_add(q[i]);
631
632 return 0;
633 }
634
igt_ctx_exec(void * arg)635 static int igt_ctx_exec(void *arg)
636 {
637 struct drm_i915_private *i915 = arg;
638 struct intel_engine_cs *engine;
639 int err = -ENODEV;
640
641 /*
642 * Create a few different contexts (with different mm) and write
643 * through each ctx/mm using the GPU making sure those writes end
644 * up in the expected pages of our obj.
645 */
646
647 if (!DRIVER_CAPS(i915)->has_logical_contexts)
648 return 0;
649
650 for_each_uabi_engine(engine, i915) {
651 struct drm_i915_gem_object *obj = NULL;
652 unsigned long ncontexts, ndwords, dw;
653 struct i915_request *tq[5] = {};
654 struct igt_live_test t;
655 IGT_TIMEOUT(end_time);
656 LIST_HEAD(objects);
657 struct file *file;
658
659 if (!intel_engine_can_store_dword(engine))
660 continue;
661
662 if (!engine->context_size)
663 continue; /* No logical context support in HW */
664
665 file = mock_file(i915);
666 if (IS_ERR(file))
667 return PTR_ERR(file);
668
669 err = igt_live_test_begin(&t, i915, __func__, engine->name);
670 if (err)
671 goto out_file;
672
673 ncontexts = 0;
674 ndwords = 0;
675 dw = 0;
676 while (!time_after(jiffies, end_time)) {
677 struct i915_gem_context *ctx;
678 struct intel_context *ce;
679
680 ctx = kernel_context(i915, NULL);
681 if (IS_ERR(ctx)) {
682 err = PTR_ERR(ctx);
683 goto out_file;
684 }
685
686 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
687 GEM_BUG_ON(IS_ERR(ce));
688
689 if (!obj) {
690 obj = create_test_object(ce->vm, file, &objects);
691 if (IS_ERR(obj)) {
692 err = PTR_ERR(obj);
693 intel_context_put(ce);
694 kernel_context_close(ctx);
695 goto out_file;
696 }
697 }
698
699 err = gpu_fill(ce, obj, dw);
700 if (err) {
701 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
702 ndwords, dw, max_dwords(obj),
703 engine->name,
704 str_yes_no(i915_gem_context_has_full_ppgtt(ctx)),
705 err);
706 intel_context_put(ce);
707 kernel_context_close(ctx);
708 goto out_file;
709 }
710
711 err = throttle(ce, tq, ARRAY_SIZE(tq));
712 if (err) {
713 intel_context_put(ce);
714 kernel_context_close(ctx);
715 goto out_file;
716 }
717
718 if (++dw == max_dwords(obj)) {
719 obj = NULL;
720 dw = 0;
721 }
722
723 ndwords++;
724 ncontexts++;
725
726 intel_context_put(ce);
727 kernel_context_close(ctx);
728 }
729
730 pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
731 ncontexts, engine->name, ndwords);
732
733 ncontexts = dw = 0;
734 list_for_each_entry(obj, &objects, st_link) {
735 unsigned int rem =
736 min_t(unsigned int, ndwords - dw, max_dwords(obj));
737
738 err = cpu_check(obj, ncontexts++, rem);
739 if (err)
740 break;
741
742 dw += rem;
743 }
744
745 out_file:
746 throttle_release(tq, ARRAY_SIZE(tq));
747 if (igt_live_test_end(&t))
748 err = -EIO;
749
750 fput(file);
751 if (err)
752 return err;
753
754 i915_gem_drain_freed_objects(i915);
755 }
756
757 return 0;
758 }
759
igt_shared_ctx_exec(void * arg)760 static int igt_shared_ctx_exec(void *arg)
761 {
762 struct drm_i915_private *i915 = arg;
763 struct i915_request *tq[5] = {};
764 struct i915_gem_context *parent;
765 struct intel_engine_cs *engine;
766 struct igt_live_test t;
767 struct file *file;
768 int err = 0;
769
770 /*
771 * Create a few different contexts with the same mm and write
772 * through each ctx using the GPU making sure those writes end
773 * up in the expected pages of our obj.
774 */
775 if (!DRIVER_CAPS(i915)->has_logical_contexts)
776 return 0;
777
778 file = mock_file(i915);
779 if (IS_ERR(file))
780 return PTR_ERR(file);
781
782 parent = live_context(i915, file);
783 if (IS_ERR(parent)) {
784 err = PTR_ERR(parent);
785 goto out_file;
786 }
787
788 if (!parent->vm) { /* not full-ppgtt; nothing to share */
789 err = 0;
790 goto out_file;
791 }
792
793 err = igt_live_test_begin(&t, i915, __func__, "");
794 if (err)
795 goto out_file;
796
797 for_each_uabi_engine(engine, i915) {
798 unsigned long ncontexts, ndwords, dw;
799 struct drm_i915_gem_object *obj = NULL;
800 IGT_TIMEOUT(end_time);
801 LIST_HEAD(objects);
802
803 if (!intel_engine_can_store_dword(engine))
804 continue;
805
806 dw = 0;
807 ndwords = 0;
808 ncontexts = 0;
809 while (!time_after(jiffies, end_time)) {
810 struct i915_gem_context *ctx;
811 struct intel_context *ce;
812
813 ctx = kernel_context(i915, parent->vm);
814 if (IS_ERR(ctx)) {
815 err = PTR_ERR(ctx);
816 goto out_test;
817 }
818
819 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
820 GEM_BUG_ON(IS_ERR(ce));
821
822 if (!obj) {
823 obj = create_test_object(parent->vm,
824 file, &objects);
825 if (IS_ERR(obj)) {
826 err = PTR_ERR(obj);
827 intel_context_put(ce);
828 kernel_context_close(ctx);
829 goto out_test;
830 }
831 }
832
833 err = gpu_fill(ce, obj, dw);
834 if (err) {
835 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
836 ndwords, dw, max_dwords(obj),
837 engine->name,
838 str_yes_no(i915_gem_context_has_full_ppgtt(ctx)),
839 err);
840 intel_context_put(ce);
841 kernel_context_close(ctx);
842 goto out_test;
843 }
844
845 err = throttle(ce, tq, ARRAY_SIZE(tq));
846 if (err) {
847 intel_context_put(ce);
848 kernel_context_close(ctx);
849 goto out_test;
850 }
851
852 if (++dw == max_dwords(obj)) {
853 obj = NULL;
854 dw = 0;
855 }
856
857 ndwords++;
858 ncontexts++;
859
860 intel_context_put(ce);
861 kernel_context_close(ctx);
862 }
863 pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
864 ncontexts, engine->name, ndwords);
865
866 ncontexts = dw = 0;
867 list_for_each_entry(obj, &objects, st_link) {
868 unsigned int rem =
869 min_t(unsigned int, ndwords - dw, max_dwords(obj));
870
871 err = cpu_check(obj, ncontexts++, rem);
872 if (err)
873 goto out_test;
874
875 dw += rem;
876 }
877
878 i915_gem_drain_freed_objects(i915);
879 }
880 out_test:
881 throttle_release(tq, ARRAY_SIZE(tq));
882 if (igt_live_test_end(&t))
883 err = -EIO;
884 out_file:
885 fput(file);
886 return err;
887 }
888
rpcs_query_batch(struct drm_i915_gem_object * rpcs,struct i915_vma * vma,struct intel_engine_cs * engine)889 static int rpcs_query_batch(struct drm_i915_gem_object *rpcs,
890 struct i915_vma *vma,
891 struct intel_engine_cs *engine)
892 {
893 u32 *cmd;
894
895 GEM_BUG_ON(GRAPHICS_VER(vma->vm->i915) < 8);
896
897 cmd = i915_gem_object_pin_map(rpcs, I915_MAP_WB);
898 if (IS_ERR(cmd))
899 return PTR_ERR(cmd);
900
901 *cmd++ = MI_STORE_REGISTER_MEM_GEN8;
902 *cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE(engine->mmio_base));
903 *cmd++ = lower_32_bits(vma->node.start);
904 *cmd++ = upper_32_bits(vma->node.start);
905 *cmd = MI_BATCH_BUFFER_END;
906
907 __i915_gem_object_flush_map(rpcs, 0, 64);
908 i915_gem_object_unpin_map(rpcs);
909
910 intel_gt_chipset_flush(vma->vm->gt);
911
912 return 0;
913 }
914
915 static int
emit_rpcs_query(struct drm_i915_gem_object * obj,struct intel_context * ce,struct i915_request ** rq_out)916 emit_rpcs_query(struct drm_i915_gem_object *obj,
917 struct intel_context *ce,
918 struct i915_request **rq_out)
919 {
920 struct drm_i915_private *i915 = to_i915(obj->base.dev);
921 struct i915_request *rq;
922 struct i915_gem_ww_ctx ww;
923 struct i915_vma *batch;
924 struct i915_vma *vma;
925 struct drm_i915_gem_object *rpcs;
926 int err;
927
928 GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
929
930 if (GRAPHICS_VER(i915) < 8)
931 return -EINVAL;
932
933 vma = i915_vma_instance(obj, ce->vm, NULL);
934 if (IS_ERR(vma))
935 return PTR_ERR(vma);
936
937 rpcs = i915_gem_object_create_internal(i915, PAGE_SIZE);
938 if (IS_ERR(rpcs))
939 return PTR_ERR(rpcs);
940
941 batch = i915_vma_instance(rpcs, ce->vm, NULL);
942 if (IS_ERR(batch)) {
943 err = PTR_ERR(batch);
944 goto err_put;
945 }
946
947 i915_gem_ww_ctx_init(&ww, false);
948 retry:
949 err = i915_gem_object_lock(obj, &ww);
950 if (!err)
951 err = i915_gem_object_lock(rpcs, &ww);
952 if (!err)
953 err = i915_gem_object_set_to_gtt_domain(obj, false);
954 if (!err)
955 err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
956 if (err)
957 goto err_put;
958
959 err = i915_vma_pin_ww(batch, &ww, 0, 0, PIN_USER);
960 if (err)
961 goto err_vma;
962
963 err = rpcs_query_batch(rpcs, vma, ce->engine);
964 if (err)
965 goto err_batch;
966
967 rq = i915_request_create(ce);
968 if (IS_ERR(rq)) {
969 err = PTR_ERR(rq);
970 goto err_batch;
971 }
972
973 err = i915_request_await_object(rq, batch->obj, false);
974 if (err == 0)
975 err = i915_vma_move_to_active(batch, rq, 0);
976 if (err)
977 goto skip_request;
978
979 err = i915_request_await_object(rq, vma->obj, true);
980 if (err == 0)
981 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
982 if (err)
983 goto skip_request;
984
985 if (rq->engine->emit_init_breadcrumb) {
986 err = rq->engine->emit_init_breadcrumb(rq);
987 if (err)
988 goto skip_request;
989 }
990
991 err = rq->engine->emit_bb_start(rq,
992 batch->node.start, batch->node.size,
993 0);
994 if (err)
995 goto skip_request;
996
997 *rq_out = i915_request_get(rq);
998
999 skip_request:
1000 if (err)
1001 i915_request_set_error_once(rq, err);
1002 i915_request_add(rq);
1003 err_batch:
1004 i915_vma_unpin(batch);
1005 err_vma:
1006 i915_vma_unpin(vma);
1007 err_put:
1008 if (err == -EDEADLK) {
1009 err = i915_gem_ww_ctx_backoff(&ww);
1010 if (!err)
1011 goto retry;
1012 }
1013 i915_gem_ww_ctx_fini(&ww);
1014 i915_gem_object_put(rpcs);
1015 return err;
1016 }
1017
1018 #define TEST_IDLE BIT(0)
1019 #define TEST_BUSY BIT(1)
1020 #define TEST_RESET BIT(2)
1021
1022 static int
__sseu_prepare(const char * name,unsigned int flags,struct intel_context * ce,struct igt_spinner ** spin)1023 __sseu_prepare(const char *name,
1024 unsigned int flags,
1025 struct intel_context *ce,
1026 struct igt_spinner **spin)
1027 {
1028 struct i915_request *rq;
1029 int ret;
1030
1031 *spin = NULL;
1032 if (!(flags & (TEST_BUSY | TEST_RESET)))
1033 return 0;
1034
1035 *spin = kzalloc(sizeof(**spin), GFP_KERNEL);
1036 if (!*spin)
1037 return -ENOMEM;
1038
1039 ret = igt_spinner_init(*spin, ce->engine->gt);
1040 if (ret)
1041 goto err_free;
1042
1043 rq = igt_spinner_create_request(*spin, ce, MI_NOOP);
1044 if (IS_ERR(rq)) {
1045 ret = PTR_ERR(rq);
1046 goto err_fini;
1047 }
1048
1049 i915_request_add(rq);
1050
1051 if (!igt_wait_for_spinner(*spin, rq)) {
1052 pr_err("%s: Spinner failed to start!\n", name);
1053 ret = -ETIMEDOUT;
1054 goto err_end;
1055 }
1056
1057 return 0;
1058
1059 err_end:
1060 igt_spinner_end(*spin);
1061 err_fini:
1062 igt_spinner_fini(*spin);
1063 err_free:
1064 kfree(fetch_and_zero(spin));
1065 return ret;
1066 }
1067
1068 static int
__read_slice_count(struct intel_context * ce,struct drm_i915_gem_object * obj,struct igt_spinner * spin,u32 * rpcs)1069 __read_slice_count(struct intel_context *ce,
1070 struct drm_i915_gem_object *obj,
1071 struct igt_spinner *spin,
1072 u32 *rpcs)
1073 {
1074 struct i915_request *rq = NULL;
1075 u32 s_mask, s_shift;
1076 unsigned int cnt;
1077 u32 *buf, val;
1078 long ret;
1079
1080 ret = emit_rpcs_query(obj, ce, &rq);
1081 if (ret)
1082 return ret;
1083
1084 if (spin)
1085 igt_spinner_end(spin);
1086
1087 ret = i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
1088 i915_request_put(rq);
1089 if (ret < 0)
1090 return ret;
1091
1092 buf = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1093 if (IS_ERR(buf)) {
1094 ret = PTR_ERR(buf);
1095 return ret;
1096 }
1097
1098 if (GRAPHICS_VER(ce->engine->i915) >= 11) {
1099 s_mask = GEN11_RPCS_S_CNT_MASK;
1100 s_shift = GEN11_RPCS_S_CNT_SHIFT;
1101 } else {
1102 s_mask = GEN8_RPCS_S_CNT_MASK;
1103 s_shift = GEN8_RPCS_S_CNT_SHIFT;
1104 }
1105
1106 val = *buf;
1107 cnt = (val & s_mask) >> s_shift;
1108 *rpcs = val;
1109
1110 i915_gem_object_unpin_map(obj);
1111
1112 return cnt;
1113 }
1114
1115 static int
__check_rpcs(const char * name,u32 rpcs,int slices,unsigned int expected,const char * prefix,const char * suffix)1116 __check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected,
1117 const char *prefix, const char *suffix)
1118 {
1119 if (slices == expected)
1120 return 0;
1121
1122 if (slices < 0) {
1123 pr_err("%s: %s read slice count failed with %d%s\n",
1124 name, prefix, slices, suffix);
1125 return slices;
1126 }
1127
1128 pr_err("%s: %s slice count %d is not %u%s\n",
1129 name, prefix, slices, expected, suffix);
1130
1131 pr_info("RPCS=0x%x; %u%sx%u%s\n",
1132 rpcs, slices,
1133 (rpcs & GEN8_RPCS_S_CNT_ENABLE) ? "*" : "",
1134 (rpcs & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT,
1135 (rpcs & GEN8_RPCS_SS_CNT_ENABLE) ? "*" : "");
1136
1137 return -EINVAL;
1138 }
1139
1140 static int
__sseu_finish(const char * name,unsigned int flags,struct intel_context * ce,struct drm_i915_gem_object * obj,unsigned int expected,struct igt_spinner * spin)1141 __sseu_finish(const char *name,
1142 unsigned int flags,
1143 struct intel_context *ce,
1144 struct drm_i915_gem_object *obj,
1145 unsigned int expected,
1146 struct igt_spinner *spin)
1147 {
1148 unsigned int slices = hweight32(ce->engine->sseu.slice_mask);
1149 u32 rpcs = 0;
1150 int ret = 0;
1151
1152 if (flags & TEST_RESET) {
1153 ret = intel_engine_reset(ce->engine, "sseu");
1154 if (ret)
1155 goto out;
1156 }
1157
1158 ret = __read_slice_count(ce, obj,
1159 flags & TEST_RESET ? NULL : spin, &rpcs);
1160 ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!");
1161 if (ret)
1162 goto out;
1163
1164 ret = __read_slice_count(ce->engine->kernel_context, obj, NULL, &rpcs);
1165 ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!");
1166
1167 out:
1168 if (spin)
1169 igt_spinner_end(spin);
1170
1171 if ((flags & TEST_IDLE) && ret == 0) {
1172 ret = igt_flush_test(ce->engine->i915);
1173 if (ret)
1174 return ret;
1175
1176 ret = __read_slice_count(ce, obj, NULL, &rpcs);
1177 ret = __check_rpcs(name, rpcs, ret, expected,
1178 "Context", " after idle!");
1179 }
1180
1181 return ret;
1182 }
1183
1184 static int
__sseu_test(const char * name,unsigned int flags,struct intel_context * ce,struct drm_i915_gem_object * obj,struct intel_sseu sseu)1185 __sseu_test(const char *name,
1186 unsigned int flags,
1187 struct intel_context *ce,
1188 struct drm_i915_gem_object *obj,
1189 struct intel_sseu sseu)
1190 {
1191 struct igt_spinner *spin = NULL;
1192 int ret;
1193
1194 intel_engine_pm_get(ce->engine);
1195
1196 ret = __sseu_prepare(name, flags, ce, &spin);
1197 if (ret)
1198 goto out_pm;
1199
1200 ret = intel_context_reconfigure_sseu(ce, sseu);
1201 if (ret)
1202 goto out_spin;
1203
1204 ret = __sseu_finish(name, flags, ce, obj,
1205 hweight32(sseu.slice_mask), spin);
1206
1207 out_spin:
1208 if (spin) {
1209 igt_spinner_end(spin);
1210 igt_spinner_fini(spin);
1211 kfree(spin);
1212 }
1213 out_pm:
1214 intel_engine_pm_put(ce->engine);
1215 return ret;
1216 }
1217
1218 static int
__igt_ctx_sseu(struct drm_i915_private * i915,const char * name,unsigned int flags)1219 __igt_ctx_sseu(struct drm_i915_private *i915,
1220 const char *name,
1221 unsigned int flags)
1222 {
1223 struct drm_i915_gem_object *obj;
1224 int inst = 0;
1225 int ret = 0;
1226
1227 if (GRAPHICS_VER(i915) < 9)
1228 return 0;
1229
1230 if (flags & TEST_RESET)
1231 igt_global_reset_lock(to_gt(i915));
1232
1233 obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
1234 if (IS_ERR(obj)) {
1235 ret = PTR_ERR(obj);
1236 goto out_unlock;
1237 }
1238
1239 do {
1240 struct intel_engine_cs *engine;
1241 struct intel_context *ce;
1242 struct intel_sseu pg_sseu;
1243
1244 engine = intel_engine_lookup_user(i915,
1245 I915_ENGINE_CLASS_RENDER,
1246 inst++);
1247 if (!engine)
1248 break;
1249
1250 if (hweight32(engine->sseu.slice_mask) < 2)
1251 continue;
1252
1253 if (!engine->gt->info.sseu.has_slice_pg)
1254 continue;
1255
1256 /*
1257 * Gen11 VME friendly power-gated configuration with
1258 * half enabled sub-slices.
1259 */
1260 pg_sseu = engine->sseu;
1261 pg_sseu.slice_mask = 1;
1262 pg_sseu.subslice_mask =
1263 ~(~0 << (hweight32(engine->sseu.subslice_mask) / 2));
1264
1265 pr_info("%s: SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n",
1266 engine->name, name, flags,
1267 hweight32(engine->sseu.slice_mask),
1268 hweight32(pg_sseu.slice_mask));
1269
1270 ce = intel_context_create(engine);
1271 if (IS_ERR(ce)) {
1272 ret = PTR_ERR(ce);
1273 goto out_put;
1274 }
1275
1276 ret = intel_context_pin(ce);
1277 if (ret)
1278 goto out_ce;
1279
1280 /* First set the default mask. */
1281 ret = __sseu_test(name, flags, ce, obj, engine->sseu);
1282 if (ret)
1283 goto out_unpin;
1284
1285 /* Then set a power-gated configuration. */
1286 ret = __sseu_test(name, flags, ce, obj, pg_sseu);
1287 if (ret)
1288 goto out_unpin;
1289
1290 /* Back to defaults. */
1291 ret = __sseu_test(name, flags, ce, obj, engine->sseu);
1292 if (ret)
1293 goto out_unpin;
1294
1295 /* One last power-gated configuration for the road. */
1296 ret = __sseu_test(name, flags, ce, obj, pg_sseu);
1297 if (ret)
1298 goto out_unpin;
1299
1300 out_unpin:
1301 intel_context_unpin(ce);
1302 out_ce:
1303 intel_context_put(ce);
1304 } while (!ret);
1305
1306 if (igt_flush_test(i915))
1307 ret = -EIO;
1308
1309 out_put:
1310 i915_gem_object_put(obj);
1311
1312 out_unlock:
1313 if (flags & TEST_RESET)
1314 igt_global_reset_unlock(to_gt(i915));
1315
1316 if (ret)
1317 pr_err("%s: Failed with %d!\n", name, ret);
1318
1319 return ret;
1320 }
1321
igt_ctx_sseu(void * arg)1322 static int igt_ctx_sseu(void *arg)
1323 {
1324 struct {
1325 const char *name;
1326 unsigned int flags;
1327 } *phase, phases[] = {
1328 { .name = "basic", .flags = 0 },
1329 { .name = "idle", .flags = TEST_IDLE },
1330 { .name = "busy", .flags = TEST_BUSY },
1331 { .name = "busy-reset", .flags = TEST_BUSY | TEST_RESET },
1332 { .name = "busy-idle", .flags = TEST_BUSY | TEST_IDLE },
1333 { .name = "reset-idle", .flags = TEST_RESET | TEST_IDLE },
1334 };
1335 unsigned int i;
1336 int ret = 0;
1337
1338 for (i = 0, phase = phases; ret == 0 && i < ARRAY_SIZE(phases);
1339 i++, phase++)
1340 ret = __igt_ctx_sseu(arg, phase->name, phase->flags);
1341
1342 return ret;
1343 }
1344
igt_ctx_readonly(void * arg)1345 static int igt_ctx_readonly(void *arg)
1346 {
1347 struct drm_i915_private *i915 = arg;
1348 unsigned long idx, ndwords, dw, num_engines;
1349 struct drm_i915_gem_object *obj = NULL;
1350 struct i915_request *tq[5] = {};
1351 struct i915_gem_engines_iter it;
1352 struct i915_address_space *vm;
1353 struct i915_gem_context *ctx;
1354 struct intel_context *ce;
1355 struct igt_live_test t;
1356 I915_RND_STATE(prng);
1357 IGT_TIMEOUT(end_time);
1358 LIST_HEAD(objects);
1359 struct file *file;
1360 int err = -ENODEV;
1361
1362 /*
1363 * Create a few read-only objects (with the occasional writable object)
1364 * and try to write into these object checking that the GPU discards
1365 * any write to a read-only object.
1366 */
1367
1368 file = mock_file(i915);
1369 if (IS_ERR(file))
1370 return PTR_ERR(file);
1371
1372 err = igt_live_test_begin(&t, i915, __func__, "");
1373 if (err)
1374 goto out_file;
1375
1376 ctx = live_context(i915, file);
1377 if (IS_ERR(ctx)) {
1378 err = PTR_ERR(ctx);
1379 goto out_file;
1380 }
1381
1382 vm = ctx->vm ?: &to_gt(i915)->ggtt->alias->vm;
1383 if (!vm || !vm->has_read_only) {
1384 err = 0;
1385 goto out_file;
1386 }
1387
1388 num_engines = 0;
1389 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it)
1390 if (intel_engine_can_store_dword(ce->engine))
1391 num_engines++;
1392 i915_gem_context_unlock_engines(ctx);
1393
1394 ndwords = 0;
1395 dw = 0;
1396 while (!time_after(jiffies, end_time)) {
1397 for_each_gem_engine(ce,
1398 i915_gem_context_lock_engines(ctx), it) {
1399 if (!intel_engine_can_store_dword(ce->engine))
1400 continue;
1401
1402 if (!obj) {
1403 obj = create_test_object(ce->vm, file, &objects);
1404 if (IS_ERR(obj)) {
1405 err = PTR_ERR(obj);
1406 i915_gem_context_unlock_engines(ctx);
1407 goto out_file;
1408 }
1409
1410 if (prandom_u32_state(&prng) & 1)
1411 i915_gem_object_set_readonly(obj);
1412 }
1413
1414 err = gpu_fill(ce, obj, dw);
1415 if (err) {
1416 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
1417 ndwords, dw, max_dwords(obj),
1418 ce->engine->name,
1419 str_yes_no(i915_gem_context_has_full_ppgtt(ctx)),
1420 err);
1421 i915_gem_context_unlock_engines(ctx);
1422 goto out_file;
1423 }
1424
1425 err = throttle(ce, tq, ARRAY_SIZE(tq));
1426 if (err) {
1427 i915_gem_context_unlock_engines(ctx);
1428 goto out_file;
1429 }
1430
1431 if (++dw == max_dwords(obj)) {
1432 obj = NULL;
1433 dw = 0;
1434 }
1435 ndwords++;
1436 }
1437 i915_gem_context_unlock_engines(ctx);
1438 }
1439 pr_info("Submitted %lu dwords (across %lu engines)\n",
1440 ndwords, num_engines);
1441
1442 dw = 0;
1443 idx = 0;
1444 list_for_each_entry(obj, &objects, st_link) {
1445 unsigned int rem =
1446 min_t(unsigned int, ndwords - dw, max_dwords(obj));
1447 unsigned int num_writes;
1448
1449 num_writes = rem;
1450 if (i915_gem_object_is_readonly(obj))
1451 num_writes = 0;
1452
1453 err = cpu_check(obj, idx++, num_writes);
1454 if (err)
1455 break;
1456
1457 dw += rem;
1458 }
1459
1460 out_file:
1461 throttle_release(tq, ARRAY_SIZE(tq));
1462 if (igt_live_test_end(&t))
1463 err = -EIO;
1464
1465 fput(file);
1466 return err;
1467 }
1468
check_scratch(struct i915_address_space * vm,u64 offset)1469 static int check_scratch(struct i915_address_space *vm, u64 offset)
1470 {
1471 struct drm_mm_node *node;
1472
1473 mutex_lock(&vm->mutex);
1474 node = __drm_mm_interval_first(&vm->mm,
1475 offset, offset + sizeof(u32) - 1);
1476 mutex_unlock(&vm->mutex);
1477 if (!node || node->start > offset)
1478 return 0;
1479
1480 GEM_BUG_ON(offset >= node->start + node->size);
1481
1482 pr_err("Target offset 0x%08x_%08x overlaps with a node in the mm!\n",
1483 upper_32_bits(offset), lower_32_bits(offset));
1484 return -EINVAL;
1485 }
1486
write_to_scratch(struct i915_gem_context * ctx,struct intel_engine_cs * engine,struct drm_i915_gem_object * obj,u64 offset,u32 value)1487 static int write_to_scratch(struct i915_gem_context *ctx,
1488 struct intel_engine_cs *engine,
1489 struct drm_i915_gem_object *obj,
1490 u64 offset, u32 value)
1491 {
1492 struct drm_i915_private *i915 = ctx->i915;
1493 struct i915_address_space *vm;
1494 struct i915_request *rq;
1495 struct i915_vma *vma;
1496 u32 *cmd;
1497 int err;
1498
1499 GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE);
1500
1501 err = check_scratch(ctx->vm, offset);
1502 if (err)
1503 return err;
1504
1505 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1506 if (IS_ERR(cmd))
1507 return PTR_ERR(cmd);
1508
1509 *cmd++ = MI_STORE_DWORD_IMM_GEN4;
1510 if (GRAPHICS_VER(i915) >= 8) {
1511 *cmd++ = lower_32_bits(offset);
1512 *cmd++ = upper_32_bits(offset);
1513 } else {
1514 *cmd++ = 0;
1515 *cmd++ = offset;
1516 }
1517 *cmd++ = value;
1518 *cmd = MI_BATCH_BUFFER_END;
1519 __i915_gem_object_flush_map(obj, 0, 64);
1520 i915_gem_object_unpin_map(obj);
1521
1522 intel_gt_chipset_flush(engine->gt);
1523
1524 vm = i915_gem_context_get_eb_vm(ctx);
1525 vma = i915_vma_instance(obj, vm, NULL);
1526 if (IS_ERR(vma)) {
1527 err = PTR_ERR(vma);
1528 goto out_vm;
1529 }
1530
1531 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED);
1532 if (err)
1533 goto out_vm;
1534
1535 rq = igt_request_alloc(ctx, engine);
1536 if (IS_ERR(rq)) {
1537 err = PTR_ERR(rq);
1538 goto err_unpin;
1539 }
1540
1541 i915_vma_lock(vma);
1542 err = i915_request_await_object(rq, vma->obj, false);
1543 if (err == 0)
1544 err = i915_vma_move_to_active(vma, rq, 0);
1545 i915_vma_unlock(vma);
1546 if (err)
1547 goto skip_request;
1548
1549 if (rq->engine->emit_init_breadcrumb) {
1550 err = rq->engine->emit_init_breadcrumb(rq);
1551 if (err)
1552 goto skip_request;
1553 }
1554
1555 err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0);
1556 if (err)
1557 goto skip_request;
1558
1559 i915_vma_unpin(vma);
1560
1561 i915_request_add(rq);
1562
1563 goto out_vm;
1564 skip_request:
1565 i915_request_set_error_once(rq, err);
1566 i915_request_add(rq);
1567 err_unpin:
1568 i915_vma_unpin(vma);
1569 out_vm:
1570 i915_vm_put(vm);
1571
1572 if (!err)
1573 err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
1574
1575 return err;
1576 }
1577
read_from_scratch(struct i915_gem_context * ctx,struct intel_engine_cs * engine,struct drm_i915_gem_object * obj,u64 offset,u32 * value)1578 static int read_from_scratch(struct i915_gem_context *ctx,
1579 struct intel_engine_cs *engine,
1580 struct drm_i915_gem_object *obj,
1581 u64 offset, u32 *value)
1582 {
1583 struct drm_i915_private *i915 = ctx->i915;
1584 struct i915_address_space *vm;
1585 const u32 result = 0x100;
1586 struct i915_request *rq;
1587 struct i915_vma *vma;
1588 unsigned int flags;
1589 u32 *cmd;
1590 int err;
1591
1592 GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE);
1593
1594 err = check_scratch(ctx->vm, offset);
1595 if (err)
1596 return err;
1597
1598 if (GRAPHICS_VER(i915) >= 8) {
1599 const u32 GPR0 = engine->mmio_base + 0x600;
1600
1601 vm = i915_gem_context_get_eb_vm(ctx);
1602 vma = i915_vma_instance(obj, vm, NULL);
1603 if (IS_ERR(vma)) {
1604 err = PTR_ERR(vma);
1605 goto out_vm;
1606 }
1607
1608 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED);
1609 if (err)
1610 goto out_vm;
1611
1612 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1613 if (IS_ERR(cmd)) {
1614 err = PTR_ERR(cmd);
1615 goto err_unpin;
1616 }
1617
1618 memset(cmd, POISON_INUSE, PAGE_SIZE);
1619 *cmd++ = MI_LOAD_REGISTER_MEM_GEN8;
1620 *cmd++ = GPR0;
1621 *cmd++ = lower_32_bits(offset);
1622 *cmd++ = upper_32_bits(offset);
1623 *cmd++ = MI_STORE_REGISTER_MEM_GEN8;
1624 *cmd++ = GPR0;
1625 *cmd++ = result;
1626 *cmd++ = 0;
1627 *cmd = MI_BATCH_BUFFER_END;
1628
1629 i915_gem_object_flush_map(obj);
1630 i915_gem_object_unpin_map(obj);
1631
1632 flags = 0;
1633 } else {
1634 const u32 reg = engine->mmio_base + 0x420;
1635
1636 /* hsw: register access even to 3DPRIM! is protected */
1637 vm = i915_vm_get(&engine->gt->ggtt->vm);
1638 vma = i915_vma_instance(obj, vm, NULL);
1639 if (IS_ERR(vma)) {
1640 err = PTR_ERR(vma);
1641 goto out_vm;
1642 }
1643
1644 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1645 if (err)
1646 goto out_vm;
1647
1648 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1649 if (IS_ERR(cmd)) {
1650 err = PTR_ERR(cmd);
1651 goto err_unpin;
1652 }
1653
1654 memset(cmd, POISON_INUSE, PAGE_SIZE);
1655 *cmd++ = MI_LOAD_REGISTER_MEM;
1656 *cmd++ = reg;
1657 *cmd++ = offset;
1658 *cmd++ = MI_STORE_REGISTER_MEM | MI_USE_GGTT;
1659 *cmd++ = reg;
1660 *cmd++ = vma->node.start + result;
1661 *cmd = MI_BATCH_BUFFER_END;
1662
1663 i915_gem_object_flush_map(obj);
1664 i915_gem_object_unpin_map(obj);
1665
1666 flags = I915_DISPATCH_SECURE;
1667 }
1668
1669 intel_gt_chipset_flush(engine->gt);
1670
1671 rq = igt_request_alloc(ctx, engine);
1672 if (IS_ERR(rq)) {
1673 err = PTR_ERR(rq);
1674 goto err_unpin;
1675 }
1676
1677 i915_vma_lock(vma);
1678 err = i915_request_await_object(rq, vma->obj, true);
1679 if (err == 0)
1680 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
1681 i915_vma_unlock(vma);
1682 if (err)
1683 goto skip_request;
1684
1685 if (rq->engine->emit_init_breadcrumb) {
1686 err = rq->engine->emit_init_breadcrumb(rq);
1687 if (err)
1688 goto skip_request;
1689 }
1690
1691 err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, flags);
1692 if (err)
1693 goto skip_request;
1694
1695 i915_vma_unpin(vma);
1696
1697 i915_request_add(rq);
1698
1699 i915_gem_object_lock(obj, NULL);
1700 err = i915_gem_object_set_to_cpu_domain(obj, false);
1701 i915_gem_object_unlock(obj);
1702 if (err)
1703 goto out_vm;
1704
1705 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1706 if (IS_ERR(cmd)) {
1707 err = PTR_ERR(cmd);
1708 goto out_vm;
1709 }
1710
1711 *value = cmd[result / sizeof(*cmd)];
1712 i915_gem_object_unpin_map(obj);
1713
1714 goto out_vm;
1715 skip_request:
1716 i915_request_set_error_once(rq, err);
1717 i915_request_add(rq);
1718 err_unpin:
1719 i915_vma_unpin(vma);
1720 out_vm:
1721 i915_vm_put(vm);
1722
1723 if (!err)
1724 err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
1725
1726 return err;
1727 }
1728
check_scratch_page(struct i915_gem_context * ctx,u32 * out)1729 static int check_scratch_page(struct i915_gem_context *ctx, u32 *out)
1730 {
1731 struct i915_address_space *vm;
1732 u32 *vaddr;
1733 int err = 0;
1734
1735 vm = ctx->vm;
1736 if (!vm)
1737 return -ENODEV;
1738
1739 if (!vm->scratch[0]) {
1740 pr_err("No scratch page!\n");
1741 return -EINVAL;
1742 }
1743
1744 vaddr = __px_vaddr(vm->scratch[0]);
1745
1746 memcpy(out, vaddr, sizeof(*out));
1747 if (memchr_inv(vaddr, *out, PAGE_SIZE)) {
1748 pr_err("Inconsistent initial state of scratch page!\n");
1749 err = -EINVAL;
1750 }
1751
1752 return err;
1753 }
1754
igt_vm_isolation(void * arg)1755 static int igt_vm_isolation(void *arg)
1756 {
1757 struct drm_i915_private *i915 = arg;
1758 struct i915_gem_context *ctx_a, *ctx_b;
1759 struct drm_i915_gem_object *obj_a, *obj_b;
1760 unsigned long num_engines, count;
1761 struct intel_engine_cs *engine;
1762 struct igt_live_test t;
1763 I915_RND_STATE(prng);
1764 struct file *file;
1765 u64 vm_total;
1766 u32 expected;
1767 int err;
1768
1769 if (GRAPHICS_VER(i915) < 7)
1770 return 0;
1771
1772 /*
1773 * The simple goal here is that a write into one context is not
1774 * observed in a second (separate page tables and scratch).
1775 */
1776
1777 file = mock_file(i915);
1778 if (IS_ERR(file))
1779 return PTR_ERR(file);
1780
1781 err = igt_live_test_begin(&t, i915, __func__, "");
1782 if (err)
1783 goto out_file;
1784
1785 ctx_a = live_context(i915, file);
1786 if (IS_ERR(ctx_a)) {
1787 err = PTR_ERR(ctx_a);
1788 goto out_file;
1789 }
1790
1791 ctx_b = live_context(i915, file);
1792 if (IS_ERR(ctx_b)) {
1793 err = PTR_ERR(ctx_b);
1794 goto out_file;
1795 }
1796
1797 /* We can only test vm isolation, if the vm are distinct */
1798 if (ctx_a->vm == ctx_b->vm)
1799 goto out_file;
1800
1801 /* Read the initial state of the scratch page */
1802 err = check_scratch_page(ctx_a, &expected);
1803 if (err)
1804 goto out_file;
1805
1806 err = check_scratch_page(ctx_b, &expected);
1807 if (err)
1808 goto out_file;
1809
1810 vm_total = ctx_a->vm->total;
1811 GEM_BUG_ON(ctx_b->vm->total != vm_total);
1812
1813 obj_a = i915_gem_object_create_internal(i915, PAGE_SIZE);
1814 if (IS_ERR(obj_a)) {
1815 err = PTR_ERR(obj_a);
1816 goto out_file;
1817 }
1818
1819 obj_b = i915_gem_object_create_internal(i915, PAGE_SIZE);
1820 if (IS_ERR(obj_b)) {
1821 err = PTR_ERR(obj_b);
1822 goto put_a;
1823 }
1824
1825 count = 0;
1826 num_engines = 0;
1827 for_each_uabi_engine(engine, i915) {
1828 IGT_TIMEOUT(end_time);
1829 unsigned long this = 0;
1830
1831 if (!intel_engine_can_store_dword(engine))
1832 continue;
1833
1834 /* Not all engines have their own GPR! */
1835 if (GRAPHICS_VER(i915) < 8 && engine->class != RENDER_CLASS)
1836 continue;
1837
1838 while (!__igt_timeout(end_time, NULL)) {
1839 u32 value = 0xc5c5c5c5;
1840 u64 offset;
1841
1842 /* Leave enough space at offset 0 for the batch */
1843 offset = igt_random_offset(&prng,
1844 I915_GTT_PAGE_SIZE, vm_total,
1845 sizeof(u32), alignof_dword);
1846
1847 err = write_to_scratch(ctx_a, engine, obj_a,
1848 offset, 0xdeadbeef);
1849 if (err == 0)
1850 err = read_from_scratch(ctx_b, engine, obj_b,
1851 offset, &value);
1852 if (err)
1853 goto put_b;
1854
1855 if (value != expected) {
1856 pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n",
1857 engine->name, value,
1858 upper_32_bits(offset),
1859 lower_32_bits(offset),
1860 this);
1861 err = -EINVAL;
1862 goto put_b;
1863 }
1864
1865 this++;
1866 }
1867 count += this;
1868 num_engines++;
1869 }
1870 pr_info("Checked %lu scratch offsets across %lu engines\n",
1871 count, num_engines);
1872
1873 put_b:
1874 i915_gem_object_put(obj_b);
1875 put_a:
1876 i915_gem_object_put(obj_a);
1877 out_file:
1878 if (igt_live_test_end(&t))
1879 err = -EIO;
1880 fput(file);
1881 return err;
1882 }
1883
i915_gem_context_live_selftests(struct drm_i915_private * i915)1884 int i915_gem_context_live_selftests(struct drm_i915_private *i915)
1885 {
1886 static const struct i915_subtest tests[] = {
1887 SUBTEST(live_nop_switch),
1888 SUBTEST(live_parallel_switch),
1889 SUBTEST(igt_ctx_exec),
1890 SUBTEST(igt_ctx_readonly),
1891 SUBTEST(igt_ctx_sseu),
1892 SUBTEST(igt_shared_ctx_exec),
1893 SUBTEST(igt_vm_isolation),
1894 };
1895
1896 if (intel_gt_is_wedged(to_gt(i915)))
1897 return 0;
1898
1899 return i915_live_subtests(tests, i915);
1900 }
1901