1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2017 Intel Corporation
5  */
6 
7 #include <linux/prime_numbers.h>
8 #include <linux/string_helpers.h>
9 
10 #include "gem/i915_gem_internal.h"
11 #include "gem/i915_gem_pm.h"
12 #include "gt/intel_engine_pm.h"
13 #include "gt/intel_engine_regs.h"
14 #include "gt/intel_gt.h"
15 #include "gt/intel_gt_requests.h"
16 #include "gt/intel_reset.h"
17 #include "i915_selftest.h"
18 
19 #include "gem/selftests/igt_gem_utils.h"
20 #include "selftests/i915_random.h"
21 #include "selftests/igt_flush_test.h"
22 #include "selftests/igt_live_test.h"
23 #include "selftests/igt_reset.h"
24 #include "selftests/igt_spinner.h"
25 #include "selftests/mock_drm.h"
26 #include "selftests/mock_gem_device.h"
27 
28 #include "huge_gem_object.h"
29 #include "igt_gem_utils.h"
30 
31 #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32))
32 
live_nop_switch(void * arg)33 static int live_nop_switch(void *arg)
34 {
35 	const unsigned int nctx = 1024;
36 	struct drm_i915_private *i915 = arg;
37 	struct intel_engine_cs *engine;
38 	struct i915_gem_context **ctx;
39 	struct igt_live_test t;
40 	struct file *file;
41 	unsigned long n;
42 	int err = -ENODEV;
43 
44 	/*
45 	 * Create as many contexts as we can feasibly get away with
46 	 * and check we can switch between them rapidly.
47 	 *
48 	 * Serves as very simple stress test for submission and HW switching
49 	 * between contexts.
50 	 */
51 
52 	if (!DRIVER_CAPS(i915)->has_logical_contexts)
53 		return 0;
54 
55 	file = mock_file(i915);
56 	if (IS_ERR(file))
57 		return PTR_ERR(file);
58 
59 	ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL);
60 	if (!ctx) {
61 		err = -ENOMEM;
62 		goto out_file;
63 	}
64 
65 	for (n = 0; n < nctx; n++) {
66 		ctx[n] = live_context(i915, file);
67 		if (IS_ERR(ctx[n])) {
68 			err = PTR_ERR(ctx[n]);
69 			goto out_file;
70 		}
71 	}
72 
73 	for_each_uabi_engine(engine, i915) {
74 		struct i915_request *rq = NULL;
75 		unsigned long end_time, prime;
76 		ktime_t times[2] = {};
77 
78 		times[0] = ktime_get_raw();
79 		for (n = 0; n < nctx; n++) {
80 			struct i915_request *this;
81 
82 			this = igt_request_alloc(ctx[n], engine);
83 			if (IS_ERR(this)) {
84 				err = PTR_ERR(this);
85 				goto out_file;
86 			}
87 			if (rq) {
88 				i915_request_await_dma_fence(this, &rq->fence);
89 				i915_request_put(rq);
90 			}
91 			rq = i915_request_get(this);
92 			i915_request_add(this);
93 		}
94 		if (i915_request_wait(rq, 0, 10 * HZ) < 0) {
95 			pr_err("Failed to populated %d contexts\n", nctx);
96 			intel_gt_set_wedged(to_gt(i915));
97 			i915_request_put(rq);
98 			err = -EIO;
99 			goto out_file;
100 		}
101 		i915_request_put(rq);
102 
103 		times[1] = ktime_get_raw();
104 
105 		pr_info("Populated %d contexts on %s in %lluns\n",
106 			nctx, engine->name, ktime_to_ns(times[1] - times[0]));
107 
108 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
109 		if (err)
110 			goto out_file;
111 
112 		end_time = jiffies + i915_selftest.timeout_jiffies;
113 		for_each_prime_number_from(prime, 2, 8192) {
114 			times[1] = ktime_get_raw();
115 
116 			rq = NULL;
117 			for (n = 0; n < prime; n++) {
118 				struct i915_request *this;
119 
120 				this = igt_request_alloc(ctx[n % nctx], engine);
121 				if (IS_ERR(this)) {
122 					err = PTR_ERR(this);
123 					goto out_file;
124 				}
125 
126 				if (rq) { /* Force submission order */
127 					i915_request_await_dma_fence(this, &rq->fence);
128 					i915_request_put(rq);
129 				}
130 
131 				/*
132 				 * This space is left intentionally blank.
133 				 *
134 				 * We do not actually want to perform any
135 				 * action with this request, we just want
136 				 * to measure the latency in allocation
137 				 * and submission of our breadcrumbs -
138 				 * ensuring that the bare request is sufficient
139 				 * for the system to work (i.e. proper HEAD
140 				 * tracking of the rings, interrupt handling,
141 				 * etc). It also gives us the lowest bounds
142 				 * for latency.
143 				 */
144 
145 				rq = i915_request_get(this);
146 				i915_request_add(this);
147 			}
148 			GEM_BUG_ON(!rq);
149 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
150 				pr_err("Switching between %ld contexts timed out\n",
151 				       prime);
152 				intel_gt_set_wedged(to_gt(i915));
153 				i915_request_put(rq);
154 				break;
155 			}
156 			i915_request_put(rq);
157 
158 			times[1] = ktime_sub(ktime_get_raw(), times[1]);
159 			if (prime == 2)
160 				times[0] = times[1];
161 
162 			if (__igt_timeout(end_time, NULL))
163 				break;
164 		}
165 
166 		err = igt_live_test_end(&t);
167 		if (err)
168 			goto out_file;
169 
170 		pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n",
171 			engine->name,
172 			ktime_to_ns(times[0]),
173 			prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1));
174 	}
175 
176 out_file:
177 	fput(file);
178 	return err;
179 }
180 
181 struct parallel_switch {
182 	struct task_struct *tsk;
183 	struct intel_context *ce[2];
184 };
185 
__live_parallel_switch1(void * data)186 static int __live_parallel_switch1(void *data)
187 {
188 	struct parallel_switch *arg = data;
189 	IGT_TIMEOUT(end_time);
190 	unsigned long count;
191 
192 	count = 0;
193 	do {
194 		struct i915_request *rq = NULL;
195 		int err, n;
196 
197 		err = 0;
198 		for (n = 0; !err && n < ARRAY_SIZE(arg->ce); n++) {
199 			struct i915_request *prev = rq;
200 
201 			rq = i915_request_create(arg->ce[n]);
202 			if (IS_ERR(rq)) {
203 				i915_request_put(prev);
204 				return PTR_ERR(rq);
205 			}
206 
207 			i915_request_get(rq);
208 			if (prev) {
209 				err = i915_request_await_dma_fence(rq, &prev->fence);
210 				i915_request_put(prev);
211 			}
212 
213 			i915_request_add(rq);
214 		}
215 		if (i915_request_wait(rq, 0, HZ / 5) < 0)
216 			err = -ETIME;
217 		i915_request_put(rq);
218 		if (err)
219 			return err;
220 
221 		count++;
222 	} while (!__igt_timeout(end_time, NULL));
223 
224 	pr_info("%s: %lu switches (sync)\n", arg->ce[0]->engine->name, count);
225 	return 0;
226 }
227 
__live_parallel_switchN(void * data)228 static int __live_parallel_switchN(void *data)
229 {
230 	struct parallel_switch *arg = data;
231 	struct i915_request *rq = NULL;
232 	IGT_TIMEOUT(end_time);
233 	unsigned long count;
234 	int n;
235 
236 	count = 0;
237 	do {
238 		for (n = 0; n < ARRAY_SIZE(arg->ce); n++) {
239 			struct i915_request *prev = rq;
240 			int err = 0;
241 
242 			rq = i915_request_create(arg->ce[n]);
243 			if (IS_ERR(rq)) {
244 				i915_request_put(prev);
245 				return PTR_ERR(rq);
246 			}
247 
248 			i915_request_get(rq);
249 			if (prev) {
250 				err = i915_request_await_dma_fence(rq, &prev->fence);
251 				i915_request_put(prev);
252 			}
253 
254 			i915_request_add(rq);
255 			if (err) {
256 				i915_request_put(rq);
257 				return err;
258 			}
259 		}
260 
261 		count++;
262 	} while (!__igt_timeout(end_time, NULL));
263 	i915_request_put(rq);
264 
265 	pr_info("%s: %lu switches (many)\n", arg->ce[0]->engine->name, count);
266 	return 0;
267 }
268 
live_parallel_switch(void * arg)269 static int live_parallel_switch(void *arg)
270 {
271 	struct drm_i915_private *i915 = arg;
272 	static int (* const func[])(void *arg) = {
273 		__live_parallel_switch1,
274 		__live_parallel_switchN,
275 		NULL,
276 	};
277 	struct parallel_switch *data = NULL;
278 	struct i915_gem_engines *engines;
279 	struct i915_gem_engines_iter it;
280 	int (* const *fn)(void *arg);
281 	struct i915_gem_context *ctx;
282 	struct intel_context *ce;
283 	struct file *file;
284 	int n, m, count;
285 	int err = 0;
286 
287 	/*
288 	 * Check we can process switches on all engines simultaneously.
289 	 */
290 
291 	if (!DRIVER_CAPS(i915)->has_logical_contexts)
292 		return 0;
293 
294 	file = mock_file(i915);
295 	if (IS_ERR(file))
296 		return PTR_ERR(file);
297 
298 	ctx = live_context(i915, file);
299 	if (IS_ERR(ctx)) {
300 		err = PTR_ERR(ctx);
301 		goto out_file;
302 	}
303 
304 	engines = i915_gem_context_lock_engines(ctx);
305 	count = engines->num_engines;
306 
307 	data = kcalloc(count, sizeof(*data), GFP_KERNEL);
308 	if (!data) {
309 		i915_gem_context_unlock_engines(ctx);
310 		err = -ENOMEM;
311 		goto out_file;
312 	}
313 
314 	m = 0; /* Use the first context as our template for the engines */
315 	for_each_gem_engine(ce, engines, it) {
316 		err = intel_context_pin(ce);
317 		if (err) {
318 			i915_gem_context_unlock_engines(ctx);
319 			goto out;
320 		}
321 		data[m++].ce[0] = intel_context_get(ce);
322 	}
323 	i915_gem_context_unlock_engines(ctx);
324 
325 	/* Clone the same set of engines into the other contexts */
326 	for (n = 1; n < ARRAY_SIZE(data->ce); n++) {
327 		ctx = live_context(i915, file);
328 		if (IS_ERR(ctx)) {
329 			err = PTR_ERR(ctx);
330 			goto out;
331 		}
332 
333 		for (m = 0; m < count; m++) {
334 			if (!data[m].ce[0])
335 				continue;
336 
337 			ce = intel_context_create(data[m].ce[0]->engine);
338 			if (IS_ERR(ce))
339 				goto out;
340 
341 			err = intel_context_pin(ce);
342 			if (err) {
343 				intel_context_put(ce);
344 				goto out;
345 			}
346 
347 			data[m].ce[n] = ce;
348 		}
349 	}
350 
351 	for (fn = func; !err && *fn; fn++) {
352 		struct igt_live_test t;
353 		int n;
354 
355 		err = igt_live_test_begin(&t, i915, __func__, "");
356 		if (err)
357 			break;
358 
359 		for (n = 0; n < count; n++) {
360 			if (!data[n].ce[0])
361 				continue;
362 
363 			data[n].tsk = kthread_run(*fn, &data[n],
364 						  "igt/parallel:%s",
365 						  data[n].ce[0]->engine->name);
366 			if (IS_ERR(data[n].tsk)) {
367 				err = PTR_ERR(data[n].tsk);
368 				break;
369 			}
370 			get_task_struct(data[n].tsk);
371 		}
372 
373 		yield(); /* start all threads before we kthread_stop() */
374 
375 		for (n = 0; n < count; n++) {
376 			int status;
377 
378 			if (IS_ERR_OR_NULL(data[n].tsk))
379 				continue;
380 
381 			status = kthread_stop(data[n].tsk);
382 			if (status && !err)
383 				err = status;
384 
385 			put_task_struct(data[n].tsk);
386 			data[n].tsk = NULL;
387 		}
388 
389 		if (igt_live_test_end(&t))
390 			err = -EIO;
391 	}
392 
393 out:
394 	for (n = 0; n < count; n++) {
395 		for (m = 0; m < ARRAY_SIZE(data->ce); m++) {
396 			if (!data[n].ce[m])
397 				continue;
398 
399 			intel_context_unpin(data[n].ce[m]);
400 			intel_context_put(data[n].ce[m]);
401 		}
402 	}
403 	kfree(data);
404 out_file:
405 	fput(file);
406 	return err;
407 }
408 
real_page_count(struct drm_i915_gem_object * obj)409 static unsigned long real_page_count(struct drm_i915_gem_object *obj)
410 {
411 	return huge_gem_object_phys_size(obj) >> PAGE_SHIFT;
412 }
413 
fake_page_count(struct drm_i915_gem_object * obj)414 static unsigned long fake_page_count(struct drm_i915_gem_object *obj)
415 {
416 	return huge_gem_object_dma_size(obj) >> PAGE_SHIFT;
417 }
418 
gpu_fill(struct intel_context * ce,struct drm_i915_gem_object * obj,unsigned int dw)419 static int gpu_fill(struct intel_context *ce,
420 		    struct drm_i915_gem_object *obj,
421 		    unsigned int dw)
422 {
423 	struct i915_vma *vma;
424 	int err;
425 
426 	GEM_BUG_ON(obj->base.size > ce->vm->total);
427 	GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
428 
429 	vma = i915_vma_instance(obj, ce->vm, NULL);
430 	if (IS_ERR(vma))
431 		return PTR_ERR(vma);
432 
433 	err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER);
434 	if (err)
435 		return err;
436 
437 	/*
438 	 * Within the GTT the huge objects maps every page onto
439 	 * its 1024 real pages (using phys_pfn = dma_pfn % 1024).
440 	 * We set the nth dword within the page using the nth
441 	 * mapping via the GTT - this should exercise the GTT mapping
442 	 * whilst checking that each context provides a unique view
443 	 * into the object.
444 	 */
445 	err = igt_gpu_fill_dw(ce, vma,
446 			      (dw * real_page_count(obj)) << PAGE_SHIFT |
447 			      (dw * sizeof(u32)),
448 			      real_page_count(obj),
449 			      dw);
450 	i915_vma_unpin(vma);
451 
452 	return err;
453 }
454 
cpu_fill(struct drm_i915_gem_object * obj,u32 value)455 static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
456 {
457 	const bool has_llc = HAS_LLC(to_i915(obj->base.dev));
458 	unsigned int n, m, need_flush;
459 	int err;
460 
461 	i915_gem_object_lock(obj, NULL);
462 	err = i915_gem_object_prepare_write(obj, &need_flush);
463 	if (err)
464 		goto out;
465 
466 	for (n = 0; n < real_page_count(obj); n++) {
467 		u32 *map;
468 
469 		map = kmap_atomic(i915_gem_object_get_page(obj, n));
470 		for (m = 0; m < DW_PER_PAGE; m++)
471 			map[m] = value;
472 		if (!has_llc)
473 			drm_clflush_virt_range(map, PAGE_SIZE);
474 		kunmap_atomic(map);
475 	}
476 
477 	i915_gem_object_finish_access(obj);
478 	obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU;
479 	obj->write_domain = 0;
480 out:
481 	i915_gem_object_unlock(obj);
482 	return err;
483 }
484 
cpu_check(struct drm_i915_gem_object * obj,unsigned int idx,unsigned int max)485 static noinline int cpu_check(struct drm_i915_gem_object *obj,
486 			      unsigned int idx, unsigned int max)
487 {
488 	unsigned int n, m, needs_flush;
489 	int err;
490 
491 	i915_gem_object_lock(obj, NULL);
492 	err = i915_gem_object_prepare_read(obj, &needs_flush);
493 	if (err)
494 		goto out_unlock;
495 
496 	for (n = 0; n < real_page_count(obj); n++) {
497 		u32 *map;
498 
499 		map = kmap_atomic(i915_gem_object_get_page(obj, n));
500 		if (needs_flush & CLFLUSH_BEFORE)
501 			drm_clflush_virt_range(map, PAGE_SIZE);
502 
503 		for (m = 0; m < max; m++) {
504 			if (map[m] != m) {
505 				pr_err("%pS: Invalid value at object %d page %d/%ld, offset %d/%d: found %x expected %x\n",
506 				       __builtin_return_address(0), idx,
507 				       n, real_page_count(obj), m, max,
508 				       map[m], m);
509 				err = -EINVAL;
510 				goto out_unmap;
511 			}
512 		}
513 
514 		for (; m < DW_PER_PAGE; m++) {
515 			if (map[m] != STACK_MAGIC) {
516 				pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x (uninitialised)\n",
517 				       __builtin_return_address(0), idx, n, m,
518 				       map[m], STACK_MAGIC);
519 				err = -EINVAL;
520 				goto out_unmap;
521 			}
522 		}
523 
524 out_unmap:
525 		kunmap_atomic(map);
526 		if (err)
527 			break;
528 	}
529 
530 	i915_gem_object_finish_access(obj);
531 out_unlock:
532 	i915_gem_object_unlock(obj);
533 	return err;
534 }
535 
file_add_object(struct file * file,struct drm_i915_gem_object * obj)536 static int file_add_object(struct file *file, struct drm_i915_gem_object *obj)
537 {
538 	int err;
539 
540 	GEM_BUG_ON(obj->base.handle_count);
541 
542 	/* tie the object to the drm_file for easy reaping */
543 	err = idr_alloc(&to_drm_file(file)->object_idr,
544 			&obj->base, 1, 0, GFP_KERNEL);
545 	if (err < 0)
546 		return err;
547 
548 	i915_gem_object_get(obj);
549 	obj->base.handle_count++;
550 	return 0;
551 }
552 
553 static struct drm_i915_gem_object *
create_test_object(struct i915_address_space * vm,struct file * file,struct list_head * objects)554 create_test_object(struct i915_address_space *vm,
555 		   struct file *file,
556 		   struct list_head *objects)
557 {
558 	struct drm_i915_gem_object *obj;
559 	u64 size;
560 	int err;
561 
562 	/* Keep in GEM's good graces */
563 	intel_gt_retire_requests(vm->gt);
564 
565 	size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE);
566 	size = round_down(size, DW_PER_PAGE * PAGE_SIZE);
567 
568 	obj = huge_gem_object(vm->i915, DW_PER_PAGE * PAGE_SIZE, size);
569 	if (IS_ERR(obj))
570 		return obj;
571 
572 	err = file_add_object(file, obj);
573 	i915_gem_object_put(obj);
574 	if (err)
575 		return ERR_PTR(err);
576 
577 	err = cpu_fill(obj, STACK_MAGIC);
578 	if (err) {
579 		pr_err("Failed to fill object with cpu, err=%d\n",
580 		       err);
581 		return ERR_PTR(err);
582 	}
583 
584 	list_add_tail(&obj->st_link, objects);
585 	return obj;
586 }
587 
max_dwords(struct drm_i915_gem_object * obj)588 static unsigned long max_dwords(struct drm_i915_gem_object *obj)
589 {
590 	unsigned long npages = fake_page_count(obj);
591 
592 	GEM_BUG_ON(!IS_ALIGNED(npages, DW_PER_PAGE));
593 	return npages / DW_PER_PAGE;
594 }
595 
throttle_release(struct i915_request ** q,int count)596 static void throttle_release(struct i915_request **q, int count)
597 {
598 	int i;
599 
600 	for (i = 0; i < count; i++) {
601 		if (IS_ERR_OR_NULL(q[i]))
602 			continue;
603 
604 		i915_request_put(fetch_and_zero(&q[i]));
605 	}
606 }
607 
throttle(struct intel_context * ce,struct i915_request ** q,int count)608 static int throttle(struct intel_context *ce,
609 		    struct i915_request **q, int count)
610 {
611 	int i;
612 
613 	if (!IS_ERR_OR_NULL(q[0])) {
614 		if (i915_request_wait(q[0],
615 				      I915_WAIT_INTERRUPTIBLE,
616 				      MAX_SCHEDULE_TIMEOUT) < 0)
617 			return -EINTR;
618 
619 		i915_request_put(q[0]);
620 	}
621 
622 	for (i = 0; i < count - 1; i++)
623 		q[i] = q[i + 1];
624 
625 	q[i] = intel_context_create_request(ce);
626 	if (IS_ERR(q[i]))
627 		return PTR_ERR(q[i]);
628 
629 	i915_request_get(q[i]);
630 	i915_request_add(q[i]);
631 
632 	return 0;
633 }
634 
igt_ctx_exec(void * arg)635 static int igt_ctx_exec(void *arg)
636 {
637 	struct drm_i915_private *i915 = arg;
638 	struct intel_engine_cs *engine;
639 	int err = -ENODEV;
640 
641 	/*
642 	 * Create a few different contexts (with different mm) and write
643 	 * through each ctx/mm using the GPU making sure those writes end
644 	 * up in the expected pages of our obj.
645 	 */
646 
647 	if (!DRIVER_CAPS(i915)->has_logical_contexts)
648 		return 0;
649 
650 	for_each_uabi_engine(engine, i915) {
651 		struct drm_i915_gem_object *obj = NULL;
652 		unsigned long ncontexts, ndwords, dw;
653 		struct i915_request *tq[5] = {};
654 		struct igt_live_test t;
655 		IGT_TIMEOUT(end_time);
656 		LIST_HEAD(objects);
657 		struct file *file;
658 
659 		if (!intel_engine_can_store_dword(engine))
660 			continue;
661 
662 		if (!engine->context_size)
663 			continue; /* No logical context support in HW */
664 
665 		file = mock_file(i915);
666 		if (IS_ERR(file))
667 			return PTR_ERR(file);
668 
669 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
670 		if (err)
671 			goto out_file;
672 
673 		ncontexts = 0;
674 		ndwords = 0;
675 		dw = 0;
676 		while (!time_after(jiffies, end_time)) {
677 			struct i915_gem_context *ctx;
678 			struct intel_context *ce;
679 
680 			ctx = kernel_context(i915, NULL);
681 			if (IS_ERR(ctx)) {
682 				err = PTR_ERR(ctx);
683 				goto out_file;
684 			}
685 
686 			ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
687 			GEM_BUG_ON(IS_ERR(ce));
688 
689 			if (!obj) {
690 				obj = create_test_object(ce->vm, file, &objects);
691 				if (IS_ERR(obj)) {
692 					err = PTR_ERR(obj);
693 					intel_context_put(ce);
694 					kernel_context_close(ctx);
695 					goto out_file;
696 				}
697 			}
698 
699 			err = gpu_fill(ce, obj, dw);
700 			if (err) {
701 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
702 				       ndwords, dw, max_dwords(obj),
703 				       engine->name,
704 				       str_yes_no(i915_gem_context_has_full_ppgtt(ctx)),
705 				       err);
706 				intel_context_put(ce);
707 				kernel_context_close(ctx);
708 				goto out_file;
709 			}
710 
711 			err = throttle(ce, tq, ARRAY_SIZE(tq));
712 			if (err) {
713 				intel_context_put(ce);
714 				kernel_context_close(ctx);
715 				goto out_file;
716 			}
717 
718 			if (++dw == max_dwords(obj)) {
719 				obj = NULL;
720 				dw = 0;
721 			}
722 
723 			ndwords++;
724 			ncontexts++;
725 
726 			intel_context_put(ce);
727 			kernel_context_close(ctx);
728 		}
729 
730 		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
731 			ncontexts, engine->name, ndwords);
732 
733 		ncontexts = dw = 0;
734 		list_for_each_entry(obj, &objects, st_link) {
735 			unsigned int rem =
736 				min_t(unsigned int, ndwords - dw, max_dwords(obj));
737 
738 			err = cpu_check(obj, ncontexts++, rem);
739 			if (err)
740 				break;
741 
742 			dw += rem;
743 		}
744 
745 out_file:
746 		throttle_release(tq, ARRAY_SIZE(tq));
747 		if (igt_live_test_end(&t))
748 			err = -EIO;
749 
750 		fput(file);
751 		if (err)
752 			return err;
753 
754 		i915_gem_drain_freed_objects(i915);
755 	}
756 
757 	return 0;
758 }
759 
igt_shared_ctx_exec(void * arg)760 static int igt_shared_ctx_exec(void *arg)
761 {
762 	struct drm_i915_private *i915 = arg;
763 	struct i915_request *tq[5] = {};
764 	struct i915_gem_context *parent;
765 	struct intel_engine_cs *engine;
766 	struct igt_live_test t;
767 	struct file *file;
768 	int err = 0;
769 
770 	/*
771 	 * Create a few different contexts with the same mm and write
772 	 * through each ctx using the GPU making sure those writes end
773 	 * up in the expected pages of our obj.
774 	 */
775 	if (!DRIVER_CAPS(i915)->has_logical_contexts)
776 		return 0;
777 
778 	file = mock_file(i915);
779 	if (IS_ERR(file))
780 		return PTR_ERR(file);
781 
782 	parent = live_context(i915, file);
783 	if (IS_ERR(parent)) {
784 		err = PTR_ERR(parent);
785 		goto out_file;
786 	}
787 
788 	if (!parent->vm) { /* not full-ppgtt; nothing to share */
789 		err = 0;
790 		goto out_file;
791 	}
792 
793 	err = igt_live_test_begin(&t, i915, __func__, "");
794 	if (err)
795 		goto out_file;
796 
797 	for_each_uabi_engine(engine, i915) {
798 		unsigned long ncontexts, ndwords, dw;
799 		struct drm_i915_gem_object *obj = NULL;
800 		IGT_TIMEOUT(end_time);
801 		LIST_HEAD(objects);
802 
803 		if (!intel_engine_can_store_dword(engine))
804 			continue;
805 
806 		dw = 0;
807 		ndwords = 0;
808 		ncontexts = 0;
809 		while (!time_after(jiffies, end_time)) {
810 			struct i915_gem_context *ctx;
811 			struct intel_context *ce;
812 
813 			ctx = kernel_context(i915, parent->vm);
814 			if (IS_ERR(ctx)) {
815 				err = PTR_ERR(ctx);
816 				goto out_test;
817 			}
818 
819 			ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
820 			GEM_BUG_ON(IS_ERR(ce));
821 
822 			if (!obj) {
823 				obj = create_test_object(parent->vm,
824 							 file, &objects);
825 				if (IS_ERR(obj)) {
826 					err = PTR_ERR(obj);
827 					intel_context_put(ce);
828 					kernel_context_close(ctx);
829 					goto out_test;
830 				}
831 			}
832 
833 			err = gpu_fill(ce, obj, dw);
834 			if (err) {
835 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
836 				       ndwords, dw, max_dwords(obj),
837 				       engine->name,
838 				       str_yes_no(i915_gem_context_has_full_ppgtt(ctx)),
839 				       err);
840 				intel_context_put(ce);
841 				kernel_context_close(ctx);
842 				goto out_test;
843 			}
844 
845 			err = throttle(ce, tq, ARRAY_SIZE(tq));
846 			if (err) {
847 				intel_context_put(ce);
848 				kernel_context_close(ctx);
849 				goto out_test;
850 			}
851 
852 			if (++dw == max_dwords(obj)) {
853 				obj = NULL;
854 				dw = 0;
855 			}
856 
857 			ndwords++;
858 			ncontexts++;
859 
860 			intel_context_put(ce);
861 			kernel_context_close(ctx);
862 		}
863 		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
864 			ncontexts, engine->name, ndwords);
865 
866 		ncontexts = dw = 0;
867 		list_for_each_entry(obj, &objects, st_link) {
868 			unsigned int rem =
869 				min_t(unsigned int, ndwords - dw, max_dwords(obj));
870 
871 			err = cpu_check(obj, ncontexts++, rem);
872 			if (err)
873 				goto out_test;
874 
875 			dw += rem;
876 		}
877 
878 		i915_gem_drain_freed_objects(i915);
879 	}
880 out_test:
881 	throttle_release(tq, ARRAY_SIZE(tq));
882 	if (igt_live_test_end(&t))
883 		err = -EIO;
884 out_file:
885 	fput(file);
886 	return err;
887 }
888 
rpcs_query_batch(struct drm_i915_gem_object * rpcs,struct i915_vma * vma,struct intel_engine_cs * engine)889 static int rpcs_query_batch(struct drm_i915_gem_object *rpcs,
890 			    struct i915_vma *vma,
891 			    struct intel_engine_cs *engine)
892 {
893 	u32 *cmd;
894 
895 	GEM_BUG_ON(GRAPHICS_VER(vma->vm->i915) < 8);
896 
897 	cmd = i915_gem_object_pin_map(rpcs, I915_MAP_WB);
898 	if (IS_ERR(cmd))
899 		return PTR_ERR(cmd);
900 
901 	*cmd++ = MI_STORE_REGISTER_MEM_GEN8;
902 	*cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE(engine->mmio_base));
903 	*cmd++ = lower_32_bits(vma->node.start);
904 	*cmd++ = upper_32_bits(vma->node.start);
905 	*cmd = MI_BATCH_BUFFER_END;
906 
907 	__i915_gem_object_flush_map(rpcs, 0, 64);
908 	i915_gem_object_unpin_map(rpcs);
909 
910 	intel_gt_chipset_flush(vma->vm->gt);
911 
912 	return 0;
913 }
914 
915 static int
emit_rpcs_query(struct drm_i915_gem_object * obj,struct intel_context * ce,struct i915_request ** rq_out)916 emit_rpcs_query(struct drm_i915_gem_object *obj,
917 		struct intel_context *ce,
918 		struct i915_request **rq_out)
919 {
920 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
921 	struct i915_request *rq;
922 	struct i915_gem_ww_ctx ww;
923 	struct i915_vma *batch;
924 	struct i915_vma *vma;
925 	struct drm_i915_gem_object *rpcs;
926 	int err;
927 
928 	GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
929 
930 	if (GRAPHICS_VER(i915) < 8)
931 		return -EINVAL;
932 
933 	vma = i915_vma_instance(obj, ce->vm, NULL);
934 	if (IS_ERR(vma))
935 		return PTR_ERR(vma);
936 
937 	rpcs = i915_gem_object_create_internal(i915, PAGE_SIZE);
938 	if (IS_ERR(rpcs))
939 		return PTR_ERR(rpcs);
940 
941 	batch = i915_vma_instance(rpcs, ce->vm, NULL);
942 	if (IS_ERR(batch)) {
943 		err = PTR_ERR(batch);
944 		goto err_put;
945 	}
946 
947 	i915_gem_ww_ctx_init(&ww, false);
948 retry:
949 	err = i915_gem_object_lock(obj, &ww);
950 	if (!err)
951 		err = i915_gem_object_lock(rpcs, &ww);
952 	if (!err)
953 		err = i915_gem_object_set_to_gtt_domain(obj, false);
954 	if (!err)
955 		err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
956 	if (err)
957 		goto err_put;
958 
959 	err = i915_vma_pin_ww(batch, &ww, 0, 0, PIN_USER);
960 	if (err)
961 		goto err_vma;
962 
963 	err = rpcs_query_batch(rpcs, vma, ce->engine);
964 	if (err)
965 		goto err_batch;
966 
967 	rq = i915_request_create(ce);
968 	if (IS_ERR(rq)) {
969 		err = PTR_ERR(rq);
970 		goto err_batch;
971 	}
972 
973 	err = i915_request_await_object(rq, batch->obj, false);
974 	if (err == 0)
975 		err = i915_vma_move_to_active(batch, rq, 0);
976 	if (err)
977 		goto skip_request;
978 
979 	err = i915_request_await_object(rq, vma->obj, true);
980 	if (err == 0)
981 		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
982 	if (err)
983 		goto skip_request;
984 
985 	if (rq->engine->emit_init_breadcrumb) {
986 		err = rq->engine->emit_init_breadcrumb(rq);
987 		if (err)
988 			goto skip_request;
989 	}
990 
991 	err = rq->engine->emit_bb_start(rq,
992 					batch->node.start, batch->node.size,
993 					0);
994 	if (err)
995 		goto skip_request;
996 
997 	*rq_out = i915_request_get(rq);
998 
999 skip_request:
1000 	if (err)
1001 		i915_request_set_error_once(rq, err);
1002 	i915_request_add(rq);
1003 err_batch:
1004 	i915_vma_unpin(batch);
1005 err_vma:
1006 	i915_vma_unpin(vma);
1007 err_put:
1008 	if (err == -EDEADLK) {
1009 		err = i915_gem_ww_ctx_backoff(&ww);
1010 		if (!err)
1011 			goto retry;
1012 	}
1013 	i915_gem_ww_ctx_fini(&ww);
1014 	i915_gem_object_put(rpcs);
1015 	return err;
1016 }
1017 
1018 #define TEST_IDLE	BIT(0)
1019 #define TEST_BUSY	BIT(1)
1020 #define TEST_RESET	BIT(2)
1021 
1022 static int
__sseu_prepare(const char * name,unsigned int flags,struct intel_context * ce,struct igt_spinner ** spin)1023 __sseu_prepare(const char *name,
1024 	       unsigned int flags,
1025 	       struct intel_context *ce,
1026 	       struct igt_spinner **spin)
1027 {
1028 	struct i915_request *rq;
1029 	int ret;
1030 
1031 	*spin = NULL;
1032 	if (!(flags & (TEST_BUSY | TEST_RESET)))
1033 		return 0;
1034 
1035 	*spin = kzalloc(sizeof(**spin), GFP_KERNEL);
1036 	if (!*spin)
1037 		return -ENOMEM;
1038 
1039 	ret = igt_spinner_init(*spin, ce->engine->gt);
1040 	if (ret)
1041 		goto err_free;
1042 
1043 	rq = igt_spinner_create_request(*spin, ce, MI_NOOP);
1044 	if (IS_ERR(rq)) {
1045 		ret = PTR_ERR(rq);
1046 		goto err_fini;
1047 	}
1048 
1049 	i915_request_add(rq);
1050 
1051 	if (!igt_wait_for_spinner(*spin, rq)) {
1052 		pr_err("%s: Spinner failed to start!\n", name);
1053 		ret = -ETIMEDOUT;
1054 		goto err_end;
1055 	}
1056 
1057 	return 0;
1058 
1059 err_end:
1060 	igt_spinner_end(*spin);
1061 err_fini:
1062 	igt_spinner_fini(*spin);
1063 err_free:
1064 	kfree(fetch_and_zero(spin));
1065 	return ret;
1066 }
1067 
1068 static int
__read_slice_count(struct intel_context * ce,struct drm_i915_gem_object * obj,struct igt_spinner * spin,u32 * rpcs)1069 __read_slice_count(struct intel_context *ce,
1070 		   struct drm_i915_gem_object *obj,
1071 		   struct igt_spinner *spin,
1072 		   u32 *rpcs)
1073 {
1074 	struct i915_request *rq = NULL;
1075 	u32 s_mask, s_shift;
1076 	unsigned int cnt;
1077 	u32 *buf, val;
1078 	long ret;
1079 
1080 	ret = emit_rpcs_query(obj, ce, &rq);
1081 	if (ret)
1082 		return ret;
1083 
1084 	if (spin)
1085 		igt_spinner_end(spin);
1086 
1087 	ret = i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
1088 	i915_request_put(rq);
1089 	if (ret < 0)
1090 		return ret;
1091 
1092 	buf = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1093 	if (IS_ERR(buf)) {
1094 		ret = PTR_ERR(buf);
1095 		return ret;
1096 	}
1097 
1098 	if (GRAPHICS_VER(ce->engine->i915) >= 11) {
1099 		s_mask = GEN11_RPCS_S_CNT_MASK;
1100 		s_shift = GEN11_RPCS_S_CNT_SHIFT;
1101 	} else {
1102 		s_mask = GEN8_RPCS_S_CNT_MASK;
1103 		s_shift = GEN8_RPCS_S_CNT_SHIFT;
1104 	}
1105 
1106 	val = *buf;
1107 	cnt = (val & s_mask) >> s_shift;
1108 	*rpcs = val;
1109 
1110 	i915_gem_object_unpin_map(obj);
1111 
1112 	return cnt;
1113 }
1114 
1115 static int
__check_rpcs(const char * name,u32 rpcs,int slices,unsigned int expected,const char * prefix,const char * suffix)1116 __check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected,
1117 	     const char *prefix, const char *suffix)
1118 {
1119 	if (slices == expected)
1120 		return 0;
1121 
1122 	if (slices < 0) {
1123 		pr_err("%s: %s read slice count failed with %d%s\n",
1124 		       name, prefix, slices, suffix);
1125 		return slices;
1126 	}
1127 
1128 	pr_err("%s: %s slice count %d is not %u%s\n",
1129 	       name, prefix, slices, expected, suffix);
1130 
1131 	pr_info("RPCS=0x%x; %u%sx%u%s\n",
1132 		rpcs, slices,
1133 		(rpcs & GEN8_RPCS_S_CNT_ENABLE) ? "*" : "",
1134 		(rpcs & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT,
1135 		(rpcs & GEN8_RPCS_SS_CNT_ENABLE) ? "*" : "");
1136 
1137 	return -EINVAL;
1138 }
1139 
1140 static int
__sseu_finish(const char * name,unsigned int flags,struct intel_context * ce,struct drm_i915_gem_object * obj,unsigned int expected,struct igt_spinner * spin)1141 __sseu_finish(const char *name,
1142 	      unsigned int flags,
1143 	      struct intel_context *ce,
1144 	      struct drm_i915_gem_object *obj,
1145 	      unsigned int expected,
1146 	      struct igt_spinner *spin)
1147 {
1148 	unsigned int slices = hweight32(ce->engine->sseu.slice_mask);
1149 	u32 rpcs = 0;
1150 	int ret = 0;
1151 
1152 	if (flags & TEST_RESET) {
1153 		ret = intel_engine_reset(ce->engine, "sseu");
1154 		if (ret)
1155 			goto out;
1156 	}
1157 
1158 	ret = __read_slice_count(ce, obj,
1159 				 flags & TEST_RESET ? NULL : spin, &rpcs);
1160 	ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!");
1161 	if (ret)
1162 		goto out;
1163 
1164 	ret = __read_slice_count(ce->engine->kernel_context, obj, NULL, &rpcs);
1165 	ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!");
1166 
1167 out:
1168 	if (spin)
1169 		igt_spinner_end(spin);
1170 
1171 	if ((flags & TEST_IDLE) && ret == 0) {
1172 		ret = igt_flush_test(ce->engine->i915);
1173 		if (ret)
1174 			return ret;
1175 
1176 		ret = __read_slice_count(ce, obj, NULL, &rpcs);
1177 		ret = __check_rpcs(name, rpcs, ret, expected,
1178 				   "Context", " after idle!");
1179 	}
1180 
1181 	return ret;
1182 }
1183 
1184 static int
__sseu_test(const char * name,unsigned int flags,struct intel_context * ce,struct drm_i915_gem_object * obj,struct intel_sseu sseu)1185 __sseu_test(const char *name,
1186 	    unsigned int flags,
1187 	    struct intel_context *ce,
1188 	    struct drm_i915_gem_object *obj,
1189 	    struct intel_sseu sseu)
1190 {
1191 	struct igt_spinner *spin = NULL;
1192 	int ret;
1193 
1194 	intel_engine_pm_get(ce->engine);
1195 
1196 	ret = __sseu_prepare(name, flags, ce, &spin);
1197 	if (ret)
1198 		goto out_pm;
1199 
1200 	ret = intel_context_reconfigure_sseu(ce, sseu);
1201 	if (ret)
1202 		goto out_spin;
1203 
1204 	ret = __sseu_finish(name, flags, ce, obj,
1205 			    hweight32(sseu.slice_mask), spin);
1206 
1207 out_spin:
1208 	if (spin) {
1209 		igt_spinner_end(spin);
1210 		igt_spinner_fini(spin);
1211 		kfree(spin);
1212 	}
1213 out_pm:
1214 	intel_engine_pm_put(ce->engine);
1215 	return ret;
1216 }
1217 
1218 static int
__igt_ctx_sseu(struct drm_i915_private * i915,const char * name,unsigned int flags)1219 __igt_ctx_sseu(struct drm_i915_private *i915,
1220 	       const char *name,
1221 	       unsigned int flags)
1222 {
1223 	struct drm_i915_gem_object *obj;
1224 	int inst = 0;
1225 	int ret = 0;
1226 
1227 	if (GRAPHICS_VER(i915) < 9)
1228 		return 0;
1229 
1230 	if (flags & TEST_RESET)
1231 		igt_global_reset_lock(to_gt(i915));
1232 
1233 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
1234 	if (IS_ERR(obj)) {
1235 		ret = PTR_ERR(obj);
1236 		goto out_unlock;
1237 	}
1238 
1239 	do {
1240 		struct intel_engine_cs *engine;
1241 		struct intel_context *ce;
1242 		struct intel_sseu pg_sseu;
1243 
1244 		engine = intel_engine_lookup_user(i915,
1245 						  I915_ENGINE_CLASS_RENDER,
1246 						  inst++);
1247 		if (!engine)
1248 			break;
1249 
1250 		if (hweight32(engine->sseu.slice_mask) < 2)
1251 			continue;
1252 
1253 		if (!engine->gt->info.sseu.has_slice_pg)
1254 			continue;
1255 
1256 		/*
1257 		 * Gen11 VME friendly power-gated configuration with
1258 		 * half enabled sub-slices.
1259 		 */
1260 		pg_sseu = engine->sseu;
1261 		pg_sseu.slice_mask = 1;
1262 		pg_sseu.subslice_mask =
1263 			~(~0 << (hweight32(engine->sseu.subslice_mask) / 2));
1264 
1265 		pr_info("%s: SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n",
1266 			engine->name, name, flags,
1267 			hweight32(engine->sseu.slice_mask),
1268 			hweight32(pg_sseu.slice_mask));
1269 
1270 		ce = intel_context_create(engine);
1271 		if (IS_ERR(ce)) {
1272 			ret = PTR_ERR(ce);
1273 			goto out_put;
1274 		}
1275 
1276 		ret = intel_context_pin(ce);
1277 		if (ret)
1278 			goto out_ce;
1279 
1280 		/* First set the default mask. */
1281 		ret = __sseu_test(name, flags, ce, obj, engine->sseu);
1282 		if (ret)
1283 			goto out_unpin;
1284 
1285 		/* Then set a power-gated configuration. */
1286 		ret = __sseu_test(name, flags, ce, obj, pg_sseu);
1287 		if (ret)
1288 			goto out_unpin;
1289 
1290 		/* Back to defaults. */
1291 		ret = __sseu_test(name, flags, ce, obj, engine->sseu);
1292 		if (ret)
1293 			goto out_unpin;
1294 
1295 		/* One last power-gated configuration for the road. */
1296 		ret = __sseu_test(name, flags, ce, obj, pg_sseu);
1297 		if (ret)
1298 			goto out_unpin;
1299 
1300 out_unpin:
1301 		intel_context_unpin(ce);
1302 out_ce:
1303 		intel_context_put(ce);
1304 	} while (!ret);
1305 
1306 	if (igt_flush_test(i915))
1307 		ret = -EIO;
1308 
1309 out_put:
1310 	i915_gem_object_put(obj);
1311 
1312 out_unlock:
1313 	if (flags & TEST_RESET)
1314 		igt_global_reset_unlock(to_gt(i915));
1315 
1316 	if (ret)
1317 		pr_err("%s: Failed with %d!\n", name, ret);
1318 
1319 	return ret;
1320 }
1321 
igt_ctx_sseu(void * arg)1322 static int igt_ctx_sseu(void *arg)
1323 {
1324 	struct {
1325 		const char *name;
1326 		unsigned int flags;
1327 	} *phase, phases[] = {
1328 		{ .name = "basic", .flags = 0 },
1329 		{ .name = "idle", .flags = TEST_IDLE },
1330 		{ .name = "busy", .flags = TEST_BUSY },
1331 		{ .name = "busy-reset", .flags = TEST_BUSY | TEST_RESET },
1332 		{ .name = "busy-idle", .flags = TEST_BUSY | TEST_IDLE },
1333 		{ .name = "reset-idle", .flags = TEST_RESET | TEST_IDLE },
1334 	};
1335 	unsigned int i;
1336 	int ret = 0;
1337 
1338 	for (i = 0, phase = phases; ret == 0 && i < ARRAY_SIZE(phases);
1339 	     i++, phase++)
1340 		ret = __igt_ctx_sseu(arg, phase->name, phase->flags);
1341 
1342 	return ret;
1343 }
1344 
igt_ctx_readonly(void * arg)1345 static int igt_ctx_readonly(void *arg)
1346 {
1347 	struct drm_i915_private *i915 = arg;
1348 	unsigned long idx, ndwords, dw, num_engines;
1349 	struct drm_i915_gem_object *obj = NULL;
1350 	struct i915_request *tq[5] = {};
1351 	struct i915_gem_engines_iter it;
1352 	struct i915_address_space *vm;
1353 	struct i915_gem_context *ctx;
1354 	struct intel_context *ce;
1355 	struct igt_live_test t;
1356 	I915_RND_STATE(prng);
1357 	IGT_TIMEOUT(end_time);
1358 	LIST_HEAD(objects);
1359 	struct file *file;
1360 	int err = -ENODEV;
1361 
1362 	/*
1363 	 * Create a few read-only objects (with the occasional writable object)
1364 	 * and try to write into these object checking that the GPU discards
1365 	 * any write to a read-only object.
1366 	 */
1367 
1368 	file = mock_file(i915);
1369 	if (IS_ERR(file))
1370 		return PTR_ERR(file);
1371 
1372 	err = igt_live_test_begin(&t, i915, __func__, "");
1373 	if (err)
1374 		goto out_file;
1375 
1376 	ctx = live_context(i915, file);
1377 	if (IS_ERR(ctx)) {
1378 		err = PTR_ERR(ctx);
1379 		goto out_file;
1380 	}
1381 
1382 	vm = ctx->vm ?: &to_gt(i915)->ggtt->alias->vm;
1383 	if (!vm || !vm->has_read_only) {
1384 		err = 0;
1385 		goto out_file;
1386 	}
1387 
1388 	num_engines = 0;
1389 	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it)
1390 		if (intel_engine_can_store_dword(ce->engine))
1391 			num_engines++;
1392 	i915_gem_context_unlock_engines(ctx);
1393 
1394 	ndwords = 0;
1395 	dw = 0;
1396 	while (!time_after(jiffies, end_time)) {
1397 		for_each_gem_engine(ce,
1398 				    i915_gem_context_lock_engines(ctx), it) {
1399 			if (!intel_engine_can_store_dword(ce->engine))
1400 				continue;
1401 
1402 			if (!obj) {
1403 				obj = create_test_object(ce->vm, file, &objects);
1404 				if (IS_ERR(obj)) {
1405 					err = PTR_ERR(obj);
1406 					i915_gem_context_unlock_engines(ctx);
1407 					goto out_file;
1408 				}
1409 
1410 				if (prandom_u32_state(&prng) & 1)
1411 					i915_gem_object_set_readonly(obj);
1412 			}
1413 
1414 			err = gpu_fill(ce, obj, dw);
1415 			if (err) {
1416 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
1417 				       ndwords, dw, max_dwords(obj),
1418 				       ce->engine->name,
1419 				       str_yes_no(i915_gem_context_has_full_ppgtt(ctx)),
1420 				       err);
1421 				i915_gem_context_unlock_engines(ctx);
1422 				goto out_file;
1423 			}
1424 
1425 			err = throttle(ce, tq, ARRAY_SIZE(tq));
1426 			if (err) {
1427 				i915_gem_context_unlock_engines(ctx);
1428 				goto out_file;
1429 			}
1430 
1431 			if (++dw == max_dwords(obj)) {
1432 				obj = NULL;
1433 				dw = 0;
1434 			}
1435 			ndwords++;
1436 		}
1437 		i915_gem_context_unlock_engines(ctx);
1438 	}
1439 	pr_info("Submitted %lu dwords (across %lu engines)\n",
1440 		ndwords, num_engines);
1441 
1442 	dw = 0;
1443 	idx = 0;
1444 	list_for_each_entry(obj, &objects, st_link) {
1445 		unsigned int rem =
1446 			min_t(unsigned int, ndwords - dw, max_dwords(obj));
1447 		unsigned int num_writes;
1448 
1449 		num_writes = rem;
1450 		if (i915_gem_object_is_readonly(obj))
1451 			num_writes = 0;
1452 
1453 		err = cpu_check(obj, idx++, num_writes);
1454 		if (err)
1455 			break;
1456 
1457 		dw += rem;
1458 	}
1459 
1460 out_file:
1461 	throttle_release(tq, ARRAY_SIZE(tq));
1462 	if (igt_live_test_end(&t))
1463 		err = -EIO;
1464 
1465 	fput(file);
1466 	return err;
1467 }
1468 
check_scratch(struct i915_address_space * vm,u64 offset)1469 static int check_scratch(struct i915_address_space *vm, u64 offset)
1470 {
1471 	struct drm_mm_node *node;
1472 
1473 	mutex_lock(&vm->mutex);
1474 	node = __drm_mm_interval_first(&vm->mm,
1475 				       offset, offset + sizeof(u32) - 1);
1476 	mutex_unlock(&vm->mutex);
1477 	if (!node || node->start > offset)
1478 		return 0;
1479 
1480 	GEM_BUG_ON(offset >= node->start + node->size);
1481 
1482 	pr_err("Target offset 0x%08x_%08x overlaps with a node in the mm!\n",
1483 	       upper_32_bits(offset), lower_32_bits(offset));
1484 	return -EINVAL;
1485 }
1486 
write_to_scratch(struct i915_gem_context * ctx,struct intel_engine_cs * engine,struct drm_i915_gem_object * obj,u64 offset,u32 value)1487 static int write_to_scratch(struct i915_gem_context *ctx,
1488 			    struct intel_engine_cs *engine,
1489 			    struct drm_i915_gem_object *obj,
1490 			    u64 offset, u32 value)
1491 {
1492 	struct drm_i915_private *i915 = ctx->i915;
1493 	struct i915_address_space *vm;
1494 	struct i915_request *rq;
1495 	struct i915_vma *vma;
1496 	u32 *cmd;
1497 	int err;
1498 
1499 	GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE);
1500 
1501 	err = check_scratch(ctx->vm, offset);
1502 	if (err)
1503 		return err;
1504 
1505 	cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1506 	if (IS_ERR(cmd))
1507 		return PTR_ERR(cmd);
1508 
1509 	*cmd++ = MI_STORE_DWORD_IMM_GEN4;
1510 	if (GRAPHICS_VER(i915) >= 8) {
1511 		*cmd++ = lower_32_bits(offset);
1512 		*cmd++ = upper_32_bits(offset);
1513 	} else {
1514 		*cmd++ = 0;
1515 		*cmd++ = offset;
1516 	}
1517 	*cmd++ = value;
1518 	*cmd = MI_BATCH_BUFFER_END;
1519 	__i915_gem_object_flush_map(obj, 0, 64);
1520 	i915_gem_object_unpin_map(obj);
1521 
1522 	intel_gt_chipset_flush(engine->gt);
1523 
1524 	vm = i915_gem_context_get_eb_vm(ctx);
1525 	vma = i915_vma_instance(obj, vm, NULL);
1526 	if (IS_ERR(vma)) {
1527 		err = PTR_ERR(vma);
1528 		goto out_vm;
1529 	}
1530 
1531 	err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED);
1532 	if (err)
1533 		goto out_vm;
1534 
1535 	rq = igt_request_alloc(ctx, engine);
1536 	if (IS_ERR(rq)) {
1537 		err = PTR_ERR(rq);
1538 		goto err_unpin;
1539 	}
1540 
1541 	i915_vma_lock(vma);
1542 	err = i915_request_await_object(rq, vma->obj, false);
1543 	if (err == 0)
1544 		err = i915_vma_move_to_active(vma, rq, 0);
1545 	i915_vma_unlock(vma);
1546 	if (err)
1547 		goto skip_request;
1548 
1549 	if (rq->engine->emit_init_breadcrumb) {
1550 		err = rq->engine->emit_init_breadcrumb(rq);
1551 		if (err)
1552 			goto skip_request;
1553 	}
1554 
1555 	err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0);
1556 	if (err)
1557 		goto skip_request;
1558 
1559 	i915_vma_unpin(vma);
1560 
1561 	i915_request_add(rq);
1562 
1563 	goto out_vm;
1564 skip_request:
1565 	i915_request_set_error_once(rq, err);
1566 	i915_request_add(rq);
1567 err_unpin:
1568 	i915_vma_unpin(vma);
1569 out_vm:
1570 	i915_vm_put(vm);
1571 
1572 	if (!err)
1573 		err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
1574 
1575 	return err;
1576 }
1577 
read_from_scratch(struct i915_gem_context * ctx,struct intel_engine_cs * engine,struct drm_i915_gem_object * obj,u64 offset,u32 * value)1578 static int read_from_scratch(struct i915_gem_context *ctx,
1579 			     struct intel_engine_cs *engine,
1580 			     struct drm_i915_gem_object *obj,
1581 			     u64 offset, u32 *value)
1582 {
1583 	struct drm_i915_private *i915 = ctx->i915;
1584 	struct i915_address_space *vm;
1585 	const u32 result = 0x100;
1586 	struct i915_request *rq;
1587 	struct i915_vma *vma;
1588 	unsigned int flags;
1589 	u32 *cmd;
1590 	int err;
1591 
1592 	GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE);
1593 
1594 	err = check_scratch(ctx->vm, offset);
1595 	if (err)
1596 		return err;
1597 
1598 	if (GRAPHICS_VER(i915) >= 8) {
1599 		const u32 GPR0 = engine->mmio_base + 0x600;
1600 
1601 		vm = i915_gem_context_get_eb_vm(ctx);
1602 		vma = i915_vma_instance(obj, vm, NULL);
1603 		if (IS_ERR(vma)) {
1604 			err = PTR_ERR(vma);
1605 			goto out_vm;
1606 		}
1607 
1608 		err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED);
1609 		if (err)
1610 			goto out_vm;
1611 
1612 		cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1613 		if (IS_ERR(cmd)) {
1614 			err = PTR_ERR(cmd);
1615 			goto err_unpin;
1616 		}
1617 
1618 		memset(cmd, POISON_INUSE, PAGE_SIZE);
1619 		*cmd++ = MI_LOAD_REGISTER_MEM_GEN8;
1620 		*cmd++ = GPR0;
1621 		*cmd++ = lower_32_bits(offset);
1622 		*cmd++ = upper_32_bits(offset);
1623 		*cmd++ = MI_STORE_REGISTER_MEM_GEN8;
1624 		*cmd++ = GPR0;
1625 		*cmd++ = result;
1626 		*cmd++ = 0;
1627 		*cmd = MI_BATCH_BUFFER_END;
1628 
1629 		i915_gem_object_flush_map(obj);
1630 		i915_gem_object_unpin_map(obj);
1631 
1632 		flags = 0;
1633 	} else {
1634 		const u32 reg = engine->mmio_base + 0x420;
1635 
1636 		/* hsw: register access even to 3DPRIM! is protected */
1637 		vm = i915_vm_get(&engine->gt->ggtt->vm);
1638 		vma = i915_vma_instance(obj, vm, NULL);
1639 		if (IS_ERR(vma)) {
1640 			err = PTR_ERR(vma);
1641 			goto out_vm;
1642 		}
1643 
1644 		err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1645 		if (err)
1646 			goto out_vm;
1647 
1648 		cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1649 		if (IS_ERR(cmd)) {
1650 			err = PTR_ERR(cmd);
1651 			goto err_unpin;
1652 		}
1653 
1654 		memset(cmd, POISON_INUSE, PAGE_SIZE);
1655 		*cmd++ = MI_LOAD_REGISTER_MEM;
1656 		*cmd++ = reg;
1657 		*cmd++ = offset;
1658 		*cmd++ = MI_STORE_REGISTER_MEM | MI_USE_GGTT;
1659 		*cmd++ = reg;
1660 		*cmd++ = vma->node.start + result;
1661 		*cmd = MI_BATCH_BUFFER_END;
1662 
1663 		i915_gem_object_flush_map(obj);
1664 		i915_gem_object_unpin_map(obj);
1665 
1666 		flags = I915_DISPATCH_SECURE;
1667 	}
1668 
1669 	intel_gt_chipset_flush(engine->gt);
1670 
1671 	rq = igt_request_alloc(ctx, engine);
1672 	if (IS_ERR(rq)) {
1673 		err = PTR_ERR(rq);
1674 		goto err_unpin;
1675 	}
1676 
1677 	i915_vma_lock(vma);
1678 	err = i915_request_await_object(rq, vma->obj, true);
1679 	if (err == 0)
1680 		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
1681 	i915_vma_unlock(vma);
1682 	if (err)
1683 		goto skip_request;
1684 
1685 	if (rq->engine->emit_init_breadcrumb) {
1686 		err = rq->engine->emit_init_breadcrumb(rq);
1687 		if (err)
1688 			goto skip_request;
1689 	}
1690 
1691 	err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, flags);
1692 	if (err)
1693 		goto skip_request;
1694 
1695 	i915_vma_unpin(vma);
1696 
1697 	i915_request_add(rq);
1698 
1699 	i915_gem_object_lock(obj, NULL);
1700 	err = i915_gem_object_set_to_cpu_domain(obj, false);
1701 	i915_gem_object_unlock(obj);
1702 	if (err)
1703 		goto out_vm;
1704 
1705 	cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1706 	if (IS_ERR(cmd)) {
1707 		err = PTR_ERR(cmd);
1708 		goto out_vm;
1709 	}
1710 
1711 	*value = cmd[result / sizeof(*cmd)];
1712 	i915_gem_object_unpin_map(obj);
1713 
1714 	goto out_vm;
1715 skip_request:
1716 	i915_request_set_error_once(rq, err);
1717 	i915_request_add(rq);
1718 err_unpin:
1719 	i915_vma_unpin(vma);
1720 out_vm:
1721 	i915_vm_put(vm);
1722 
1723 	if (!err)
1724 		err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
1725 
1726 	return err;
1727 }
1728 
check_scratch_page(struct i915_gem_context * ctx,u32 * out)1729 static int check_scratch_page(struct i915_gem_context *ctx, u32 *out)
1730 {
1731 	struct i915_address_space *vm;
1732 	u32 *vaddr;
1733 	int err = 0;
1734 
1735 	vm = ctx->vm;
1736 	if (!vm)
1737 		return -ENODEV;
1738 
1739 	if (!vm->scratch[0]) {
1740 		pr_err("No scratch page!\n");
1741 		return -EINVAL;
1742 	}
1743 
1744 	vaddr = __px_vaddr(vm->scratch[0]);
1745 
1746 	memcpy(out, vaddr, sizeof(*out));
1747 	if (memchr_inv(vaddr, *out, PAGE_SIZE)) {
1748 		pr_err("Inconsistent initial state of scratch page!\n");
1749 		err = -EINVAL;
1750 	}
1751 
1752 	return err;
1753 }
1754 
igt_vm_isolation(void * arg)1755 static int igt_vm_isolation(void *arg)
1756 {
1757 	struct drm_i915_private *i915 = arg;
1758 	struct i915_gem_context *ctx_a, *ctx_b;
1759 	struct drm_i915_gem_object *obj_a, *obj_b;
1760 	unsigned long num_engines, count;
1761 	struct intel_engine_cs *engine;
1762 	struct igt_live_test t;
1763 	I915_RND_STATE(prng);
1764 	struct file *file;
1765 	u64 vm_total;
1766 	u32 expected;
1767 	int err;
1768 
1769 	if (GRAPHICS_VER(i915) < 7)
1770 		return 0;
1771 
1772 	/*
1773 	 * The simple goal here is that a write into one context is not
1774 	 * observed in a second (separate page tables and scratch).
1775 	 */
1776 
1777 	file = mock_file(i915);
1778 	if (IS_ERR(file))
1779 		return PTR_ERR(file);
1780 
1781 	err = igt_live_test_begin(&t, i915, __func__, "");
1782 	if (err)
1783 		goto out_file;
1784 
1785 	ctx_a = live_context(i915, file);
1786 	if (IS_ERR(ctx_a)) {
1787 		err = PTR_ERR(ctx_a);
1788 		goto out_file;
1789 	}
1790 
1791 	ctx_b = live_context(i915, file);
1792 	if (IS_ERR(ctx_b)) {
1793 		err = PTR_ERR(ctx_b);
1794 		goto out_file;
1795 	}
1796 
1797 	/* We can only test vm isolation, if the vm are distinct */
1798 	if (ctx_a->vm == ctx_b->vm)
1799 		goto out_file;
1800 
1801 	/* Read the initial state of the scratch page */
1802 	err = check_scratch_page(ctx_a, &expected);
1803 	if (err)
1804 		goto out_file;
1805 
1806 	err = check_scratch_page(ctx_b, &expected);
1807 	if (err)
1808 		goto out_file;
1809 
1810 	vm_total = ctx_a->vm->total;
1811 	GEM_BUG_ON(ctx_b->vm->total != vm_total);
1812 
1813 	obj_a = i915_gem_object_create_internal(i915, PAGE_SIZE);
1814 	if (IS_ERR(obj_a)) {
1815 		err = PTR_ERR(obj_a);
1816 		goto out_file;
1817 	}
1818 
1819 	obj_b = i915_gem_object_create_internal(i915, PAGE_SIZE);
1820 	if (IS_ERR(obj_b)) {
1821 		err = PTR_ERR(obj_b);
1822 		goto put_a;
1823 	}
1824 
1825 	count = 0;
1826 	num_engines = 0;
1827 	for_each_uabi_engine(engine, i915) {
1828 		IGT_TIMEOUT(end_time);
1829 		unsigned long this = 0;
1830 
1831 		if (!intel_engine_can_store_dword(engine))
1832 			continue;
1833 
1834 		/* Not all engines have their own GPR! */
1835 		if (GRAPHICS_VER(i915) < 8 && engine->class != RENDER_CLASS)
1836 			continue;
1837 
1838 		while (!__igt_timeout(end_time, NULL)) {
1839 			u32 value = 0xc5c5c5c5;
1840 			u64 offset;
1841 
1842 			/* Leave enough space at offset 0 for the batch */
1843 			offset = igt_random_offset(&prng,
1844 						   I915_GTT_PAGE_SIZE, vm_total,
1845 						   sizeof(u32), alignof_dword);
1846 
1847 			err = write_to_scratch(ctx_a, engine, obj_a,
1848 					       offset, 0xdeadbeef);
1849 			if (err == 0)
1850 				err = read_from_scratch(ctx_b, engine, obj_b,
1851 							offset, &value);
1852 			if (err)
1853 				goto put_b;
1854 
1855 			if (value != expected) {
1856 				pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n",
1857 				       engine->name, value,
1858 				       upper_32_bits(offset),
1859 				       lower_32_bits(offset),
1860 				       this);
1861 				err = -EINVAL;
1862 				goto put_b;
1863 			}
1864 
1865 			this++;
1866 		}
1867 		count += this;
1868 		num_engines++;
1869 	}
1870 	pr_info("Checked %lu scratch offsets across %lu engines\n",
1871 		count, num_engines);
1872 
1873 put_b:
1874 	i915_gem_object_put(obj_b);
1875 put_a:
1876 	i915_gem_object_put(obj_a);
1877 out_file:
1878 	if (igt_live_test_end(&t))
1879 		err = -EIO;
1880 	fput(file);
1881 	return err;
1882 }
1883 
i915_gem_context_live_selftests(struct drm_i915_private * i915)1884 int i915_gem_context_live_selftests(struct drm_i915_private *i915)
1885 {
1886 	static const struct i915_subtest tests[] = {
1887 		SUBTEST(live_nop_switch),
1888 		SUBTEST(live_parallel_switch),
1889 		SUBTEST(igt_ctx_exec),
1890 		SUBTEST(igt_ctx_readonly),
1891 		SUBTEST(igt_ctx_sseu),
1892 		SUBTEST(igt_shared_ctx_exec),
1893 		SUBTEST(igt_vm_isolation),
1894 	};
1895 
1896 	if (intel_gt_is_wedged(to_gt(i915)))
1897 		return 0;
1898 
1899 	return i915_live_subtests(tests, i915);
1900 }
1901