1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2020 Intel Corporation
4 */
5
6 #include <linux/sort.h>
7
8 #include "gem/i915_gem_internal.h"
9 #include "gem/i915_gem_lmem.h"
10
11 #include "selftests/igt_spinner.h"
12 #include "selftests/i915_random.h"
13
14 static const unsigned int sizes[] = {
15 SZ_4K,
16 SZ_64K,
17 SZ_2M,
18 CHUNK_SZ - SZ_4K,
19 CHUNK_SZ,
20 CHUNK_SZ + SZ_4K,
21 SZ_64M,
22 };
23
24 static struct drm_i915_gem_object *
create_lmem_or_internal(struct drm_i915_private * i915,size_t size)25 create_lmem_or_internal(struct drm_i915_private *i915, size_t size)
26 {
27 struct drm_i915_gem_object *obj;
28
29 obj = i915_gem_object_create_lmem(i915, size, 0);
30 if (!IS_ERR(obj))
31 return obj;
32
33 return i915_gem_object_create_internal(i915, size);
34 }
35
copy(struct intel_migrate * migrate,int (* fn)(struct intel_migrate * migrate,struct i915_gem_ww_ctx * ww,struct drm_i915_gem_object * src,struct drm_i915_gem_object * dst,struct i915_request ** out),u32 sz,struct rnd_state * prng)36 static int copy(struct intel_migrate *migrate,
37 int (*fn)(struct intel_migrate *migrate,
38 struct i915_gem_ww_ctx *ww,
39 struct drm_i915_gem_object *src,
40 struct drm_i915_gem_object *dst,
41 struct i915_request **out),
42 u32 sz, struct rnd_state *prng)
43 {
44 struct drm_i915_private *i915 = migrate->context->engine->i915;
45 struct drm_i915_gem_object *src, *dst;
46 struct i915_request *rq;
47 struct i915_gem_ww_ctx ww;
48 u32 *vaddr;
49 int err = 0;
50 int i;
51
52 src = create_lmem_or_internal(i915, sz);
53 if (IS_ERR(src))
54 return 0;
55
56 sz = src->base.size;
57 dst = i915_gem_object_create_internal(i915, sz);
58 if (IS_ERR(dst))
59 goto err_free_src;
60
61 for_i915_gem_ww(&ww, err, true) {
62 err = i915_gem_object_lock(src, &ww);
63 if (err)
64 continue;
65
66 err = i915_gem_object_lock(dst, &ww);
67 if (err)
68 continue;
69
70 vaddr = i915_gem_object_pin_map(src, I915_MAP_WC);
71 if (IS_ERR(vaddr)) {
72 err = PTR_ERR(vaddr);
73 continue;
74 }
75
76 for (i = 0; i < sz / sizeof(u32); i++)
77 vaddr[i] = i;
78 i915_gem_object_flush_map(src);
79
80 vaddr = i915_gem_object_pin_map(dst, I915_MAP_WC);
81 if (IS_ERR(vaddr)) {
82 err = PTR_ERR(vaddr);
83 goto unpin_src;
84 }
85
86 for (i = 0; i < sz / sizeof(u32); i++)
87 vaddr[i] = ~i;
88 i915_gem_object_flush_map(dst);
89
90 err = fn(migrate, &ww, src, dst, &rq);
91 if (!err)
92 continue;
93
94 if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS)
95 pr_err("%ps failed, size: %u\n", fn, sz);
96 if (rq) {
97 i915_request_wait(rq, 0, HZ);
98 i915_request_put(rq);
99 }
100 i915_gem_object_unpin_map(dst);
101 unpin_src:
102 i915_gem_object_unpin_map(src);
103 }
104 if (err)
105 goto err_out;
106
107 if (rq) {
108 if (i915_request_wait(rq, 0, HZ) < 0) {
109 pr_err("%ps timed out, size: %u\n", fn, sz);
110 err = -ETIME;
111 }
112 i915_request_put(rq);
113 }
114
115 for (i = 0; !err && i < sz / PAGE_SIZE; i++) {
116 int x = i * 1024 + i915_prandom_u32_max_state(1024, prng);
117
118 if (vaddr[x] != x) {
119 pr_err("%ps failed, size: %u, offset: %zu\n",
120 fn, sz, x * sizeof(u32));
121 igt_hexdump(vaddr + i * 1024, 4096);
122 err = -EINVAL;
123 }
124 }
125
126 i915_gem_object_unpin_map(dst);
127 i915_gem_object_unpin_map(src);
128
129 err_out:
130 i915_gem_object_put(dst);
131 err_free_src:
132 i915_gem_object_put(src);
133
134 return err;
135 }
136
intel_context_copy_ccs(struct intel_context * ce,const struct i915_deps * deps,struct scatterlist * sg,unsigned int pat_index,bool write_to_ccs,struct i915_request ** out)137 static int intel_context_copy_ccs(struct intel_context *ce,
138 const struct i915_deps *deps,
139 struct scatterlist *sg,
140 unsigned int pat_index,
141 bool write_to_ccs,
142 struct i915_request **out)
143 {
144 u8 src_access = write_to_ccs ? DIRECT_ACCESS : INDIRECT_ACCESS;
145 u8 dst_access = write_to_ccs ? INDIRECT_ACCESS : DIRECT_ACCESS;
146 struct sgt_dma it = sg_sgt(sg);
147 struct i915_request *rq;
148 u32 offset;
149 int err;
150
151 GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm);
152 *out = NULL;
153
154 GEM_BUG_ON(ce->ring->size < SZ_64K);
155
156 offset = 0;
157 if (HAS_64K_PAGES(ce->engine->i915))
158 offset = CHUNK_SZ;
159
160 do {
161 int len;
162
163 rq = i915_request_create(ce);
164 if (IS_ERR(rq)) {
165 err = PTR_ERR(rq);
166 goto out_ce;
167 }
168
169 if (deps) {
170 err = i915_request_await_deps(rq, deps);
171 if (err)
172 goto out_rq;
173
174 if (rq->engine->emit_init_breadcrumb) {
175 err = rq->engine->emit_init_breadcrumb(rq);
176 if (err)
177 goto out_rq;
178 }
179
180 deps = NULL;
181 }
182
183 /* The PTE updates + clear must not be interrupted. */
184 err = emit_no_arbitration(rq);
185 if (err)
186 goto out_rq;
187
188 len = emit_pte(rq, &it, pat_index, true, offset, CHUNK_SZ);
189 if (len <= 0) {
190 err = len;
191 goto out_rq;
192 }
193
194 err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
195 if (err)
196 goto out_rq;
197
198 err = emit_copy_ccs(rq, offset, dst_access,
199 offset, src_access, len);
200 if (err)
201 goto out_rq;
202
203 err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
204
205 /* Arbitration is re-enabled between requests. */
206 out_rq:
207 if (*out)
208 i915_request_put(*out);
209 *out = i915_request_get(rq);
210 i915_request_add(rq);
211 if (err || !it.sg || !sg_dma_len(it.sg))
212 break;
213
214 cond_resched();
215 } while (1);
216
217 out_ce:
218 return err;
219 }
220
221 static int
intel_migrate_ccs_copy(struct intel_migrate * m,struct i915_gem_ww_ctx * ww,const struct i915_deps * deps,struct scatterlist * sg,unsigned int pat_index,bool write_to_ccs,struct i915_request ** out)222 intel_migrate_ccs_copy(struct intel_migrate *m,
223 struct i915_gem_ww_ctx *ww,
224 const struct i915_deps *deps,
225 struct scatterlist *sg,
226 unsigned int pat_index,
227 bool write_to_ccs,
228 struct i915_request **out)
229 {
230 struct intel_context *ce;
231 int err;
232
233 *out = NULL;
234 if (!m->context)
235 return -ENODEV;
236
237 ce = intel_migrate_create_context(m);
238 if (IS_ERR(ce))
239 ce = intel_context_get(m->context);
240 GEM_BUG_ON(IS_ERR(ce));
241
242 err = intel_context_pin_ww(ce, ww);
243 if (err)
244 goto out;
245
246 err = intel_context_copy_ccs(ce, deps, sg, pat_index,
247 write_to_ccs, out);
248
249 intel_context_unpin(ce);
250 out:
251 intel_context_put(ce);
252 return err;
253 }
254
clear(struct intel_migrate * migrate,int (* fn)(struct intel_migrate * migrate,struct i915_gem_ww_ctx * ww,struct drm_i915_gem_object * obj,u32 value,struct i915_request ** out),u32 sz,struct rnd_state * prng)255 static int clear(struct intel_migrate *migrate,
256 int (*fn)(struct intel_migrate *migrate,
257 struct i915_gem_ww_ctx *ww,
258 struct drm_i915_gem_object *obj,
259 u32 value,
260 struct i915_request **out),
261 u32 sz, struct rnd_state *prng)
262 {
263 struct drm_i915_private *i915 = migrate->context->engine->i915;
264 struct drm_i915_gem_object *obj;
265 struct i915_request *rq;
266 struct i915_gem_ww_ctx ww;
267 u32 *vaddr, val = 0;
268 bool ccs_cap = false;
269 int err = 0;
270 int i;
271
272 obj = create_lmem_or_internal(i915, sz);
273 if (IS_ERR(obj))
274 return 0;
275
276 /* Consider the rounded up memory too */
277 sz = obj->base.size;
278
279 if (HAS_FLAT_CCS(i915) && i915_gem_object_is_lmem(obj))
280 ccs_cap = true;
281
282 for_i915_gem_ww(&ww, err, true) {
283 int ccs_bytes, ccs_bytes_per_chunk;
284
285 err = i915_gem_object_lock(obj, &ww);
286 if (err)
287 continue;
288
289 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
290 if (IS_ERR(vaddr)) {
291 err = PTR_ERR(vaddr);
292 continue;
293 }
294
295 for (i = 0; i < sz / sizeof(u32); i++)
296 vaddr[i] = ~i;
297 i915_gem_object_flush_map(obj);
298
299 if (ccs_cap && !val) {
300 /* Write the obj data into ccs surface */
301 err = intel_migrate_ccs_copy(migrate, &ww, NULL,
302 obj->mm.pages->sgl,
303 obj->pat_index,
304 true, &rq);
305 if (rq && !err) {
306 if (i915_request_wait(rq, 0, HZ) < 0) {
307 pr_err("%ps timed out, size: %u\n",
308 fn, sz);
309 err = -ETIME;
310 }
311 i915_request_put(rq);
312 rq = NULL;
313 }
314 if (err)
315 continue;
316 }
317
318 err = fn(migrate, &ww, obj, val, &rq);
319 if (rq && !err) {
320 if (i915_request_wait(rq, 0, HZ) < 0) {
321 pr_err("%ps timed out, size: %u\n", fn, sz);
322 err = -ETIME;
323 }
324 i915_request_put(rq);
325 rq = NULL;
326 }
327 if (err)
328 continue;
329
330 i915_gem_object_flush_map(obj);
331
332 /* Verify the set/clear of the obj mem */
333 for (i = 0; !err && i < sz / PAGE_SIZE; i++) {
334 int x = i * 1024 +
335 i915_prandom_u32_max_state(1024, prng);
336
337 if (vaddr[x] != val) {
338 pr_err("%ps failed, (%u != %u), offset: %zu\n",
339 fn, vaddr[x], val, x * sizeof(u32));
340 igt_hexdump(vaddr + i * 1024, 4096);
341 err = -EINVAL;
342 }
343 }
344 if (err)
345 continue;
346
347 if (ccs_cap && !val) {
348 for (i = 0; i < sz / sizeof(u32); i++)
349 vaddr[i] = ~i;
350 i915_gem_object_flush_map(obj);
351
352 err = intel_migrate_ccs_copy(migrate, &ww, NULL,
353 obj->mm.pages->sgl,
354 obj->pat_index,
355 false, &rq);
356 if (rq && !err) {
357 if (i915_request_wait(rq, 0, HZ) < 0) {
358 pr_err("%ps timed out, size: %u\n",
359 fn, sz);
360 err = -ETIME;
361 }
362 i915_request_put(rq);
363 rq = NULL;
364 }
365 if (err)
366 continue;
367
368 ccs_bytes = GET_CCS_BYTES(i915, sz);
369 ccs_bytes_per_chunk = GET_CCS_BYTES(i915, CHUNK_SZ);
370 i915_gem_object_flush_map(obj);
371
372 for (i = 0; !err && i < DIV_ROUND_UP(ccs_bytes, PAGE_SIZE); i++) {
373 int offset = ((i * PAGE_SIZE) /
374 ccs_bytes_per_chunk) * CHUNK_SZ / sizeof(u32);
375 int ccs_bytes_left = (ccs_bytes - i * PAGE_SIZE) / sizeof(u32);
376 int x = i915_prandom_u32_max_state(min_t(int, 1024,
377 ccs_bytes_left), prng);
378
379 if (vaddr[offset + x]) {
380 pr_err("%ps ccs clearing failed, offset: %ld/%d\n",
381 fn, i * PAGE_SIZE + x * sizeof(u32), ccs_bytes);
382 igt_hexdump(vaddr + offset,
383 min_t(int, 4096,
384 ccs_bytes_left * sizeof(u32)));
385 err = -EINVAL;
386 }
387 }
388
389 if (err)
390 continue;
391 }
392 i915_gem_object_unpin_map(obj);
393 }
394
395 if (err) {
396 if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS)
397 pr_err("%ps failed, size: %u\n", fn, sz);
398 if (rq && err != -EINVAL) {
399 i915_request_wait(rq, 0, HZ);
400 i915_request_put(rq);
401 }
402
403 i915_gem_object_unpin_map(obj);
404 }
405
406 i915_gem_object_put(obj);
407 return err;
408 }
409
__migrate_copy(struct intel_migrate * migrate,struct i915_gem_ww_ctx * ww,struct drm_i915_gem_object * src,struct drm_i915_gem_object * dst,struct i915_request ** out)410 static int __migrate_copy(struct intel_migrate *migrate,
411 struct i915_gem_ww_ctx *ww,
412 struct drm_i915_gem_object *src,
413 struct drm_i915_gem_object *dst,
414 struct i915_request **out)
415 {
416 return intel_migrate_copy(migrate, ww, NULL,
417 src->mm.pages->sgl, src->pat_index,
418 i915_gem_object_is_lmem(src),
419 dst->mm.pages->sgl, dst->pat_index,
420 i915_gem_object_is_lmem(dst),
421 out);
422 }
423
__global_copy(struct intel_migrate * migrate,struct i915_gem_ww_ctx * ww,struct drm_i915_gem_object * src,struct drm_i915_gem_object * dst,struct i915_request ** out)424 static int __global_copy(struct intel_migrate *migrate,
425 struct i915_gem_ww_ctx *ww,
426 struct drm_i915_gem_object *src,
427 struct drm_i915_gem_object *dst,
428 struct i915_request **out)
429 {
430 return intel_context_migrate_copy(migrate->context, NULL,
431 src->mm.pages->sgl, src->pat_index,
432 i915_gem_object_is_lmem(src),
433 dst->mm.pages->sgl, dst->pat_index,
434 i915_gem_object_is_lmem(dst),
435 out);
436 }
437
438 static int
migrate_copy(struct intel_migrate * migrate,u32 sz,struct rnd_state * prng)439 migrate_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
440 {
441 return copy(migrate, __migrate_copy, sz, prng);
442 }
443
444 static int
global_copy(struct intel_migrate * migrate,u32 sz,struct rnd_state * prng)445 global_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
446 {
447 return copy(migrate, __global_copy, sz, prng);
448 }
449
__migrate_clear(struct intel_migrate * migrate,struct i915_gem_ww_ctx * ww,struct drm_i915_gem_object * obj,u32 value,struct i915_request ** out)450 static int __migrate_clear(struct intel_migrate *migrate,
451 struct i915_gem_ww_ctx *ww,
452 struct drm_i915_gem_object *obj,
453 u32 value,
454 struct i915_request **out)
455 {
456 return intel_migrate_clear(migrate, ww, NULL,
457 obj->mm.pages->sgl,
458 obj->pat_index,
459 i915_gem_object_is_lmem(obj),
460 value, out);
461 }
462
__global_clear(struct intel_migrate * migrate,struct i915_gem_ww_ctx * ww,struct drm_i915_gem_object * obj,u32 value,struct i915_request ** out)463 static int __global_clear(struct intel_migrate *migrate,
464 struct i915_gem_ww_ctx *ww,
465 struct drm_i915_gem_object *obj,
466 u32 value,
467 struct i915_request **out)
468 {
469 return intel_context_migrate_clear(migrate->context, NULL,
470 obj->mm.pages->sgl,
471 obj->pat_index,
472 i915_gem_object_is_lmem(obj),
473 value, out);
474 }
475
476 static int
migrate_clear(struct intel_migrate * migrate,u32 sz,struct rnd_state * prng)477 migrate_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
478 {
479 return clear(migrate, __migrate_clear, sz, prng);
480 }
481
482 static int
global_clear(struct intel_migrate * migrate,u32 sz,struct rnd_state * prng)483 global_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
484 {
485 return clear(migrate, __global_clear, sz, prng);
486 }
487
live_migrate_copy(void * arg)488 static int live_migrate_copy(void *arg)
489 {
490 struct intel_gt *gt = arg;
491 struct intel_migrate *migrate = >->migrate;
492 struct drm_i915_private *i915 = migrate->context->engine->i915;
493 I915_RND_STATE(prng);
494 int i;
495
496 for (i = 0; i < ARRAY_SIZE(sizes); i++) {
497 int err;
498
499 err = migrate_copy(migrate, sizes[i], &prng);
500 if (err == 0)
501 err = global_copy(migrate, sizes[i], &prng);
502 i915_gem_drain_freed_objects(i915);
503 if (err)
504 return err;
505 }
506
507 return 0;
508 }
509
live_migrate_clear(void * arg)510 static int live_migrate_clear(void *arg)
511 {
512 struct intel_gt *gt = arg;
513 struct intel_migrate *migrate = >->migrate;
514 struct drm_i915_private *i915 = migrate->context->engine->i915;
515 I915_RND_STATE(prng);
516 int i;
517
518 for (i = 0; i < ARRAY_SIZE(sizes); i++) {
519 int err;
520
521 err = migrate_clear(migrate, sizes[i], &prng);
522 if (err == 0)
523 err = global_clear(migrate, sizes[i], &prng);
524
525 i915_gem_drain_freed_objects(i915);
526 if (err)
527 return err;
528 }
529
530 return 0;
531 }
532
533 struct spinner_timer {
534 struct timer_list timer;
535 struct igt_spinner spin;
536 };
537
spinner_kill(struct timer_list * timer)538 static void spinner_kill(struct timer_list *timer)
539 {
540 struct spinner_timer *st = from_timer(st, timer, timer);
541
542 igt_spinner_end(&st->spin);
543 pr_info("%s\n", __func__);
544 }
545
live_emit_pte_full_ring(void * arg)546 static int live_emit_pte_full_ring(void *arg)
547 {
548 struct intel_gt *gt = arg;
549 struct intel_migrate *migrate = >->migrate;
550 struct drm_i915_private *i915 = migrate->context->engine->i915;
551 struct drm_i915_gem_object *obj;
552 struct intel_context *ce;
553 struct i915_request *rq, *prev;
554 struct spinner_timer st;
555 struct sgt_dma it;
556 int len, sz, err;
557 u32 *cs;
558
559 /*
560 * Simple regression test to check that we don't trample the
561 * rq->reserved_space when returning from emit_pte(), if the ring is
562 * nearly full.
563 */
564
565 if (igt_spinner_init(&st.spin, to_gt(i915)))
566 return -ENOMEM;
567
568 obj = i915_gem_object_create_internal(i915, 2 * PAGE_SIZE);
569 if (IS_ERR(obj)) {
570 err = PTR_ERR(obj);
571 goto out_spinner;
572 }
573
574 err = i915_gem_object_pin_pages_unlocked(obj);
575 if (err)
576 goto out_obj;
577
578 ce = intel_migrate_create_context(migrate);
579 if (IS_ERR(ce)) {
580 err = PTR_ERR(ce);
581 goto out_obj;
582 }
583
584 ce->ring_size = SZ_4K; /* Not too big */
585
586 err = intel_context_pin(ce);
587 if (err)
588 goto out_put;
589
590 rq = igt_spinner_create_request(&st.spin, ce, MI_ARB_CHECK);
591 if (IS_ERR(rq)) {
592 err = PTR_ERR(rq);
593 goto out_unpin;
594 }
595
596 i915_request_add(rq);
597 if (!igt_wait_for_spinner(&st.spin, rq)) {
598 err = -EIO;
599 goto out_unpin;
600 }
601
602 /*
603 * Fill the rest of the ring leaving I915_EMIT_PTE_NUM_DWORDS +
604 * ring->reserved_space at the end. To actually emit the PTEs we require
605 * slightly more than I915_EMIT_PTE_NUM_DWORDS, since our object size is
606 * greater than PAGE_SIZE. The correct behaviour is to wait for more
607 * ring space in emit_pte(), otherwise we trample on the reserved_space
608 * resulting in crashes when later submitting the rq.
609 */
610
611 prev = NULL;
612 do {
613 if (prev)
614 i915_request_add(rq);
615
616 rq = i915_request_create(ce);
617 if (IS_ERR(rq)) {
618 err = PTR_ERR(rq);
619 goto out_unpin;
620 }
621
622 sz = (rq->ring->space - rq->reserved_space) / sizeof(u32) -
623 I915_EMIT_PTE_NUM_DWORDS;
624 sz = min_t(u32, sz, (SZ_1K - rq->reserved_space) / sizeof(u32) -
625 I915_EMIT_PTE_NUM_DWORDS);
626 cs = intel_ring_begin(rq, sz);
627 if (IS_ERR(cs)) {
628 err = PTR_ERR(cs);
629 goto out_rq;
630 }
631
632 memset32(cs, MI_NOOP, sz);
633 cs += sz;
634 intel_ring_advance(rq, cs);
635
636 pr_info("%s emit=%u sz=%d\n", __func__, rq->ring->emit, sz);
637
638 prev = rq;
639 } while (rq->ring->space > (rq->reserved_space +
640 I915_EMIT_PTE_NUM_DWORDS * sizeof(u32)));
641
642 timer_setup_on_stack(&st.timer, spinner_kill, 0);
643 mod_timer(&st.timer, jiffies + 2 * HZ);
644
645 /*
646 * This should wait for the spinner to be killed, otherwise we should go
647 * down in flames when doing i915_request_add().
648 */
649 pr_info("%s emite_pte ring space=%u\n", __func__, rq->ring->space);
650 it = sg_sgt(obj->mm.pages->sgl);
651 len = emit_pte(rq, &it, obj->pat_index, false, 0, CHUNK_SZ);
652 if (!len) {
653 err = -EINVAL;
654 goto out_rq;
655 }
656 if (len < 0) {
657 err = len;
658 goto out_rq;
659 }
660
661 out_rq:
662 i915_request_add(rq); /* GEM_BUG_ON(rq->reserved_space > ring->space)? */
663 del_timer_sync(&st.timer);
664 destroy_timer_on_stack(&st.timer);
665 out_unpin:
666 intel_context_unpin(ce);
667 out_put:
668 intel_context_put(ce);
669 out_obj:
670 i915_gem_object_put(obj);
671 out_spinner:
672 igt_spinner_fini(&st.spin);
673 return err;
674 }
675
676 struct threaded_migrate {
677 struct intel_migrate *migrate;
678 struct task_struct *tsk;
679 struct rnd_state prng;
680 };
681
threaded_migrate(struct intel_migrate * migrate,int (* fn)(void * arg),unsigned int flags)682 static int threaded_migrate(struct intel_migrate *migrate,
683 int (*fn)(void *arg),
684 unsigned int flags)
685 {
686 const unsigned int n_cpus = num_online_cpus() + 1;
687 struct threaded_migrate *thread;
688 I915_RND_STATE(prng);
689 unsigned int i;
690 int err = 0;
691
692 thread = kcalloc(n_cpus, sizeof(*thread), GFP_KERNEL);
693 if (!thread)
694 return 0;
695
696 for (i = 0; i < n_cpus; ++i) {
697 struct task_struct *tsk;
698
699 thread[i].migrate = migrate;
700 thread[i].prng =
701 I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng));
702
703 tsk = kthread_run(fn, &thread[i], "igt-%d", i);
704 if (IS_ERR(tsk)) {
705 err = PTR_ERR(tsk);
706 break;
707 }
708
709 get_task_struct(tsk);
710 thread[i].tsk = tsk;
711 }
712
713 msleep(10); /* start all threads before we kthread_stop() */
714
715 for (i = 0; i < n_cpus; ++i) {
716 struct task_struct *tsk = thread[i].tsk;
717 int status;
718
719 if (IS_ERR_OR_NULL(tsk))
720 continue;
721
722 status = kthread_stop(tsk);
723 if (status && !err)
724 err = status;
725
726 put_task_struct(tsk);
727 }
728
729 kfree(thread);
730 return err;
731 }
732
__thread_migrate_copy(void * arg)733 static int __thread_migrate_copy(void *arg)
734 {
735 struct threaded_migrate *tm = arg;
736
737 return migrate_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
738 }
739
thread_migrate_copy(void * arg)740 static int thread_migrate_copy(void *arg)
741 {
742 struct intel_gt *gt = arg;
743 struct intel_migrate *migrate = >->migrate;
744
745 return threaded_migrate(migrate, __thread_migrate_copy, 0);
746 }
747
__thread_global_copy(void * arg)748 static int __thread_global_copy(void *arg)
749 {
750 struct threaded_migrate *tm = arg;
751
752 return global_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
753 }
754
thread_global_copy(void * arg)755 static int thread_global_copy(void *arg)
756 {
757 struct intel_gt *gt = arg;
758 struct intel_migrate *migrate = >->migrate;
759
760 return threaded_migrate(migrate, __thread_global_copy, 0);
761 }
762
__thread_migrate_clear(void * arg)763 static int __thread_migrate_clear(void *arg)
764 {
765 struct threaded_migrate *tm = arg;
766
767 return migrate_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
768 }
769
__thread_global_clear(void * arg)770 static int __thread_global_clear(void *arg)
771 {
772 struct threaded_migrate *tm = arg;
773
774 return global_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
775 }
776
thread_migrate_clear(void * arg)777 static int thread_migrate_clear(void *arg)
778 {
779 struct intel_gt *gt = arg;
780 struct intel_migrate *migrate = >->migrate;
781
782 return threaded_migrate(migrate, __thread_migrate_clear, 0);
783 }
784
thread_global_clear(void * arg)785 static int thread_global_clear(void *arg)
786 {
787 struct intel_gt *gt = arg;
788 struct intel_migrate *migrate = >->migrate;
789
790 return threaded_migrate(migrate, __thread_global_clear, 0);
791 }
792
intel_migrate_live_selftests(struct drm_i915_private * i915)793 int intel_migrate_live_selftests(struct drm_i915_private *i915)
794 {
795 static const struct i915_subtest tests[] = {
796 SUBTEST(live_migrate_copy),
797 SUBTEST(live_migrate_clear),
798 SUBTEST(live_emit_pte_full_ring),
799 SUBTEST(thread_migrate_copy),
800 SUBTEST(thread_migrate_clear),
801 SUBTEST(thread_global_copy),
802 SUBTEST(thread_global_clear),
803 };
804 struct intel_gt *gt = to_gt(i915);
805
806 if (!gt->migrate.context)
807 return 0;
808
809 return intel_gt_live_subtests(tests, gt);
810 }
811
812 static struct drm_i915_gem_object *
create_init_lmem_internal(struct intel_gt * gt,size_t sz,bool try_lmem)813 create_init_lmem_internal(struct intel_gt *gt, size_t sz, bool try_lmem)
814 {
815 struct drm_i915_gem_object *obj = NULL;
816 int err;
817
818 if (try_lmem)
819 obj = i915_gem_object_create_lmem(gt->i915, sz, 0);
820
821 if (IS_ERR_OR_NULL(obj)) {
822 obj = i915_gem_object_create_internal(gt->i915, sz);
823 if (IS_ERR(obj))
824 return obj;
825 }
826
827 i915_gem_object_trylock(obj, NULL);
828 err = i915_gem_object_pin_pages(obj);
829 if (err) {
830 i915_gem_object_unlock(obj);
831 i915_gem_object_put(obj);
832 return ERR_PTR(err);
833 }
834
835 return obj;
836 }
837
wrap_ktime_compare(const void * A,const void * B)838 static int wrap_ktime_compare(const void *A, const void *B)
839 {
840 const ktime_t *a = A, *b = B;
841
842 return ktime_compare(*a, *b);
843 }
844
__perf_clear_blt(struct intel_context * ce,struct scatterlist * sg,unsigned int pat_index,bool is_lmem,size_t sz)845 static int __perf_clear_blt(struct intel_context *ce,
846 struct scatterlist *sg,
847 unsigned int pat_index,
848 bool is_lmem,
849 size_t sz)
850 {
851 ktime_t t[5];
852 int pass;
853 int err = 0;
854
855 for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
856 struct i915_request *rq;
857 ktime_t t0, t1;
858
859 t0 = ktime_get();
860
861 err = intel_context_migrate_clear(ce, NULL, sg, pat_index,
862 is_lmem, 0, &rq);
863 if (rq) {
864 if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0)
865 err = -EIO;
866 i915_request_put(rq);
867 }
868 if (err)
869 break;
870
871 t1 = ktime_get();
872 t[pass] = ktime_sub(t1, t0);
873 }
874 if (err)
875 return err;
876
877 sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
878 pr_info("%s: %zd KiB fill: %lld MiB/s\n",
879 ce->engine->name, sz >> 10,
880 div64_u64(mul_u32_u32(4 * sz,
881 1000 * 1000 * 1000),
882 t[1] + 2 * t[2] + t[3]) >> 20);
883 return 0;
884 }
885
perf_clear_blt(void * arg)886 static int perf_clear_blt(void *arg)
887 {
888 struct intel_gt *gt = arg;
889 static const unsigned long sizes[] = {
890 SZ_4K,
891 SZ_64K,
892 SZ_2M,
893 SZ_64M
894 };
895 int i;
896
897 for (i = 0; i < ARRAY_SIZE(sizes); i++) {
898 struct drm_i915_gem_object *dst;
899 int err;
900
901 dst = create_init_lmem_internal(gt, sizes[i], true);
902 if (IS_ERR(dst))
903 return PTR_ERR(dst);
904
905 err = __perf_clear_blt(gt->migrate.context,
906 dst->mm.pages->sgl,
907 i915_gem_get_pat_index(gt->i915,
908 I915_CACHE_NONE),
909 i915_gem_object_is_lmem(dst),
910 sizes[i]);
911
912 i915_gem_object_unlock(dst);
913 i915_gem_object_put(dst);
914 if (err)
915 return err;
916 }
917
918 return 0;
919 }
920
__perf_copy_blt(struct intel_context * ce,struct scatterlist * src,unsigned int src_pat_index,bool src_is_lmem,struct scatterlist * dst,unsigned int dst_pat_index,bool dst_is_lmem,size_t sz)921 static int __perf_copy_blt(struct intel_context *ce,
922 struct scatterlist *src,
923 unsigned int src_pat_index,
924 bool src_is_lmem,
925 struct scatterlist *dst,
926 unsigned int dst_pat_index,
927 bool dst_is_lmem,
928 size_t sz)
929 {
930 ktime_t t[5];
931 int pass;
932 int err = 0;
933
934 for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
935 struct i915_request *rq;
936 ktime_t t0, t1;
937
938 t0 = ktime_get();
939
940 err = intel_context_migrate_copy(ce, NULL,
941 src, src_pat_index,
942 src_is_lmem,
943 dst, dst_pat_index,
944 dst_is_lmem,
945 &rq);
946 if (rq) {
947 if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0)
948 err = -EIO;
949 i915_request_put(rq);
950 }
951 if (err)
952 break;
953
954 t1 = ktime_get();
955 t[pass] = ktime_sub(t1, t0);
956 }
957 if (err)
958 return err;
959
960 sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
961 pr_info("%s: %zd KiB copy: %lld MiB/s\n",
962 ce->engine->name, sz >> 10,
963 div64_u64(mul_u32_u32(4 * sz,
964 1000 * 1000 * 1000),
965 t[1] + 2 * t[2] + t[3]) >> 20);
966 return 0;
967 }
968
perf_copy_blt(void * arg)969 static int perf_copy_blt(void *arg)
970 {
971 struct intel_gt *gt = arg;
972 static const unsigned long sizes[] = {
973 SZ_4K,
974 SZ_64K,
975 SZ_2M,
976 SZ_64M
977 };
978 int i;
979
980 for (i = 0; i < ARRAY_SIZE(sizes); i++) {
981 struct drm_i915_gem_object *src, *dst;
982 size_t sz;
983 int err;
984
985 src = create_init_lmem_internal(gt, sizes[i], true);
986 if (IS_ERR(src))
987 return PTR_ERR(src);
988
989 sz = src->base.size;
990 dst = create_init_lmem_internal(gt, sz, false);
991 if (IS_ERR(dst)) {
992 err = PTR_ERR(dst);
993 goto err_src;
994 }
995
996 err = __perf_copy_blt(gt->migrate.context,
997 src->mm.pages->sgl,
998 i915_gem_get_pat_index(gt->i915,
999 I915_CACHE_NONE),
1000 i915_gem_object_is_lmem(src),
1001 dst->mm.pages->sgl,
1002 i915_gem_get_pat_index(gt->i915,
1003 I915_CACHE_NONE),
1004 i915_gem_object_is_lmem(dst),
1005 sz);
1006
1007 i915_gem_object_unlock(dst);
1008 i915_gem_object_put(dst);
1009 err_src:
1010 i915_gem_object_unlock(src);
1011 i915_gem_object_put(src);
1012 if (err)
1013 return err;
1014 }
1015
1016 return 0;
1017 }
1018
intel_migrate_perf_selftests(struct drm_i915_private * i915)1019 int intel_migrate_perf_selftests(struct drm_i915_private *i915)
1020 {
1021 static const struct i915_subtest tests[] = {
1022 SUBTEST(perf_clear_blt),
1023 SUBTEST(perf_copy_blt),
1024 };
1025 struct intel_gt *gt = to_gt(i915);
1026
1027 if (intel_gt_is_wedged(gt))
1028 return 0;
1029
1030 if (!gt->migrate.context)
1031 return 0;
1032
1033 return intel_gt_live_subtests(tests, gt);
1034 }
1035