1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2014 Intel Corporation
4 */
5
6 #include <linux/circ_buf.h>
7
8 #include "gem/i915_gem_context.h"
9 #include "gt/gen8_engine_cs.h"
10 #include "gt/intel_breadcrumbs.h"
11 #include "gt/intel_context.h"
12 #include "gt/intel_engine_heartbeat.h"
13 #include "gt/intel_engine_pm.h"
14 #include "gt/intel_engine_regs.h"
15 #include "gt/intel_gpu_commands.h"
16 #include "gt/intel_gt.h"
17 #include "gt/intel_gt_clock_utils.h"
18 #include "gt/intel_gt_irq.h"
19 #include "gt/intel_gt_pm.h"
20 #include "gt/intel_gt_regs.h"
21 #include "gt/intel_gt_requests.h"
22 #include "gt/intel_lrc.h"
23 #include "gt/intel_lrc_reg.h"
24 #include "gt/intel_mocs.h"
25 #include "gt/intel_ring.h"
26
27 #include "intel_guc_ads.h"
28 #include "intel_guc_capture.h"
29 #include "intel_guc_submission.h"
30
31 #include "i915_drv.h"
32 #include "i915_trace.h"
33
34 /**
35 * DOC: GuC-based command submission
36 *
37 * The Scratch registers:
38 * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes
39 * a value to the action register (SOFT_SCRATCH_0) along with any data. It then
40 * triggers an interrupt on the GuC via another register write (0xC4C8).
41 * Firmware writes a success/fail code back to the action register after
42 * processes the request. The kernel driver polls waiting for this update and
43 * then proceeds.
44 *
45 * Command Transport buffers (CTBs):
46 * Covered in detail in other sections but CTBs (Host to GuC - H2G, GuC to Host
47 * - G2H) are a message interface between the i915 and GuC.
48 *
49 * Context registration:
50 * Before a context can be submitted it must be registered with the GuC via a
51 * H2G. A unique guc_id is associated with each context. The context is either
52 * registered at request creation time (normal operation) or at submission time
53 * (abnormal operation, e.g. after a reset).
54 *
55 * Context submission:
56 * The i915 updates the LRC tail value in memory. The i915 must enable the
57 * scheduling of the context within the GuC for the GuC to actually consider it.
58 * Therefore, the first time a disabled context is submitted we use a schedule
59 * enable H2G, while follow up submissions are done via the context submit H2G,
60 * which informs the GuC that a previously enabled context has new work
61 * available.
62 *
63 * Context unpin:
64 * To unpin a context a H2G is used to disable scheduling. When the
65 * corresponding G2H returns indicating the scheduling disable operation has
66 * completed it is safe to unpin the context. While a disable is in flight it
67 * isn't safe to resubmit the context so a fence is used to stall all future
68 * requests of that context until the G2H is returned.
69 *
70 * Context deregistration:
71 * Before a context can be destroyed or if we steal its guc_id we must
72 * deregister the context with the GuC via H2G. If stealing the guc_id it isn't
73 * safe to submit anything to this guc_id until the deregister completes so a
74 * fence is used to stall all requests associated with this guc_id until the
75 * corresponding G2H returns indicating the guc_id has been deregistered.
76 *
77 * submission_state.guc_ids:
78 * Unique number associated with private GuC context data passed in during
79 * context registration / submission / deregistration. 64k available. Simple ida
80 * is used for allocation.
81 *
82 * Stealing guc_ids:
83 * If no guc_ids are available they can be stolen from another context at
84 * request creation time if that context is unpinned. If a guc_id can't be found
85 * we punt this problem to the user as we believe this is near impossible to hit
86 * during normal use cases.
87 *
88 * Locking:
89 * In the GuC submission code we have 3 basic spin locks which protect
90 * everything. Details about each below.
91 *
92 * sched_engine->lock
93 * This is the submission lock for all contexts that share an i915 schedule
94 * engine (sched_engine), thus only one of the contexts which share a
95 * sched_engine can be submitting at a time. Currently only one sched_engine is
96 * used for all of GuC submission but that could change in the future.
97 *
98 * guc->submission_state.lock
99 * Global lock for GuC submission state. Protects guc_ids and destroyed contexts
100 * list.
101 *
102 * ce->guc_state.lock
103 * Protects everything under ce->guc_state. Ensures that a context is in the
104 * correct state before issuing a H2G. e.g. We don't issue a schedule disable
105 * on a disabled context (bad idea), we don't issue a schedule enable when a
106 * schedule disable is in flight, etc... Also protects list of inflight requests
107 * on the context and the priority management state. Lock is individual to each
108 * context.
109 *
110 * Lock ordering rules:
111 * sched_engine->lock -> ce->guc_state.lock
112 * guc->submission_state.lock -> ce->guc_state.lock
113 *
114 * Reset races:
115 * When a full GT reset is triggered it is assumed that some G2H responses to
116 * H2Gs can be lost as the GuC is also reset. Losing these G2H can prove to be
117 * fatal as we do certain operations upon receiving a G2H (e.g. destroy
118 * contexts, release guc_ids, etc...). When this occurs we can scrub the
119 * context state and cleanup appropriately, however this is quite racey.
120 * To avoid races, the reset code must disable submission before scrubbing for
121 * the missing G2H, while the submission code must check for submission being
122 * disabled and skip sending H2Gs and updating context states when it is. Both
123 * sides must also make sure to hold the relevant locks.
124 */
125
126 /* GuC Virtual Engine */
127 struct guc_virtual_engine {
128 struct intel_engine_cs base;
129 struct intel_context context;
130 };
131
132 static struct intel_context *
133 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
134 unsigned long flags);
135
136 static struct intel_context *
137 guc_create_parallel(struct intel_engine_cs **engines,
138 unsigned int num_siblings,
139 unsigned int width);
140
141 #define GUC_REQUEST_SIZE 64 /* bytes */
142
143 /*
144 * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous
145 * per the GuC submission interface. A different allocation algorithm is used
146 * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to
147 * partition the guc_id space. We believe the number of multi-lrc contexts in
148 * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for
149 * multi-lrc.
150 */
151 #define NUMBER_MULTI_LRC_GUC_ID(guc) \
152 ((guc)->submission_state.num_guc_ids / 16)
153
154 /*
155 * Below is a set of functions which control the GuC scheduling state which
156 * require a lock.
157 */
158 #define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER BIT(0)
159 #define SCHED_STATE_DESTROYED BIT(1)
160 #define SCHED_STATE_PENDING_DISABLE BIT(2)
161 #define SCHED_STATE_BANNED BIT(3)
162 #define SCHED_STATE_ENABLED BIT(4)
163 #define SCHED_STATE_PENDING_ENABLE BIT(5)
164 #define SCHED_STATE_REGISTERED BIT(6)
165 #define SCHED_STATE_POLICY_REQUIRED BIT(7)
166 #define SCHED_STATE_BLOCKED_SHIFT 8
167 #define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT)
168 #define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT)
169
init_sched_state(struct intel_context * ce)170 static inline void init_sched_state(struct intel_context *ce)
171 {
172 lockdep_assert_held(&ce->guc_state.lock);
173 ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK;
174 }
175
176 __maybe_unused
sched_state_is_init(struct intel_context * ce)177 static bool sched_state_is_init(struct intel_context *ce)
178 {
179 /* Kernel contexts can have SCHED_STATE_REGISTERED after suspend. */
180 return !(ce->guc_state.sched_state &
181 ~(SCHED_STATE_BLOCKED_MASK | SCHED_STATE_REGISTERED));
182 }
183
184 static inline bool
context_wait_for_deregister_to_register(struct intel_context * ce)185 context_wait_for_deregister_to_register(struct intel_context *ce)
186 {
187 return ce->guc_state.sched_state &
188 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
189 }
190
191 static inline void
set_context_wait_for_deregister_to_register(struct intel_context * ce)192 set_context_wait_for_deregister_to_register(struct intel_context *ce)
193 {
194 lockdep_assert_held(&ce->guc_state.lock);
195 ce->guc_state.sched_state |=
196 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
197 }
198
199 static inline void
clr_context_wait_for_deregister_to_register(struct intel_context * ce)200 clr_context_wait_for_deregister_to_register(struct intel_context *ce)
201 {
202 lockdep_assert_held(&ce->guc_state.lock);
203 ce->guc_state.sched_state &=
204 ~SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
205 }
206
207 static inline bool
context_destroyed(struct intel_context * ce)208 context_destroyed(struct intel_context *ce)
209 {
210 return ce->guc_state.sched_state & SCHED_STATE_DESTROYED;
211 }
212
213 static inline void
set_context_destroyed(struct intel_context * ce)214 set_context_destroyed(struct intel_context *ce)
215 {
216 lockdep_assert_held(&ce->guc_state.lock);
217 ce->guc_state.sched_state |= SCHED_STATE_DESTROYED;
218 }
219
context_pending_disable(struct intel_context * ce)220 static inline bool context_pending_disable(struct intel_context *ce)
221 {
222 return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE;
223 }
224
set_context_pending_disable(struct intel_context * ce)225 static inline void set_context_pending_disable(struct intel_context *ce)
226 {
227 lockdep_assert_held(&ce->guc_state.lock);
228 ce->guc_state.sched_state |= SCHED_STATE_PENDING_DISABLE;
229 }
230
clr_context_pending_disable(struct intel_context * ce)231 static inline void clr_context_pending_disable(struct intel_context *ce)
232 {
233 lockdep_assert_held(&ce->guc_state.lock);
234 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_DISABLE;
235 }
236
context_banned(struct intel_context * ce)237 static inline bool context_banned(struct intel_context *ce)
238 {
239 return ce->guc_state.sched_state & SCHED_STATE_BANNED;
240 }
241
set_context_banned(struct intel_context * ce)242 static inline void set_context_banned(struct intel_context *ce)
243 {
244 lockdep_assert_held(&ce->guc_state.lock);
245 ce->guc_state.sched_state |= SCHED_STATE_BANNED;
246 }
247
clr_context_banned(struct intel_context * ce)248 static inline void clr_context_banned(struct intel_context *ce)
249 {
250 lockdep_assert_held(&ce->guc_state.lock);
251 ce->guc_state.sched_state &= ~SCHED_STATE_BANNED;
252 }
253
context_enabled(struct intel_context * ce)254 static inline bool context_enabled(struct intel_context *ce)
255 {
256 return ce->guc_state.sched_state & SCHED_STATE_ENABLED;
257 }
258
set_context_enabled(struct intel_context * ce)259 static inline void set_context_enabled(struct intel_context *ce)
260 {
261 lockdep_assert_held(&ce->guc_state.lock);
262 ce->guc_state.sched_state |= SCHED_STATE_ENABLED;
263 }
264
clr_context_enabled(struct intel_context * ce)265 static inline void clr_context_enabled(struct intel_context *ce)
266 {
267 lockdep_assert_held(&ce->guc_state.lock);
268 ce->guc_state.sched_state &= ~SCHED_STATE_ENABLED;
269 }
270
context_pending_enable(struct intel_context * ce)271 static inline bool context_pending_enable(struct intel_context *ce)
272 {
273 return ce->guc_state.sched_state & SCHED_STATE_PENDING_ENABLE;
274 }
275
set_context_pending_enable(struct intel_context * ce)276 static inline void set_context_pending_enable(struct intel_context *ce)
277 {
278 lockdep_assert_held(&ce->guc_state.lock);
279 ce->guc_state.sched_state |= SCHED_STATE_PENDING_ENABLE;
280 }
281
clr_context_pending_enable(struct intel_context * ce)282 static inline void clr_context_pending_enable(struct intel_context *ce)
283 {
284 lockdep_assert_held(&ce->guc_state.lock);
285 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_ENABLE;
286 }
287
context_registered(struct intel_context * ce)288 static inline bool context_registered(struct intel_context *ce)
289 {
290 return ce->guc_state.sched_state & SCHED_STATE_REGISTERED;
291 }
292
set_context_registered(struct intel_context * ce)293 static inline void set_context_registered(struct intel_context *ce)
294 {
295 lockdep_assert_held(&ce->guc_state.lock);
296 ce->guc_state.sched_state |= SCHED_STATE_REGISTERED;
297 }
298
clr_context_registered(struct intel_context * ce)299 static inline void clr_context_registered(struct intel_context *ce)
300 {
301 lockdep_assert_held(&ce->guc_state.lock);
302 ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED;
303 }
304
context_policy_required(struct intel_context * ce)305 static inline bool context_policy_required(struct intel_context *ce)
306 {
307 return ce->guc_state.sched_state & SCHED_STATE_POLICY_REQUIRED;
308 }
309
set_context_policy_required(struct intel_context * ce)310 static inline void set_context_policy_required(struct intel_context *ce)
311 {
312 lockdep_assert_held(&ce->guc_state.lock);
313 ce->guc_state.sched_state |= SCHED_STATE_POLICY_REQUIRED;
314 }
315
clr_context_policy_required(struct intel_context * ce)316 static inline void clr_context_policy_required(struct intel_context *ce)
317 {
318 lockdep_assert_held(&ce->guc_state.lock);
319 ce->guc_state.sched_state &= ~SCHED_STATE_POLICY_REQUIRED;
320 }
321
context_blocked(struct intel_context * ce)322 static inline u32 context_blocked(struct intel_context *ce)
323 {
324 return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >>
325 SCHED_STATE_BLOCKED_SHIFT;
326 }
327
incr_context_blocked(struct intel_context * ce)328 static inline void incr_context_blocked(struct intel_context *ce)
329 {
330 lockdep_assert_held(&ce->guc_state.lock);
331
332 ce->guc_state.sched_state += SCHED_STATE_BLOCKED;
333
334 GEM_BUG_ON(!context_blocked(ce)); /* Overflow check */
335 }
336
decr_context_blocked(struct intel_context * ce)337 static inline void decr_context_blocked(struct intel_context *ce)
338 {
339 lockdep_assert_held(&ce->guc_state.lock);
340
341 GEM_BUG_ON(!context_blocked(ce)); /* Underflow check */
342
343 ce->guc_state.sched_state -= SCHED_STATE_BLOCKED;
344 }
345
context_has_committed_requests(struct intel_context * ce)346 static inline bool context_has_committed_requests(struct intel_context *ce)
347 {
348 return !!ce->guc_state.number_committed_requests;
349 }
350
incr_context_committed_requests(struct intel_context * ce)351 static inline void incr_context_committed_requests(struct intel_context *ce)
352 {
353 lockdep_assert_held(&ce->guc_state.lock);
354 ++ce->guc_state.number_committed_requests;
355 GEM_BUG_ON(ce->guc_state.number_committed_requests < 0);
356 }
357
decr_context_committed_requests(struct intel_context * ce)358 static inline void decr_context_committed_requests(struct intel_context *ce)
359 {
360 lockdep_assert_held(&ce->guc_state.lock);
361 --ce->guc_state.number_committed_requests;
362 GEM_BUG_ON(ce->guc_state.number_committed_requests < 0);
363 }
364
365 static struct intel_context *
request_to_scheduling_context(struct i915_request * rq)366 request_to_scheduling_context(struct i915_request *rq)
367 {
368 return intel_context_to_parent(rq->context);
369 }
370
context_guc_id_invalid(struct intel_context * ce)371 static inline bool context_guc_id_invalid(struct intel_context *ce)
372 {
373 return ce->guc_id.id == GUC_INVALID_CONTEXT_ID;
374 }
375
set_context_guc_id_invalid(struct intel_context * ce)376 static inline void set_context_guc_id_invalid(struct intel_context *ce)
377 {
378 ce->guc_id.id = GUC_INVALID_CONTEXT_ID;
379 }
380
ce_to_guc(struct intel_context * ce)381 static inline struct intel_guc *ce_to_guc(struct intel_context *ce)
382 {
383 return &ce->engine->gt->uc.guc;
384 }
385
to_priolist(struct rb_node * rb)386 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
387 {
388 return rb_entry(rb, struct i915_priolist, node);
389 }
390
391 /*
392 * When using multi-lrc submission a scratch memory area is reserved in the
393 * parent's context state for the process descriptor, work queue, and handshake
394 * between the parent + children contexts to insert safe preemption points
395 * between each of the BBs. Currently the scratch area is sized to a page.
396 *
397 * The layout of this scratch area is below:
398 * 0 guc_process_desc
399 * + sizeof(struct guc_process_desc) child go
400 * + CACHELINE_BYTES child join[0]
401 * ...
402 * + CACHELINE_BYTES child join[n - 1]
403 * ... unused
404 * PARENT_SCRATCH_SIZE / 2 work queue start
405 * ... work queue
406 * PARENT_SCRATCH_SIZE - 1 work queue end
407 */
408 #define WQ_SIZE (PARENT_SCRATCH_SIZE / 2)
409 #define WQ_OFFSET (PARENT_SCRATCH_SIZE - WQ_SIZE)
410
411 struct sync_semaphore {
412 u32 semaphore;
413 u8 unused[CACHELINE_BYTES - sizeof(u32)];
414 };
415
416 struct parent_scratch {
417 union guc_descs {
418 struct guc_sched_wq_desc wq_desc;
419 struct guc_process_desc_v69 pdesc;
420 } descs;
421
422 struct sync_semaphore go;
423 struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1];
424
425 u8 unused[WQ_OFFSET - sizeof(union guc_descs) -
426 sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)];
427
428 u32 wq[WQ_SIZE / sizeof(u32)];
429 };
430
__get_parent_scratch_offset(struct intel_context * ce)431 static u32 __get_parent_scratch_offset(struct intel_context *ce)
432 {
433 GEM_BUG_ON(!ce->parallel.guc.parent_page);
434
435 return ce->parallel.guc.parent_page * PAGE_SIZE;
436 }
437
__get_wq_offset(struct intel_context * ce)438 static u32 __get_wq_offset(struct intel_context *ce)
439 {
440 BUILD_BUG_ON(offsetof(struct parent_scratch, wq) != WQ_OFFSET);
441
442 return __get_parent_scratch_offset(ce) + WQ_OFFSET;
443 }
444
445 static struct parent_scratch *
__get_parent_scratch(struct intel_context * ce)446 __get_parent_scratch(struct intel_context *ce)
447 {
448 BUILD_BUG_ON(sizeof(struct parent_scratch) != PARENT_SCRATCH_SIZE);
449 BUILD_BUG_ON(sizeof(struct sync_semaphore) != CACHELINE_BYTES);
450
451 /*
452 * Need to subtract LRC_STATE_OFFSET here as the
453 * parallel.guc.parent_page is the offset into ce->state while
454 * ce->lrc_reg_reg is ce->state + LRC_STATE_OFFSET.
455 */
456 return (struct parent_scratch *)
457 (ce->lrc_reg_state +
458 ((__get_parent_scratch_offset(ce) -
459 LRC_STATE_OFFSET) / sizeof(u32)));
460 }
461
462 static struct guc_process_desc_v69 *
__get_process_desc_v69(struct intel_context * ce)463 __get_process_desc_v69(struct intel_context *ce)
464 {
465 struct parent_scratch *ps = __get_parent_scratch(ce);
466
467 return &ps->descs.pdesc;
468 }
469
470 static struct guc_sched_wq_desc *
__get_wq_desc_v70(struct intel_context * ce)471 __get_wq_desc_v70(struct intel_context *ce)
472 {
473 struct parent_scratch *ps = __get_parent_scratch(ce);
474
475 return &ps->descs.wq_desc;
476 }
477
get_wq_pointer(struct intel_context * ce,u32 wqi_size)478 static u32 *get_wq_pointer(struct intel_context *ce, u32 wqi_size)
479 {
480 /*
481 * Check for space in work queue. Caching a value of head pointer in
482 * intel_context structure in order reduce the number accesses to shared
483 * GPU memory which may be across a PCIe bus.
484 */
485 #define AVAILABLE_SPACE \
486 CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE)
487 if (wqi_size > AVAILABLE_SPACE) {
488 ce->parallel.guc.wqi_head = READ_ONCE(*ce->parallel.guc.wq_head);
489
490 if (wqi_size > AVAILABLE_SPACE)
491 return NULL;
492 }
493 #undef AVAILABLE_SPACE
494
495 return &__get_parent_scratch(ce)->wq[ce->parallel.guc.wqi_tail / sizeof(u32)];
496 }
497
__get_context(struct intel_guc * guc,u32 id)498 static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id)
499 {
500 struct intel_context *ce = xa_load(&guc->context_lookup, id);
501
502 GEM_BUG_ON(id >= GUC_MAX_CONTEXT_ID);
503
504 return ce;
505 }
506
__get_lrc_desc_v69(struct intel_guc * guc,u32 index)507 static struct guc_lrc_desc_v69 *__get_lrc_desc_v69(struct intel_guc *guc, u32 index)
508 {
509 struct guc_lrc_desc_v69 *base = guc->lrc_desc_pool_vaddr_v69;
510
511 if (!base)
512 return NULL;
513
514 GEM_BUG_ON(index >= GUC_MAX_CONTEXT_ID);
515
516 return &base[index];
517 }
518
guc_lrc_desc_pool_create_v69(struct intel_guc * guc)519 static int guc_lrc_desc_pool_create_v69(struct intel_guc *guc)
520 {
521 u32 size;
522 int ret;
523
524 size = PAGE_ALIGN(sizeof(struct guc_lrc_desc_v69) *
525 GUC_MAX_CONTEXT_ID);
526 ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool_v69,
527 (void **)&guc->lrc_desc_pool_vaddr_v69);
528 if (ret)
529 return ret;
530
531 return 0;
532 }
533
guc_lrc_desc_pool_destroy_v69(struct intel_guc * guc)534 static void guc_lrc_desc_pool_destroy_v69(struct intel_guc *guc)
535 {
536 if (!guc->lrc_desc_pool_vaddr_v69)
537 return;
538
539 guc->lrc_desc_pool_vaddr_v69 = NULL;
540 i915_vma_unpin_and_release(&guc->lrc_desc_pool_v69, I915_VMA_RELEASE_MAP);
541 }
542
guc_submission_initialized(struct intel_guc * guc)543 static inline bool guc_submission_initialized(struct intel_guc *guc)
544 {
545 return guc->submission_initialized;
546 }
547
_reset_lrc_desc_v69(struct intel_guc * guc,u32 id)548 static inline void _reset_lrc_desc_v69(struct intel_guc *guc, u32 id)
549 {
550 struct guc_lrc_desc_v69 *desc = __get_lrc_desc_v69(guc, id);
551
552 if (desc)
553 memset(desc, 0, sizeof(*desc));
554 }
555
ctx_id_mapped(struct intel_guc * guc,u32 id)556 static inline bool ctx_id_mapped(struct intel_guc *guc, u32 id)
557 {
558 return __get_context(guc, id);
559 }
560
set_ctx_id_mapping(struct intel_guc * guc,u32 id,struct intel_context * ce)561 static inline void set_ctx_id_mapping(struct intel_guc *guc, u32 id,
562 struct intel_context *ce)
563 {
564 unsigned long flags;
565
566 /*
567 * xarray API doesn't have xa_save_irqsave wrapper, so calling the
568 * lower level functions directly.
569 */
570 xa_lock_irqsave(&guc->context_lookup, flags);
571 __xa_store(&guc->context_lookup, id, ce, GFP_ATOMIC);
572 xa_unlock_irqrestore(&guc->context_lookup, flags);
573 }
574
clr_ctx_id_mapping(struct intel_guc * guc,u32 id)575 static inline void clr_ctx_id_mapping(struct intel_guc *guc, u32 id)
576 {
577 unsigned long flags;
578
579 if (unlikely(!guc_submission_initialized(guc)))
580 return;
581
582 _reset_lrc_desc_v69(guc, id);
583
584 /*
585 * xarray API doesn't have xa_erase_irqsave wrapper, so calling
586 * the lower level functions directly.
587 */
588 xa_lock_irqsave(&guc->context_lookup, flags);
589 __xa_erase(&guc->context_lookup, id);
590 xa_unlock_irqrestore(&guc->context_lookup, flags);
591 }
592
decr_outstanding_submission_g2h(struct intel_guc * guc)593 static void decr_outstanding_submission_g2h(struct intel_guc *guc)
594 {
595 if (atomic_dec_and_test(&guc->outstanding_submission_g2h))
596 wake_up_all(&guc->ct.wq);
597 }
598
guc_submission_send_busy_loop(struct intel_guc * guc,const u32 * action,u32 len,u32 g2h_len_dw,bool loop)599 static int guc_submission_send_busy_loop(struct intel_guc *guc,
600 const u32 *action,
601 u32 len,
602 u32 g2h_len_dw,
603 bool loop)
604 {
605 /*
606 * We always loop when a send requires a reply (i.e. g2h_len_dw > 0),
607 * so we don't handle the case where we don't get a reply because we
608 * aborted the send due to the channel being busy.
609 */
610 GEM_BUG_ON(g2h_len_dw && !loop);
611
612 if (g2h_len_dw)
613 atomic_inc(&guc->outstanding_submission_g2h);
614
615 return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop);
616 }
617
intel_guc_wait_for_pending_msg(struct intel_guc * guc,atomic_t * wait_var,bool interruptible,long timeout)618 int intel_guc_wait_for_pending_msg(struct intel_guc *guc,
619 atomic_t *wait_var,
620 bool interruptible,
621 long timeout)
622 {
623 const int state = interruptible ?
624 TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
625 DEFINE_WAIT(wait);
626
627 might_sleep();
628 GEM_BUG_ON(timeout < 0);
629
630 if (!atomic_read(wait_var))
631 return 0;
632
633 if (!timeout)
634 return -ETIME;
635
636 for (;;) {
637 prepare_to_wait(&guc->ct.wq, &wait, state);
638
639 if (!atomic_read(wait_var))
640 break;
641
642 if (signal_pending_state(state, current)) {
643 timeout = -EINTR;
644 break;
645 }
646
647 if (!timeout) {
648 timeout = -ETIME;
649 break;
650 }
651
652 timeout = io_schedule_timeout(timeout);
653 }
654 finish_wait(&guc->ct.wq, &wait);
655
656 return (timeout < 0) ? timeout : 0;
657 }
658
intel_guc_wait_for_idle(struct intel_guc * guc,long timeout)659 int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout)
660 {
661 if (!intel_uc_uses_guc_submission(&guc_to_gt(guc)->uc))
662 return 0;
663
664 return intel_guc_wait_for_pending_msg(guc,
665 &guc->outstanding_submission_g2h,
666 true, timeout);
667 }
668
669 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop);
670 static int try_context_registration(struct intel_context *ce, bool loop);
671
__guc_add_request(struct intel_guc * guc,struct i915_request * rq)672 static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq)
673 {
674 int err = 0;
675 struct intel_context *ce = request_to_scheduling_context(rq);
676 u32 action[3];
677 int len = 0;
678 u32 g2h_len_dw = 0;
679 bool enabled;
680
681 lockdep_assert_held(&rq->engine->sched_engine->lock);
682
683 /*
684 * Corner case where requests were sitting in the priority list or a
685 * request resubmitted after the context was banned.
686 */
687 if (unlikely(!intel_context_is_schedulable(ce))) {
688 i915_request_put(i915_request_mark_eio(rq));
689 intel_engine_signal_breadcrumbs(ce->engine);
690 return 0;
691 }
692
693 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref));
694 GEM_BUG_ON(context_guc_id_invalid(ce));
695
696 if (context_policy_required(ce)) {
697 err = guc_context_policy_init_v70(ce, false);
698 if (err)
699 return err;
700 }
701
702 spin_lock(&ce->guc_state.lock);
703
704 /*
705 * The request / context will be run on the hardware when scheduling
706 * gets enabled in the unblock. For multi-lrc we still submit the
707 * context to move the LRC tails.
708 */
709 if (unlikely(context_blocked(ce) && !intel_context_is_parent(ce)))
710 goto out;
711
712 enabled = context_enabled(ce) || context_blocked(ce);
713
714 if (!enabled) {
715 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
716 action[len++] = ce->guc_id.id;
717 action[len++] = GUC_CONTEXT_ENABLE;
718 set_context_pending_enable(ce);
719 intel_context_get(ce);
720 g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET;
721 } else {
722 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT;
723 action[len++] = ce->guc_id.id;
724 }
725
726 err = intel_guc_send_nb(guc, action, len, g2h_len_dw);
727 if (!enabled && !err) {
728 trace_intel_context_sched_enable(ce);
729 atomic_inc(&guc->outstanding_submission_g2h);
730 set_context_enabled(ce);
731
732 /*
733 * Without multi-lrc KMD does the submission step (moving the
734 * lrc tail) so enabling scheduling is sufficient to submit the
735 * context. This isn't the case in multi-lrc submission as the
736 * GuC needs to move the tails, hence the need for another H2G
737 * to submit a multi-lrc context after enabling scheduling.
738 */
739 if (intel_context_is_parent(ce)) {
740 action[0] = INTEL_GUC_ACTION_SCHED_CONTEXT;
741 err = intel_guc_send_nb(guc, action, len - 1, 0);
742 }
743 } else if (!enabled) {
744 clr_context_pending_enable(ce);
745 intel_context_put(ce);
746 }
747 if (likely(!err))
748 trace_i915_request_guc_submit(rq);
749
750 out:
751 spin_unlock(&ce->guc_state.lock);
752 return err;
753 }
754
guc_add_request(struct intel_guc * guc,struct i915_request * rq)755 static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
756 {
757 int ret = __guc_add_request(guc, rq);
758
759 if (unlikely(ret == -EBUSY)) {
760 guc->stalled_request = rq;
761 guc->submission_stall_reason = STALL_ADD_REQUEST;
762 }
763
764 return ret;
765 }
766
guc_set_lrc_tail(struct i915_request * rq)767 static inline void guc_set_lrc_tail(struct i915_request *rq)
768 {
769 rq->context->lrc_reg_state[CTX_RING_TAIL] =
770 intel_ring_set_tail(rq->ring, rq->tail);
771 }
772
rq_prio(const struct i915_request * rq)773 static inline int rq_prio(const struct i915_request *rq)
774 {
775 return rq->sched.attr.priority;
776 }
777
is_multi_lrc_rq(struct i915_request * rq)778 static bool is_multi_lrc_rq(struct i915_request *rq)
779 {
780 return intel_context_is_parallel(rq->context);
781 }
782
can_merge_rq(struct i915_request * rq,struct i915_request * last)783 static bool can_merge_rq(struct i915_request *rq,
784 struct i915_request *last)
785 {
786 return request_to_scheduling_context(rq) ==
787 request_to_scheduling_context(last);
788 }
789
wq_space_until_wrap(struct intel_context * ce)790 static u32 wq_space_until_wrap(struct intel_context *ce)
791 {
792 return (WQ_SIZE - ce->parallel.guc.wqi_tail);
793 }
794
write_wqi(struct intel_context * ce,u32 wqi_size)795 static void write_wqi(struct intel_context *ce, u32 wqi_size)
796 {
797 BUILD_BUG_ON(!is_power_of_2(WQ_SIZE));
798
799 /*
800 * Ensure WQI are visible before updating tail
801 */
802 intel_guc_write_barrier(ce_to_guc(ce));
803
804 ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) &
805 (WQ_SIZE - 1);
806 WRITE_ONCE(*ce->parallel.guc.wq_tail, ce->parallel.guc.wqi_tail);
807 }
808
guc_wq_noop_append(struct intel_context * ce)809 static int guc_wq_noop_append(struct intel_context *ce)
810 {
811 u32 *wqi = get_wq_pointer(ce, wq_space_until_wrap(ce));
812 u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1;
813
814 if (!wqi)
815 return -EBUSY;
816
817 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw));
818
819 *wqi = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) |
820 FIELD_PREP(WQ_LEN_MASK, len_dw);
821 ce->parallel.guc.wqi_tail = 0;
822
823 return 0;
824 }
825
__guc_wq_item_append(struct i915_request * rq)826 static int __guc_wq_item_append(struct i915_request *rq)
827 {
828 struct intel_context *ce = request_to_scheduling_context(rq);
829 struct intel_context *child;
830 unsigned int wqi_size = (ce->parallel.number_children + 4) *
831 sizeof(u32);
832 u32 *wqi;
833 u32 len_dw = (wqi_size / sizeof(u32)) - 1;
834 int ret;
835
836 /* Ensure context is in correct state updating work queue */
837 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref));
838 GEM_BUG_ON(context_guc_id_invalid(ce));
839 GEM_BUG_ON(context_wait_for_deregister_to_register(ce));
840 GEM_BUG_ON(!ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id));
841
842 /* Insert NOOP if this work queue item will wrap the tail pointer. */
843 if (wqi_size > wq_space_until_wrap(ce)) {
844 ret = guc_wq_noop_append(ce);
845 if (ret)
846 return ret;
847 }
848
849 wqi = get_wq_pointer(ce, wqi_size);
850 if (!wqi)
851 return -EBUSY;
852
853 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw));
854
855 *wqi++ = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) |
856 FIELD_PREP(WQ_LEN_MASK, len_dw);
857 *wqi++ = ce->lrc.lrca;
858 *wqi++ = FIELD_PREP(WQ_GUC_ID_MASK, ce->guc_id.id) |
859 FIELD_PREP(WQ_RING_TAIL_MASK, ce->ring->tail / sizeof(u64));
860 *wqi++ = 0; /* fence_id */
861 for_each_child(ce, child)
862 *wqi++ = child->ring->tail / sizeof(u64);
863
864 write_wqi(ce, wqi_size);
865
866 return 0;
867 }
868
guc_wq_item_append(struct intel_guc * guc,struct i915_request * rq)869 static int guc_wq_item_append(struct intel_guc *guc,
870 struct i915_request *rq)
871 {
872 struct intel_context *ce = request_to_scheduling_context(rq);
873 int ret;
874
875 if (unlikely(!intel_context_is_schedulable(ce)))
876 return 0;
877
878 ret = __guc_wq_item_append(rq);
879 if (unlikely(ret == -EBUSY)) {
880 guc->stalled_request = rq;
881 guc->submission_stall_reason = STALL_MOVE_LRC_TAIL;
882 }
883
884 return ret;
885 }
886
multi_lrc_submit(struct i915_request * rq)887 static bool multi_lrc_submit(struct i915_request *rq)
888 {
889 struct intel_context *ce = request_to_scheduling_context(rq);
890
891 intel_ring_set_tail(rq->ring, rq->tail);
892
893 /*
894 * We expect the front end (execbuf IOCTL) to set this flag on the last
895 * request generated from a multi-BB submission. This indicates to the
896 * backend (GuC interface) that we should submit this context thus
897 * submitting all the requests generated in parallel.
898 */
899 return test_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL, &rq->fence.flags) ||
900 !intel_context_is_schedulable(ce);
901 }
902
guc_dequeue_one_context(struct intel_guc * guc)903 static int guc_dequeue_one_context(struct intel_guc *guc)
904 {
905 struct i915_sched_engine * const sched_engine = guc->sched_engine;
906 struct i915_request *last = NULL;
907 bool submit = false;
908 struct rb_node *rb;
909 int ret;
910
911 lockdep_assert_held(&sched_engine->lock);
912
913 if (guc->stalled_request) {
914 submit = true;
915 last = guc->stalled_request;
916
917 switch (guc->submission_stall_reason) {
918 case STALL_REGISTER_CONTEXT:
919 goto register_context;
920 case STALL_MOVE_LRC_TAIL:
921 goto move_lrc_tail;
922 case STALL_ADD_REQUEST:
923 goto add_request;
924 default:
925 MISSING_CASE(guc->submission_stall_reason);
926 }
927 }
928
929 while ((rb = rb_first_cached(&sched_engine->queue))) {
930 struct i915_priolist *p = to_priolist(rb);
931 struct i915_request *rq, *rn;
932
933 priolist_for_each_request_consume(rq, rn, p) {
934 if (last && !can_merge_rq(rq, last))
935 goto register_context;
936
937 list_del_init(&rq->sched.link);
938
939 __i915_request_submit(rq);
940
941 trace_i915_request_in(rq, 0);
942 last = rq;
943
944 if (is_multi_lrc_rq(rq)) {
945 /*
946 * We need to coalesce all multi-lrc requests in
947 * a relationship into a single H2G. We are
948 * guaranteed that all of these requests will be
949 * submitted sequentially.
950 */
951 if (multi_lrc_submit(rq)) {
952 submit = true;
953 goto register_context;
954 }
955 } else {
956 submit = true;
957 }
958 }
959
960 rb_erase_cached(&p->node, &sched_engine->queue);
961 i915_priolist_free(p);
962 }
963
964 register_context:
965 if (submit) {
966 struct intel_context *ce = request_to_scheduling_context(last);
967
968 if (unlikely(!ctx_id_mapped(guc, ce->guc_id.id) &&
969 intel_context_is_schedulable(ce))) {
970 ret = try_context_registration(ce, false);
971 if (unlikely(ret == -EPIPE)) {
972 goto deadlk;
973 } else if (ret == -EBUSY) {
974 guc->stalled_request = last;
975 guc->submission_stall_reason =
976 STALL_REGISTER_CONTEXT;
977 goto schedule_tasklet;
978 } else if (ret != 0) {
979 GEM_WARN_ON(ret); /* Unexpected */
980 goto deadlk;
981 }
982 }
983
984 move_lrc_tail:
985 if (is_multi_lrc_rq(last)) {
986 ret = guc_wq_item_append(guc, last);
987 if (ret == -EBUSY) {
988 goto schedule_tasklet;
989 } else if (ret != 0) {
990 GEM_WARN_ON(ret); /* Unexpected */
991 goto deadlk;
992 }
993 } else {
994 guc_set_lrc_tail(last);
995 }
996
997 add_request:
998 ret = guc_add_request(guc, last);
999 if (unlikely(ret == -EPIPE)) {
1000 goto deadlk;
1001 } else if (ret == -EBUSY) {
1002 goto schedule_tasklet;
1003 } else if (ret != 0) {
1004 GEM_WARN_ON(ret); /* Unexpected */
1005 goto deadlk;
1006 }
1007 }
1008
1009 guc->stalled_request = NULL;
1010 guc->submission_stall_reason = STALL_NONE;
1011 return submit;
1012
1013 deadlk:
1014 sched_engine->tasklet.callback = NULL;
1015 tasklet_disable_nosync(&sched_engine->tasklet);
1016 return false;
1017
1018 schedule_tasklet:
1019 tasklet_schedule(&sched_engine->tasklet);
1020 return false;
1021 }
1022
guc_submission_tasklet(struct tasklet_struct * t)1023 static void guc_submission_tasklet(struct tasklet_struct *t)
1024 {
1025 struct i915_sched_engine *sched_engine =
1026 from_tasklet(sched_engine, t, tasklet);
1027 unsigned long flags;
1028 bool loop;
1029
1030 spin_lock_irqsave(&sched_engine->lock, flags);
1031
1032 do {
1033 loop = guc_dequeue_one_context(sched_engine->private_data);
1034 } while (loop);
1035
1036 i915_sched_engine_reset_on_empty(sched_engine);
1037
1038 spin_unlock_irqrestore(&sched_engine->lock, flags);
1039 }
1040
cs_irq_handler(struct intel_engine_cs * engine,u16 iir)1041 static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir)
1042 {
1043 if (iir & GT_RENDER_USER_INTERRUPT)
1044 intel_engine_signal_breadcrumbs(engine);
1045 }
1046
1047 static void __guc_context_destroy(struct intel_context *ce);
1048 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce);
1049 static void guc_signal_context_fence(struct intel_context *ce);
1050 static void guc_cancel_context_requests(struct intel_context *ce);
1051 static void guc_blocked_fence_complete(struct intel_context *ce);
1052
scrub_guc_desc_for_outstanding_g2h(struct intel_guc * guc)1053 static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
1054 {
1055 struct intel_context *ce;
1056 unsigned long index, flags;
1057 bool pending_disable, pending_enable, deregister, destroyed, banned;
1058
1059 xa_lock_irqsave(&guc->context_lookup, flags);
1060 xa_for_each(&guc->context_lookup, index, ce) {
1061 /*
1062 * Corner case where the ref count on the object is zero but and
1063 * deregister G2H was lost. In this case we don't touch the ref
1064 * count and finish the destroy of the context.
1065 */
1066 bool do_put = kref_get_unless_zero(&ce->ref);
1067
1068 xa_unlock(&guc->context_lookup);
1069
1070 spin_lock(&ce->guc_state.lock);
1071
1072 /*
1073 * Once we are at this point submission_disabled() is guaranteed
1074 * to be visible to all callers who set the below flags (see above
1075 * flush and flushes in reset_prepare). If submission_disabled()
1076 * is set, the caller shouldn't set these flags.
1077 */
1078
1079 destroyed = context_destroyed(ce);
1080 pending_enable = context_pending_enable(ce);
1081 pending_disable = context_pending_disable(ce);
1082 deregister = context_wait_for_deregister_to_register(ce);
1083 banned = context_banned(ce);
1084 init_sched_state(ce);
1085
1086 spin_unlock(&ce->guc_state.lock);
1087
1088 if (pending_enable || destroyed || deregister) {
1089 decr_outstanding_submission_g2h(guc);
1090 if (deregister)
1091 guc_signal_context_fence(ce);
1092 if (destroyed) {
1093 intel_gt_pm_put_async(guc_to_gt(guc));
1094 release_guc_id(guc, ce);
1095 __guc_context_destroy(ce);
1096 }
1097 if (pending_enable || deregister)
1098 intel_context_put(ce);
1099 }
1100
1101 /* Not mutualy exclusive with above if statement. */
1102 if (pending_disable) {
1103 guc_signal_context_fence(ce);
1104 if (banned) {
1105 guc_cancel_context_requests(ce);
1106 intel_engine_signal_breadcrumbs(ce->engine);
1107 }
1108 intel_context_sched_disable_unpin(ce);
1109 decr_outstanding_submission_g2h(guc);
1110
1111 spin_lock(&ce->guc_state.lock);
1112 guc_blocked_fence_complete(ce);
1113 spin_unlock(&ce->guc_state.lock);
1114
1115 intel_context_put(ce);
1116 }
1117
1118 if (do_put)
1119 intel_context_put(ce);
1120 xa_lock(&guc->context_lookup);
1121 }
1122 xa_unlock_irqrestore(&guc->context_lookup, flags);
1123 }
1124
1125 /*
1126 * GuC stores busyness stats for each engine at context in/out boundaries. A
1127 * context 'in' logs execution start time, 'out' adds in -> out delta to total.
1128 * i915/kmd accesses 'start', 'total' and 'context id' from memory shared with
1129 * GuC.
1130 *
1131 * __i915_pmu_event_read samples engine busyness. When sampling, if context id
1132 * is valid (!= ~0) and start is non-zero, the engine is considered to be
1133 * active. For an active engine total busyness = total + (now - start), where
1134 * 'now' is the time at which the busyness is sampled. For inactive engine,
1135 * total busyness = total.
1136 *
1137 * All times are captured from GUCPMTIMESTAMP reg and are in gt clock domain.
1138 *
1139 * The start and total values provided by GuC are 32 bits and wrap around in a
1140 * few minutes. Since perf pmu provides busyness as 64 bit monotonically
1141 * increasing ns values, there is a need for this implementation to account for
1142 * overflows and extend the GuC provided values to 64 bits before returning
1143 * busyness to the user. In order to do that, a worker runs periodically at
1144 * frequency = 1/8th the time it takes for the timestamp to wrap (i.e. once in
1145 * 27 seconds for a gt clock frequency of 19.2 MHz).
1146 */
1147
1148 #define WRAP_TIME_CLKS U32_MAX
1149 #define POLL_TIME_CLKS (WRAP_TIME_CLKS >> 3)
1150
1151 static void
__extend_last_switch(struct intel_guc * guc,u64 * prev_start,u32 new_start)1152 __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
1153 {
1154 u32 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp);
1155 u32 gt_stamp_last = lower_32_bits(guc->timestamp.gt_stamp);
1156
1157 if (new_start == lower_32_bits(*prev_start))
1158 return;
1159
1160 /*
1161 * When gt is unparked, we update the gt timestamp and start the ping
1162 * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt
1163 * is unparked, all switched in contexts will have a start time that is
1164 * within +/- POLL_TIME_CLKS of the most recent gt_stamp.
1165 *
1166 * If neither gt_stamp nor new_start has rolled over, then the
1167 * gt_stamp_hi does not need to be adjusted, however if one of them has
1168 * rolled over, we need to adjust gt_stamp_hi accordingly.
1169 *
1170 * The below conditions address the cases of new_start rollover and
1171 * gt_stamp_last rollover respectively.
1172 */
1173 if (new_start < gt_stamp_last &&
1174 (new_start - gt_stamp_last) <= POLL_TIME_CLKS)
1175 gt_stamp_hi++;
1176
1177 if (new_start > gt_stamp_last &&
1178 (gt_stamp_last - new_start) <= POLL_TIME_CLKS && gt_stamp_hi)
1179 gt_stamp_hi--;
1180
1181 *prev_start = ((u64)gt_stamp_hi << 32) | new_start;
1182 }
1183
1184 #define record_read(map_, field_) \
1185 iosys_map_rd_field(map_, 0, struct guc_engine_usage_record, field_)
1186
1187 /*
1188 * GuC updates shared memory and KMD reads it. Since this is not synchronized,
1189 * we run into a race where the value read is inconsistent. Sometimes the
1190 * inconsistency is in reading the upper MSB bytes of the last_in value when
1191 * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper
1192 * 24 bits are zero. Since these are non-zero values, it is non-trivial to
1193 * determine validity of these values. Instead we read the values multiple times
1194 * until they are consistent. In test runs, 3 attempts results in consistent
1195 * values. The upper bound is set to 6 attempts and may need to be tuned as per
1196 * any new occurences.
1197 */
__get_engine_usage_record(struct intel_engine_cs * engine,u32 * last_in,u32 * id,u32 * total)1198 static void __get_engine_usage_record(struct intel_engine_cs *engine,
1199 u32 *last_in, u32 *id, u32 *total)
1200 {
1201 struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine);
1202 int i = 0;
1203
1204 do {
1205 *last_in = record_read(&rec_map, last_switch_in_stamp);
1206 *id = record_read(&rec_map, current_context_index);
1207 *total = record_read(&rec_map, total_runtime);
1208
1209 if (record_read(&rec_map, last_switch_in_stamp) == *last_in &&
1210 record_read(&rec_map, current_context_index) == *id &&
1211 record_read(&rec_map, total_runtime) == *total)
1212 break;
1213 } while (++i < 6);
1214 }
1215
guc_update_engine_gt_clks(struct intel_engine_cs * engine)1216 static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
1217 {
1218 struct intel_engine_guc_stats *stats = &engine->stats.guc;
1219 struct intel_guc *guc = &engine->gt->uc.guc;
1220 u32 last_switch, ctx_id, total;
1221
1222 lockdep_assert_held(&guc->timestamp.lock);
1223
1224 __get_engine_usage_record(engine, &last_switch, &ctx_id, &total);
1225
1226 stats->running = ctx_id != ~0U && last_switch;
1227 if (stats->running)
1228 __extend_last_switch(guc, &stats->start_gt_clk, last_switch);
1229
1230 /*
1231 * Instead of adjusting the total for overflow, just add the
1232 * difference from previous sample stats->total_gt_clks
1233 */
1234 if (total && total != ~0U) {
1235 stats->total_gt_clks += (u32)(total - stats->prev_total);
1236 stats->prev_total = total;
1237 }
1238 }
1239
gpm_timestamp_shift(struct intel_gt * gt)1240 static u32 gpm_timestamp_shift(struct intel_gt *gt)
1241 {
1242 intel_wakeref_t wakeref;
1243 u32 reg, shift;
1244
1245 with_intel_runtime_pm(gt->uncore->rpm, wakeref)
1246 reg = intel_uncore_read(gt->uncore, RPM_CONFIG0);
1247
1248 shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
1249 GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT;
1250
1251 return 3 - shift;
1252 }
1253
guc_update_pm_timestamp(struct intel_guc * guc,ktime_t * now)1254 static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now)
1255 {
1256 struct intel_gt *gt = guc_to_gt(guc);
1257 u32 gt_stamp_lo, gt_stamp_hi;
1258 u64 gpm_ts;
1259
1260 lockdep_assert_held(&guc->timestamp.lock);
1261
1262 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp);
1263 gpm_ts = intel_uncore_read64_2x32(gt->uncore, MISC_STATUS0,
1264 MISC_STATUS1) >> guc->timestamp.shift;
1265 gt_stamp_lo = lower_32_bits(gpm_ts);
1266 *now = ktime_get();
1267
1268 if (gt_stamp_lo < lower_32_bits(guc->timestamp.gt_stamp))
1269 gt_stamp_hi++;
1270
1271 guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_lo;
1272 }
1273
1274 /*
1275 * Unlike the execlist mode of submission total and active times are in terms of
1276 * gt clocks. The *now parameter is retained to return the cpu time at which the
1277 * busyness was sampled.
1278 */
guc_engine_busyness(struct intel_engine_cs * engine,ktime_t * now)1279 static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
1280 {
1281 struct intel_engine_guc_stats stats_saved, *stats = &engine->stats.guc;
1282 struct i915_gpu_error *gpu_error = &engine->i915->gpu_error;
1283 struct intel_gt *gt = engine->gt;
1284 struct intel_guc *guc = >->uc.guc;
1285 u64 total, gt_stamp_saved;
1286 unsigned long flags;
1287 u32 reset_count;
1288 bool in_reset;
1289
1290 spin_lock_irqsave(&guc->timestamp.lock, flags);
1291
1292 /*
1293 * If a reset happened, we risk reading partially updated engine
1294 * busyness from GuC, so we just use the driver stored copy of busyness.
1295 * Synchronize with gt reset using reset_count and the
1296 * I915_RESET_BACKOFF flag. Note that reset flow updates the reset_count
1297 * after I915_RESET_BACKOFF flag, so ensure that the reset_count is
1298 * usable by checking the flag afterwards.
1299 */
1300 reset_count = i915_reset_count(gpu_error);
1301 in_reset = test_bit(I915_RESET_BACKOFF, >->reset.flags);
1302
1303 *now = ktime_get();
1304
1305 /*
1306 * The active busyness depends on start_gt_clk and gt_stamp.
1307 * gt_stamp is updated by i915 only when gt is awake and the
1308 * start_gt_clk is derived from GuC state. To get a consistent
1309 * view of activity, we query the GuC state only if gt is awake.
1310 */
1311 if (!in_reset && intel_gt_pm_get_if_awake(gt)) {
1312 stats_saved = *stats;
1313 gt_stamp_saved = guc->timestamp.gt_stamp;
1314 /*
1315 * Update gt_clks, then gt timestamp to simplify the 'gt_stamp -
1316 * start_gt_clk' calculation below for active engines.
1317 */
1318 guc_update_engine_gt_clks(engine);
1319 guc_update_pm_timestamp(guc, now);
1320 intel_gt_pm_put_async(gt);
1321 if (i915_reset_count(gpu_error) != reset_count) {
1322 *stats = stats_saved;
1323 guc->timestamp.gt_stamp = gt_stamp_saved;
1324 }
1325 }
1326
1327 total = intel_gt_clock_interval_to_ns(gt, stats->total_gt_clks);
1328 if (stats->running) {
1329 u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk;
1330
1331 total += intel_gt_clock_interval_to_ns(gt, clk);
1332 }
1333
1334 spin_unlock_irqrestore(&guc->timestamp.lock, flags);
1335
1336 return ns_to_ktime(total);
1337 }
1338
__reset_guc_busyness_stats(struct intel_guc * guc)1339 static void __reset_guc_busyness_stats(struct intel_guc *guc)
1340 {
1341 struct intel_gt *gt = guc_to_gt(guc);
1342 struct intel_engine_cs *engine;
1343 enum intel_engine_id id;
1344 unsigned long flags;
1345 ktime_t unused;
1346
1347 cancel_delayed_work_sync(&guc->timestamp.work);
1348
1349 spin_lock_irqsave(&guc->timestamp.lock, flags);
1350
1351 guc_update_pm_timestamp(guc, &unused);
1352 for_each_engine(engine, gt, id) {
1353 guc_update_engine_gt_clks(engine);
1354 engine->stats.guc.prev_total = 0;
1355 }
1356
1357 spin_unlock_irqrestore(&guc->timestamp.lock, flags);
1358 }
1359
__update_guc_busyness_stats(struct intel_guc * guc)1360 static void __update_guc_busyness_stats(struct intel_guc *guc)
1361 {
1362 struct intel_gt *gt = guc_to_gt(guc);
1363 struct intel_engine_cs *engine;
1364 enum intel_engine_id id;
1365 unsigned long flags;
1366 ktime_t unused;
1367
1368 guc->timestamp.last_stat_jiffies = jiffies;
1369
1370 spin_lock_irqsave(&guc->timestamp.lock, flags);
1371
1372 guc_update_pm_timestamp(guc, &unused);
1373 for_each_engine(engine, gt, id)
1374 guc_update_engine_gt_clks(engine);
1375
1376 spin_unlock_irqrestore(&guc->timestamp.lock, flags);
1377 }
1378
guc_timestamp_ping(struct work_struct * wrk)1379 static void guc_timestamp_ping(struct work_struct *wrk)
1380 {
1381 struct intel_guc *guc = container_of(wrk, typeof(*guc),
1382 timestamp.work.work);
1383 struct intel_uc *uc = container_of(guc, typeof(*uc), guc);
1384 struct intel_gt *gt = guc_to_gt(guc);
1385 intel_wakeref_t wakeref;
1386 int srcu, ret;
1387
1388 /*
1389 * Synchronize with gt reset to make sure the worker does not
1390 * corrupt the engine/guc stats.
1391 */
1392 ret = intel_gt_reset_trylock(gt, &srcu);
1393 if (ret)
1394 return;
1395
1396 with_intel_runtime_pm(>->i915->runtime_pm, wakeref)
1397 __update_guc_busyness_stats(guc);
1398
1399 intel_gt_reset_unlock(gt, srcu);
1400
1401 mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
1402 guc->timestamp.ping_delay);
1403 }
1404
guc_action_enable_usage_stats(struct intel_guc * guc)1405 static int guc_action_enable_usage_stats(struct intel_guc *guc)
1406 {
1407 u32 offset = intel_guc_engine_usage_offset(guc);
1408 u32 action[] = {
1409 INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF,
1410 offset,
1411 0,
1412 };
1413
1414 return intel_guc_send(guc, action, ARRAY_SIZE(action));
1415 }
1416
guc_init_engine_stats(struct intel_guc * guc)1417 static void guc_init_engine_stats(struct intel_guc *guc)
1418 {
1419 struct intel_gt *gt = guc_to_gt(guc);
1420 intel_wakeref_t wakeref;
1421
1422 mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
1423 guc->timestamp.ping_delay);
1424
1425 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) {
1426 int ret = guc_action_enable_usage_stats(guc);
1427
1428 if (ret)
1429 drm_err(>->i915->drm,
1430 "Failed to enable usage stats: %d!\n", ret);
1431 }
1432 }
1433
intel_guc_busyness_park(struct intel_gt * gt)1434 void intel_guc_busyness_park(struct intel_gt *gt)
1435 {
1436 struct intel_guc *guc = >->uc.guc;
1437
1438 if (!guc_submission_initialized(guc))
1439 return;
1440
1441 /*
1442 * There is a race with suspend flow where the worker runs after suspend
1443 * and causes an unclaimed register access warning. Cancel the worker
1444 * synchronously here.
1445 */
1446 cancel_delayed_work_sync(&guc->timestamp.work);
1447
1448 /*
1449 * Before parking, we should sample engine busyness stats if we need to.
1450 * We can skip it if we are less than half a ping from the last time we
1451 * sampled the busyness stats.
1452 */
1453 if (guc->timestamp.last_stat_jiffies &&
1454 !time_after(jiffies, guc->timestamp.last_stat_jiffies +
1455 (guc->timestamp.ping_delay / 2)))
1456 return;
1457
1458 __update_guc_busyness_stats(guc);
1459 }
1460
intel_guc_busyness_unpark(struct intel_gt * gt)1461 void intel_guc_busyness_unpark(struct intel_gt *gt)
1462 {
1463 struct intel_guc *guc = >->uc.guc;
1464 unsigned long flags;
1465 ktime_t unused;
1466
1467 if (!guc_submission_initialized(guc))
1468 return;
1469
1470 spin_lock_irqsave(&guc->timestamp.lock, flags);
1471 guc_update_pm_timestamp(guc, &unused);
1472 spin_unlock_irqrestore(&guc->timestamp.lock, flags);
1473 mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
1474 guc->timestamp.ping_delay);
1475 }
1476
1477 static inline bool
submission_disabled(struct intel_guc * guc)1478 submission_disabled(struct intel_guc *guc)
1479 {
1480 struct i915_sched_engine * const sched_engine = guc->sched_engine;
1481
1482 return unlikely(!sched_engine ||
1483 !__tasklet_is_enabled(&sched_engine->tasklet) ||
1484 intel_gt_is_wedged(guc_to_gt(guc)));
1485 }
1486
disable_submission(struct intel_guc * guc)1487 static void disable_submission(struct intel_guc *guc)
1488 {
1489 struct i915_sched_engine * const sched_engine = guc->sched_engine;
1490
1491 if (__tasklet_is_enabled(&sched_engine->tasklet)) {
1492 GEM_BUG_ON(!guc->ct.enabled);
1493 __tasklet_disable_sync_once(&sched_engine->tasklet);
1494 sched_engine->tasklet.callback = NULL;
1495 }
1496 }
1497
enable_submission(struct intel_guc * guc)1498 static void enable_submission(struct intel_guc *guc)
1499 {
1500 struct i915_sched_engine * const sched_engine = guc->sched_engine;
1501 unsigned long flags;
1502
1503 spin_lock_irqsave(&guc->sched_engine->lock, flags);
1504 sched_engine->tasklet.callback = guc_submission_tasklet;
1505 wmb(); /* Make sure callback visible */
1506 if (!__tasklet_is_enabled(&sched_engine->tasklet) &&
1507 __tasklet_enable(&sched_engine->tasklet)) {
1508 GEM_BUG_ON(!guc->ct.enabled);
1509
1510 /* And kick in case we missed a new request submission. */
1511 tasklet_hi_schedule(&sched_engine->tasklet);
1512 }
1513 spin_unlock_irqrestore(&guc->sched_engine->lock, flags);
1514 }
1515
guc_flush_submissions(struct intel_guc * guc)1516 static void guc_flush_submissions(struct intel_guc *guc)
1517 {
1518 struct i915_sched_engine * const sched_engine = guc->sched_engine;
1519 unsigned long flags;
1520
1521 spin_lock_irqsave(&sched_engine->lock, flags);
1522 spin_unlock_irqrestore(&sched_engine->lock, flags);
1523 }
1524
1525 static void guc_flush_destroyed_contexts(struct intel_guc *guc);
1526
intel_guc_submission_reset_prepare(struct intel_guc * guc)1527 void intel_guc_submission_reset_prepare(struct intel_guc *guc)
1528 {
1529 if (unlikely(!guc_submission_initialized(guc))) {
1530 /* Reset called during driver load? GuC not yet initialised! */
1531 return;
1532 }
1533
1534 intel_gt_park_heartbeats(guc_to_gt(guc));
1535 disable_submission(guc);
1536 guc->interrupts.disable(guc);
1537 __reset_guc_busyness_stats(guc);
1538
1539 /* Flush IRQ handler */
1540 spin_lock_irq(guc_to_gt(guc)->irq_lock);
1541 spin_unlock_irq(guc_to_gt(guc)->irq_lock);
1542
1543 guc_flush_submissions(guc);
1544 guc_flush_destroyed_contexts(guc);
1545 flush_work(&guc->ct.requests.worker);
1546
1547 scrub_guc_desc_for_outstanding_g2h(guc);
1548 }
1549
1550 static struct intel_engine_cs *
guc_virtual_get_sibling(struct intel_engine_cs * ve,unsigned int sibling)1551 guc_virtual_get_sibling(struct intel_engine_cs *ve, unsigned int sibling)
1552 {
1553 struct intel_engine_cs *engine;
1554 intel_engine_mask_t tmp, mask = ve->mask;
1555 unsigned int num_siblings = 0;
1556
1557 for_each_engine_masked(engine, ve->gt, mask, tmp)
1558 if (num_siblings++ == sibling)
1559 return engine;
1560
1561 return NULL;
1562 }
1563
1564 static inline struct intel_engine_cs *
__context_to_physical_engine(struct intel_context * ce)1565 __context_to_physical_engine(struct intel_context *ce)
1566 {
1567 struct intel_engine_cs *engine = ce->engine;
1568
1569 if (intel_engine_is_virtual(engine))
1570 engine = guc_virtual_get_sibling(engine, 0);
1571
1572 return engine;
1573 }
1574
guc_reset_state(struct intel_context * ce,u32 head,bool scrub)1575 static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub)
1576 {
1577 struct intel_engine_cs *engine = __context_to_physical_engine(ce);
1578
1579 if (!intel_context_is_schedulable(ce))
1580 return;
1581
1582 GEM_BUG_ON(!intel_context_is_pinned(ce));
1583
1584 /*
1585 * We want a simple context + ring to execute the breadcrumb update.
1586 * We cannot rely on the context being intact across the GPU hang,
1587 * so clear it and rebuild just what we need for the breadcrumb.
1588 * All pending requests for this context will be zapped, and any
1589 * future request will be after userspace has had the opportunity
1590 * to recreate its own state.
1591 */
1592 if (scrub)
1593 lrc_init_regs(ce, engine, true);
1594
1595 /* Rerun the request; its payload has been neutered (if guilty). */
1596 lrc_update_regs(ce, engine, head);
1597 }
1598
guc_engine_reset_prepare(struct intel_engine_cs * engine)1599 static void guc_engine_reset_prepare(struct intel_engine_cs *engine)
1600 {
1601 if (!IS_GRAPHICS_VER(engine->i915, 11, 12))
1602 return;
1603
1604 intel_engine_stop_cs(engine);
1605
1606 /*
1607 * Wa_22011802037:gen11/gen12: In addition to stopping the cs, we need
1608 * to wait for any pending mi force wakeups
1609 */
1610 intel_engine_wait_for_pending_mi_fw(engine);
1611 }
1612
guc_reset_nop(struct intel_engine_cs * engine)1613 static void guc_reset_nop(struct intel_engine_cs *engine)
1614 {
1615 }
1616
guc_rewind_nop(struct intel_engine_cs * engine,bool stalled)1617 static void guc_rewind_nop(struct intel_engine_cs *engine, bool stalled)
1618 {
1619 }
1620
1621 static void
__unwind_incomplete_requests(struct intel_context * ce)1622 __unwind_incomplete_requests(struct intel_context *ce)
1623 {
1624 struct i915_request *rq, *rn;
1625 struct list_head *pl;
1626 int prio = I915_PRIORITY_INVALID;
1627 struct i915_sched_engine * const sched_engine =
1628 ce->engine->sched_engine;
1629 unsigned long flags;
1630
1631 spin_lock_irqsave(&sched_engine->lock, flags);
1632 spin_lock(&ce->guc_state.lock);
1633 list_for_each_entry_safe_reverse(rq, rn,
1634 &ce->guc_state.requests,
1635 sched.link) {
1636 if (i915_request_completed(rq))
1637 continue;
1638
1639 list_del_init(&rq->sched.link);
1640 __i915_request_unsubmit(rq);
1641
1642 /* Push the request back into the queue for later resubmission. */
1643 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
1644 if (rq_prio(rq) != prio) {
1645 prio = rq_prio(rq);
1646 pl = i915_sched_lookup_priolist(sched_engine, prio);
1647 }
1648 GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine));
1649
1650 list_add(&rq->sched.link, pl);
1651 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
1652 }
1653 spin_unlock(&ce->guc_state.lock);
1654 spin_unlock_irqrestore(&sched_engine->lock, flags);
1655 }
1656
__guc_reset_context(struct intel_context * ce,intel_engine_mask_t stalled)1657 static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t stalled)
1658 {
1659 bool guilty;
1660 struct i915_request *rq;
1661 unsigned long flags;
1662 u32 head;
1663 int i, number_children = ce->parallel.number_children;
1664 struct intel_context *parent = ce;
1665
1666 GEM_BUG_ON(intel_context_is_child(ce));
1667
1668 intel_context_get(ce);
1669
1670 /*
1671 * GuC will implicitly mark the context as non-schedulable when it sends
1672 * the reset notification. Make sure our state reflects this change. The
1673 * context will be marked enabled on resubmission.
1674 */
1675 spin_lock_irqsave(&ce->guc_state.lock, flags);
1676 clr_context_enabled(ce);
1677 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
1678
1679 /*
1680 * For each context in the relationship find the hanging request
1681 * resetting each context / request as needed
1682 */
1683 for (i = 0; i < number_children + 1; ++i) {
1684 if (!intel_context_is_pinned(ce))
1685 goto next_context;
1686
1687 guilty = false;
1688 rq = intel_context_find_active_request(ce);
1689 if (!rq) {
1690 head = ce->ring->tail;
1691 goto out_replay;
1692 }
1693
1694 if (i915_request_started(rq))
1695 guilty = stalled & ce->engine->mask;
1696
1697 GEM_BUG_ON(i915_active_is_idle(&ce->active));
1698 head = intel_ring_wrap(ce->ring, rq->head);
1699
1700 __i915_request_reset(rq, guilty);
1701 out_replay:
1702 guc_reset_state(ce, head, guilty);
1703 next_context:
1704 if (i != number_children)
1705 ce = list_next_entry(ce, parallel.child_link);
1706 }
1707
1708 __unwind_incomplete_requests(parent);
1709 intel_context_put(parent);
1710 }
1711
intel_guc_submission_reset(struct intel_guc * guc,intel_engine_mask_t stalled)1712 void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled)
1713 {
1714 struct intel_context *ce;
1715 unsigned long index;
1716 unsigned long flags;
1717
1718 if (unlikely(!guc_submission_initialized(guc))) {
1719 /* Reset called during driver load? GuC not yet initialised! */
1720 return;
1721 }
1722
1723 xa_lock_irqsave(&guc->context_lookup, flags);
1724 xa_for_each(&guc->context_lookup, index, ce) {
1725 if (!kref_get_unless_zero(&ce->ref))
1726 continue;
1727
1728 xa_unlock(&guc->context_lookup);
1729
1730 if (intel_context_is_pinned(ce) &&
1731 !intel_context_is_child(ce))
1732 __guc_reset_context(ce, stalled);
1733
1734 intel_context_put(ce);
1735
1736 xa_lock(&guc->context_lookup);
1737 }
1738 xa_unlock_irqrestore(&guc->context_lookup, flags);
1739
1740 /* GuC is blown away, drop all references to contexts */
1741 xa_destroy(&guc->context_lookup);
1742 }
1743
guc_cancel_context_requests(struct intel_context * ce)1744 static void guc_cancel_context_requests(struct intel_context *ce)
1745 {
1746 struct i915_sched_engine *sched_engine = ce_to_guc(ce)->sched_engine;
1747 struct i915_request *rq;
1748 unsigned long flags;
1749
1750 /* Mark all executing requests as skipped. */
1751 spin_lock_irqsave(&sched_engine->lock, flags);
1752 spin_lock(&ce->guc_state.lock);
1753 list_for_each_entry(rq, &ce->guc_state.requests, sched.link)
1754 i915_request_put(i915_request_mark_eio(rq));
1755 spin_unlock(&ce->guc_state.lock);
1756 spin_unlock_irqrestore(&sched_engine->lock, flags);
1757 }
1758
1759 static void
guc_cancel_sched_engine_requests(struct i915_sched_engine * sched_engine)1760 guc_cancel_sched_engine_requests(struct i915_sched_engine *sched_engine)
1761 {
1762 struct i915_request *rq, *rn;
1763 struct rb_node *rb;
1764 unsigned long flags;
1765
1766 /* Can be called during boot if GuC fails to load */
1767 if (!sched_engine)
1768 return;
1769
1770 /*
1771 * Before we call engine->cancel_requests(), we should have exclusive
1772 * access to the submission state. This is arranged for us by the
1773 * caller disabling the interrupt generation, the tasklet and other
1774 * threads that may then access the same state, giving us a free hand
1775 * to reset state. However, we still need to let lockdep be aware that
1776 * we know this state may be accessed in hardirq context, so we
1777 * disable the irq around this manipulation and we want to keep
1778 * the spinlock focused on its duties and not accidentally conflate
1779 * coverage to the submission's irq state. (Similarly, although we
1780 * shouldn't need to disable irq around the manipulation of the
1781 * submission's irq state, we also wish to remind ourselves that
1782 * it is irq state.)
1783 */
1784 spin_lock_irqsave(&sched_engine->lock, flags);
1785
1786 /* Flush the queued requests to the timeline list (for retiring). */
1787 while ((rb = rb_first_cached(&sched_engine->queue))) {
1788 struct i915_priolist *p = to_priolist(rb);
1789
1790 priolist_for_each_request_consume(rq, rn, p) {
1791 list_del_init(&rq->sched.link);
1792
1793 __i915_request_submit(rq);
1794
1795 i915_request_put(i915_request_mark_eio(rq));
1796 }
1797
1798 rb_erase_cached(&p->node, &sched_engine->queue);
1799 i915_priolist_free(p);
1800 }
1801
1802 /* Remaining _unready_ requests will be nop'ed when submitted */
1803
1804 sched_engine->queue_priority_hint = INT_MIN;
1805 sched_engine->queue = RB_ROOT_CACHED;
1806
1807 spin_unlock_irqrestore(&sched_engine->lock, flags);
1808 }
1809
intel_guc_submission_cancel_requests(struct intel_guc * guc)1810 void intel_guc_submission_cancel_requests(struct intel_guc *guc)
1811 {
1812 struct intel_context *ce;
1813 unsigned long index;
1814 unsigned long flags;
1815
1816 xa_lock_irqsave(&guc->context_lookup, flags);
1817 xa_for_each(&guc->context_lookup, index, ce) {
1818 if (!kref_get_unless_zero(&ce->ref))
1819 continue;
1820
1821 xa_unlock(&guc->context_lookup);
1822
1823 if (intel_context_is_pinned(ce) &&
1824 !intel_context_is_child(ce))
1825 guc_cancel_context_requests(ce);
1826
1827 intel_context_put(ce);
1828
1829 xa_lock(&guc->context_lookup);
1830 }
1831 xa_unlock_irqrestore(&guc->context_lookup, flags);
1832
1833 guc_cancel_sched_engine_requests(guc->sched_engine);
1834
1835 /* GuC is blown away, drop all references to contexts */
1836 xa_destroy(&guc->context_lookup);
1837 }
1838
intel_guc_submission_reset_finish(struct intel_guc * guc)1839 void intel_guc_submission_reset_finish(struct intel_guc *guc)
1840 {
1841 /* Reset called during driver load or during wedge? */
1842 if (unlikely(!guc_submission_initialized(guc) ||
1843 intel_gt_is_wedged(guc_to_gt(guc)))) {
1844 return;
1845 }
1846
1847 /*
1848 * Technically possible for either of these values to be non-zero here,
1849 * but very unlikely + harmless. Regardless let's add a warn so we can
1850 * see in CI if this happens frequently / a precursor to taking down the
1851 * machine.
1852 */
1853 GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h));
1854 atomic_set(&guc->outstanding_submission_g2h, 0);
1855
1856 intel_guc_global_policies_update(guc);
1857 enable_submission(guc);
1858 intel_gt_unpark_heartbeats(guc_to_gt(guc));
1859 }
1860
1861 static void destroyed_worker_func(struct work_struct *w);
1862 static void reset_fail_worker_func(struct work_struct *w);
1863
1864 /*
1865 * Set up the memory resources to be shared with the GuC (via the GGTT)
1866 * at firmware loading time.
1867 */
intel_guc_submission_init(struct intel_guc * guc)1868 int intel_guc_submission_init(struct intel_guc *guc)
1869 {
1870 struct intel_gt *gt = guc_to_gt(guc);
1871 int ret;
1872
1873 if (guc->submission_initialized)
1874 return 0;
1875
1876 if (GET_UC_VER(guc) < MAKE_UC_VER(70, 0, 0)) {
1877 ret = guc_lrc_desc_pool_create_v69(guc);
1878 if (ret)
1879 return ret;
1880 }
1881
1882 guc->submission_state.guc_ids_bitmap =
1883 bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
1884 if (!guc->submission_state.guc_ids_bitmap) {
1885 ret = -ENOMEM;
1886 goto destroy_pool;
1887 }
1888
1889 guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ;
1890 guc->timestamp.shift = gpm_timestamp_shift(gt);
1891 guc->submission_initialized = true;
1892
1893 return 0;
1894
1895 destroy_pool:
1896 guc_lrc_desc_pool_destroy_v69(guc);
1897
1898 return ret;
1899 }
1900
intel_guc_submission_fini(struct intel_guc * guc)1901 void intel_guc_submission_fini(struct intel_guc *guc)
1902 {
1903 if (!guc->submission_initialized)
1904 return;
1905
1906 guc_flush_destroyed_contexts(guc);
1907 guc_lrc_desc_pool_destroy_v69(guc);
1908 i915_sched_engine_put(guc->sched_engine);
1909 bitmap_free(guc->submission_state.guc_ids_bitmap);
1910 guc->submission_initialized = false;
1911 }
1912
queue_request(struct i915_sched_engine * sched_engine,struct i915_request * rq,int prio)1913 static inline void queue_request(struct i915_sched_engine *sched_engine,
1914 struct i915_request *rq,
1915 int prio)
1916 {
1917 GEM_BUG_ON(!list_empty(&rq->sched.link));
1918 list_add_tail(&rq->sched.link,
1919 i915_sched_lookup_priolist(sched_engine, prio));
1920 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
1921 tasklet_hi_schedule(&sched_engine->tasklet);
1922 }
1923
guc_bypass_tasklet_submit(struct intel_guc * guc,struct i915_request * rq)1924 static int guc_bypass_tasklet_submit(struct intel_guc *guc,
1925 struct i915_request *rq)
1926 {
1927 int ret = 0;
1928
1929 __i915_request_submit(rq);
1930
1931 trace_i915_request_in(rq, 0);
1932
1933 if (is_multi_lrc_rq(rq)) {
1934 if (multi_lrc_submit(rq)) {
1935 ret = guc_wq_item_append(guc, rq);
1936 if (!ret)
1937 ret = guc_add_request(guc, rq);
1938 }
1939 } else {
1940 guc_set_lrc_tail(rq);
1941 ret = guc_add_request(guc, rq);
1942 }
1943
1944 if (unlikely(ret == -EPIPE))
1945 disable_submission(guc);
1946
1947 return ret;
1948 }
1949
need_tasklet(struct intel_guc * guc,struct i915_request * rq)1950 static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq)
1951 {
1952 struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
1953 struct intel_context *ce = request_to_scheduling_context(rq);
1954
1955 return submission_disabled(guc) || guc->stalled_request ||
1956 !i915_sched_engine_is_empty(sched_engine) ||
1957 !ctx_id_mapped(guc, ce->guc_id.id);
1958 }
1959
guc_submit_request(struct i915_request * rq)1960 static void guc_submit_request(struct i915_request *rq)
1961 {
1962 struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
1963 struct intel_guc *guc = &rq->engine->gt->uc.guc;
1964 unsigned long flags;
1965
1966 /* Will be called from irq-context when using foreign fences. */
1967 spin_lock_irqsave(&sched_engine->lock, flags);
1968
1969 if (need_tasklet(guc, rq))
1970 queue_request(sched_engine, rq, rq_prio(rq));
1971 else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY)
1972 tasklet_hi_schedule(&sched_engine->tasklet);
1973
1974 spin_unlock_irqrestore(&sched_engine->lock, flags);
1975 }
1976
new_guc_id(struct intel_guc * guc,struct intel_context * ce)1977 static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
1978 {
1979 int ret;
1980
1981 GEM_BUG_ON(intel_context_is_child(ce));
1982
1983 if (intel_context_is_parent(ce))
1984 ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
1985 NUMBER_MULTI_LRC_GUC_ID(guc),
1986 order_base_2(ce->parallel.number_children
1987 + 1));
1988 else
1989 ret = ida_simple_get(&guc->submission_state.guc_ids,
1990 NUMBER_MULTI_LRC_GUC_ID(guc),
1991 guc->submission_state.num_guc_ids,
1992 GFP_KERNEL | __GFP_RETRY_MAYFAIL |
1993 __GFP_NOWARN);
1994 if (unlikely(ret < 0))
1995 return ret;
1996
1997 ce->guc_id.id = ret;
1998 return 0;
1999 }
2000
__release_guc_id(struct intel_guc * guc,struct intel_context * ce)2001 static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce)
2002 {
2003 GEM_BUG_ON(intel_context_is_child(ce));
2004
2005 if (!context_guc_id_invalid(ce)) {
2006 if (intel_context_is_parent(ce))
2007 bitmap_release_region(guc->submission_state.guc_ids_bitmap,
2008 ce->guc_id.id,
2009 order_base_2(ce->parallel.number_children
2010 + 1));
2011 else
2012 ida_simple_remove(&guc->submission_state.guc_ids,
2013 ce->guc_id.id);
2014 clr_ctx_id_mapping(guc, ce->guc_id.id);
2015 set_context_guc_id_invalid(ce);
2016 }
2017 if (!list_empty(&ce->guc_id.link))
2018 list_del_init(&ce->guc_id.link);
2019 }
2020
release_guc_id(struct intel_guc * guc,struct intel_context * ce)2021 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce)
2022 {
2023 unsigned long flags;
2024
2025 spin_lock_irqsave(&guc->submission_state.lock, flags);
2026 __release_guc_id(guc, ce);
2027 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
2028 }
2029
steal_guc_id(struct intel_guc * guc,struct intel_context * ce)2030 static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce)
2031 {
2032 struct intel_context *cn;
2033
2034 lockdep_assert_held(&guc->submission_state.lock);
2035 GEM_BUG_ON(intel_context_is_child(ce));
2036 GEM_BUG_ON(intel_context_is_parent(ce));
2037
2038 if (!list_empty(&guc->submission_state.guc_id_list)) {
2039 cn = list_first_entry(&guc->submission_state.guc_id_list,
2040 struct intel_context,
2041 guc_id.link);
2042
2043 GEM_BUG_ON(atomic_read(&cn->guc_id.ref));
2044 GEM_BUG_ON(context_guc_id_invalid(cn));
2045 GEM_BUG_ON(intel_context_is_child(cn));
2046 GEM_BUG_ON(intel_context_is_parent(cn));
2047
2048 list_del_init(&cn->guc_id.link);
2049 ce->guc_id.id = cn->guc_id.id;
2050
2051 spin_lock(&cn->guc_state.lock);
2052 clr_context_registered(cn);
2053 spin_unlock(&cn->guc_state.lock);
2054
2055 set_context_guc_id_invalid(cn);
2056
2057 #ifdef CONFIG_DRM_I915_SELFTEST
2058 guc->number_guc_id_stolen++;
2059 #endif
2060
2061 return 0;
2062 } else {
2063 return -EAGAIN;
2064 }
2065 }
2066
assign_guc_id(struct intel_guc * guc,struct intel_context * ce)2067 static int assign_guc_id(struct intel_guc *guc, struct intel_context *ce)
2068 {
2069 int ret;
2070
2071 lockdep_assert_held(&guc->submission_state.lock);
2072 GEM_BUG_ON(intel_context_is_child(ce));
2073
2074 ret = new_guc_id(guc, ce);
2075 if (unlikely(ret < 0)) {
2076 if (intel_context_is_parent(ce))
2077 return -ENOSPC;
2078
2079 ret = steal_guc_id(guc, ce);
2080 if (ret < 0)
2081 return ret;
2082 }
2083
2084 if (intel_context_is_parent(ce)) {
2085 struct intel_context *child;
2086 int i = 1;
2087
2088 for_each_child(ce, child)
2089 child->guc_id.id = ce->guc_id.id + i++;
2090 }
2091
2092 return 0;
2093 }
2094
2095 #define PIN_GUC_ID_TRIES 4
pin_guc_id(struct intel_guc * guc,struct intel_context * ce)2096 static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce)
2097 {
2098 int ret = 0;
2099 unsigned long flags, tries = PIN_GUC_ID_TRIES;
2100
2101 GEM_BUG_ON(atomic_read(&ce->guc_id.ref));
2102
2103 try_again:
2104 spin_lock_irqsave(&guc->submission_state.lock, flags);
2105
2106 might_lock(&ce->guc_state.lock);
2107
2108 if (context_guc_id_invalid(ce)) {
2109 ret = assign_guc_id(guc, ce);
2110 if (ret)
2111 goto out_unlock;
2112 ret = 1; /* Indidcates newly assigned guc_id */
2113 }
2114 if (!list_empty(&ce->guc_id.link))
2115 list_del_init(&ce->guc_id.link);
2116 atomic_inc(&ce->guc_id.ref);
2117
2118 out_unlock:
2119 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
2120
2121 /*
2122 * -EAGAIN indicates no guc_id are available, let's retire any
2123 * outstanding requests to see if that frees up a guc_id. If the first
2124 * retire didn't help, insert a sleep with the timeslice duration before
2125 * attempting to retire more requests. Double the sleep period each
2126 * subsequent pass before finally giving up. The sleep period has max of
2127 * 100ms and minimum of 1ms.
2128 */
2129 if (ret == -EAGAIN && --tries) {
2130 if (PIN_GUC_ID_TRIES - tries > 1) {
2131 unsigned int timeslice_shifted =
2132 ce->engine->props.timeslice_duration_ms <<
2133 (PIN_GUC_ID_TRIES - tries - 2);
2134 unsigned int max = min_t(unsigned int, 100,
2135 timeslice_shifted);
2136
2137 msleep(max_t(unsigned int, max, 1));
2138 }
2139 intel_gt_retire_requests(guc_to_gt(guc));
2140 goto try_again;
2141 }
2142
2143 return ret;
2144 }
2145
unpin_guc_id(struct intel_guc * guc,struct intel_context * ce)2146 static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce)
2147 {
2148 unsigned long flags;
2149
2150 GEM_BUG_ON(atomic_read(&ce->guc_id.ref) < 0);
2151 GEM_BUG_ON(intel_context_is_child(ce));
2152
2153 if (unlikely(context_guc_id_invalid(ce) ||
2154 intel_context_is_parent(ce)))
2155 return;
2156
2157 spin_lock_irqsave(&guc->submission_state.lock, flags);
2158 if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id.link) &&
2159 !atomic_read(&ce->guc_id.ref))
2160 list_add_tail(&ce->guc_id.link,
2161 &guc->submission_state.guc_id_list);
2162 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
2163 }
2164
__guc_action_register_multi_lrc_v69(struct intel_guc * guc,struct intel_context * ce,u32 guc_id,u32 offset,bool loop)2165 static int __guc_action_register_multi_lrc_v69(struct intel_guc *guc,
2166 struct intel_context *ce,
2167 u32 guc_id,
2168 u32 offset,
2169 bool loop)
2170 {
2171 struct intel_context *child;
2172 u32 action[4 + MAX_ENGINE_INSTANCE];
2173 int len = 0;
2174
2175 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE);
2176
2177 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
2178 action[len++] = guc_id;
2179 action[len++] = ce->parallel.number_children + 1;
2180 action[len++] = offset;
2181 for_each_child(ce, child) {
2182 offset += sizeof(struct guc_lrc_desc_v69);
2183 action[len++] = offset;
2184 }
2185
2186 return guc_submission_send_busy_loop(guc, action, len, 0, loop);
2187 }
2188
__guc_action_register_multi_lrc_v70(struct intel_guc * guc,struct intel_context * ce,struct guc_ctxt_registration_info * info,bool loop)2189 static int __guc_action_register_multi_lrc_v70(struct intel_guc *guc,
2190 struct intel_context *ce,
2191 struct guc_ctxt_registration_info *info,
2192 bool loop)
2193 {
2194 struct intel_context *child;
2195 u32 action[13 + (MAX_ENGINE_INSTANCE * 2)];
2196 int len = 0;
2197 u32 next_id;
2198
2199 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE);
2200
2201 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
2202 action[len++] = info->flags;
2203 action[len++] = info->context_idx;
2204 action[len++] = info->engine_class;
2205 action[len++] = info->engine_submit_mask;
2206 action[len++] = info->wq_desc_lo;
2207 action[len++] = info->wq_desc_hi;
2208 action[len++] = info->wq_base_lo;
2209 action[len++] = info->wq_base_hi;
2210 action[len++] = info->wq_size;
2211 action[len++] = ce->parallel.number_children + 1;
2212 action[len++] = info->hwlrca_lo;
2213 action[len++] = info->hwlrca_hi;
2214
2215 next_id = info->context_idx + 1;
2216 for_each_child(ce, child) {
2217 GEM_BUG_ON(next_id++ != child->guc_id.id);
2218
2219 /*
2220 * NB: GuC interface supports 64 bit LRCA even though i915/HW
2221 * only supports 32 bit currently.
2222 */
2223 action[len++] = lower_32_bits(child->lrc.lrca);
2224 action[len++] = upper_32_bits(child->lrc.lrca);
2225 }
2226
2227 GEM_BUG_ON(len > ARRAY_SIZE(action));
2228
2229 return guc_submission_send_busy_loop(guc, action, len, 0, loop);
2230 }
2231
__guc_action_register_context_v69(struct intel_guc * guc,u32 guc_id,u32 offset,bool loop)2232 static int __guc_action_register_context_v69(struct intel_guc *guc,
2233 u32 guc_id,
2234 u32 offset,
2235 bool loop)
2236 {
2237 u32 action[] = {
2238 INTEL_GUC_ACTION_REGISTER_CONTEXT,
2239 guc_id,
2240 offset,
2241 };
2242
2243 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2244 0, loop);
2245 }
2246
__guc_action_register_context_v70(struct intel_guc * guc,struct guc_ctxt_registration_info * info,bool loop)2247 static int __guc_action_register_context_v70(struct intel_guc *guc,
2248 struct guc_ctxt_registration_info *info,
2249 bool loop)
2250 {
2251 u32 action[] = {
2252 INTEL_GUC_ACTION_REGISTER_CONTEXT,
2253 info->flags,
2254 info->context_idx,
2255 info->engine_class,
2256 info->engine_submit_mask,
2257 info->wq_desc_lo,
2258 info->wq_desc_hi,
2259 info->wq_base_lo,
2260 info->wq_base_hi,
2261 info->wq_size,
2262 info->hwlrca_lo,
2263 info->hwlrca_hi,
2264 };
2265
2266 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2267 0, loop);
2268 }
2269
2270 static void prepare_context_registration_info_v69(struct intel_context *ce);
2271 static void prepare_context_registration_info_v70(struct intel_context *ce,
2272 struct guc_ctxt_registration_info *info);
2273
2274 static int
register_context_v69(struct intel_guc * guc,struct intel_context * ce,bool loop)2275 register_context_v69(struct intel_guc *guc, struct intel_context *ce, bool loop)
2276 {
2277 u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool_v69) +
2278 ce->guc_id.id * sizeof(struct guc_lrc_desc_v69);
2279
2280 prepare_context_registration_info_v69(ce);
2281
2282 if (intel_context_is_parent(ce))
2283 return __guc_action_register_multi_lrc_v69(guc, ce, ce->guc_id.id,
2284 offset, loop);
2285 else
2286 return __guc_action_register_context_v69(guc, ce->guc_id.id,
2287 offset, loop);
2288 }
2289
2290 static int
register_context_v70(struct intel_guc * guc,struct intel_context * ce,bool loop)2291 register_context_v70(struct intel_guc *guc, struct intel_context *ce, bool loop)
2292 {
2293 struct guc_ctxt_registration_info info;
2294
2295 prepare_context_registration_info_v70(ce, &info);
2296
2297 if (intel_context_is_parent(ce))
2298 return __guc_action_register_multi_lrc_v70(guc, ce, &info, loop);
2299 else
2300 return __guc_action_register_context_v70(guc, &info, loop);
2301 }
2302
register_context(struct intel_context * ce,bool loop)2303 static int register_context(struct intel_context *ce, bool loop)
2304 {
2305 struct intel_guc *guc = ce_to_guc(ce);
2306 int ret;
2307
2308 GEM_BUG_ON(intel_context_is_child(ce));
2309 trace_intel_context_register(ce);
2310
2311 if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0))
2312 ret = register_context_v70(guc, ce, loop);
2313 else
2314 ret = register_context_v69(guc, ce, loop);
2315
2316 if (likely(!ret)) {
2317 unsigned long flags;
2318
2319 spin_lock_irqsave(&ce->guc_state.lock, flags);
2320 set_context_registered(ce);
2321 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2322
2323 if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0))
2324 guc_context_policy_init_v70(ce, loop);
2325 }
2326
2327 return ret;
2328 }
2329
__guc_action_deregister_context(struct intel_guc * guc,u32 guc_id)2330 static int __guc_action_deregister_context(struct intel_guc *guc,
2331 u32 guc_id)
2332 {
2333 u32 action[] = {
2334 INTEL_GUC_ACTION_DEREGISTER_CONTEXT,
2335 guc_id,
2336 };
2337
2338 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2339 G2H_LEN_DW_DEREGISTER_CONTEXT,
2340 true);
2341 }
2342
deregister_context(struct intel_context * ce,u32 guc_id)2343 static int deregister_context(struct intel_context *ce, u32 guc_id)
2344 {
2345 struct intel_guc *guc = ce_to_guc(ce);
2346
2347 GEM_BUG_ON(intel_context_is_child(ce));
2348 trace_intel_context_deregister(ce);
2349
2350 return __guc_action_deregister_context(guc, guc_id);
2351 }
2352
clear_children_join_go_memory(struct intel_context * ce)2353 static inline void clear_children_join_go_memory(struct intel_context *ce)
2354 {
2355 struct parent_scratch *ps = __get_parent_scratch(ce);
2356 int i;
2357
2358 ps->go.semaphore = 0;
2359 for (i = 0; i < ce->parallel.number_children + 1; ++i)
2360 ps->join[i].semaphore = 0;
2361 }
2362
get_children_go_value(struct intel_context * ce)2363 static inline u32 get_children_go_value(struct intel_context *ce)
2364 {
2365 return __get_parent_scratch(ce)->go.semaphore;
2366 }
2367
get_children_join_value(struct intel_context * ce,u8 child_index)2368 static inline u32 get_children_join_value(struct intel_context *ce,
2369 u8 child_index)
2370 {
2371 return __get_parent_scratch(ce)->join[child_index].semaphore;
2372 }
2373
2374 struct context_policy {
2375 u32 count;
2376 struct guc_update_context_policy h2g;
2377 };
2378
__guc_context_policy_action_size(struct context_policy * policy)2379 static u32 __guc_context_policy_action_size(struct context_policy *policy)
2380 {
2381 size_t bytes = sizeof(policy->h2g.header) +
2382 (sizeof(policy->h2g.klv[0]) * policy->count);
2383
2384 return bytes / sizeof(u32);
2385 }
2386
__guc_context_policy_start_klv(struct context_policy * policy,u16 guc_id)2387 static void __guc_context_policy_start_klv(struct context_policy *policy, u16 guc_id)
2388 {
2389 policy->h2g.header.action = INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES;
2390 policy->h2g.header.ctx_id = guc_id;
2391 policy->count = 0;
2392 }
2393
2394 #define MAKE_CONTEXT_POLICY_ADD(func, id) \
2395 static void __guc_context_policy_add_##func(struct context_policy *policy, u32 data) \
2396 { \
2397 GEM_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \
2398 policy->h2g.klv[policy->count].kl = \
2399 FIELD_PREP(GUC_KLV_0_KEY, GUC_CONTEXT_POLICIES_KLV_ID_##id) | \
2400 FIELD_PREP(GUC_KLV_0_LEN, 1); \
2401 policy->h2g.klv[policy->count].value = data; \
2402 policy->count++; \
2403 }
2404
MAKE_CONTEXT_POLICY_ADD(execution_quantum,EXECUTION_QUANTUM)2405 MAKE_CONTEXT_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM)
2406 MAKE_CONTEXT_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT)
2407 MAKE_CONTEXT_POLICY_ADD(priority, SCHEDULING_PRIORITY)
2408 MAKE_CONTEXT_POLICY_ADD(preempt_to_idle, PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY)
2409
2410 #undef MAKE_CONTEXT_POLICY_ADD
2411
2412 static int __guc_context_set_context_policies(struct intel_guc *guc,
2413 struct context_policy *policy,
2414 bool loop)
2415 {
2416 return guc_submission_send_busy_loop(guc, (u32 *)&policy->h2g,
2417 __guc_context_policy_action_size(policy),
2418 0, loop);
2419 }
2420
guc_context_policy_init_v70(struct intel_context * ce,bool loop)2421 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop)
2422 {
2423 struct intel_engine_cs *engine = ce->engine;
2424 struct intel_guc *guc = &engine->gt->uc.guc;
2425 struct context_policy policy;
2426 u32 execution_quantum;
2427 u32 preemption_timeout;
2428 unsigned long flags;
2429 int ret;
2430
2431 /* NB: For both of these, zero means disabled. */
2432 GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000,
2433 execution_quantum));
2434 GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000,
2435 preemption_timeout));
2436 execution_quantum = engine->props.timeslice_duration_ms * 1000;
2437 preemption_timeout = engine->props.preempt_timeout_ms * 1000;
2438
2439 __guc_context_policy_start_klv(&policy, ce->guc_id.id);
2440
2441 __guc_context_policy_add_priority(&policy, ce->guc_state.prio);
2442 __guc_context_policy_add_execution_quantum(&policy, execution_quantum);
2443 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
2444
2445 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
2446 __guc_context_policy_add_preempt_to_idle(&policy, 1);
2447
2448 ret = __guc_context_set_context_policies(guc, &policy, loop);
2449
2450 spin_lock_irqsave(&ce->guc_state.lock, flags);
2451 if (ret != 0)
2452 set_context_policy_required(ce);
2453 else
2454 clr_context_policy_required(ce);
2455 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2456
2457 return ret;
2458 }
2459
guc_context_policy_init_v69(struct intel_engine_cs * engine,struct guc_lrc_desc_v69 * desc)2460 static void guc_context_policy_init_v69(struct intel_engine_cs *engine,
2461 struct guc_lrc_desc_v69 *desc)
2462 {
2463 desc->policy_flags = 0;
2464
2465 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
2466 desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69;
2467
2468 /* NB: For both of these, zero means disabled. */
2469 GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000,
2470 desc->execution_quantum));
2471 GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000,
2472 desc->preemption_timeout));
2473 desc->execution_quantum = engine->props.timeslice_duration_ms * 1000;
2474 desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000;
2475 }
2476
map_guc_prio_to_lrc_desc_prio(u8 prio)2477 static u32 map_guc_prio_to_lrc_desc_prio(u8 prio)
2478 {
2479 /*
2480 * this matches the mapping we do in map_i915_prio_to_guc_prio()
2481 * (e.g. prio < I915_PRIORITY_NORMAL maps to GUC_CLIENT_PRIORITY_NORMAL)
2482 */
2483 switch (prio) {
2484 default:
2485 MISSING_CASE(prio);
2486 fallthrough;
2487 case GUC_CLIENT_PRIORITY_KMD_NORMAL:
2488 return GEN12_CTX_PRIORITY_NORMAL;
2489 case GUC_CLIENT_PRIORITY_NORMAL:
2490 return GEN12_CTX_PRIORITY_LOW;
2491 case GUC_CLIENT_PRIORITY_HIGH:
2492 case GUC_CLIENT_PRIORITY_KMD_HIGH:
2493 return GEN12_CTX_PRIORITY_HIGH;
2494 }
2495 }
2496
prepare_context_registration_info_v69(struct intel_context * ce)2497 static void prepare_context_registration_info_v69(struct intel_context *ce)
2498 {
2499 struct intel_engine_cs *engine = ce->engine;
2500 struct intel_guc *guc = &engine->gt->uc.guc;
2501 u32 ctx_id = ce->guc_id.id;
2502 struct guc_lrc_desc_v69 *desc;
2503 struct intel_context *child;
2504
2505 GEM_BUG_ON(!engine->mask);
2506
2507 /*
2508 * Ensure LRC + CT vmas are is same region as write barrier is done
2509 * based on CT vma region.
2510 */
2511 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) !=
2512 i915_gem_object_is_lmem(ce->ring->vma->obj));
2513
2514 desc = __get_lrc_desc_v69(guc, ctx_id);
2515 desc->engine_class = engine_class_to_guc_class(engine->class);
2516 desc->engine_submit_mask = engine->logical_mask;
2517 desc->hw_context_desc = ce->lrc.lrca;
2518 desc->priority = ce->guc_state.prio;
2519 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
2520 guc_context_policy_init_v69(engine, desc);
2521
2522 /*
2523 * If context is a parent, we need to register a process descriptor
2524 * describing a work queue and register all child contexts.
2525 */
2526 if (intel_context_is_parent(ce)) {
2527 struct guc_process_desc_v69 *pdesc;
2528
2529 ce->parallel.guc.wqi_tail = 0;
2530 ce->parallel.guc.wqi_head = 0;
2531
2532 desc->process_desc = i915_ggtt_offset(ce->state) +
2533 __get_parent_scratch_offset(ce);
2534 desc->wq_addr = i915_ggtt_offset(ce->state) +
2535 __get_wq_offset(ce);
2536 desc->wq_size = WQ_SIZE;
2537
2538 pdesc = __get_process_desc_v69(ce);
2539 memset(pdesc, 0, sizeof(*(pdesc)));
2540 pdesc->stage_id = ce->guc_id.id;
2541 pdesc->wq_base_addr = desc->wq_addr;
2542 pdesc->wq_size_bytes = desc->wq_size;
2543 pdesc->wq_status = WQ_STATUS_ACTIVE;
2544
2545 ce->parallel.guc.wq_head = &pdesc->head;
2546 ce->parallel.guc.wq_tail = &pdesc->tail;
2547 ce->parallel.guc.wq_status = &pdesc->wq_status;
2548
2549 for_each_child(ce, child) {
2550 desc = __get_lrc_desc_v69(guc, child->guc_id.id);
2551
2552 desc->engine_class =
2553 engine_class_to_guc_class(engine->class);
2554 desc->hw_context_desc = child->lrc.lrca;
2555 desc->priority = ce->guc_state.prio;
2556 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
2557 guc_context_policy_init_v69(engine, desc);
2558 }
2559
2560 clear_children_join_go_memory(ce);
2561 }
2562 }
2563
prepare_context_registration_info_v70(struct intel_context * ce,struct guc_ctxt_registration_info * info)2564 static void prepare_context_registration_info_v70(struct intel_context *ce,
2565 struct guc_ctxt_registration_info *info)
2566 {
2567 struct intel_engine_cs *engine = ce->engine;
2568 struct intel_guc *guc = &engine->gt->uc.guc;
2569 u32 ctx_id = ce->guc_id.id;
2570
2571 GEM_BUG_ON(!engine->mask);
2572
2573 /*
2574 * Ensure LRC + CT vmas are is same region as write barrier is done
2575 * based on CT vma region.
2576 */
2577 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) !=
2578 i915_gem_object_is_lmem(ce->ring->vma->obj));
2579
2580 memset(info, 0, sizeof(*info));
2581 info->context_idx = ctx_id;
2582 info->engine_class = engine_class_to_guc_class(engine->class);
2583 info->engine_submit_mask = engine->logical_mask;
2584 /*
2585 * NB: GuC interface supports 64 bit LRCA even though i915/HW
2586 * only supports 32 bit currently.
2587 */
2588 info->hwlrca_lo = lower_32_bits(ce->lrc.lrca);
2589 info->hwlrca_hi = upper_32_bits(ce->lrc.lrca);
2590 if (engine->flags & I915_ENGINE_HAS_EU_PRIORITY)
2591 info->hwlrca_lo |= map_guc_prio_to_lrc_desc_prio(ce->guc_state.prio);
2592 info->flags = CONTEXT_REGISTRATION_FLAG_KMD;
2593
2594 /*
2595 * If context is a parent, we need to register a process descriptor
2596 * describing a work queue and register all child contexts.
2597 */
2598 if (intel_context_is_parent(ce)) {
2599 struct guc_sched_wq_desc *wq_desc;
2600 u64 wq_desc_offset, wq_base_offset;
2601
2602 ce->parallel.guc.wqi_tail = 0;
2603 ce->parallel.guc.wqi_head = 0;
2604
2605 wq_desc_offset = i915_ggtt_offset(ce->state) +
2606 __get_parent_scratch_offset(ce);
2607 wq_base_offset = i915_ggtt_offset(ce->state) +
2608 __get_wq_offset(ce);
2609 info->wq_desc_lo = lower_32_bits(wq_desc_offset);
2610 info->wq_desc_hi = upper_32_bits(wq_desc_offset);
2611 info->wq_base_lo = lower_32_bits(wq_base_offset);
2612 info->wq_base_hi = upper_32_bits(wq_base_offset);
2613 info->wq_size = WQ_SIZE;
2614
2615 wq_desc = __get_wq_desc_v70(ce);
2616 memset(wq_desc, 0, sizeof(*wq_desc));
2617 wq_desc->wq_status = WQ_STATUS_ACTIVE;
2618
2619 ce->parallel.guc.wq_head = &wq_desc->head;
2620 ce->parallel.guc.wq_tail = &wq_desc->tail;
2621 ce->parallel.guc.wq_status = &wq_desc->wq_status;
2622
2623 clear_children_join_go_memory(ce);
2624 }
2625 }
2626
try_context_registration(struct intel_context * ce,bool loop)2627 static int try_context_registration(struct intel_context *ce, bool loop)
2628 {
2629 struct intel_engine_cs *engine = ce->engine;
2630 struct intel_runtime_pm *runtime_pm = engine->uncore->rpm;
2631 struct intel_guc *guc = &engine->gt->uc.guc;
2632 intel_wakeref_t wakeref;
2633 u32 ctx_id = ce->guc_id.id;
2634 bool context_registered;
2635 int ret = 0;
2636
2637 GEM_BUG_ON(!sched_state_is_init(ce));
2638
2639 context_registered = ctx_id_mapped(guc, ctx_id);
2640
2641 clr_ctx_id_mapping(guc, ctx_id);
2642 set_ctx_id_mapping(guc, ctx_id, ce);
2643
2644 /*
2645 * The context_lookup xarray is used to determine if the hardware
2646 * context is currently registered. There are two cases in which it
2647 * could be registered either the guc_id has been stolen from another
2648 * context or the lrc descriptor address of this context has changed. In
2649 * either case the context needs to be deregistered with the GuC before
2650 * registering this context.
2651 */
2652 if (context_registered) {
2653 bool disabled;
2654 unsigned long flags;
2655
2656 trace_intel_context_steal_guc_id(ce);
2657 GEM_BUG_ON(!loop);
2658
2659 /* Seal race with Reset */
2660 spin_lock_irqsave(&ce->guc_state.lock, flags);
2661 disabled = submission_disabled(guc);
2662 if (likely(!disabled)) {
2663 set_context_wait_for_deregister_to_register(ce);
2664 intel_context_get(ce);
2665 }
2666 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2667 if (unlikely(disabled)) {
2668 clr_ctx_id_mapping(guc, ctx_id);
2669 return 0; /* Will get registered later */
2670 }
2671
2672 /*
2673 * If stealing the guc_id, this ce has the same guc_id as the
2674 * context whose guc_id was stolen.
2675 */
2676 with_intel_runtime_pm(runtime_pm, wakeref)
2677 ret = deregister_context(ce, ce->guc_id.id);
2678 if (unlikely(ret == -ENODEV))
2679 ret = 0; /* Will get registered later */
2680 } else {
2681 with_intel_runtime_pm(runtime_pm, wakeref)
2682 ret = register_context(ce, loop);
2683 if (unlikely(ret == -EBUSY)) {
2684 clr_ctx_id_mapping(guc, ctx_id);
2685 } else if (unlikely(ret == -ENODEV)) {
2686 clr_ctx_id_mapping(guc, ctx_id);
2687 ret = 0; /* Will get registered later */
2688 }
2689 }
2690
2691 return ret;
2692 }
2693
__guc_context_pre_pin(struct intel_context * ce,struct intel_engine_cs * engine,struct i915_gem_ww_ctx * ww,void ** vaddr)2694 static int __guc_context_pre_pin(struct intel_context *ce,
2695 struct intel_engine_cs *engine,
2696 struct i915_gem_ww_ctx *ww,
2697 void **vaddr)
2698 {
2699 return lrc_pre_pin(ce, engine, ww, vaddr);
2700 }
2701
__guc_context_pin(struct intel_context * ce,struct intel_engine_cs * engine,void * vaddr)2702 static int __guc_context_pin(struct intel_context *ce,
2703 struct intel_engine_cs *engine,
2704 void *vaddr)
2705 {
2706 if (i915_ggtt_offset(ce->state) !=
2707 (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK))
2708 set_bit(CONTEXT_LRCA_DIRTY, &ce->flags);
2709
2710 /*
2711 * GuC context gets pinned in guc_request_alloc. See that function for
2712 * explaination of why.
2713 */
2714
2715 return lrc_pin(ce, engine, vaddr);
2716 }
2717
guc_context_pre_pin(struct intel_context * ce,struct i915_gem_ww_ctx * ww,void ** vaddr)2718 static int guc_context_pre_pin(struct intel_context *ce,
2719 struct i915_gem_ww_ctx *ww,
2720 void **vaddr)
2721 {
2722 return __guc_context_pre_pin(ce, ce->engine, ww, vaddr);
2723 }
2724
guc_context_pin(struct intel_context * ce,void * vaddr)2725 static int guc_context_pin(struct intel_context *ce, void *vaddr)
2726 {
2727 int ret = __guc_context_pin(ce, ce->engine, vaddr);
2728
2729 if (likely(!ret && !intel_context_is_barrier(ce)))
2730 intel_engine_pm_get(ce->engine);
2731
2732 return ret;
2733 }
2734
guc_context_unpin(struct intel_context * ce)2735 static void guc_context_unpin(struct intel_context *ce)
2736 {
2737 struct intel_guc *guc = ce_to_guc(ce);
2738
2739 unpin_guc_id(guc, ce);
2740 lrc_unpin(ce);
2741
2742 if (likely(!intel_context_is_barrier(ce)))
2743 intel_engine_pm_put_async(ce->engine);
2744 }
2745
guc_context_post_unpin(struct intel_context * ce)2746 static void guc_context_post_unpin(struct intel_context *ce)
2747 {
2748 lrc_post_unpin(ce);
2749 }
2750
__guc_context_sched_enable(struct intel_guc * guc,struct intel_context * ce)2751 static void __guc_context_sched_enable(struct intel_guc *guc,
2752 struct intel_context *ce)
2753 {
2754 u32 action[] = {
2755 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET,
2756 ce->guc_id.id,
2757 GUC_CONTEXT_ENABLE
2758 };
2759
2760 trace_intel_context_sched_enable(ce);
2761
2762 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2763 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true);
2764 }
2765
__guc_context_sched_disable(struct intel_guc * guc,struct intel_context * ce,u16 guc_id)2766 static void __guc_context_sched_disable(struct intel_guc *guc,
2767 struct intel_context *ce,
2768 u16 guc_id)
2769 {
2770 u32 action[] = {
2771 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET,
2772 guc_id, /* ce->guc_id.id not stable */
2773 GUC_CONTEXT_DISABLE
2774 };
2775
2776 GEM_BUG_ON(guc_id == GUC_INVALID_CONTEXT_ID);
2777
2778 GEM_BUG_ON(intel_context_is_child(ce));
2779 trace_intel_context_sched_disable(ce);
2780
2781 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2782 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true);
2783 }
2784
guc_blocked_fence_complete(struct intel_context * ce)2785 static void guc_blocked_fence_complete(struct intel_context *ce)
2786 {
2787 lockdep_assert_held(&ce->guc_state.lock);
2788
2789 if (!i915_sw_fence_done(&ce->guc_state.blocked))
2790 i915_sw_fence_complete(&ce->guc_state.blocked);
2791 }
2792
guc_blocked_fence_reinit(struct intel_context * ce)2793 static void guc_blocked_fence_reinit(struct intel_context *ce)
2794 {
2795 lockdep_assert_held(&ce->guc_state.lock);
2796 GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_state.blocked));
2797
2798 /*
2799 * This fence is always complete unless a pending schedule disable is
2800 * outstanding. We arm the fence here and complete it when we receive
2801 * the pending schedule disable complete message.
2802 */
2803 i915_sw_fence_fini(&ce->guc_state.blocked);
2804 i915_sw_fence_reinit(&ce->guc_state.blocked);
2805 i915_sw_fence_await(&ce->guc_state.blocked);
2806 i915_sw_fence_commit(&ce->guc_state.blocked);
2807 }
2808
prep_context_pending_disable(struct intel_context * ce)2809 static u16 prep_context_pending_disable(struct intel_context *ce)
2810 {
2811 lockdep_assert_held(&ce->guc_state.lock);
2812
2813 set_context_pending_disable(ce);
2814 clr_context_enabled(ce);
2815 guc_blocked_fence_reinit(ce);
2816 intel_context_get(ce);
2817
2818 return ce->guc_id.id;
2819 }
2820
guc_context_block(struct intel_context * ce)2821 static struct i915_sw_fence *guc_context_block(struct intel_context *ce)
2822 {
2823 struct intel_guc *guc = ce_to_guc(ce);
2824 unsigned long flags;
2825 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
2826 intel_wakeref_t wakeref;
2827 u16 guc_id;
2828 bool enabled;
2829
2830 GEM_BUG_ON(intel_context_is_child(ce));
2831
2832 spin_lock_irqsave(&ce->guc_state.lock, flags);
2833
2834 incr_context_blocked(ce);
2835
2836 enabled = context_enabled(ce);
2837 if (unlikely(!enabled || submission_disabled(guc))) {
2838 if (enabled)
2839 clr_context_enabled(ce);
2840 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2841 return &ce->guc_state.blocked;
2842 }
2843
2844 /*
2845 * We add +2 here as the schedule disable complete CTB handler calls
2846 * intel_context_sched_disable_unpin (-2 to pin_count).
2847 */
2848 atomic_add(2, &ce->pin_count);
2849
2850 guc_id = prep_context_pending_disable(ce);
2851
2852 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2853
2854 with_intel_runtime_pm(runtime_pm, wakeref)
2855 __guc_context_sched_disable(guc, ce, guc_id);
2856
2857 return &ce->guc_state.blocked;
2858 }
2859
2860 #define SCHED_STATE_MULTI_BLOCKED_MASK \
2861 (SCHED_STATE_BLOCKED_MASK & ~SCHED_STATE_BLOCKED)
2862 #define SCHED_STATE_NO_UNBLOCK \
2863 (SCHED_STATE_MULTI_BLOCKED_MASK | \
2864 SCHED_STATE_PENDING_DISABLE | \
2865 SCHED_STATE_BANNED)
2866
context_cant_unblock(struct intel_context * ce)2867 static bool context_cant_unblock(struct intel_context *ce)
2868 {
2869 lockdep_assert_held(&ce->guc_state.lock);
2870
2871 return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) ||
2872 context_guc_id_invalid(ce) ||
2873 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id) ||
2874 !intel_context_is_pinned(ce);
2875 }
2876
guc_context_unblock(struct intel_context * ce)2877 static void guc_context_unblock(struct intel_context *ce)
2878 {
2879 struct intel_guc *guc = ce_to_guc(ce);
2880 unsigned long flags;
2881 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
2882 intel_wakeref_t wakeref;
2883 bool enable;
2884
2885 GEM_BUG_ON(context_enabled(ce));
2886 GEM_BUG_ON(intel_context_is_child(ce));
2887
2888 spin_lock_irqsave(&ce->guc_state.lock, flags);
2889
2890 if (unlikely(submission_disabled(guc) ||
2891 context_cant_unblock(ce))) {
2892 enable = false;
2893 } else {
2894 enable = true;
2895 set_context_pending_enable(ce);
2896 set_context_enabled(ce);
2897 intel_context_get(ce);
2898 }
2899
2900 decr_context_blocked(ce);
2901
2902 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2903
2904 if (enable) {
2905 with_intel_runtime_pm(runtime_pm, wakeref)
2906 __guc_context_sched_enable(guc, ce);
2907 }
2908 }
2909
guc_context_cancel_request(struct intel_context * ce,struct i915_request * rq)2910 static void guc_context_cancel_request(struct intel_context *ce,
2911 struct i915_request *rq)
2912 {
2913 struct intel_context *block_context =
2914 request_to_scheduling_context(rq);
2915
2916 if (i915_sw_fence_signaled(&rq->submit)) {
2917 struct i915_sw_fence *fence;
2918
2919 intel_context_get(ce);
2920 fence = guc_context_block(block_context);
2921 i915_sw_fence_wait(fence);
2922 if (!i915_request_completed(rq)) {
2923 __i915_request_skip(rq);
2924 guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head),
2925 true);
2926 }
2927
2928 guc_context_unblock(block_context);
2929 intel_context_put(ce);
2930 }
2931 }
2932
__guc_context_set_preemption_timeout(struct intel_guc * guc,u16 guc_id,u32 preemption_timeout)2933 static void __guc_context_set_preemption_timeout(struct intel_guc *guc,
2934 u16 guc_id,
2935 u32 preemption_timeout)
2936 {
2937 if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0)) {
2938 struct context_policy policy;
2939
2940 __guc_context_policy_start_klv(&policy, guc_id);
2941 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
2942 __guc_context_set_context_policies(guc, &policy, true);
2943 } else {
2944 u32 action[] = {
2945 INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT,
2946 guc_id,
2947 preemption_timeout
2948 };
2949
2950 intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
2951 }
2952 }
2953
2954 static void
guc_context_revoke(struct intel_context * ce,struct i915_request * rq,unsigned int preempt_timeout_ms)2955 guc_context_revoke(struct intel_context *ce, struct i915_request *rq,
2956 unsigned int preempt_timeout_ms)
2957 {
2958 struct intel_guc *guc = ce_to_guc(ce);
2959 struct intel_runtime_pm *runtime_pm =
2960 &ce->engine->gt->i915->runtime_pm;
2961 intel_wakeref_t wakeref;
2962 unsigned long flags;
2963
2964 GEM_BUG_ON(intel_context_is_child(ce));
2965
2966 guc_flush_submissions(guc);
2967
2968 spin_lock_irqsave(&ce->guc_state.lock, flags);
2969 set_context_banned(ce);
2970
2971 if (submission_disabled(guc) ||
2972 (!context_enabled(ce) && !context_pending_disable(ce))) {
2973 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2974
2975 guc_cancel_context_requests(ce);
2976 intel_engine_signal_breadcrumbs(ce->engine);
2977 } else if (!context_pending_disable(ce)) {
2978 u16 guc_id;
2979
2980 /*
2981 * We add +2 here as the schedule disable complete CTB handler
2982 * calls intel_context_sched_disable_unpin (-2 to pin_count).
2983 */
2984 atomic_add(2, &ce->pin_count);
2985
2986 guc_id = prep_context_pending_disable(ce);
2987 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2988
2989 /*
2990 * In addition to disabling scheduling, set the preemption
2991 * timeout to the minimum value (1 us) so the banned context
2992 * gets kicked off the HW ASAP.
2993 */
2994 with_intel_runtime_pm(runtime_pm, wakeref) {
2995 __guc_context_set_preemption_timeout(guc, guc_id,
2996 preempt_timeout_ms);
2997 __guc_context_sched_disable(guc, ce, guc_id);
2998 }
2999 } else {
3000 if (!context_guc_id_invalid(ce))
3001 with_intel_runtime_pm(runtime_pm, wakeref)
3002 __guc_context_set_preemption_timeout(guc,
3003 ce->guc_id.id,
3004 preempt_timeout_ms);
3005 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3006 }
3007 }
3008
guc_context_sched_disable(struct intel_context * ce)3009 static void guc_context_sched_disable(struct intel_context *ce)
3010 {
3011 struct intel_guc *guc = ce_to_guc(ce);
3012 unsigned long flags;
3013 struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm;
3014 intel_wakeref_t wakeref;
3015 u16 guc_id;
3016
3017 GEM_BUG_ON(intel_context_is_child(ce));
3018
3019 spin_lock_irqsave(&ce->guc_state.lock, flags);
3020
3021 /*
3022 * We have to check if the context has been disabled by another thread,
3023 * check if submssion has been disabled to seal a race with reset and
3024 * finally check if any more requests have been committed to the
3025 * context ensursing that a request doesn't slip through the
3026 * 'context_pending_disable' fence.
3027 */
3028 if (unlikely(!context_enabled(ce) || submission_disabled(guc) ||
3029 context_has_committed_requests(ce))) {
3030 clr_context_enabled(ce);
3031 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3032 goto unpin;
3033 }
3034 guc_id = prep_context_pending_disable(ce);
3035
3036 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3037
3038 with_intel_runtime_pm(runtime_pm, wakeref)
3039 __guc_context_sched_disable(guc, ce, guc_id);
3040
3041 return;
3042 unpin:
3043 intel_context_sched_disable_unpin(ce);
3044 }
3045
guc_lrc_desc_unpin(struct intel_context * ce)3046 static inline void guc_lrc_desc_unpin(struct intel_context *ce)
3047 {
3048 struct intel_guc *guc = ce_to_guc(ce);
3049 struct intel_gt *gt = guc_to_gt(guc);
3050 unsigned long flags;
3051 bool disabled;
3052
3053 GEM_BUG_ON(!intel_gt_pm_is_awake(gt));
3054 GEM_BUG_ON(!ctx_id_mapped(guc, ce->guc_id.id));
3055 GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id));
3056 GEM_BUG_ON(context_enabled(ce));
3057
3058 /* Seal race with Reset */
3059 spin_lock_irqsave(&ce->guc_state.lock, flags);
3060 disabled = submission_disabled(guc);
3061 if (likely(!disabled)) {
3062 __intel_gt_pm_get(gt);
3063 set_context_destroyed(ce);
3064 clr_context_registered(ce);
3065 }
3066 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3067 if (unlikely(disabled)) {
3068 release_guc_id(guc, ce);
3069 __guc_context_destroy(ce);
3070 return;
3071 }
3072
3073 deregister_context(ce, ce->guc_id.id);
3074 }
3075
__guc_context_destroy(struct intel_context * ce)3076 static void __guc_context_destroy(struct intel_context *ce)
3077 {
3078 GEM_BUG_ON(ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] ||
3079 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] ||
3080 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] ||
3081 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]);
3082 GEM_BUG_ON(ce->guc_state.number_committed_requests);
3083
3084 lrc_fini(ce);
3085 intel_context_fini(ce);
3086
3087 if (intel_engine_is_virtual(ce->engine)) {
3088 struct guc_virtual_engine *ve =
3089 container_of(ce, typeof(*ve), context);
3090
3091 if (ve->base.breadcrumbs)
3092 intel_breadcrumbs_put(ve->base.breadcrumbs);
3093
3094 kfree(ve);
3095 } else {
3096 intel_context_free(ce);
3097 }
3098 }
3099
guc_flush_destroyed_contexts(struct intel_guc * guc)3100 static void guc_flush_destroyed_contexts(struct intel_guc *guc)
3101 {
3102 struct intel_context *ce;
3103 unsigned long flags;
3104
3105 GEM_BUG_ON(!submission_disabled(guc) &&
3106 guc_submission_initialized(guc));
3107
3108 while (!list_empty(&guc->submission_state.destroyed_contexts)) {
3109 spin_lock_irqsave(&guc->submission_state.lock, flags);
3110 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts,
3111 struct intel_context,
3112 destroyed_link);
3113 if (ce)
3114 list_del_init(&ce->destroyed_link);
3115 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
3116
3117 if (!ce)
3118 break;
3119
3120 release_guc_id(guc, ce);
3121 __guc_context_destroy(ce);
3122 }
3123 }
3124
deregister_destroyed_contexts(struct intel_guc * guc)3125 static void deregister_destroyed_contexts(struct intel_guc *guc)
3126 {
3127 struct intel_context *ce;
3128 unsigned long flags;
3129
3130 while (!list_empty(&guc->submission_state.destroyed_contexts)) {
3131 spin_lock_irqsave(&guc->submission_state.lock, flags);
3132 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts,
3133 struct intel_context,
3134 destroyed_link);
3135 if (ce)
3136 list_del_init(&ce->destroyed_link);
3137 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
3138
3139 if (!ce)
3140 break;
3141
3142 guc_lrc_desc_unpin(ce);
3143 }
3144 }
3145
destroyed_worker_func(struct work_struct * w)3146 static void destroyed_worker_func(struct work_struct *w)
3147 {
3148 struct intel_guc *guc = container_of(w, struct intel_guc,
3149 submission_state.destroyed_worker);
3150 struct intel_gt *gt = guc_to_gt(guc);
3151 int tmp;
3152
3153 with_intel_gt_pm(gt, tmp)
3154 deregister_destroyed_contexts(guc);
3155 }
3156
guc_context_destroy(struct kref * kref)3157 static void guc_context_destroy(struct kref *kref)
3158 {
3159 struct intel_context *ce = container_of(kref, typeof(*ce), ref);
3160 struct intel_guc *guc = ce_to_guc(ce);
3161 unsigned long flags;
3162 bool destroy;
3163
3164 /*
3165 * If the guc_id is invalid this context has been stolen and we can free
3166 * it immediately. Also can be freed immediately if the context is not
3167 * registered with the GuC or the GuC is in the middle of a reset.
3168 */
3169 spin_lock_irqsave(&guc->submission_state.lock, flags);
3170 destroy = submission_disabled(guc) || context_guc_id_invalid(ce) ||
3171 !ctx_id_mapped(guc, ce->guc_id.id);
3172 if (likely(!destroy)) {
3173 if (!list_empty(&ce->guc_id.link))
3174 list_del_init(&ce->guc_id.link);
3175 list_add_tail(&ce->destroyed_link,
3176 &guc->submission_state.destroyed_contexts);
3177 } else {
3178 __release_guc_id(guc, ce);
3179 }
3180 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
3181 if (unlikely(destroy)) {
3182 __guc_context_destroy(ce);
3183 return;
3184 }
3185
3186 /*
3187 * We use a worker to issue the H2G to deregister the context as we can
3188 * take the GT PM for the first time which isn't allowed from an atomic
3189 * context.
3190 */
3191 queue_work(system_unbound_wq, &guc->submission_state.destroyed_worker);
3192 }
3193
guc_context_alloc(struct intel_context * ce)3194 static int guc_context_alloc(struct intel_context *ce)
3195 {
3196 return lrc_alloc(ce, ce->engine);
3197 }
3198
__guc_context_set_prio(struct intel_guc * guc,struct intel_context * ce)3199 static void __guc_context_set_prio(struct intel_guc *guc,
3200 struct intel_context *ce)
3201 {
3202 if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0)) {
3203 struct context_policy policy;
3204
3205 __guc_context_policy_start_klv(&policy, ce->guc_id.id);
3206 __guc_context_policy_add_priority(&policy, ce->guc_state.prio);
3207 __guc_context_set_context_policies(guc, &policy, true);
3208 } else {
3209 u32 action[] = {
3210 INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY,
3211 ce->guc_id.id,
3212 ce->guc_state.prio,
3213 };
3214
3215 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
3216 }
3217 }
3218
guc_context_set_prio(struct intel_guc * guc,struct intel_context * ce,u8 prio)3219 static void guc_context_set_prio(struct intel_guc *guc,
3220 struct intel_context *ce,
3221 u8 prio)
3222 {
3223 GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH ||
3224 prio > GUC_CLIENT_PRIORITY_NORMAL);
3225 lockdep_assert_held(&ce->guc_state.lock);
3226
3227 if (ce->guc_state.prio == prio || submission_disabled(guc) ||
3228 !context_registered(ce)) {
3229 ce->guc_state.prio = prio;
3230 return;
3231 }
3232
3233 ce->guc_state.prio = prio;
3234 __guc_context_set_prio(guc, ce);
3235
3236 trace_intel_context_set_prio(ce);
3237 }
3238
map_i915_prio_to_guc_prio(int prio)3239 static inline u8 map_i915_prio_to_guc_prio(int prio)
3240 {
3241 if (prio == I915_PRIORITY_NORMAL)
3242 return GUC_CLIENT_PRIORITY_KMD_NORMAL;
3243 else if (prio < I915_PRIORITY_NORMAL)
3244 return GUC_CLIENT_PRIORITY_NORMAL;
3245 else if (prio < I915_PRIORITY_DISPLAY)
3246 return GUC_CLIENT_PRIORITY_HIGH;
3247 else
3248 return GUC_CLIENT_PRIORITY_KMD_HIGH;
3249 }
3250
add_context_inflight_prio(struct intel_context * ce,u8 guc_prio)3251 static inline void add_context_inflight_prio(struct intel_context *ce,
3252 u8 guc_prio)
3253 {
3254 lockdep_assert_held(&ce->guc_state.lock);
3255 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count));
3256
3257 ++ce->guc_state.prio_count[guc_prio];
3258
3259 /* Overflow protection */
3260 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]);
3261 }
3262
sub_context_inflight_prio(struct intel_context * ce,u8 guc_prio)3263 static inline void sub_context_inflight_prio(struct intel_context *ce,
3264 u8 guc_prio)
3265 {
3266 lockdep_assert_held(&ce->guc_state.lock);
3267 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count));
3268
3269 /* Underflow protection */
3270 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]);
3271
3272 --ce->guc_state.prio_count[guc_prio];
3273 }
3274
update_context_prio(struct intel_context * ce)3275 static inline void update_context_prio(struct intel_context *ce)
3276 {
3277 struct intel_guc *guc = &ce->engine->gt->uc.guc;
3278 int i;
3279
3280 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0);
3281 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL);
3282
3283 lockdep_assert_held(&ce->guc_state.lock);
3284
3285 for (i = 0; i < ARRAY_SIZE(ce->guc_state.prio_count); ++i) {
3286 if (ce->guc_state.prio_count[i]) {
3287 guc_context_set_prio(guc, ce, i);
3288 break;
3289 }
3290 }
3291 }
3292
new_guc_prio_higher(u8 old_guc_prio,u8 new_guc_prio)3293 static inline bool new_guc_prio_higher(u8 old_guc_prio, u8 new_guc_prio)
3294 {
3295 /* Lower value is higher priority */
3296 return new_guc_prio < old_guc_prio;
3297 }
3298
add_to_context(struct i915_request * rq)3299 static void add_to_context(struct i915_request *rq)
3300 {
3301 struct intel_context *ce = request_to_scheduling_context(rq);
3302 u8 new_guc_prio = map_i915_prio_to_guc_prio(rq_prio(rq));
3303
3304 GEM_BUG_ON(intel_context_is_child(ce));
3305 GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI);
3306
3307 spin_lock(&ce->guc_state.lock);
3308 list_move_tail(&rq->sched.link, &ce->guc_state.requests);
3309
3310 if (rq->guc_prio == GUC_PRIO_INIT) {
3311 rq->guc_prio = new_guc_prio;
3312 add_context_inflight_prio(ce, rq->guc_prio);
3313 } else if (new_guc_prio_higher(rq->guc_prio, new_guc_prio)) {
3314 sub_context_inflight_prio(ce, rq->guc_prio);
3315 rq->guc_prio = new_guc_prio;
3316 add_context_inflight_prio(ce, rq->guc_prio);
3317 }
3318 update_context_prio(ce);
3319
3320 spin_unlock(&ce->guc_state.lock);
3321 }
3322
guc_prio_fini(struct i915_request * rq,struct intel_context * ce)3323 static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce)
3324 {
3325 lockdep_assert_held(&ce->guc_state.lock);
3326
3327 if (rq->guc_prio != GUC_PRIO_INIT &&
3328 rq->guc_prio != GUC_PRIO_FINI) {
3329 sub_context_inflight_prio(ce, rq->guc_prio);
3330 update_context_prio(ce);
3331 }
3332 rq->guc_prio = GUC_PRIO_FINI;
3333 }
3334
remove_from_context(struct i915_request * rq)3335 static void remove_from_context(struct i915_request *rq)
3336 {
3337 struct intel_context *ce = request_to_scheduling_context(rq);
3338
3339 GEM_BUG_ON(intel_context_is_child(ce));
3340
3341 spin_lock_irq(&ce->guc_state.lock);
3342
3343 list_del_init(&rq->sched.link);
3344 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
3345
3346 /* Prevent further __await_execution() registering a cb, then flush */
3347 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
3348
3349 guc_prio_fini(rq, ce);
3350
3351 decr_context_committed_requests(ce);
3352
3353 spin_unlock_irq(&ce->guc_state.lock);
3354
3355 atomic_dec(&ce->guc_id.ref);
3356 i915_request_notify_execute_cb_imm(rq);
3357 }
3358
3359 static const struct intel_context_ops guc_context_ops = {
3360 .alloc = guc_context_alloc,
3361
3362 .pre_pin = guc_context_pre_pin,
3363 .pin = guc_context_pin,
3364 .unpin = guc_context_unpin,
3365 .post_unpin = guc_context_post_unpin,
3366
3367 .revoke = guc_context_revoke,
3368
3369 .cancel_request = guc_context_cancel_request,
3370
3371 .enter = intel_context_enter_engine,
3372 .exit = intel_context_exit_engine,
3373
3374 .sched_disable = guc_context_sched_disable,
3375
3376 .reset = lrc_reset,
3377 .destroy = guc_context_destroy,
3378
3379 .create_virtual = guc_create_virtual,
3380 .create_parallel = guc_create_parallel,
3381 };
3382
submit_work_cb(struct irq_work * wrk)3383 static void submit_work_cb(struct irq_work *wrk)
3384 {
3385 struct i915_request *rq = container_of(wrk, typeof(*rq), submit_work);
3386
3387 might_lock(&rq->engine->sched_engine->lock);
3388 i915_sw_fence_complete(&rq->submit);
3389 }
3390
__guc_signal_context_fence(struct intel_context * ce)3391 static void __guc_signal_context_fence(struct intel_context *ce)
3392 {
3393 struct i915_request *rq, *rn;
3394
3395 lockdep_assert_held(&ce->guc_state.lock);
3396
3397 if (!list_empty(&ce->guc_state.fences))
3398 trace_intel_context_fence_release(ce);
3399
3400 /*
3401 * Use an IRQ to ensure locking order of sched_engine->lock ->
3402 * ce->guc_state.lock is preserved.
3403 */
3404 list_for_each_entry_safe(rq, rn, &ce->guc_state.fences,
3405 guc_fence_link) {
3406 list_del(&rq->guc_fence_link);
3407 irq_work_queue(&rq->submit_work);
3408 }
3409
3410 INIT_LIST_HEAD(&ce->guc_state.fences);
3411 }
3412
guc_signal_context_fence(struct intel_context * ce)3413 static void guc_signal_context_fence(struct intel_context *ce)
3414 {
3415 unsigned long flags;
3416
3417 GEM_BUG_ON(intel_context_is_child(ce));
3418
3419 spin_lock_irqsave(&ce->guc_state.lock, flags);
3420 clr_context_wait_for_deregister_to_register(ce);
3421 __guc_signal_context_fence(ce);
3422 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3423 }
3424
context_needs_register(struct intel_context * ce,bool new_guc_id)3425 static bool context_needs_register(struct intel_context *ce, bool new_guc_id)
3426 {
3427 return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) ||
3428 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)) &&
3429 !submission_disabled(ce_to_guc(ce));
3430 }
3431
guc_context_init(struct intel_context * ce)3432 static void guc_context_init(struct intel_context *ce)
3433 {
3434 const struct i915_gem_context *ctx;
3435 int prio = I915_CONTEXT_DEFAULT_PRIORITY;
3436
3437 rcu_read_lock();
3438 ctx = rcu_dereference(ce->gem_context);
3439 if (ctx)
3440 prio = ctx->sched.priority;
3441 rcu_read_unlock();
3442
3443 ce->guc_state.prio = map_i915_prio_to_guc_prio(prio);
3444 set_bit(CONTEXT_GUC_INIT, &ce->flags);
3445 }
3446
guc_request_alloc(struct i915_request * rq)3447 static int guc_request_alloc(struct i915_request *rq)
3448 {
3449 struct intel_context *ce = request_to_scheduling_context(rq);
3450 struct intel_guc *guc = ce_to_guc(ce);
3451 unsigned long flags;
3452 int ret;
3453
3454 GEM_BUG_ON(!intel_context_is_pinned(rq->context));
3455
3456 /*
3457 * Flush enough space to reduce the likelihood of waiting after
3458 * we start building the request - in which case we will just
3459 * have to repeat work.
3460 */
3461 rq->reserved_space += GUC_REQUEST_SIZE;
3462
3463 /*
3464 * Note that after this point, we have committed to using
3465 * this request as it is being used to both track the
3466 * state of engine initialisation and liveness of the
3467 * golden renderstate above. Think twice before you try
3468 * to cancel/unwind this request now.
3469 */
3470
3471 /* Unconditionally invalidate GPU caches and TLBs. */
3472 ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
3473 if (ret)
3474 return ret;
3475
3476 rq->reserved_space -= GUC_REQUEST_SIZE;
3477
3478 if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags)))
3479 guc_context_init(ce);
3480
3481 /*
3482 * Call pin_guc_id here rather than in the pinning step as with
3483 * dma_resv, contexts can be repeatedly pinned / unpinned trashing the
3484 * guc_id and creating horrible race conditions. This is especially bad
3485 * when guc_id are being stolen due to over subscription. By the time
3486 * this function is reached, it is guaranteed that the guc_id will be
3487 * persistent until the generated request is retired. Thus, sealing these
3488 * race conditions. It is still safe to fail here if guc_id are
3489 * exhausted and return -EAGAIN to the user indicating that they can try
3490 * again in the future.
3491 *
3492 * There is no need for a lock here as the timeline mutex ensures at
3493 * most one context can be executing this code path at once. The
3494 * guc_id_ref is incremented once for every request in flight and
3495 * decremented on each retire. When it is zero, a lock around the
3496 * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id.
3497 */
3498 if (atomic_add_unless(&ce->guc_id.ref, 1, 0))
3499 goto out;
3500
3501 ret = pin_guc_id(guc, ce); /* returns 1 if new guc_id assigned */
3502 if (unlikely(ret < 0))
3503 return ret;
3504 if (context_needs_register(ce, !!ret)) {
3505 ret = try_context_registration(ce, true);
3506 if (unlikely(ret)) { /* unwind */
3507 if (ret == -EPIPE) {
3508 disable_submission(guc);
3509 goto out; /* GPU will be reset */
3510 }
3511 atomic_dec(&ce->guc_id.ref);
3512 unpin_guc_id(guc, ce);
3513 return ret;
3514 }
3515 }
3516
3517 clear_bit(CONTEXT_LRCA_DIRTY, &ce->flags);
3518
3519 out:
3520 /*
3521 * We block all requests on this context if a G2H is pending for a
3522 * schedule disable or context deregistration as the GuC will fail a
3523 * schedule enable or context registration if either G2H is pending
3524 * respectfully. Once a G2H returns, the fence is released that is
3525 * blocking these requests (see guc_signal_context_fence).
3526 */
3527 spin_lock_irqsave(&ce->guc_state.lock, flags);
3528 if (context_wait_for_deregister_to_register(ce) ||
3529 context_pending_disable(ce)) {
3530 init_irq_work(&rq->submit_work, submit_work_cb);
3531 i915_sw_fence_await(&rq->submit);
3532
3533 list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences);
3534 }
3535 incr_context_committed_requests(ce);
3536 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3537
3538 return 0;
3539 }
3540
guc_virtual_context_pre_pin(struct intel_context * ce,struct i915_gem_ww_ctx * ww,void ** vaddr)3541 static int guc_virtual_context_pre_pin(struct intel_context *ce,
3542 struct i915_gem_ww_ctx *ww,
3543 void **vaddr)
3544 {
3545 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3546
3547 return __guc_context_pre_pin(ce, engine, ww, vaddr);
3548 }
3549
guc_virtual_context_pin(struct intel_context * ce,void * vaddr)3550 static int guc_virtual_context_pin(struct intel_context *ce, void *vaddr)
3551 {
3552 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3553 int ret = __guc_context_pin(ce, engine, vaddr);
3554 intel_engine_mask_t tmp, mask = ce->engine->mask;
3555
3556 if (likely(!ret))
3557 for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
3558 intel_engine_pm_get(engine);
3559
3560 return ret;
3561 }
3562
guc_virtual_context_unpin(struct intel_context * ce)3563 static void guc_virtual_context_unpin(struct intel_context *ce)
3564 {
3565 intel_engine_mask_t tmp, mask = ce->engine->mask;
3566 struct intel_engine_cs *engine;
3567 struct intel_guc *guc = ce_to_guc(ce);
3568
3569 GEM_BUG_ON(context_enabled(ce));
3570 GEM_BUG_ON(intel_context_is_barrier(ce));
3571
3572 unpin_guc_id(guc, ce);
3573 lrc_unpin(ce);
3574
3575 for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
3576 intel_engine_pm_put_async(engine);
3577 }
3578
guc_virtual_context_enter(struct intel_context * ce)3579 static void guc_virtual_context_enter(struct intel_context *ce)
3580 {
3581 intel_engine_mask_t tmp, mask = ce->engine->mask;
3582 struct intel_engine_cs *engine;
3583
3584 for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
3585 intel_engine_pm_get(engine);
3586
3587 intel_timeline_enter(ce->timeline);
3588 }
3589
guc_virtual_context_exit(struct intel_context * ce)3590 static void guc_virtual_context_exit(struct intel_context *ce)
3591 {
3592 intel_engine_mask_t tmp, mask = ce->engine->mask;
3593 struct intel_engine_cs *engine;
3594
3595 for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
3596 intel_engine_pm_put(engine);
3597
3598 intel_timeline_exit(ce->timeline);
3599 }
3600
guc_virtual_context_alloc(struct intel_context * ce)3601 static int guc_virtual_context_alloc(struct intel_context *ce)
3602 {
3603 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3604
3605 return lrc_alloc(ce, engine);
3606 }
3607
3608 static const struct intel_context_ops virtual_guc_context_ops = {
3609 .alloc = guc_virtual_context_alloc,
3610
3611 .pre_pin = guc_virtual_context_pre_pin,
3612 .pin = guc_virtual_context_pin,
3613 .unpin = guc_virtual_context_unpin,
3614 .post_unpin = guc_context_post_unpin,
3615
3616 .revoke = guc_context_revoke,
3617
3618 .cancel_request = guc_context_cancel_request,
3619
3620 .enter = guc_virtual_context_enter,
3621 .exit = guc_virtual_context_exit,
3622
3623 .sched_disable = guc_context_sched_disable,
3624
3625 .destroy = guc_context_destroy,
3626
3627 .get_sibling = guc_virtual_get_sibling,
3628 };
3629
guc_parent_context_pin(struct intel_context * ce,void * vaddr)3630 static int guc_parent_context_pin(struct intel_context *ce, void *vaddr)
3631 {
3632 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3633 struct intel_guc *guc = ce_to_guc(ce);
3634 int ret;
3635
3636 GEM_BUG_ON(!intel_context_is_parent(ce));
3637 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
3638
3639 ret = pin_guc_id(guc, ce);
3640 if (unlikely(ret < 0))
3641 return ret;
3642
3643 return __guc_context_pin(ce, engine, vaddr);
3644 }
3645
guc_child_context_pin(struct intel_context * ce,void * vaddr)3646 static int guc_child_context_pin(struct intel_context *ce, void *vaddr)
3647 {
3648 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3649
3650 GEM_BUG_ON(!intel_context_is_child(ce));
3651 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
3652
3653 __intel_context_pin(ce->parallel.parent);
3654 return __guc_context_pin(ce, engine, vaddr);
3655 }
3656
guc_parent_context_unpin(struct intel_context * ce)3657 static void guc_parent_context_unpin(struct intel_context *ce)
3658 {
3659 struct intel_guc *guc = ce_to_guc(ce);
3660
3661 GEM_BUG_ON(context_enabled(ce));
3662 GEM_BUG_ON(intel_context_is_barrier(ce));
3663 GEM_BUG_ON(!intel_context_is_parent(ce));
3664 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
3665
3666 unpin_guc_id(guc, ce);
3667 lrc_unpin(ce);
3668 }
3669
guc_child_context_unpin(struct intel_context * ce)3670 static void guc_child_context_unpin(struct intel_context *ce)
3671 {
3672 GEM_BUG_ON(context_enabled(ce));
3673 GEM_BUG_ON(intel_context_is_barrier(ce));
3674 GEM_BUG_ON(!intel_context_is_child(ce));
3675 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
3676
3677 lrc_unpin(ce);
3678 }
3679
guc_child_context_post_unpin(struct intel_context * ce)3680 static void guc_child_context_post_unpin(struct intel_context *ce)
3681 {
3682 GEM_BUG_ON(!intel_context_is_child(ce));
3683 GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent));
3684 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
3685
3686 lrc_post_unpin(ce);
3687 intel_context_unpin(ce->parallel.parent);
3688 }
3689
guc_child_context_destroy(struct kref * kref)3690 static void guc_child_context_destroy(struct kref *kref)
3691 {
3692 struct intel_context *ce = container_of(kref, typeof(*ce), ref);
3693
3694 __guc_context_destroy(ce);
3695 }
3696
3697 static const struct intel_context_ops virtual_parent_context_ops = {
3698 .alloc = guc_virtual_context_alloc,
3699
3700 .pre_pin = guc_context_pre_pin,
3701 .pin = guc_parent_context_pin,
3702 .unpin = guc_parent_context_unpin,
3703 .post_unpin = guc_context_post_unpin,
3704
3705 .revoke = guc_context_revoke,
3706
3707 .cancel_request = guc_context_cancel_request,
3708
3709 .enter = guc_virtual_context_enter,
3710 .exit = guc_virtual_context_exit,
3711
3712 .sched_disable = guc_context_sched_disable,
3713
3714 .destroy = guc_context_destroy,
3715
3716 .get_sibling = guc_virtual_get_sibling,
3717 };
3718
3719 static const struct intel_context_ops virtual_child_context_ops = {
3720 .alloc = guc_virtual_context_alloc,
3721
3722 .pre_pin = guc_context_pre_pin,
3723 .pin = guc_child_context_pin,
3724 .unpin = guc_child_context_unpin,
3725 .post_unpin = guc_child_context_post_unpin,
3726
3727 .cancel_request = guc_context_cancel_request,
3728
3729 .enter = guc_virtual_context_enter,
3730 .exit = guc_virtual_context_exit,
3731
3732 .destroy = guc_child_context_destroy,
3733
3734 .get_sibling = guc_virtual_get_sibling,
3735 };
3736
3737 /*
3738 * The below override of the breadcrumbs is enabled when the user configures a
3739 * context for parallel submission (multi-lrc, parent-child).
3740 *
3741 * The overridden breadcrumbs implements an algorithm which allows the GuC to
3742 * safely preempt all the hw contexts configured for parallel submission
3743 * between each BB. The contract between the i915 and GuC is if the parent
3744 * context can be preempted, all the children can be preempted, and the GuC will
3745 * always try to preempt the parent before the children. A handshake between the
3746 * parent / children breadcrumbs ensures the i915 holds up its end of the deal
3747 * creating a window to preempt between each set of BBs.
3748 */
3749 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq,
3750 u64 offset, u32 len,
3751 const unsigned int flags);
3752 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq,
3753 u64 offset, u32 len,
3754 const unsigned int flags);
3755 static u32 *
3756 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
3757 u32 *cs);
3758 static u32 *
3759 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
3760 u32 *cs);
3761
3762 static struct intel_context *
guc_create_parallel(struct intel_engine_cs ** engines,unsigned int num_siblings,unsigned int width)3763 guc_create_parallel(struct intel_engine_cs **engines,
3764 unsigned int num_siblings,
3765 unsigned int width)
3766 {
3767 struct intel_engine_cs **siblings = NULL;
3768 struct intel_context *parent = NULL, *ce, *err;
3769 int i, j;
3770
3771 siblings = kmalloc_array(num_siblings,
3772 sizeof(*siblings),
3773 GFP_KERNEL);
3774 if (!siblings)
3775 return ERR_PTR(-ENOMEM);
3776
3777 for (i = 0; i < width; ++i) {
3778 for (j = 0; j < num_siblings; ++j)
3779 siblings[j] = engines[i * num_siblings + j];
3780
3781 ce = intel_engine_create_virtual(siblings, num_siblings,
3782 FORCE_VIRTUAL);
3783 if (IS_ERR(ce)) {
3784 err = ERR_CAST(ce);
3785 goto unwind;
3786 }
3787
3788 if (i == 0) {
3789 parent = ce;
3790 parent->ops = &virtual_parent_context_ops;
3791 } else {
3792 ce->ops = &virtual_child_context_ops;
3793 intel_context_bind_parent_child(parent, ce);
3794 }
3795 }
3796
3797 parent->parallel.fence_context = dma_fence_context_alloc(1);
3798
3799 parent->engine->emit_bb_start =
3800 emit_bb_start_parent_no_preempt_mid_batch;
3801 parent->engine->emit_fini_breadcrumb =
3802 emit_fini_breadcrumb_parent_no_preempt_mid_batch;
3803 parent->engine->emit_fini_breadcrumb_dw =
3804 12 + 4 * parent->parallel.number_children;
3805 for_each_child(parent, ce) {
3806 ce->engine->emit_bb_start =
3807 emit_bb_start_child_no_preempt_mid_batch;
3808 ce->engine->emit_fini_breadcrumb =
3809 emit_fini_breadcrumb_child_no_preempt_mid_batch;
3810 ce->engine->emit_fini_breadcrumb_dw = 16;
3811 }
3812
3813 kfree(siblings);
3814 return parent;
3815
3816 unwind:
3817 if (parent)
3818 intel_context_put(parent);
3819 kfree(siblings);
3820 return err;
3821 }
3822
3823 static bool
guc_irq_enable_breadcrumbs(struct intel_breadcrumbs * b)3824 guc_irq_enable_breadcrumbs(struct intel_breadcrumbs *b)
3825 {
3826 struct intel_engine_cs *sibling;
3827 intel_engine_mask_t tmp, mask = b->engine_mask;
3828 bool result = false;
3829
3830 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp)
3831 result |= intel_engine_irq_enable(sibling);
3832
3833 return result;
3834 }
3835
3836 static void
guc_irq_disable_breadcrumbs(struct intel_breadcrumbs * b)3837 guc_irq_disable_breadcrumbs(struct intel_breadcrumbs *b)
3838 {
3839 struct intel_engine_cs *sibling;
3840 intel_engine_mask_t tmp, mask = b->engine_mask;
3841
3842 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp)
3843 intel_engine_irq_disable(sibling);
3844 }
3845
guc_init_breadcrumbs(struct intel_engine_cs * engine)3846 static void guc_init_breadcrumbs(struct intel_engine_cs *engine)
3847 {
3848 int i;
3849
3850 /*
3851 * In GuC submission mode we do not know which physical engine a request
3852 * will be scheduled on, this creates a problem because the breadcrumb
3853 * interrupt is per physical engine. To work around this we attach
3854 * requests and direct all breadcrumb interrupts to the first instance
3855 * of an engine per class. In addition all breadcrumb interrupts are
3856 * enabled / disabled across an engine class in unison.
3857 */
3858 for (i = 0; i < MAX_ENGINE_INSTANCE; ++i) {
3859 struct intel_engine_cs *sibling =
3860 engine->gt->engine_class[engine->class][i];
3861
3862 if (sibling) {
3863 if (engine->breadcrumbs != sibling->breadcrumbs) {
3864 intel_breadcrumbs_put(engine->breadcrumbs);
3865 engine->breadcrumbs =
3866 intel_breadcrumbs_get(sibling->breadcrumbs);
3867 }
3868 break;
3869 }
3870 }
3871
3872 if (engine->breadcrumbs) {
3873 engine->breadcrumbs->engine_mask |= engine->mask;
3874 engine->breadcrumbs->irq_enable = guc_irq_enable_breadcrumbs;
3875 engine->breadcrumbs->irq_disable = guc_irq_disable_breadcrumbs;
3876 }
3877 }
3878
guc_bump_inflight_request_prio(struct i915_request * rq,int prio)3879 static void guc_bump_inflight_request_prio(struct i915_request *rq,
3880 int prio)
3881 {
3882 struct intel_context *ce = request_to_scheduling_context(rq);
3883 u8 new_guc_prio = map_i915_prio_to_guc_prio(prio);
3884
3885 /* Short circuit function */
3886 if (prio < I915_PRIORITY_NORMAL ||
3887 rq->guc_prio == GUC_PRIO_FINI ||
3888 (rq->guc_prio != GUC_PRIO_INIT &&
3889 !new_guc_prio_higher(rq->guc_prio, new_guc_prio)))
3890 return;
3891
3892 spin_lock(&ce->guc_state.lock);
3893 if (rq->guc_prio != GUC_PRIO_FINI) {
3894 if (rq->guc_prio != GUC_PRIO_INIT)
3895 sub_context_inflight_prio(ce, rq->guc_prio);
3896 rq->guc_prio = new_guc_prio;
3897 add_context_inflight_prio(ce, rq->guc_prio);
3898 update_context_prio(ce);
3899 }
3900 spin_unlock(&ce->guc_state.lock);
3901 }
3902
guc_retire_inflight_request_prio(struct i915_request * rq)3903 static void guc_retire_inflight_request_prio(struct i915_request *rq)
3904 {
3905 struct intel_context *ce = request_to_scheduling_context(rq);
3906
3907 spin_lock(&ce->guc_state.lock);
3908 guc_prio_fini(rq, ce);
3909 spin_unlock(&ce->guc_state.lock);
3910 }
3911
sanitize_hwsp(struct intel_engine_cs * engine)3912 static void sanitize_hwsp(struct intel_engine_cs *engine)
3913 {
3914 struct intel_timeline *tl;
3915
3916 list_for_each_entry(tl, &engine->status_page.timelines, engine_link)
3917 intel_timeline_reset_seqno(tl);
3918 }
3919
guc_sanitize(struct intel_engine_cs * engine)3920 static void guc_sanitize(struct intel_engine_cs *engine)
3921 {
3922 /*
3923 * Poison residual state on resume, in case the suspend didn't!
3924 *
3925 * We have to assume that across suspend/resume (or other loss
3926 * of control) that the contents of our pinned buffers has been
3927 * lost, replaced by garbage. Since this doesn't always happen,
3928 * let's poison such state so that we more quickly spot when
3929 * we falsely assume it has been preserved.
3930 */
3931 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
3932 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE);
3933
3934 /*
3935 * The kernel_context HWSP is stored in the status_page. As above,
3936 * that may be lost on resume/initialisation, and so we need to
3937 * reset the value in the HWSP.
3938 */
3939 sanitize_hwsp(engine);
3940
3941 /* And scrub the dirty cachelines for the HWSP */
3942 drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE);
3943
3944 intel_engine_reset_pinned_contexts(engine);
3945 }
3946
setup_hwsp(struct intel_engine_cs * engine)3947 static void setup_hwsp(struct intel_engine_cs *engine)
3948 {
3949 intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
3950
3951 ENGINE_WRITE_FW(engine,
3952 RING_HWS_PGA,
3953 i915_ggtt_offset(engine->status_page.vma));
3954 }
3955
start_engine(struct intel_engine_cs * engine)3956 static void start_engine(struct intel_engine_cs *engine)
3957 {
3958 ENGINE_WRITE_FW(engine,
3959 RING_MODE_GEN7,
3960 _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE));
3961
3962 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
3963 ENGINE_POSTING_READ(engine, RING_MI_MODE);
3964 }
3965
guc_resume(struct intel_engine_cs * engine)3966 static int guc_resume(struct intel_engine_cs *engine)
3967 {
3968 assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
3969
3970 intel_mocs_init_engine(engine);
3971
3972 intel_breadcrumbs_reset(engine->breadcrumbs);
3973
3974 setup_hwsp(engine);
3975 start_engine(engine);
3976
3977 if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE)
3978 xehp_enable_ccs_engines(engine);
3979
3980 return 0;
3981 }
3982
guc_sched_engine_disabled(struct i915_sched_engine * sched_engine)3983 static bool guc_sched_engine_disabled(struct i915_sched_engine *sched_engine)
3984 {
3985 return !sched_engine->tasklet.callback;
3986 }
3987
guc_set_default_submission(struct intel_engine_cs * engine)3988 static void guc_set_default_submission(struct intel_engine_cs *engine)
3989 {
3990 engine->submit_request = guc_submit_request;
3991 }
3992
guc_kernel_context_pin(struct intel_guc * guc,struct intel_context * ce)3993 static inline void guc_kernel_context_pin(struct intel_guc *guc,
3994 struct intel_context *ce)
3995 {
3996 /*
3997 * Note: we purposefully do not check the returns below because
3998 * the registration can only fail if a reset is just starting.
3999 * This is called at the end of reset so presumably another reset
4000 * isn't happening and even it did this code would be run again.
4001 */
4002
4003 if (context_guc_id_invalid(ce))
4004 pin_guc_id(guc, ce);
4005
4006 try_context_registration(ce, true);
4007 }
4008
guc_init_lrc_mapping(struct intel_guc * guc)4009 static inline void guc_init_lrc_mapping(struct intel_guc *guc)
4010 {
4011 struct intel_gt *gt = guc_to_gt(guc);
4012 struct intel_engine_cs *engine;
4013 enum intel_engine_id id;
4014
4015 /* make sure all descriptors are clean... */
4016 xa_destroy(&guc->context_lookup);
4017
4018 /*
4019 * A reset might have occurred while we had a pending stalled request,
4020 * so make sure we clean that up.
4021 */
4022 guc->stalled_request = NULL;
4023 guc->submission_stall_reason = STALL_NONE;
4024
4025 /*
4026 * Some contexts might have been pinned before we enabled GuC
4027 * submission, so we need to add them to the GuC bookeeping.
4028 * Also, after a reset the of the GuC we want to make sure that the
4029 * information shared with GuC is properly reset. The kernel LRCs are
4030 * not attached to the gem_context, so they need to be added separately.
4031 */
4032 for_each_engine(engine, gt, id) {
4033 struct intel_context *ce;
4034
4035 list_for_each_entry(ce, &engine->pinned_contexts_list,
4036 pinned_contexts_link)
4037 guc_kernel_context_pin(guc, ce);
4038 }
4039 }
4040
guc_release(struct intel_engine_cs * engine)4041 static void guc_release(struct intel_engine_cs *engine)
4042 {
4043 engine->sanitize = NULL; /* no longer in control, nothing to sanitize */
4044
4045 intel_engine_cleanup_common(engine);
4046 lrc_fini_wa_ctx(engine);
4047 }
4048
virtual_guc_bump_serial(struct intel_engine_cs * engine)4049 static void virtual_guc_bump_serial(struct intel_engine_cs *engine)
4050 {
4051 struct intel_engine_cs *e;
4052 intel_engine_mask_t tmp, mask = engine->mask;
4053
4054 for_each_engine_masked(e, engine->gt, mask, tmp)
4055 e->serial++;
4056 }
4057
guc_default_vfuncs(struct intel_engine_cs * engine)4058 static void guc_default_vfuncs(struct intel_engine_cs *engine)
4059 {
4060 /* Default vfuncs which can be overridden by each engine. */
4061
4062 engine->resume = guc_resume;
4063
4064 engine->cops = &guc_context_ops;
4065 engine->request_alloc = guc_request_alloc;
4066 engine->add_active_request = add_to_context;
4067 engine->remove_active_request = remove_from_context;
4068
4069 engine->sched_engine->schedule = i915_schedule;
4070
4071 engine->reset.prepare = guc_engine_reset_prepare;
4072 engine->reset.rewind = guc_rewind_nop;
4073 engine->reset.cancel = guc_reset_nop;
4074 engine->reset.finish = guc_reset_nop;
4075
4076 engine->emit_flush = gen8_emit_flush_xcs;
4077 engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
4078 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs;
4079 if (GRAPHICS_VER(engine->i915) >= 12) {
4080 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs;
4081 engine->emit_flush = gen12_emit_flush_xcs;
4082 }
4083 engine->set_default_submission = guc_set_default_submission;
4084 engine->busyness = guc_engine_busyness;
4085
4086 engine->flags |= I915_ENGINE_SUPPORTS_STATS;
4087 engine->flags |= I915_ENGINE_HAS_PREEMPTION;
4088 engine->flags |= I915_ENGINE_HAS_TIMESLICES;
4089
4090 /* Wa_14014475959:dg2 */
4091 if (IS_DG2(engine->i915) && engine->class == COMPUTE_CLASS)
4092 engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT;
4093
4094 /*
4095 * TODO: GuC supports timeslicing and semaphores as well, but they're
4096 * handled by the firmware so some minor tweaks are required before
4097 * enabling.
4098 *
4099 * engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
4100 */
4101
4102 engine->emit_bb_start = gen8_emit_bb_start;
4103 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
4104 engine->emit_bb_start = gen125_emit_bb_start;
4105 }
4106
rcs_submission_override(struct intel_engine_cs * engine)4107 static void rcs_submission_override(struct intel_engine_cs *engine)
4108 {
4109 switch (GRAPHICS_VER(engine->i915)) {
4110 case 12:
4111 engine->emit_flush = gen12_emit_flush_rcs;
4112 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
4113 break;
4114 case 11:
4115 engine->emit_flush = gen11_emit_flush_rcs;
4116 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
4117 break;
4118 default:
4119 engine->emit_flush = gen8_emit_flush_rcs;
4120 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
4121 break;
4122 }
4123 }
4124
guc_default_irqs(struct intel_engine_cs * engine)4125 static inline void guc_default_irqs(struct intel_engine_cs *engine)
4126 {
4127 engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT;
4128 intel_engine_set_irq_handler(engine, cs_irq_handler);
4129 }
4130
guc_sched_engine_destroy(struct kref * kref)4131 static void guc_sched_engine_destroy(struct kref *kref)
4132 {
4133 struct i915_sched_engine *sched_engine =
4134 container_of(kref, typeof(*sched_engine), ref);
4135 struct intel_guc *guc = sched_engine->private_data;
4136
4137 guc->sched_engine = NULL;
4138 tasklet_kill(&sched_engine->tasklet); /* flush the callback */
4139 kfree(sched_engine);
4140 }
4141
intel_guc_submission_setup(struct intel_engine_cs * engine)4142 int intel_guc_submission_setup(struct intel_engine_cs *engine)
4143 {
4144 struct drm_i915_private *i915 = engine->i915;
4145 struct intel_guc *guc = &engine->gt->uc.guc;
4146
4147 /*
4148 * The setup relies on several assumptions (e.g. irqs always enabled)
4149 * that are only valid on gen11+
4150 */
4151 GEM_BUG_ON(GRAPHICS_VER(i915) < 11);
4152
4153 if (!guc->sched_engine) {
4154 guc->sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL);
4155 if (!guc->sched_engine)
4156 return -ENOMEM;
4157
4158 guc->sched_engine->schedule = i915_schedule;
4159 guc->sched_engine->disabled = guc_sched_engine_disabled;
4160 guc->sched_engine->private_data = guc;
4161 guc->sched_engine->destroy = guc_sched_engine_destroy;
4162 guc->sched_engine->bump_inflight_request_prio =
4163 guc_bump_inflight_request_prio;
4164 guc->sched_engine->retire_inflight_request_prio =
4165 guc_retire_inflight_request_prio;
4166 tasklet_setup(&guc->sched_engine->tasklet,
4167 guc_submission_tasklet);
4168 }
4169 i915_sched_engine_put(engine->sched_engine);
4170 engine->sched_engine = i915_sched_engine_get(guc->sched_engine);
4171
4172 guc_default_vfuncs(engine);
4173 guc_default_irqs(engine);
4174 guc_init_breadcrumbs(engine);
4175
4176 if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE)
4177 rcs_submission_override(engine);
4178
4179 lrc_init_wa_ctx(engine);
4180
4181 /* Finally, take ownership and responsibility for cleanup! */
4182 engine->sanitize = guc_sanitize;
4183 engine->release = guc_release;
4184
4185 return 0;
4186 }
4187
intel_guc_submission_enable(struct intel_guc * guc)4188 void intel_guc_submission_enable(struct intel_guc *guc)
4189 {
4190 struct intel_gt *gt = guc_to_gt(guc);
4191
4192 /* Enable and route to GuC */
4193 if (GRAPHICS_VER(gt->i915) >= 12)
4194 intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES,
4195 GUC_SEM_INTR_ROUTE_TO_GUC |
4196 GUC_SEM_INTR_ENABLE_ALL);
4197
4198 guc_init_lrc_mapping(guc);
4199 guc_init_engine_stats(guc);
4200 }
4201
intel_guc_submission_disable(struct intel_guc * guc)4202 void intel_guc_submission_disable(struct intel_guc *guc)
4203 {
4204 struct intel_gt *gt = guc_to_gt(guc);
4205
4206 /* Note: By the time we're here, GuC may have already been reset */
4207
4208 /* Disable and route to host */
4209 if (GRAPHICS_VER(gt->i915) >= 12)
4210 intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES, 0x0);
4211 }
4212
__guc_submission_supported(struct intel_guc * guc)4213 static bool __guc_submission_supported(struct intel_guc *guc)
4214 {
4215 /* GuC submission is unavailable for pre-Gen11 */
4216 return intel_guc_is_supported(guc) &&
4217 GRAPHICS_VER(guc_to_gt(guc)->i915) >= 11;
4218 }
4219
__guc_submission_selected(struct intel_guc * guc)4220 static bool __guc_submission_selected(struct intel_guc *guc)
4221 {
4222 struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
4223
4224 if (!intel_guc_submission_is_supported(guc))
4225 return false;
4226
4227 return i915->params.enable_guc & ENABLE_GUC_SUBMISSION;
4228 }
4229
intel_guc_submission_init_early(struct intel_guc * guc)4230 void intel_guc_submission_init_early(struct intel_guc *guc)
4231 {
4232 xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ);
4233
4234 spin_lock_init(&guc->submission_state.lock);
4235 INIT_LIST_HEAD(&guc->submission_state.guc_id_list);
4236 ida_init(&guc->submission_state.guc_ids);
4237 INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts);
4238 INIT_WORK(&guc->submission_state.destroyed_worker,
4239 destroyed_worker_func);
4240 INIT_WORK(&guc->submission_state.reset_fail_worker,
4241 reset_fail_worker_func);
4242
4243 spin_lock_init(&guc->timestamp.lock);
4244 INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping);
4245
4246 guc->submission_state.num_guc_ids = GUC_MAX_CONTEXT_ID;
4247 guc->submission_supported = __guc_submission_supported(guc);
4248 guc->submission_selected = __guc_submission_selected(guc);
4249 }
4250
4251 static inline struct intel_context *
g2h_context_lookup(struct intel_guc * guc,u32 ctx_id)4252 g2h_context_lookup(struct intel_guc *guc, u32 ctx_id)
4253 {
4254 struct intel_context *ce;
4255
4256 if (unlikely(ctx_id >= GUC_MAX_CONTEXT_ID)) {
4257 drm_err(&guc_to_gt(guc)->i915->drm,
4258 "Invalid ctx_id %u\n", ctx_id);
4259 return NULL;
4260 }
4261
4262 ce = __get_context(guc, ctx_id);
4263 if (unlikely(!ce)) {
4264 drm_err(&guc_to_gt(guc)->i915->drm,
4265 "Context is NULL, ctx_id %u\n", ctx_id);
4266 return NULL;
4267 }
4268
4269 if (unlikely(intel_context_is_child(ce))) {
4270 drm_err(&guc_to_gt(guc)->i915->drm,
4271 "Context is child, ctx_id %u\n", ctx_id);
4272 return NULL;
4273 }
4274
4275 return ce;
4276 }
4277
intel_guc_deregister_done_process_msg(struct intel_guc * guc,const u32 * msg,u32 len)4278 int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
4279 const u32 *msg,
4280 u32 len)
4281 {
4282 struct intel_context *ce;
4283 u32 ctx_id;
4284
4285 if (unlikely(len < 1)) {
4286 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len);
4287 return -EPROTO;
4288 }
4289 ctx_id = msg[0];
4290
4291 ce = g2h_context_lookup(guc, ctx_id);
4292 if (unlikely(!ce))
4293 return -EPROTO;
4294
4295 trace_intel_context_deregister_done(ce);
4296
4297 #ifdef CONFIG_DRM_I915_SELFTEST
4298 if (unlikely(ce->drop_deregister)) {
4299 ce->drop_deregister = false;
4300 return 0;
4301 }
4302 #endif
4303
4304 if (context_wait_for_deregister_to_register(ce)) {
4305 struct intel_runtime_pm *runtime_pm =
4306 &ce->engine->gt->i915->runtime_pm;
4307 intel_wakeref_t wakeref;
4308
4309 /*
4310 * Previous owner of this guc_id has been deregistered, now safe
4311 * register this context.
4312 */
4313 with_intel_runtime_pm(runtime_pm, wakeref)
4314 register_context(ce, true);
4315 guc_signal_context_fence(ce);
4316 intel_context_put(ce);
4317 } else if (context_destroyed(ce)) {
4318 /* Context has been destroyed */
4319 intel_gt_pm_put_async(guc_to_gt(guc));
4320 release_guc_id(guc, ce);
4321 __guc_context_destroy(ce);
4322 }
4323
4324 decr_outstanding_submission_g2h(guc);
4325
4326 return 0;
4327 }
4328
intel_guc_sched_done_process_msg(struct intel_guc * guc,const u32 * msg,u32 len)4329 int intel_guc_sched_done_process_msg(struct intel_guc *guc,
4330 const u32 *msg,
4331 u32 len)
4332 {
4333 struct intel_context *ce;
4334 unsigned long flags;
4335 u32 ctx_id;
4336
4337 if (unlikely(len < 2)) {
4338 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len);
4339 return -EPROTO;
4340 }
4341 ctx_id = msg[0];
4342
4343 ce = g2h_context_lookup(guc, ctx_id);
4344 if (unlikely(!ce))
4345 return -EPROTO;
4346
4347 if (unlikely(context_destroyed(ce) ||
4348 (!context_pending_enable(ce) &&
4349 !context_pending_disable(ce)))) {
4350 drm_err(&guc_to_gt(guc)->i915->drm,
4351 "Bad context sched_state 0x%x, ctx_id %u\n",
4352 ce->guc_state.sched_state, ctx_id);
4353 return -EPROTO;
4354 }
4355
4356 trace_intel_context_sched_done(ce);
4357
4358 if (context_pending_enable(ce)) {
4359 #ifdef CONFIG_DRM_I915_SELFTEST
4360 if (unlikely(ce->drop_schedule_enable)) {
4361 ce->drop_schedule_enable = false;
4362 return 0;
4363 }
4364 #endif
4365
4366 spin_lock_irqsave(&ce->guc_state.lock, flags);
4367 clr_context_pending_enable(ce);
4368 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
4369 } else if (context_pending_disable(ce)) {
4370 bool banned;
4371
4372 #ifdef CONFIG_DRM_I915_SELFTEST
4373 if (unlikely(ce->drop_schedule_disable)) {
4374 ce->drop_schedule_disable = false;
4375 return 0;
4376 }
4377 #endif
4378
4379 /*
4380 * Unpin must be done before __guc_signal_context_fence,
4381 * otherwise a race exists between the requests getting
4382 * submitted + retired before this unpin completes resulting in
4383 * the pin_count going to zero and the context still being
4384 * enabled.
4385 */
4386 intel_context_sched_disable_unpin(ce);
4387
4388 spin_lock_irqsave(&ce->guc_state.lock, flags);
4389 banned = context_banned(ce);
4390 clr_context_banned(ce);
4391 clr_context_pending_disable(ce);
4392 __guc_signal_context_fence(ce);
4393 guc_blocked_fence_complete(ce);
4394 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
4395
4396 if (banned) {
4397 guc_cancel_context_requests(ce);
4398 intel_engine_signal_breadcrumbs(ce->engine);
4399 }
4400 }
4401
4402 decr_outstanding_submission_g2h(guc);
4403 intel_context_put(ce);
4404
4405 return 0;
4406 }
4407
capture_error_state(struct intel_guc * guc,struct intel_context * ce)4408 static void capture_error_state(struct intel_guc *guc,
4409 struct intel_context *ce)
4410 {
4411 struct intel_gt *gt = guc_to_gt(guc);
4412 struct drm_i915_private *i915 = gt->i915;
4413 struct intel_engine_cs *engine = __context_to_physical_engine(ce);
4414 intel_wakeref_t wakeref;
4415
4416 intel_engine_set_hung_context(engine, ce);
4417 with_intel_runtime_pm(&i915->runtime_pm, wakeref)
4418 i915_capture_error_state(gt, engine->mask, CORE_DUMP_FLAG_IS_GUC_CAPTURE);
4419 atomic_inc(&i915->gpu_error.reset_engine_count[engine->uabi_class]);
4420 }
4421
guc_context_replay(struct intel_context * ce)4422 static void guc_context_replay(struct intel_context *ce)
4423 {
4424 struct i915_sched_engine *sched_engine = ce->engine->sched_engine;
4425
4426 __guc_reset_context(ce, ce->engine->mask);
4427 tasklet_hi_schedule(&sched_engine->tasklet);
4428 }
4429
guc_handle_context_reset(struct intel_guc * guc,struct intel_context * ce)4430 static void guc_handle_context_reset(struct intel_guc *guc,
4431 struct intel_context *ce)
4432 {
4433 trace_intel_context_reset(ce);
4434
4435 if (likely(intel_context_is_schedulable(ce))) {
4436 capture_error_state(guc, ce);
4437 guc_context_replay(ce);
4438 } else {
4439 drm_info(&guc_to_gt(guc)->i915->drm,
4440 "Ignoring context reset notification of exiting context 0x%04X on %s",
4441 ce->guc_id.id, ce->engine->name);
4442 }
4443 }
4444
intel_guc_context_reset_process_msg(struct intel_guc * guc,const u32 * msg,u32 len)4445 int intel_guc_context_reset_process_msg(struct intel_guc *guc,
4446 const u32 *msg, u32 len)
4447 {
4448 struct intel_context *ce;
4449 unsigned long flags;
4450 int ctx_id;
4451
4452 if (unlikely(len != 1)) {
4453 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
4454 return -EPROTO;
4455 }
4456
4457 ctx_id = msg[0];
4458
4459 /*
4460 * The context lookup uses the xarray but lookups only require an RCU lock
4461 * not the full spinlock. So take the lock explicitly and keep it until the
4462 * context has been reference count locked to ensure it can't be destroyed
4463 * asynchronously until the reset is done.
4464 */
4465 xa_lock_irqsave(&guc->context_lookup, flags);
4466 ce = g2h_context_lookup(guc, ctx_id);
4467 if (ce)
4468 intel_context_get(ce);
4469 xa_unlock_irqrestore(&guc->context_lookup, flags);
4470
4471 if (unlikely(!ce))
4472 return -EPROTO;
4473
4474 guc_handle_context_reset(guc, ce);
4475 intel_context_put(ce);
4476
4477 return 0;
4478 }
4479
intel_guc_error_capture_process_msg(struct intel_guc * guc,const u32 * msg,u32 len)4480 int intel_guc_error_capture_process_msg(struct intel_guc *guc,
4481 const u32 *msg, u32 len)
4482 {
4483 u32 status;
4484
4485 if (unlikely(len != 1)) {
4486 drm_dbg(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
4487 return -EPROTO;
4488 }
4489
4490 status = msg[0] & INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK;
4491 if (status == INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE)
4492 drm_warn(&guc_to_gt(guc)->i915->drm, "G2H-Error capture no space");
4493
4494 intel_guc_capture_process(guc);
4495
4496 return 0;
4497 }
4498
4499 struct intel_engine_cs *
intel_guc_lookup_engine(struct intel_guc * guc,u8 guc_class,u8 instance)4500 intel_guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance)
4501 {
4502 struct intel_gt *gt = guc_to_gt(guc);
4503 u8 engine_class = guc_class_to_engine_class(guc_class);
4504
4505 /* Class index is checked in class converter */
4506 GEM_BUG_ON(instance > MAX_ENGINE_INSTANCE);
4507
4508 return gt->engine_class[engine_class][instance];
4509 }
4510
reset_fail_worker_func(struct work_struct * w)4511 static void reset_fail_worker_func(struct work_struct *w)
4512 {
4513 struct intel_guc *guc = container_of(w, struct intel_guc,
4514 submission_state.reset_fail_worker);
4515 struct intel_gt *gt = guc_to_gt(guc);
4516 intel_engine_mask_t reset_fail_mask;
4517 unsigned long flags;
4518
4519 spin_lock_irqsave(&guc->submission_state.lock, flags);
4520 reset_fail_mask = guc->submission_state.reset_fail_mask;
4521 guc->submission_state.reset_fail_mask = 0;
4522 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
4523
4524 if (likely(reset_fail_mask))
4525 intel_gt_handle_error(gt, reset_fail_mask,
4526 I915_ERROR_CAPTURE,
4527 "GuC failed to reset engine mask=0x%x\n",
4528 reset_fail_mask);
4529 }
4530
intel_guc_engine_failure_process_msg(struct intel_guc * guc,const u32 * msg,u32 len)4531 int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
4532 const u32 *msg, u32 len)
4533 {
4534 struct intel_engine_cs *engine;
4535 struct intel_gt *gt = guc_to_gt(guc);
4536 u8 guc_class, instance;
4537 u32 reason;
4538 unsigned long flags;
4539
4540 if (unlikely(len != 3)) {
4541 drm_err(>->i915->drm, "Invalid length %u", len);
4542 return -EPROTO;
4543 }
4544
4545 guc_class = msg[0];
4546 instance = msg[1];
4547 reason = msg[2];
4548
4549 engine = intel_guc_lookup_engine(guc, guc_class, instance);
4550 if (unlikely(!engine)) {
4551 drm_err(>->i915->drm,
4552 "Invalid engine %d:%d", guc_class, instance);
4553 return -EPROTO;
4554 }
4555
4556 /*
4557 * This is an unexpected failure of a hardware feature. So, log a real
4558 * error message not just the informational that comes with the reset.
4559 */
4560 drm_err(>->i915->drm, "GuC engine reset request failed on %d:%d (%s) because 0x%08X",
4561 guc_class, instance, engine->name, reason);
4562
4563 spin_lock_irqsave(&guc->submission_state.lock, flags);
4564 guc->submission_state.reset_fail_mask |= engine->mask;
4565 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
4566
4567 /*
4568 * A GT reset flushes this worker queue (G2H handler) so we must use
4569 * another worker to trigger a GT reset.
4570 */
4571 queue_work(system_unbound_wq, &guc->submission_state.reset_fail_worker);
4572
4573 return 0;
4574 }
4575
intel_guc_find_hung_context(struct intel_engine_cs * engine)4576 void intel_guc_find_hung_context(struct intel_engine_cs *engine)
4577 {
4578 struct intel_guc *guc = &engine->gt->uc.guc;
4579 struct intel_context *ce;
4580 struct i915_request *rq;
4581 unsigned long index;
4582 unsigned long flags;
4583
4584 /* Reset called during driver load? GuC not yet initialised! */
4585 if (unlikely(!guc_submission_initialized(guc)))
4586 return;
4587
4588 xa_lock_irqsave(&guc->context_lookup, flags);
4589 xa_for_each(&guc->context_lookup, index, ce) {
4590 if (!kref_get_unless_zero(&ce->ref))
4591 continue;
4592
4593 xa_unlock(&guc->context_lookup);
4594
4595 if (!intel_context_is_pinned(ce))
4596 goto next;
4597
4598 if (intel_engine_is_virtual(ce->engine)) {
4599 if (!(ce->engine->mask & engine->mask))
4600 goto next;
4601 } else {
4602 if (ce->engine != engine)
4603 goto next;
4604 }
4605
4606 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) {
4607 if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE)
4608 continue;
4609
4610 intel_engine_set_hung_context(engine, ce);
4611
4612 /* Can only cope with one hang at a time... */
4613 intel_context_put(ce);
4614 xa_lock(&guc->context_lookup);
4615 goto done;
4616 }
4617 next:
4618 intel_context_put(ce);
4619 xa_lock(&guc->context_lookup);
4620 }
4621 done:
4622 xa_unlock_irqrestore(&guc->context_lookup, flags);
4623 }
4624
intel_guc_dump_active_requests(struct intel_engine_cs * engine,struct i915_request * hung_rq,struct drm_printer * m)4625 void intel_guc_dump_active_requests(struct intel_engine_cs *engine,
4626 struct i915_request *hung_rq,
4627 struct drm_printer *m)
4628 {
4629 struct intel_guc *guc = &engine->gt->uc.guc;
4630 struct intel_context *ce;
4631 unsigned long index;
4632 unsigned long flags;
4633
4634 /* Reset called during driver load? GuC not yet initialised! */
4635 if (unlikely(!guc_submission_initialized(guc)))
4636 return;
4637
4638 xa_lock_irqsave(&guc->context_lookup, flags);
4639 xa_for_each(&guc->context_lookup, index, ce) {
4640 if (!kref_get_unless_zero(&ce->ref))
4641 continue;
4642
4643 xa_unlock(&guc->context_lookup);
4644
4645 if (!intel_context_is_pinned(ce))
4646 goto next;
4647
4648 if (intel_engine_is_virtual(ce->engine)) {
4649 if (!(ce->engine->mask & engine->mask))
4650 goto next;
4651 } else {
4652 if (ce->engine != engine)
4653 goto next;
4654 }
4655
4656 spin_lock(&ce->guc_state.lock);
4657 intel_engine_dump_active_requests(&ce->guc_state.requests,
4658 hung_rq, m);
4659 spin_unlock(&ce->guc_state.lock);
4660
4661 next:
4662 intel_context_put(ce);
4663 xa_lock(&guc->context_lookup);
4664 }
4665 xa_unlock_irqrestore(&guc->context_lookup, flags);
4666 }
4667
intel_guc_submission_print_info(struct intel_guc * guc,struct drm_printer * p)4668 void intel_guc_submission_print_info(struct intel_guc *guc,
4669 struct drm_printer *p)
4670 {
4671 struct i915_sched_engine *sched_engine = guc->sched_engine;
4672 struct rb_node *rb;
4673 unsigned long flags;
4674
4675 if (!sched_engine)
4676 return;
4677
4678 drm_printf(p, "GuC Number Outstanding Submission G2H: %u\n",
4679 atomic_read(&guc->outstanding_submission_g2h));
4680 drm_printf(p, "GuC tasklet count: %u\n\n",
4681 atomic_read(&sched_engine->tasklet.count));
4682
4683 spin_lock_irqsave(&sched_engine->lock, flags);
4684 drm_printf(p, "Requests in GuC submit tasklet:\n");
4685 for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) {
4686 struct i915_priolist *pl = to_priolist(rb);
4687 struct i915_request *rq;
4688
4689 priolist_for_each_request(rq, pl)
4690 drm_printf(p, "guc_id=%u, seqno=%llu\n",
4691 rq->context->guc_id.id,
4692 rq->fence.seqno);
4693 }
4694 spin_unlock_irqrestore(&sched_engine->lock, flags);
4695 drm_printf(p, "\n");
4696 }
4697
guc_log_context_priority(struct drm_printer * p,struct intel_context * ce)4698 static inline void guc_log_context_priority(struct drm_printer *p,
4699 struct intel_context *ce)
4700 {
4701 int i;
4702
4703 drm_printf(p, "\t\tPriority: %d\n", ce->guc_state.prio);
4704 drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n");
4705 for (i = GUC_CLIENT_PRIORITY_KMD_HIGH;
4706 i < GUC_CLIENT_PRIORITY_NUM; ++i) {
4707 drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n",
4708 i, ce->guc_state.prio_count[i]);
4709 }
4710 drm_printf(p, "\n");
4711 }
4712
guc_log_context(struct drm_printer * p,struct intel_context * ce)4713 static inline void guc_log_context(struct drm_printer *p,
4714 struct intel_context *ce)
4715 {
4716 drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id);
4717 drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca);
4718 drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n",
4719 ce->ring->head,
4720 ce->lrc_reg_state[CTX_RING_HEAD]);
4721 drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n",
4722 ce->ring->tail,
4723 ce->lrc_reg_state[CTX_RING_TAIL]);
4724 drm_printf(p, "\t\tContext Pin Count: %u\n",
4725 atomic_read(&ce->pin_count));
4726 drm_printf(p, "\t\tGuC ID Ref Count: %u\n",
4727 atomic_read(&ce->guc_id.ref));
4728 drm_printf(p, "\t\tSchedule State: 0x%x\n\n",
4729 ce->guc_state.sched_state);
4730 }
4731
intel_guc_submission_print_context_info(struct intel_guc * guc,struct drm_printer * p)4732 void intel_guc_submission_print_context_info(struct intel_guc *guc,
4733 struct drm_printer *p)
4734 {
4735 struct intel_context *ce;
4736 unsigned long index;
4737 unsigned long flags;
4738
4739 xa_lock_irqsave(&guc->context_lookup, flags);
4740 xa_for_each(&guc->context_lookup, index, ce) {
4741 GEM_BUG_ON(intel_context_is_child(ce));
4742
4743 guc_log_context(p, ce);
4744 guc_log_context_priority(p, ce);
4745
4746 if (intel_context_is_parent(ce)) {
4747 struct intel_context *child;
4748
4749 drm_printf(p, "\t\tNumber children: %u\n",
4750 ce->parallel.number_children);
4751
4752 if (ce->parallel.guc.wq_status) {
4753 drm_printf(p, "\t\tWQI Head: %u\n",
4754 READ_ONCE(*ce->parallel.guc.wq_head));
4755 drm_printf(p, "\t\tWQI Tail: %u\n",
4756 READ_ONCE(*ce->parallel.guc.wq_tail));
4757 drm_printf(p, "\t\tWQI Status: %u\n\n",
4758 READ_ONCE(*ce->parallel.guc.wq_status));
4759 }
4760
4761 if (ce->engine->emit_bb_start ==
4762 emit_bb_start_parent_no_preempt_mid_batch) {
4763 u8 i;
4764
4765 drm_printf(p, "\t\tChildren Go: %u\n\n",
4766 get_children_go_value(ce));
4767 for (i = 0; i < ce->parallel.number_children; ++i)
4768 drm_printf(p, "\t\tChildren Join: %u\n",
4769 get_children_join_value(ce, i));
4770 }
4771
4772 for_each_child(ce, child)
4773 guc_log_context(p, child);
4774 }
4775 }
4776 xa_unlock_irqrestore(&guc->context_lookup, flags);
4777 }
4778
get_children_go_addr(struct intel_context * ce)4779 static inline u32 get_children_go_addr(struct intel_context *ce)
4780 {
4781 GEM_BUG_ON(!intel_context_is_parent(ce));
4782
4783 return i915_ggtt_offset(ce->state) +
4784 __get_parent_scratch_offset(ce) +
4785 offsetof(struct parent_scratch, go.semaphore);
4786 }
4787
get_children_join_addr(struct intel_context * ce,u8 child_index)4788 static inline u32 get_children_join_addr(struct intel_context *ce,
4789 u8 child_index)
4790 {
4791 GEM_BUG_ON(!intel_context_is_parent(ce));
4792
4793 return i915_ggtt_offset(ce->state) +
4794 __get_parent_scratch_offset(ce) +
4795 offsetof(struct parent_scratch, join[child_index].semaphore);
4796 }
4797
4798 #define PARENT_GO_BB 1
4799 #define PARENT_GO_FINI_BREADCRUMB 0
4800 #define CHILD_GO_BB 1
4801 #define CHILD_GO_FINI_BREADCRUMB 0
emit_bb_start_parent_no_preempt_mid_batch(struct i915_request * rq,u64 offset,u32 len,const unsigned int flags)4802 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq,
4803 u64 offset, u32 len,
4804 const unsigned int flags)
4805 {
4806 struct intel_context *ce = rq->context;
4807 u32 *cs;
4808 u8 i;
4809
4810 GEM_BUG_ON(!intel_context_is_parent(ce));
4811
4812 cs = intel_ring_begin(rq, 10 + 4 * ce->parallel.number_children);
4813 if (IS_ERR(cs))
4814 return PTR_ERR(cs);
4815
4816 /* Wait on children */
4817 for (i = 0; i < ce->parallel.number_children; ++i) {
4818 *cs++ = (MI_SEMAPHORE_WAIT |
4819 MI_SEMAPHORE_GLOBAL_GTT |
4820 MI_SEMAPHORE_POLL |
4821 MI_SEMAPHORE_SAD_EQ_SDD);
4822 *cs++ = PARENT_GO_BB;
4823 *cs++ = get_children_join_addr(ce, i);
4824 *cs++ = 0;
4825 }
4826
4827 /* Turn off preemption */
4828 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
4829 *cs++ = MI_NOOP;
4830
4831 /* Tell children go */
4832 cs = gen8_emit_ggtt_write(cs,
4833 CHILD_GO_BB,
4834 get_children_go_addr(ce),
4835 0);
4836
4837 /* Jump to batch */
4838 *cs++ = MI_BATCH_BUFFER_START_GEN8 |
4839 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
4840 *cs++ = lower_32_bits(offset);
4841 *cs++ = upper_32_bits(offset);
4842 *cs++ = MI_NOOP;
4843
4844 intel_ring_advance(rq, cs);
4845
4846 return 0;
4847 }
4848
emit_bb_start_child_no_preempt_mid_batch(struct i915_request * rq,u64 offset,u32 len,const unsigned int flags)4849 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq,
4850 u64 offset, u32 len,
4851 const unsigned int flags)
4852 {
4853 struct intel_context *ce = rq->context;
4854 struct intel_context *parent = intel_context_to_parent(ce);
4855 u32 *cs;
4856
4857 GEM_BUG_ON(!intel_context_is_child(ce));
4858
4859 cs = intel_ring_begin(rq, 12);
4860 if (IS_ERR(cs))
4861 return PTR_ERR(cs);
4862
4863 /* Signal parent */
4864 cs = gen8_emit_ggtt_write(cs,
4865 PARENT_GO_BB,
4866 get_children_join_addr(parent,
4867 ce->parallel.child_index),
4868 0);
4869
4870 /* Wait on parent for go */
4871 *cs++ = (MI_SEMAPHORE_WAIT |
4872 MI_SEMAPHORE_GLOBAL_GTT |
4873 MI_SEMAPHORE_POLL |
4874 MI_SEMAPHORE_SAD_EQ_SDD);
4875 *cs++ = CHILD_GO_BB;
4876 *cs++ = get_children_go_addr(parent);
4877 *cs++ = 0;
4878
4879 /* Turn off preemption */
4880 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
4881
4882 /* Jump to batch */
4883 *cs++ = MI_BATCH_BUFFER_START_GEN8 |
4884 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
4885 *cs++ = lower_32_bits(offset);
4886 *cs++ = upper_32_bits(offset);
4887
4888 intel_ring_advance(rq, cs);
4889
4890 return 0;
4891 }
4892
4893 static u32 *
__emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request * rq,u32 * cs)4894 __emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
4895 u32 *cs)
4896 {
4897 struct intel_context *ce = rq->context;
4898 u8 i;
4899
4900 GEM_BUG_ON(!intel_context_is_parent(ce));
4901
4902 /* Wait on children */
4903 for (i = 0; i < ce->parallel.number_children; ++i) {
4904 *cs++ = (MI_SEMAPHORE_WAIT |
4905 MI_SEMAPHORE_GLOBAL_GTT |
4906 MI_SEMAPHORE_POLL |
4907 MI_SEMAPHORE_SAD_EQ_SDD);
4908 *cs++ = PARENT_GO_FINI_BREADCRUMB;
4909 *cs++ = get_children_join_addr(ce, i);
4910 *cs++ = 0;
4911 }
4912
4913 /* Turn on preemption */
4914 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4915 *cs++ = MI_NOOP;
4916
4917 /* Tell children go */
4918 cs = gen8_emit_ggtt_write(cs,
4919 CHILD_GO_FINI_BREADCRUMB,
4920 get_children_go_addr(ce),
4921 0);
4922
4923 return cs;
4924 }
4925
4926 /*
4927 * If this true, a submission of multi-lrc requests had an error and the
4928 * requests need to be skipped. The front end (execuf IOCTL) should've called
4929 * i915_request_skip which squashes the BB but we still need to emit the fini
4930 * breadrcrumbs seqno write. At this point we don't know how many of the
4931 * requests in the multi-lrc submission were generated so we can't do the
4932 * handshake between the parent and children (e.g. if 4 requests should be
4933 * generated but 2nd hit an error only 1 would be seen by the GuC backend).
4934 * Simply skip the handshake, but still emit the breadcrumbd seqno, if an error
4935 * has occurred on any of the requests in submission / relationship.
4936 */
skip_handshake(struct i915_request * rq)4937 static inline bool skip_handshake(struct i915_request *rq)
4938 {
4939 return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags);
4940 }
4941
4942 #define NON_SKIP_LEN 6
4943 static u32 *
emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request * rq,u32 * cs)4944 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
4945 u32 *cs)
4946 {
4947 struct intel_context *ce = rq->context;
4948 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs;
4949 __maybe_unused u32 *start_fini_breadcrumb_cs = cs;
4950
4951 GEM_BUG_ON(!intel_context_is_parent(ce));
4952
4953 if (unlikely(skip_handshake(rq))) {
4954 /*
4955 * NOP everything in __emit_fini_breadcrumb_parent_no_preempt_mid_batch,
4956 * the NON_SKIP_LEN comes from the length of the emits below.
4957 */
4958 memset(cs, 0, sizeof(u32) *
4959 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN));
4960 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN;
4961 } else {
4962 cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs);
4963 }
4964
4965 /* Emit fini breadcrumb */
4966 before_fini_breadcrumb_user_interrupt_cs = cs;
4967 cs = gen8_emit_ggtt_write(cs,
4968 rq->fence.seqno,
4969 i915_request_active_timeline(rq)->hwsp_offset,
4970 0);
4971
4972 /* User interrupt */
4973 *cs++ = MI_USER_INTERRUPT;
4974 *cs++ = MI_NOOP;
4975
4976 /* Ensure our math for skip + emit is correct */
4977 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN !=
4978 cs);
4979 GEM_BUG_ON(start_fini_breadcrumb_cs +
4980 ce->engine->emit_fini_breadcrumb_dw != cs);
4981
4982 rq->tail = intel_ring_offset(rq, cs);
4983
4984 return cs;
4985 }
4986
4987 static u32 *
__emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request * rq,u32 * cs)4988 __emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
4989 u32 *cs)
4990 {
4991 struct intel_context *ce = rq->context;
4992 struct intel_context *parent = intel_context_to_parent(ce);
4993
4994 GEM_BUG_ON(!intel_context_is_child(ce));
4995
4996 /* Turn on preemption */
4997 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4998 *cs++ = MI_NOOP;
4999
5000 /* Signal parent */
5001 cs = gen8_emit_ggtt_write(cs,
5002 PARENT_GO_FINI_BREADCRUMB,
5003 get_children_join_addr(parent,
5004 ce->parallel.child_index),
5005 0);
5006
5007 /* Wait parent on for go */
5008 *cs++ = (MI_SEMAPHORE_WAIT |
5009 MI_SEMAPHORE_GLOBAL_GTT |
5010 MI_SEMAPHORE_POLL |
5011 MI_SEMAPHORE_SAD_EQ_SDD);
5012 *cs++ = CHILD_GO_FINI_BREADCRUMB;
5013 *cs++ = get_children_go_addr(parent);
5014 *cs++ = 0;
5015
5016 return cs;
5017 }
5018
5019 static u32 *
emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request * rq,u32 * cs)5020 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
5021 u32 *cs)
5022 {
5023 struct intel_context *ce = rq->context;
5024 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs;
5025 __maybe_unused u32 *start_fini_breadcrumb_cs = cs;
5026
5027 GEM_BUG_ON(!intel_context_is_child(ce));
5028
5029 if (unlikely(skip_handshake(rq))) {
5030 /*
5031 * NOP everything in __emit_fini_breadcrumb_child_no_preempt_mid_batch,
5032 * the NON_SKIP_LEN comes from the length of the emits below.
5033 */
5034 memset(cs, 0, sizeof(u32) *
5035 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN));
5036 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN;
5037 } else {
5038 cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs);
5039 }
5040
5041 /* Emit fini breadcrumb */
5042 before_fini_breadcrumb_user_interrupt_cs = cs;
5043 cs = gen8_emit_ggtt_write(cs,
5044 rq->fence.seqno,
5045 i915_request_active_timeline(rq)->hwsp_offset,
5046 0);
5047
5048 /* User interrupt */
5049 *cs++ = MI_USER_INTERRUPT;
5050 *cs++ = MI_NOOP;
5051
5052 /* Ensure our math for skip + emit is correct */
5053 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN !=
5054 cs);
5055 GEM_BUG_ON(start_fini_breadcrumb_cs +
5056 ce->engine->emit_fini_breadcrumb_dw != cs);
5057
5058 rq->tail = intel_ring_offset(rq, cs);
5059
5060 return cs;
5061 }
5062
5063 #undef NON_SKIP_LEN
5064
5065 static struct intel_context *
guc_create_virtual(struct intel_engine_cs ** siblings,unsigned int count,unsigned long flags)5066 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
5067 unsigned long flags)
5068 {
5069 struct guc_virtual_engine *ve;
5070 struct intel_guc *guc;
5071 unsigned int n;
5072 int err;
5073
5074 ve = kzalloc(sizeof(*ve), GFP_KERNEL);
5075 if (!ve)
5076 return ERR_PTR(-ENOMEM);
5077
5078 guc = &siblings[0]->gt->uc.guc;
5079
5080 ve->base.i915 = siblings[0]->i915;
5081 ve->base.gt = siblings[0]->gt;
5082 ve->base.uncore = siblings[0]->uncore;
5083 ve->base.id = -1;
5084
5085 ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
5086 ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5087 ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5088 ve->base.saturated = ALL_ENGINES;
5089
5090 snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
5091
5092 ve->base.sched_engine = i915_sched_engine_get(guc->sched_engine);
5093
5094 ve->base.cops = &virtual_guc_context_ops;
5095 ve->base.request_alloc = guc_request_alloc;
5096 ve->base.bump_serial = virtual_guc_bump_serial;
5097
5098 ve->base.submit_request = guc_submit_request;
5099
5100 ve->base.flags = I915_ENGINE_IS_VIRTUAL;
5101
5102 intel_context_init(&ve->context, &ve->base);
5103
5104 for (n = 0; n < count; n++) {
5105 struct intel_engine_cs *sibling = siblings[n];
5106
5107 GEM_BUG_ON(!is_power_of_2(sibling->mask));
5108 if (sibling->mask & ve->base.mask) {
5109 DRM_DEBUG("duplicate %s entry in load balancer\n",
5110 sibling->name);
5111 err = -EINVAL;
5112 goto err_put;
5113 }
5114
5115 ve->base.mask |= sibling->mask;
5116 ve->base.logical_mask |= sibling->logical_mask;
5117
5118 if (n != 0 && ve->base.class != sibling->class) {
5119 DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
5120 sibling->class, ve->base.class);
5121 err = -EINVAL;
5122 goto err_put;
5123 } else if (n == 0) {
5124 ve->base.class = sibling->class;
5125 ve->base.uabi_class = sibling->uabi_class;
5126 snprintf(ve->base.name, sizeof(ve->base.name),
5127 "v%dx%d", ve->base.class, count);
5128 ve->base.context_size = sibling->context_size;
5129
5130 ve->base.add_active_request =
5131 sibling->add_active_request;
5132 ve->base.remove_active_request =
5133 sibling->remove_active_request;
5134 ve->base.emit_bb_start = sibling->emit_bb_start;
5135 ve->base.emit_flush = sibling->emit_flush;
5136 ve->base.emit_init_breadcrumb =
5137 sibling->emit_init_breadcrumb;
5138 ve->base.emit_fini_breadcrumb =
5139 sibling->emit_fini_breadcrumb;
5140 ve->base.emit_fini_breadcrumb_dw =
5141 sibling->emit_fini_breadcrumb_dw;
5142 ve->base.breadcrumbs =
5143 intel_breadcrumbs_get(sibling->breadcrumbs);
5144
5145 ve->base.flags |= sibling->flags;
5146
5147 ve->base.props.timeslice_duration_ms =
5148 sibling->props.timeslice_duration_ms;
5149 ve->base.props.preempt_timeout_ms =
5150 sibling->props.preempt_timeout_ms;
5151 }
5152 }
5153
5154 return &ve->context;
5155
5156 err_put:
5157 intel_context_put(&ve->context);
5158 return ERR_PTR(err);
5159 }
5160
intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs * ve)5161 bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve)
5162 {
5163 struct intel_engine_cs *engine;
5164 intel_engine_mask_t tmp, mask = ve->mask;
5165
5166 for_each_engine_masked(engine, ve->gt, mask, tmp)
5167 if (READ_ONCE(engine->props.heartbeat_interval_ms))
5168 return true;
5169
5170 return false;
5171 }
5172
5173 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
5174 #include "selftest_guc.c"
5175 #include "selftest_guc_multi_lrc.c"
5176 #include "selftest_guc_hangcheck.c"
5177 #endif
5178