1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2014 Intel Corporation
4 */
5
6 #include <linux/circ_buf.h>
7
8 #include "gem/i915_gem_context.h"
9 #include "gem/i915_gem_lmem.h"
10 #include "gt/gen8_engine_cs.h"
11 #include "gt/intel_breadcrumbs.h"
12 #include "gt/intel_context.h"
13 #include "gt/intel_engine_heartbeat.h"
14 #include "gt/intel_engine_pm.h"
15 #include "gt/intel_engine_regs.h"
16 #include "gt/intel_gpu_commands.h"
17 #include "gt/intel_gt.h"
18 #include "gt/intel_gt_clock_utils.h"
19 #include "gt/intel_gt_irq.h"
20 #include "gt/intel_gt_pm.h"
21 #include "gt/intel_gt_regs.h"
22 #include "gt/intel_gt_requests.h"
23 #include "gt/intel_lrc.h"
24 #include "gt/intel_lrc_reg.h"
25 #include "gt/intel_mocs.h"
26 #include "gt/intel_ring.h"
27
28 #include "intel_guc_ads.h"
29 #include "intel_guc_capture.h"
30 #include "intel_guc_print.h"
31 #include "intel_guc_submission.h"
32
33 #include "i915_drv.h"
34 #include "i915_reg.h"
35 #include "i915_trace.h"
36
37 /**
38 * DOC: GuC-based command submission
39 *
40 * The Scratch registers:
41 * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes
42 * a value to the action register (SOFT_SCRATCH_0) along with any data. It then
43 * triggers an interrupt on the GuC via another register write (0xC4C8).
44 * Firmware writes a success/fail code back to the action register after
45 * processes the request. The kernel driver polls waiting for this update and
46 * then proceeds.
47 *
48 * Command Transport buffers (CTBs):
49 * Covered in detail in other sections but CTBs (Host to GuC - H2G, GuC to Host
50 * - G2H) are a message interface between the i915 and GuC.
51 *
52 * Context registration:
53 * Before a context can be submitted it must be registered with the GuC via a
54 * H2G. A unique guc_id is associated with each context. The context is either
55 * registered at request creation time (normal operation) or at submission time
56 * (abnormal operation, e.g. after a reset).
57 *
58 * Context submission:
59 * The i915 updates the LRC tail value in memory. The i915 must enable the
60 * scheduling of the context within the GuC for the GuC to actually consider it.
61 * Therefore, the first time a disabled context is submitted we use a schedule
62 * enable H2G, while follow up submissions are done via the context submit H2G,
63 * which informs the GuC that a previously enabled context has new work
64 * available.
65 *
66 * Context unpin:
67 * To unpin a context a H2G is used to disable scheduling. When the
68 * corresponding G2H returns indicating the scheduling disable operation has
69 * completed it is safe to unpin the context. While a disable is in flight it
70 * isn't safe to resubmit the context so a fence is used to stall all future
71 * requests of that context until the G2H is returned. Because this interaction
72 * with the GuC takes a non-zero amount of time we delay the disabling of
73 * scheduling after the pin count goes to zero by a configurable period of time
74 * (see SCHED_DISABLE_DELAY_MS). The thought is this gives the user a window of
75 * time to resubmit something on the context before doing this costly operation.
76 * This delay is only done if the context isn't closed and the guc_id usage is
77 * less than a threshold (see NUM_SCHED_DISABLE_GUC_IDS_THRESHOLD).
78 *
79 * Context deregistration:
80 * Before a context can be destroyed or if we steal its guc_id we must
81 * deregister the context with the GuC via H2G. If stealing the guc_id it isn't
82 * safe to submit anything to this guc_id until the deregister completes so a
83 * fence is used to stall all requests associated with this guc_id until the
84 * corresponding G2H returns indicating the guc_id has been deregistered.
85 *
86 * submission_state.guc_ids:
87 * Unique number associated with private GuC context data passed in during
88 * context registration / submission / deregistration. 64k available. Simple ida
89 * is used for allocation.
90 *
91 * Stealing guc_ids:
92 * If no guc_ids are available they can be stolen from another context at
93 * request creation time if that context is unpinned. If a guc_id can't be found
94 * we punt this problem to the user as we believe this is near impossible to hit
95 * during normal use cases.
96 *
97 * Locking:
98 * In the GuC submission code we have 3 basic spin locks which protect
99 * everything. Details about each below.
100 *
101 * sched_engine->lock
102 * This is the submission lock for all contexts that share an i915 schedule
103 * engine (sched_engine), thus only one of the contexts which share a
104 * sched_engine can be submitting at a time. Currently only one sched_engine is
105 * used for all of GuC submission but that could change in the future.
106 *
107 * guc->submission_state.lock
108 * Global lock for GuC submission state. Protects guc_ids and destroyed contexts
109 * list.
110 *
111 * ce->guc_state.lock
112 * Protects everything under ce->guc_state. Ensures that a context is in the
113 * correct state before issuing a H2G. e.g. We don't issue a schedule disable
114 * on a disabled context (bad idea), we don't issue a schedule enable when a
115 * schedule disable is in flight, etc... Also protects list of inflight requests
116 * on the context and the priority management state. Lock is individual to each
117 * context.
118 *
119 * Lock ordering rules:
120 * sched_engine->lock -> ce->guc_state.lock
121 * guc->submission_state.lock -> ce->guc_state.lock
122 *
123 * Reset races:
124 * When a full GT reset is triggered it is assumed that some G2H responses to
125 * H2Gs can be lost as the GuC is also reset. Losing these G2H can prove to be
126 * fatal as we do certain operations upon receiving a G2H (e.g. destroy
127 * contexts, release guc_ids, etc...). When this occurs we can scrub the
128 * context state and cleanup appropriately, however this is quite racey.
129 * To avoid races, the reset code must disable submission before scrubbing for
130 * the missing G2H, while the submission code must check for submission being
131 * disabled and skip sending H2Gs and updating context states when it is. Both
132 * sides must also make sure to hold the relevant locks.
133 */
134
135 /* GuC Virtual Engine */
136 struct guc_virtual_engine {
137 struct intel_engine_cs base;
138 struct intel_context context;
139 };
140
141 static struct intel_context *
142 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
143 unsigned long flags);
144
145 static struct intel_context *
146 guc_create_parallel(struct intel_engine_cs **engines,
147 unsigned int num_siblings,
148 unsigned int width);
149
150 #define GUC_REQUEST_SIZE 64 /* bytes */
151
152 /*
153 * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous
154 * per the GuC submission interface. A different allocation algorithm is used
155 * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to
156 * partition the guc_id space. We believe the number of multi-lrc contexts in
157 * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for
158 * multi-lrc.
159 */
160 #define NUMBER_MULTI_LRC_GUC_ID(guc) \
161 ((guc)->submission_state.num_guc_ids / 16)
162
163 /*
164 * Below is a set of functions which control the GuC scheduling state which
165 * require a lock.
166 */
167 #define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER BIT(0)
168 #define SCHED_STATE_DESTROYED BIT(1)
169 #define SCHED_STATE_PENDING_DISABLE BIT(2)
170 #define SCHED_STATE_BANNED BIT(3)
171 #define SCHED_STATE_ENABLED BIT(4)
172 #define SCHED_STATE_PENDING_ENABLE BIT(5)
173 #define SCHED_STATE_REGISTERED BIT(6)
174 #define SCHED_STATE_POLICY_REQUIRED BIT(7)
175 #define SCHED_STATE_CLOSED BIT(8)
176 #define SCHED_STATE_BLOCKED_SHIFT 9
177 #define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT)
178 #define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT)
179
init_sched_state(struct intel_context * ce)180 static inline void init_sched_state(struct intel_context *ce)
181 {
182 lockdep_assert_held(&ce->guc_state.lock);
183 ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK;
184 }
185
186 /*
187 * Kernel contexts can have SCHED_STATE_REGISTERED after suspend.
188 * A context close can race with the submission path, so SCHED_STATE_CLOSED
189 * can be set immediately before we try to register.
190 */
191 #define SCHED_STATE_VALID_INIT \
192 (SCHED_STATE_BLOCKED_MASK | \
193 SCHED_STATE_CLOSED | \
194 SCHED_STATE_REGISTERED)
195
196 __maybe_unused
sched_state_is_init(struct intel_context * ce)197 static bool sched_state_is_init(struct intel_context *ce)
198 {
199 return !(ce->guc_state.sched_state & ~SCHED_STATE_VALID_INIT);
200 }
201
202 static inline bool
context_wait_for_deregister_to_register(struct intel_context * ce)203 context_wait_for_deregister_to_register(struct intel_context *ce)
204 {
205 return ce->guc_state.sched_state &
206 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
207 }
208
209 static inline void
set_context_wait_for_deregister_to_register(struct intel_context * ce)210 set_context_wait_for_deregister_to_register(struct intel_context *ce)
211 {
212 lockdep_assert_held(&ce->guc_state.lock);
213 ce->guc_state.sched_state |=
214 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
215 }
216
217 static inline void
clr_context_wait_for_deregister_to_register(struct intel_context * ce)218 clr_context_wait_for_deregister_to_register(struct intel_context *ce)
219 {
220 lockdep_assert_held(&ce->guc_state.lock);
221 ce->guc_state.sched_state &=
222 ~SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
223 }
224
225 static inline bool
context_destroyed(struct intel_context * ce)226 context_destroyed(struct intel_context *ce)
227 {
228 return ce->guc_state.sched_state & SCHED_STATE_DESTROYED;
229 }
230
231 static inline void
set_context_destroyed(struct intel_context * ce)232 set_context_destroyed(struct intel_context *ce)
233 {
234 lockdep_assert_held(&ce->guc_state.lock);
235 ce->guc_state.sched_state |= SCHED_STATE_DESTROYED;
236 }
237
context_pending_disable(struct intel_context * ce)238 static inline bool context_pending_disable(struct intel_context *ce)
239 {
240 return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE;
241 }
242
set_context_pending_disable(struct intel_context * ce)243 static inline void set_context_pending_disable(struct intel_context *ce)
244 {
245 lockdep_assert_held(&ce->guc_state.lock);
246 ce->guc_state.sched_state |= SCHED_STATE_PENDING_DISABLE;
247 }
248
clr_context_pending_disable(struct intel_context * ce)249 static inline void clr_context_pending_disable(struct intel_context *ce)
250 {
251 lockdep_assert_held(&ce->guc_state.lock);
252 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_DISABLE;
253 }
254
context_banned(struct intel_context * ce)255 static inline bool context_banned(struct intel_context *ce)
256 {
257 return ce->guc_state.sched_state & SCHED_STATE_BANNED;
258 }
259
set_context_banned(struct intel_context * ce)260 static inline void set_context_banned(struct intel_context *ce)
261 {
262 lockdep_assert_held(&ce->guc_state.lock);
263 ce->guc_state.sched_state |= SCHED_STATE_BANNED;
264 }
265
clr_context_banned(struct intel_context * ce)266 static inline void clr_context_banned(struct intel_context *ce)
267 {
268 lockdep_assert_held(&ce->guc_state.lock);
269 ce->guc_state.sched_state &= ~SCHED_STATE_BANNED;
270 }
271
context_enabled(struct intel_context * ce)272 static inline bool context_enabled(struct intel_context *ce)
273 {
274 return ce->guc_state.sched_state & SCHED_STATE_ENABLED;
275 }
276
set_context_enabled(struct intel_context * ce)277 static inline void set_context_enabled(struct intel_context *ce)
278 {
279 lockdep_assert_held(&ce->guc_state.lock);
280 ce->guc_state.sched_state |= SCHED_STATE_ENABLED;
281 }
282
clr_context_enabled(struct intel_context * ce)283 static inline void clr_context_enabled(struct intel_context *ce)
284 {
285 lockdep_assert_held(&ce->guc_state.lock);
286 ce->guc_state.sched_state &= ~SCHED_STATE_ENABLED;
287 }
288
context_pending_enable(struct intel_context * ce)289 static inline bool context_pending_enable(struct intel_context *ce)
290 {
291 return ce->guc_state.sched_state & SCHED_STATE_PENDING_ENABLE;
292 }
293
set_context_pending_enable(struct intel_context * ce)294 static inline void set_context_pending_enable(struct intel_context *ce)
295 {
296 lockdep_assert_held(&ce->guc_state.lock);
297 ce->guc_state.sched_state |= SCHED_STATE_PENDING_ENABLE;
298 }
299
clr_context_pending_enable(struct intel_context * ce)300 static inline void clr_context_pending_enable(struct intel_context *ce)
301 {
302 lockdep_assert_held(&ce->guc_state.lock);
303 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_ENABLE;
304 }
305
context_registered(struct intel_context * ce)306 static inline bool context_registered(struct intel_context *ce)
307 {
308 return ce->guc_state.sched_state & SCHED_STATE_REGISTERED;
309 }
310
set_context_registered(struct intel_context * ce)311 static inline void set_context_registered(struct intel_context *ce)
312 {
313 lockdep_assert_held(&ce->guc_state.lock);
314 ce->guc_state.sched_state |= SCHED_STATE_REGISTERED;
315 }
316
clr_context_registered(struct intel_context * ce)317 static inline void clr_context_registered(struct intel_context *ce)
318 {
319 lockdep_assert_held(&ce->guc_state.lock);
320 ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED;
321 }
322
context_policy_required(struct intel_context * ce)323 static inline bool context_policy_required(struct intel_context *ce)
324 {
325 return ce->guc_state.sched_state & SCHED_STATE_POLICY_REQUIRED;
326 }
327
set_context_policy_required(struct intel_context * ce)328 static inline void set_context_policy_required(struct intel_context *ce)
329 {
330 lockdep_assert_held(&ce->guc_state.lock);
331 ce->guc_state.sched_state |= SCHED_STATE_POLICY_REQUIRED;
332 }
333
clr_context_policy_required(struct intel_context * ce)334 static inline void clr_context_policy_required(struct intel_context *ce)
335 {
336 lockdep_assert_held(&ce->guc_state.lock);
337 ce->guc_state.sched_state &= ~SCHED_STATE_POLICY_REQUIRED;
338 }
339
context_close_done(struct intel_context * ce)340 static inline bool context_close_done(struct intel_context *ce)
341 {
342 return ce->guc_state.sched_state & SCHED_STATE_CLOSED;
343 }
344
set_context_close_done(struct intel_context * ce)345 static inline void set_context_close_done(struct intel_context *ce)
346 {
347 lockdep_assert_held(&ce->guc_state.lock);
348 ce->guc_state.sched_state |= SCHED_STATE_CLOSED;
349 }
350
context_blocked(struct intel_context * ce)351 static inline u32 context_blocked(struct intel_context *ce)
352 {
353 return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >>
354 SCHED_STATE_BLOCKED_SHIFT;
355 }
356
incr_context_blocked(struct intel_context * ce)357 static inline void incr_context_blocked(struct intel_context *ce)
358 {
359 lockdep_assert_held(&ce->guc_state.lock);
360
361 ce->guc_state.sched_state += SCHED_STATE_BLOCKED;
362
363 GEM_BUG_ON(!context_blocked(ce)); /* Overflow check */
364 }
365
decr_context_blocked(struct intel_context * ce)366 static inline void decr_context_blocked(struct intel_context *ce)
367 {
368 lockdep_assert_held(&ce->guc_state.lock);
369
370 GEM_BUG_ON(!context_blocked(ce)); /* Underflow check */
371
372 ce->guc_state.sched_state -= SCHED_STATE_BLOCKED;
373 }
374
375 static struct intel_context *
request_to_scheduling_context(struct i915_request * rq)376 request_to_scheduling_context(struct i915_request *rq)
377 {
378 return intel_context_to_parent(rq->context);
379 }
380
context_guc_id_invalid(struct intel_context * ce)381 static inline bool context_guc_id_invalid(struct intel_context *ce)
382 {
383 return ce->guc_id.id == GUC_INVALID_CONTEXT_ID;
384 }
385
set_context_guc_id_invalid(struct intel_context * ce)386 static inline void set_context_guc_id_invalid(struct intel_context *ce)
387 {
388 ce->guc_id.id = GUC_INVALID_CONTEXT_ID;
389 }
390
ce_to_guc(struct intel_context * ce)391 static inline struct intel_guc *ce_to_guc(struct intel_context *ce)
392 {
393 return &ce->engine->gt->uc.guc;
394 }
395
to_priolist(struct rb_node * rb)396 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
397 {
398 return rb_entry(rb, struct i915_priolist, node);
399 }
400
401 /*
402 * When using multi-lrc submission a scratch memory area is reserved in the
403 * parent's context state for the process descriptor, work queue, and handshake
404 * between the parent + children contexts to insert safe preemption points
405 * between each of the BBs. Currently the scratch area is sized to a page.
406 *
407 * The layout of this scratch area is below:
408 * 0 guc_process_desc
409 * + sizeof(struct guc_process_desc) child go
410 * + CACHELINE_BYTES child join[0]
411 * ...
412 * + CACHELINE_BYTES child join[n - 1]
413 * ... unused
414 * PARENT_SCRATCH_SIZE / 2 work queue start
415 * ... work queue
416 * PARENT_SCRATCH_SIZE - 1 work queue end
417 */
418 #define WQ_SIZE (PARENT_SCRATCH_SIZE / 2)
419 #define WQ_OFFSET (PARENT_SCRATCH_SIZE - WQ_SIZE)
420
421 struct sync_semaphore {
422 u32 semaphore;
423 u8 unused[CACHELINE_BYTES - sizeof(u32)];
424 };
425
426 struct parent_scratch {
427 union guc_descs {
428 struct guc_sched_wq_desc wq_desc;
429 struct guc_process_desc_v69 pdesc;
430 } descs;
431
432 struct sync_semaphore go;
433 struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1];
434
435 u8 unused[WQ_OFFSET - sizeof(union guc_descs) -
436 sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)];
437
438 u32 wq[WQ_SIZE / sizeof(u32)];
439 };
440
__get_parent_scratch_offset(struct intel_context * ce)441 static u32 __get_parent_scratch_offset(struct intel_context *ce)
442 {
443 GEM_BUG_ON(!ce->parallel.guc.parent_page);
444
445 return ce->parallel.guc.parent_page * PAGE_SIZE;
446 }
447
__get_wq_offset(struct intel_context * ce)448 static u32 __get_wq_offset(struct intel_context *ce)
449 {
450 BUILD_BUG_ON(offsetof(struct parent_scratch, wq) != WQ_OFFSET);
451
452 return __get_parent_scratch_offset(ce) + WQ_OFFSET;
453 }
454
455 static struct parent_scratch *
__get_parent_scratch(struct intel_context * ce)456 __get_parent_scratch(struct intel_context *ce)
457 {
458 BUILD_BUG_ON(sizeof(struct parent_scratch) != PARENT_SCRATCH_SIZE);
459 BUILD_BUG_ON(sizeof(struct sync_semaphore) != CACHELINE_BYTES);
460
461 /*
462 * Need to subtract LRC_STATE_OFFSET here as the
463 * parallel.guc.parent_page is the offset into ce->state while
464 * ce->lrc_reg_reg is ce->state + LRC_STATE_OFFSET.
465 */
466 return (struct parent_scratch *)
467 (ce->lrc_reg_state +
468 ((__get_parent_scratch_offset(ce) -
469 LRC_STATE_OFFSET) / sizeof(u32)));
470 }
471
472 static struct guc_process_desc_v69 *
__get_process_desc_v69(struct intel_context * ce)473 __get_process_desc_v69(struct intel_context *ce)
474 {
475 struct parent_scratch *ps = __get_parent_scratch(ce);
476
477 return &ps->descs.pdesc;
478 }
479
480 static struct guc_sched_wq_desc *
__get_wq_desc_v70(struct intel_context * ce)481 __get_wq_desc_v70(struct intel_context *ce)
482 {
483 struct parent_scratch *ps = __get_parent_scratch(ce);
484
485 return &ps->descs.wq_desc;
486 }
487
get_wq_pointer(struct intel_context * ce,u32 wqi_size)488 static u32 *get_wq_pointer(struct intel_context *ce, u32 wqi_size)
489 {
490 /*
491 * Check for space in work queue. Caching a value of head pointer in
492 * intel_context structure in order reduce the number accesses to shared
493 * GPU memory which may be across a PCIe bus.
494 */
495 #define AVAILABLE_SPACE \
496 CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE)
497 if (wqi_size > AVAILABLE_SPACE) {
498 ce->parallel.guc.wqi_head = READ_ONCE(*ce->parallel.guc.wq_head);
499
500 if (wqi_size > AVAILABLE_SPACE)
501 return NULL;
502 }
503 #undef AVAILABLE_SPACE
504
505 return &__get_parent_scratch(ce)->wq[ce->parallel.guc.wqi_tail / sizeof(u32)];
506 }
507
__get_context(struct intel_guc * guc,u32 id)508 static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id)
509 {
510 struct intel_context *ce = xa_load(&guc->context_lookup, id);
511
512 GEM_BUG_ON(id >= GUC_MAX_CONTEXT_ID);
513
514 return ce;
515 }
516
__get_lrc_desc_v69(struct intel_guc * guc,u32 index)517 static struct guc_lrc_desc_v69 *__get_lrc_desc_v69(struct intel_guc *guc, u32 index)
518 {
519 struct guc_lrc_desc_v69 *base = guc->lrc_desc_pool_vaddr_v69;
520
521 if (!base)
522 return NULL;
523
524 GEM_BUG_ON(index >= GUC_MAX_CONTEXT_ID);
525
526 return &base[index];
527 }
528
guc_lrc_desc_pool_create_v69(struct intel_guc * guc)529 static int guc_lrc_desc_pool_create_v69(struct intel_guc *guc)
530 {
531 u32 size;
532 int ret;
533
534 size = PAGE_ALIGN(sizeof(struct guc_lrc_desc_v69) *
535 GUC_MAX_CONTEXT_ID);
536 ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool_v69,
537 (void **)&guc->lrc_desc_pool_vaddr_v69);
538 if (ret)
539 return ret;
540
541 return 0;
542 }
543
guc_lrc_desc_pool_destroy_v69(struct intel_guc * guc)544 static void guc_lrc_desc_pool_destroy_v69(struct intel_guc *guc)
545 {
546 if (!guc->lrc_desc_pool_vaddr_v69)
547 return;
548
549 guc->lrc_desc_pool_vaddr_v69 = NULL;
550 i915_vma_unpin_and_release(&guc->lrc_desc_pool_v69, I915_VMA_RELEASE_MAP);
551 }
552
guc_submission_initialized(struct intel_guc * guc)553 static inline bool guc_submission_initialized(struct intel_guc *guc)
554 {
555 return guc->submission_initialized;
556 }
557
_reset_lrc_desc_v69(struct intel_guc * guc,u32 id)558 static inline void _reset_lrc_desc_v69(struct intel_guc *guc, u32 id)
559 {
560 struct guc_lrc_desc_v69 *desc = __get_lrc_desc_v69(guc, id);
561
562 if (desc)
563 memset(desc, 0, sizeof(*desc));
564 }
565
ctx_id_mapped(struct intel_guc * guc,u32 id)566 static inline bool ctx_id_mapped(struct intel_guc *guc, u32 id)
567 {
568 return __get_context(guc, id);
569 }
570
set_ctx_id_mapping(struct intel_guc * guc,u32 id,struct intel_context * ce)571 static inline void set_ctx_id_mapping(struct intel_guc *guc, u32 id,
572 struct intel_context *ce)
573 {
574 unsigned long flags;
575
576 /*
577 * xarray API doesn't have xa_save_irqsave wrapper, so calling the
578 * lower level functions directly.
579 */
580 xa_lock_irqsave(&guc->context_lookup, flags);
581 __xa_store(&guc->context_lookup, id, ce, GFP_ATOMIC);
582 xa_unlock_irqrestore(&guc->context_lookup, flags);
583 }
584
clr_ctx_id_mapping(struct intel_guc * guc,u32 id)585 static inline void clr_ctx_id_mapping(struct intel_guc *guc, u32 id)
586 {
587 unsigned long flags;
588
589 if (unlikely(!guc_submission_initialized(guc)))
590 return;
591
592 _reset_lrc_desc_v69(guc, id);
593
594 /*
595 * xarray API doesn't have xa_erase_irqsave wrapper, so calling
596 * the lower level functions directly.
597 */
598 xa_lock_irqsave(&guc->context_lookup, flags);
599 __xa_erase(&guc->context_lookup, id);
600 xa_unlock_irqrestore(&guc->context_lookup, flags);
601 }
602
decr_outstanding_submission_g2h(struct intel_guc * guc)603 static void decr_outstanding_submission_g2h(struct intel_guc *guc)
604 {
605 if (atomic_dec_and_test(&guc->outstanding_submission_g2h))
606 wake_up_all(&guc->ct.wq);
607 }
608
guc_submission_send_busy_loop(struct intel_guc * guc,const u32 * action,u32 len,u32 g2h_len_dw,bool loop)609 static int guc_submission_send_busy_loop(struct intel_guc *guc,
610 const u32 *action,
611 u32 len,
612 u32 g2h_len_dw,
613 bool loop)
614 {
615 /*
616 * We always loop when a send requires a reply (i.e. g2h_len_dw > 0),
617 * so we don't handle the case where we don't get a reply because we
618 * aborted the send due to the channel being busy.
619 */
620 GEM_BUG_ON(g2h_len_dw && !loop);
621
622 if (g2h_len_dw)
623 atomic_inc(&guc->outstanding_submission_g2h);
624
625 return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop);
626 }
627
intel_guc_wait_for_pending_msg(struct intel_guc * guc,atomic_t * wait_var,bool interruptible,long timeout)628 int intel_guc_wait_for_pending_msg(struct intel_guc *guc,
629 atomic_t *wait_var,
630 bool interruptible,
631 long timeout)
632 {
633 const int state = interruptible ?
634 TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
635 DEFINE_WAIT(wait);
636
637 might_sleep();
638 GEM_BUG_ON(timeout < 0);
639
640 if (!atomic_read(wait_var))
641 return 0;
642
643 if (!timeout)
644 return -ETIME;
645
646 for (;;) {
647 prepare_to_wait(&guc->ct.wq, &wait, state);
648
649 if (!atomic_read(wait_var))
650 break;
651
652 if (signal_pending_state(state, current)) {
653 timeout = -EINTR;
654 break;
655 }
656
657 if (!timeout) {
658 timeout = -ETIME;
659 break;
660 }
661
662 timeout = io_schedule_timeout(timeout);
663 }
664 finish_wait(&guc->ct.wq, &wait);
665
666 return (timeout < 0) ? timeout : 0;
667 }
668
intel_guc_wait_for_idle(struct intel_guc * guc,long timeout)669 int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout)
670 {
671 if (!intel_uc_uses_guc_submission(&guc_to_gt(guc)->uc))
672 return 0;
673
674 return intel_guc_wait_for_pending_msg(guc,
675 &guc->outstanding_submission_g2h,
676 true, timeout);
677 }
678
679 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop);
680 static int try_context_registration(struct intel_context *ce, bool loop);
681
__guc_add_request(struct intel_guc * guc,struct i915_request * rq)682 static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq)
683 {
684 int err = 0;
685 struct intel_context *ce = request_to_scheduling_context(rq);
686 u32 action[3];
687 int len = 0;
688 u32 g2h_len_dw = 0;
689 bool enabled;
690
691 lockdep_assert_held(&rq->engine->sched_engine->lock);
692
693 /*
694 * Corner case where requests were sitting in the priority list or a
695 * request resubmitted after the context was banned.
696 */
697 if (unlikely(!intel_context_is_schedulable(ce))) {
698 i915_request_put(i915_request_mark_eio(rq));
699 intel_engine_signal_breadcrumbs(ce->engine);
700 return 0;
701 }
702
703 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref));
704 GEM_BUG_ON(context_guc_id_invalid(ce));
705
706 if (context_policy_required(ce)) {
707 err = guc_context_policy_init_v70(ce, false);
708 if (err)
709 return err;
710 }
711
712 spin_lock(&ce->guc_state.lock);
713
714 /*
715 * The request / context will be run on the hardware when scheduling
716 * gets enabled in the unblock. For multi-lrc we still submit the
717 * context to move the LRC tails.
718 */
719 if (unlikely(context_blocked(ce) && !intel_context_is_parent(ce)))
720 goto out;
721
722 enabled = context_enabled(ce) || context_blocked(ce);
723
724 if (!enabled) {
725 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
726 action[len++] = ce->guc_id.id;
727 action[len++] = GUC_CONTEXT_ENABLE;
728 set_context_pending_enable(ce);
729 intel_context_get(ce);
730 g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET;
731 } else {
732 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT;
733 action[len++] = ce->guc_id.id;
734 }
735
736 err = intel_guc_send_nb(guc, action, len, g2h_len_dw);
737 if (!enabled && !err) {
738 trace_intel_context_sched_enable(ce);
739 atomic_inc(&guc->outstanding_submission_g2h);
740 set_context_enabled(ce);
741
742 /*
743 * Without multi-lrc KMD does the submission step (moving the
744 * lrc tail) so enabling scheduling is sufficient to submit the
745 * context. This isn't the case in multi-lrc submission as the
746 * GuC needs to move the tails, hence the need for another H2G
747 * to submit a multi-lrc context after enabling scheduling.
748 */
749 if (intel_context_is_parent(ce)) {
750 action[0] = INTEL_GUC_ACTION_SCHED_CONTEXT;
751 err = intel_guc_send_nb(guc, action, len - 1, 0);
752 }
753 } else if (!enabled) {
754 clr_context_pending_enable(ce);
755 intel_context_put(ce);
756 }
757 if (likely(!err))
758 trace_i915_request_guc_submit(rq);
759
760 out:
761 spin_unlock(&ce->guc_state.lock);
762 return err;
763 }
764
guc_add_request(struct intel_guc * guc,struct i915_request * rq)765 static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
766 {
767 int ret = __guc_add_request(guc, rq);
768
769 if (unlikely(ret == -EBUSY)) {
770 guc->stalled_request = rq;
771 guc->submission_stall_reason = STALL_ADD_REQUEST;
772 }
773
774 return ret;
775 }
776
guc_set_lrc_tail(struct i915_request * rq)777 static inline void guc_set_lrc_tail(struct i915_request *rq)
778 {
779 rq->context->lrc_reg_state[CTX_RING_TAIL] =
780 intel_ring_set_tail(rq->ring, rq->tail);
781 }
782
rq_prio(const struct i915_request * rq)783 static inline int rq_prio(const struct i915_request *rq)
784 {
785 return rq->sched.attr.priority;
786 }
787
is_multi_lrc_rq(struct i915_request * rq)788 static bool is_multi_lrc_rq(struct i915_request *rq)
789 {
790 return intel_context_is_parallel(rq->context);
791 }
792
can_merge_rq(struct i915_request * rq,struct i915_request * last)793 static bool can_merge_rq(struct i915_request *rq,
794 struct i915_request *last)
795 {
796 return request_to_scheduling_context(rq) ==
797 request_to_scheduling_context(last);
798 }
799
wq_space_until_wrap(struct intel_context * ce)800 static u32 wq_space_until_wrap(struct intel_context *ce)
801 {
802 return (WQ_SIZE - ce->parallel.guc.wqi_tail);
803 }
804
write_wqi(struct intel_context * ce,u32 wqi_size)805 static void write_wqi(struct intel_context *ce, u32 wqi_size)
806 {
807 BUILD_BUG_ON(!is_power_of_2(WQ_SIZE));
808
809 /*
810 * Ensure WQI are visible before updating tail
811 */
812 intel_guc_write_barrier(ce_to_guc(ce));
813
814 ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) &
815 (WQ_SIZE - 1);
816 WRITE_ONCE(*ce->parallel.guc.wq_tail, ce->parallel.guc.wqi_tail);
817 }
818
guc_wq_noop_append(struct intel_context * ce)819 static int guc_wq_noop_append(struct intel_context *ce)
820 {
821 u32 *wqi = get_wq_pointer(ce, wq_space_until_wrap(ce));
822 u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1;
823
824 if (!wqi)
825 return -EBUSY;
826
827 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw));
828
829 *wqi = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) |
830 FIELD_PREP(WQ_LEN_MASK, len_dw);
831 ce->parallel.guc.wqi_tail = 0;
832
833 return 0;
834 }
835
__guc_wq_item_append(struct i915_request * rq)836 static int __guc_wq_item_append(struct i915_request *rq)
837 {
838 struct intel_context *ce = request_to_scheduling_context(rq);
839 struct intel_context *child;
840 unsigned int wqi_size = (ce->parallel.number_children + 4) *
841 sizeof(u32);
842 u32 *wqi;
843 u32 len_dw = (wqi_size / sizeof(u32)) - 1;
844 int ret;
845
846 /* Ensure context is in correct state updating work queue */
847 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref));
848 GEM_BUG_ON(context_guc_id_invalid(ce));
849 GEM_BUG_ON(context_wait_for_deregister_to_register(ce));
850 GEM_BUG_ON(!ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id));
851
852 /* Insert NOOP if this work queue item will wrap the tail pointer. */
853 if (wqi_size > wq_space_until_wrap(ce)) {
854 ret = guc_wq_noop_append(ce);
855 if (ret)
856 return ret;
857 }
858
859 wqi = get_wq_pointer(ce, wqi_size);
860 if (!wqi)
861 return -EBUSY;
862
863 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw));
864
865 *wqi++ = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) |
866 FIELD_PREP(WQ_LEN_MASK, len_dw);
867 *wqi++ = ce->lrc.lrca;
868 *wqi++ = FIELD_PREP(WQ_GUC_ID_MASK, ce->guc_id.id) |
869 FIELD_PREP(WQ_RING_TAIL_MASK, ce->ring->tail / sizeof(u64));
870 *wqi++ = 0; /* fence_id */
871 for_each_child(ce, child)
872 *wqi++ = child->ring->tail / sizeof(u64);
873
874 write_wqi(ce, wqi_size);
875
876 return 0;
877 }
878
guc_wq_item_append(struct intel_guc * guc,struct i915_request * rq)879 static int guc_wq_item_append(struct intel_guc *guc,
880 struct i915_request *rq)
881 {
882 struct intel_context *ce = request_to_scheduling_context(rq);
883 int ret;
884
885 if (unlikely(!intel_context_is_schedulable(ce)))
886 return 0;
887
888 ret = __guc_wq_item_append(rq);
889 if (unlikely(ret == -EBUSY)) {
890 guc->stalled_request = rq;
891 guc->submission_stall_reason = STALL_MOVE_LRC_TAIL;
892 }
893
894 return ret;
895 }
896
multi_lrc_submit(struct i915_request * rq)897 static bool multi_lrc_submit(struct i915_request *rq)
898 {
899 struct intel_context *ce = request_to_scheduling_context(rq);
900
901 intel_ring_set_tail(rq->ring, rq->tail);
902
903 /*
904 * We expect the front end (execbuf IOCTL) to set this flag on the last
905 * request generated from a multi-BB submission. This indicates to the
906 * backend (GuC interface) that we should submit this context thus
907 * submitting all the requests generated in parallel.
908 */
909 return test_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL, &rq->fence.flags) ||
910 !intel_context_is_schedulable(ce);
911 }
912
guc_dequeue_one_context(struct intel_guc * guc)913 static int guc_dequeue_one_context(struct intel_guc *guc)
914 {
915 struct i915_sched_engine * const sched_engine = guc->sched_engine;
916 struct i915_request *last = NULL;
917 bool submit = false;
918 struct rb_node *rb;
919 int ret;
920
921 lockdep_assert_held(&sched_engine->lock);
922
923 if (guc->stalled_request) {
924 submit = true;
925 last = guc->stalled_request;
926
927 switch (guc->submission_stall_reason) {
928 case STALL_REGISTER_CONTEXT:
929 goto register_context;
930 case STALL_MOVE_LRC_TAIL:
931 goto move_lrc_tail;
932 case STALL_ADD_REQUEST:
933 goto add_request;
934 default:
935 MISSING_CASE(guc->submission_stall_reason);
936 }
937 }
938
939 while ((rb = rb_first_cached(&sched_engine->queue))) {
940 struct i915_priolist *p = to_priolist(rb);
941 struct i915_request *rq, *rn;
942
943 priolist_for_each_request_consume(rq, rn, p) {
944 if (last && !can_merge_rq(rq, last))
945 goto register_context;
946
947 list_del_init(&rq->sched.link);
948
949 __i915_request_submit(rq);
950
951 trace_i915_request_in(rq, 0);
952 last = rq;
953
954 if (is_multi_lrc_rq(rq)) {
955 /*
956 * We need to coalesce all multi-lrc requests in
957 * a relationship into a single H2G. We are
958 * guaranteed that all of these requests will be
959 * submitted sequentially.
960 */
961 if (multi_lrc_submit(rq)) {
962 submit = true;
963 goto register_context;
964 }
965 } else {
966 submit = true;
967 }
968 }
969
970 rb_erase_cached(&p->node, &sched_engine->queue);
971 i915_priolist_free(p);
972 }
973
974 register_context:
975 if (submit) {
976 struct intel_context *ce = request_to_scheduling_context(last);
977
978 if (unlikely(!ctx_id_mapped(guc, ce->guc_id.id) &&
979 intel_context_is_schedulable(ce))) {
980 ret = try_context_registration(ce, false);
981 if (unlikely(ret == -EPIPE)) {
982 goto deadlk;
983 } else if (ret == -EBUSY) {
984 guc->stalled_request = last;
985 guc->submission_stall_reason =
986 STALL_REGISTER_CONTEXT;
987 goto schedule_tasklet;
988 } else if (ret != 0) {
989 GEM_WARN_ON(ret); /* Unexpected */
990 goto deadlk;
991 }
992 }
993
994 move_lrc_tail:
995 if (is_multi_lrc_rq(last)) {
996 ret = guc_wq_item_append(guc, last);
997 if (ret == -EBUSY) {
998 goto schedule_tasklet;
999 } else if (ret != 0) {
1000 GEM_WARN_ON(ret); /* Unexpected */
1001 goto deadlk;
1002 }
1003 } else {
1004 guc_set_lrc_tail(last);
1005 }
1006
1007 add_request:
1008 ret = guc_add_request(guc, last);
1009 if (unlikely(ret == -EPIPE)) {
1010 goto deadlk;
1011 } else if (ret == -EBUSY) {
1012 goto schedule_tasklet;
1013 } else if (ret != 0) {
1014 GEM_WARN_ON(ret); /* Unexpected */
1015 goto deadlk;
1016 }
1017 }
1018
1019 guc->stalled_request = NULL;
1020 guc->submission_stall_reason = STALL_NONE;
1021 return submit;
1022
1023 deadlk:
1024 sched_engine->tasklet.callback = NULL;
1025 tasklet_disable_nosync(&sched_engine->tasklet);
1026 return false;
1027
1028 schedule_tasklet:
1029 tasklet_schedule(&sched_engine->tasklet);
1030 return false;
1031 }
1032
guc_submission_tasklet(struct tasklet_struct * t)1033 static void guc_submission_tasklet(struct tasklet_struct *t)
1034 {
1035 struct i915_sched_engine *sched_engine =
1036 from_tasklet(sched_engine, t, tasklet);
1037 unsigned long flags;
1038 bool loop;
1039
1040 spin_lock_irqsave(&sched_engine->lock, flags);
1041
1042 do {
1043 loop = guc_dequeue_one_context(sched_engine->private_data);
1044 } while (loop);
1045
1046 i915_sched_engine_reset_on_empty(sched_engine);
1047
1048 spin_unlock_irqrestore(&sched_engine->lock, flags);
1049 }
1050
cs_irq_handler(struct intel_engine_cs * engine,u16 iir)1051 static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir)
1052 {
1053 if (iir & GT_RENDER_USER_INTERRUPT)
1054 intel_engine_signal_breadcrumbs(engine);
1055 }
1056
1057 static void __guc_context_destroy(struct intel_context *ce);
1058 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce);
1059 static void guc_signal_context_fence(struct intel_context *ce);
1060 static void guc_cancel_context_requests(struct intel_context *ce);
1061 static void guc_blocked_fence_complete(struct intel_context *ce);
1062
scrub_guc_desc_for_outstanding_g2h(struct intel_guc * guc)1063 static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
1064 {
1065 struct intel_context *ce;
1066 unsigned long index, flags;
1067 bool pending_disable, pending_enable, deregister, destroyed, banned;
1068
1069 xa_lock_irqsave(&guc->context_lookup, flags);
1070 xa_for_each(&guc->context_lookup, index, ce) {
1071 /*
1072 * Corner case where the ref count on the object is zero but and
1073 * deregister G2H was lost. In this case we don't touch the ref
1074 * count and finish the destroy of the context.
1075 */
1076 bool do_put = kref_get_unless_zero(&ce->ref);
1077
1078 xa_unlock(&guc->context_lookup);
1079
1080 if (test_bit(CONTEXT_GUC_INIT, &ce->flags) &&
1081 (cancel_delayed_work(&ce->guc_state.sched_disable_delay_work))) {
1082 /* successful cancel so jump straight to close it */
1083 intel_context_sched_disable_unpin(ce);
1084 }
1085
1086 spin_lock(&ce->guc_state.lock);
1087
1088 /*
1089 * Once we are at this point submission_disabled() is guaranteed
1090 * to be visible to all callers who set the below flags (see above
1091 * flush and flushes in reset_prepare). If submission_disabled()
1092 * is set, the caller shouldn't set these flags.
1093 */
1094
1095 destroyed = context_destroyed(ce);
1096 pending_enable = context_pending_enable(ce);
1097 pending_disable = context_pending_disable(ce);
1098 deregister = context_wait_for_deregister_to_register(ce);
1099 banned = context_banned(ce);
1100 init_sched_state(ce);
1101
1102 spin_unlock(&ce->guc_state.lock);
1103
1104 if (pending_enable || destroyed || deregister) {
1105 decr_outstanding_submission_g2h(guc);
1106 if (deregister)
1107 guc_signal_context_fence(ce);
1108 if (destroyed) {
1109 intel_gt_pm_put_async(guc_to_gt(guc));
1110 release_guc_id(guc, ce);
1111 __guc_context_destroy(ce);
1112 }
1113 if (pending_enable || deregister)
1114 intel_context_put(ce);
1115 }
1116
1117 /* Not mutualy exclusive with above if statement. */
1118 if (pending_disable) {
1119 guc_signal_context_fence(ce);
1120 if (banned) {
1121 guc_cancel_context_requests(ce);
1122 intel_engine_signal_breadcrumbs(ce->engine);
1123 }
1124 intel_context_sched_disable_unpin(ce);
1125 decr_outstanding_submission_g2h(guc);
1126
1127 spin_lock(&ce->guc_state.lock);
1128 guc_blocked_fence_complete(ce);
1129 spin_unlock(&ce->guc_state.lock);
1130
1131 intel_context_put(ce);
1132 }
1133
1134 if (do_put)
1135 intel_context_put(ce);
1136 xa_lock(&guc->context_lookup);
1137 }
1138 xa_unlock_irqrestore(&guc->context_lookup, flags);
1139 }
1140
1141 /*
1142 * GuC stores busyness stats for each engine at context in/out boundaries. A
1143 * context 'in' logs execution start time, 'out' adds in -> out delta to total.
1144 * i915/kmd accesses 'start', 'total' and 'context id' from memory shared with
1145 * GuC.
1146 *
1147 * __i915_pmu_event_read samples engine busyness. When sampling, if context id
1148 * is valid (!= ~0) and start is non-zero, the engine is considered to be
1149 * active. For an active engine total busyness = total + (now - start), where
1150 * 'now' is the time at which the busyness is sampled. For inactive engine,
1151 * total busyness = total.
1152 *
1153 * All times are captured from GUCPMTIMESTAMP reg and are in gt clock domain.
1154 *
1155 * The start and total values provided by GuC are 32 bits and wrap around in a
1156 * few minutes. Since perf pmu provides busyness as 64 bit monotonically
1157 * increasing ns values, there is a need for this implementation to account for
1158 * overflows and extend the GuC provided values to 64 bits before returning
1159 * busyness to the user. In order to do that, a worker runs periodically at
1160 * frequency = 1/8th the time it takes for the timestamp to wrap (i.e. once in
1161 * 27 seconds for a gt clock frequency of 19.2 MHz).
1162 */
1163
1164 #define WRAP_TIME_CLKS U32_MAX
1165 #define POLL_TIME_CLKS (WRAP_TIME_CLKS >> 3)
1166
1167 static void
__extend_last_switch(struct intel_guc * guc,u64 * prev_start,u32 new_start)1168 __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
1169 {
1170 u32 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp);
1171 u32 gt_stamp_last = lower_32_bits(guc->timestamp.gt_stamp);
1172
1173 if (new_start == lower_32_bits(*prev_start))
1174 return;
1175
1176 /*
1177 * When gt is unparked, we update the gt timestamp and start the ping
1178 * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt
1179 * is unparked, all switched in contexts will have a start time that is
1180 * within +/- POLL_TIME_CLKS of the most recent gt_stamp.
1181 *
1182 * If neither gt_stamp nor new_start has rolled over, then the
1183 * gt_stamp_hi does not need to be adjusted, however if one of them has
1184 * rolled over, we need to adjust gt_stamp_hi accordingly.
1185 *
1186 * The below conditions address the cases of new_start rollover and
1187 * gt_stamp_last rollover respectively.
1188 */
1189 if (new_start < gt_stamp_last &&
1190 (new_start - gt_stamp_last) <= POLL_TIME_CLKS)
1191 gt_stamp_hi++;
1192
1193 if (new_start > gt_stamp_last &&
1194 (gt_stamp_last - new_start) <= POLL_TIME_CLKS && gt_stamp_hi)
1195 gt_stamp_hi--;
1196
1197 *prev_start = ((u64)gt_stamp_hi << 32) | new_start;
1198 }
1199
1200 #define record_read(map_, field_) \
1201 iosys_map_rd_field(map_, 0, struct guc_engine_usage_record, field_)
1202
1203 /*
1204 * GuC updates shared memory and KMD reads it. Since this is not synchronized,
1205 * we run into a race where the value read is inconsistent. Sometimes the
1206 * inconsistency is in reading the upper MSB bytes of the last_in value when
1207 * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper
1208 * 24 bits are zero. Since these are non-zero values, it is non-trivial to
1209 * determine validity of these values. Instead we read the values multiple times
1210 * until they are consistent. In test runs, 3 attempts results in consistent
1211 * values. The upper bound is set to 6 attempts and may need to be tuned as per
1212 * any new occurences.
1213 */
__get_engine_usage_record(struct intel_engine_cs * engine,u32 * last_in,u32 * id,u32 * total)1214 static void __get_engine_usage_record(struct intel_engine_cs *engine,
1215 u32 *last_in, u32 *id, u32 *total)
1216 {
1217 struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine);
1218 int i = 0;
1219
1220 do {
1221 *last_in = record_read(&rec_map, last_switch_in_stamp);
1222 *id = record_read(&rec_map, current_context_index);
1223 *total = record_read(&rec_map, total_runtime);
1224
1225 if (record_read(&rec_map, last_switch_in_stamp) == *last_in &&
1226 record_read(&rec_map, current_context_index) == *id &&
1227 record_read(&rec_map, total_runtime) == *total)
1228 break;
1229 } while (++i < 6);
1230 }
1231
guc_update_engine_gt_clks(struct intel_engine_cs * engine)1232 static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
1233 {
1234 struct intel_engine_guc_stats *stats = &engine->stats.guc;
1235 struct intel_guc *guc = &engine->gt->uc.guc;
1236 u32 last_switch, ctx_id, total;
1237
1238 lockdep_assert_held(&guc->timestamp.lock);
1239
1240 __get_engine_usage_record(engine, &last_switch, &ctx_id, &total);
1241
1242 stats->running = ctx_id != ~0U && last_switch;
1243 if (stats->running)
1244 __extend_last_switch(guc, &stats->start_gt_clk, last_switch);
1245
1246 /*
1247 * Instead of adjusting the total for overflow, just add the
1248 * difference from previous sample stats->total_gt_clks
1249 */
1250 if (total && total != ~0U) {
1251 stats->total_gt_clks += (u32)(total - stats->prev_total);
1252 stats->prev_total = total;
1253 }
1254 }
1255
gpm_timestamp_shift(struct intel_gt * gt)1256 static u32 gpm_timestamp_shift(struct intel_gt *gt)
1257 {
1258 intel_wakeref_t wakeref;
1259 u32 reg, shift;
1260
1261 with_intel_runtime_pm(gt->uncore->rpm, wakeref)
1262 reg = intel_uncore_read(gt->uncore, RPM_CONFIG0);
1263
1264 shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
1265 GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT;
1266
1267 return 3 - shift;
1268 }
1269
guc_update_pm_timestamp(struct intel_guc * guc,ktime_t * now)1270 static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now)
1271 {
1272 struct intel_gt *gt = guc_to_gt(guc);
1273 u32 gt_stamp_lo, gt_stamp_hi;
1274 u64 gpm_ts;
1275
1276 lockdep_assert_held(&guc->timestamp.lock);
1277
1278 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp);
1279 gpm_ts = intel_uncore_read64_2x32(gt->uncore, MISC_STATUS0,
1280 MISC_STATUS1) >> guc->timestamp.shift;
1281 gt_stamp_lo = lower_32_bits(gpm_ts);
1282 *now = ktime_get();
1283
1284 if (gt_stamp_lo < lower_32_bits(guc->timestamp.gt_stamp))
1285 gt_stamp_hi++;
1286
1287 guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_lo;
1288 }
1289
1290 /*
1291 * Unlike the execlist mode of submission total and active times are in terms of
1292 * gt clocks. The *now parameter is retained to return the cpu time at which the
1293 * busyness was sampled.
1294 */
guc_engine_busyness(struct intel_engine_cs * engine,ktime_t * now)1295 static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
1296 {
1297 struct intel_engine_guc_stats stats_saved, *stats = &engine->stats.guc;
1298 struct i915_gpu_error *gpu_error = &engine->i915->gpu_error;
1299 struct intel_gt *gt = engine->gt;
1300 struct intel_guc *guc = >->uc.guc;
1301 u64 total, gt_stamp_saved;
1302 unsigned long flags;
1303 u32 reset_count;
1304 bool in_reset;
1305
1306 spin_lock_irqsave(&guc->timestamp.lock, flags);
1307
1308 /*
1309 * If a reset happened, we risk reading partially updated engine
1310 * busyness from GuC, so we just use the driver stored copy of busyness.
1311 * Synchronize with gt reset using reset_count and the
1312 * I915_RESET_BACKOFF flag. Note that reset flow updates the reset_count
1313 * after I915_RESET_BACKOFF flag, so ensure that the reset_count is
1314 * usable by checking the flag afterwards.
1315 */
1316 reset_count = i915_reset_count(gpu_error);
1317 in_reset = test_bit(I915_RESET_BACKOFF, >->reset.flags);
1318
1319 *now = ktime_get();
1320
1321 /*
1322 * The active busyness depends on start_gt_clk and gt_stamp.
1323 * gt_stamp is updated by i915 only when gt is awake and the
1324 * start_gt_clk is derived from GuC state. To get a consistent
1325 * view of activity, we query the GuC state only if gt is awake.
1326 */
1327 if (!in_reset && intel_gt_pm_get_if_awake(gt)) {
1328 stats_saved = *stats;
1329 gt_stamp_saved = guc->timestamp.gt_stamp;
1330 /*
1331 * Update gt_clks, then gt timestamp to simplify the 'gt_stamp -
1332 * start_gt_clk' calculation below for active engines.
1333 */
1334 guc_update_engine_gt_clks(engine);
1335 guc_update_pm_timestamp(guc, now);
1336 intel_gt_pm_put_async(gt);
1337 if (i915_reset_count(gpu_error) != reset_count) {
1338 *stats = stats_saved;
1339 guc->timestamp.gt_stamp = gt_stamp_saved;
1340 }
1341 }
1342
1343 total = intel_gt_clock_interval_to_ns(gt, stats->total_gt_clks);
1344 if (stats->running) {
1345 u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk;
1346
1347 total += intel_gt_clock_interval_to_ns(gt, clk);
1348 }
1349
1350 spin_unlock_irqrestore(&guc->timestamp.lock, flags);
1351
1352 return ns_to_ktime(total);
1353 }
1354
guc_enable_busyness_worker(struct intel_guc * guc)1355 static void guc_enable_busyness_worker(struct intel_guc *guc)
1356 {
1357 mod_delayed_work(system_highpri_wq, &guc->timestamp.work, guc->timestamp.ping_delay);
1358 }
1359
guc_cancel_busyness_worker(struct intel_guc * guc)1360 static void guc_cancel_busyness_worker(struct intel_guc *guc)
1361 {
1362 cancel_delayed_work_sync(&guc->timestamp.work);
1363 }
1364
__reset_guc_busyness_stats(struct intel_guc * guc)1365 static void __reset_guc_busyness_stats(struct intel_guc *guc)
1366 {
1367 struct intel_gt *gt = guc_to_gt(guc);
1368 struct intel_engine_cs *engine;
1369 enum intel_engine_id id;
1370 unsigned long flags;
1371 ktime_t unused;
1372
1373 guc_cancel_busyness_worker(guc);
1374
1375 spin_lock_irqsave(&guc->timestamp.lock, flags);
1376
1377 guc_update_pm_timestamp(guc, &unused);
1378 for_each_engine(engine, gt, id) {
1379 guc_update_engine_gt_clks(engine);
1380 engine->stats.guc.prev_total = 0;
1381 }
1382
1383 spin_unlock_irqrestore(&guc->timestamp.lock, flags);
1384 }
1385
__update_guc_busyness_stats(struct intel_guc * guc)1386 static void __update_guc_busyness_stats(struct intel_guc *guc)
1387 {
1388 struct intel_gt *gt = guc_to_gt(guc);
1389 struct intel_engine_cs *engine;
1390 enum intel_engine_id id;
1391 unsigned long flags;
1392 ktime_t unused;
1393
1394 guc->timestamp.last_stat_jiffies = jiffies;
1395
1396 spin_lock_irqsave(&guc->timestamp.lock, flags);
1397
1398 guc_update_pm_timestamp(guc, &unused);
1399 for_each_engine(engine, gt, id)
1400 guc_update_engine_gt_clks(engine);
1401
1402 spin_unlock_irqrestore(&guc->timestamp.lock, flags);
1403 }
1404
__guc_context_update_stats(struct intel_context * ce)1405 static void __guc_context_update_stats(struct intel_context *ce)
1406 {
1407 struct intel_guc *guc = ce_to_guc(ce);
1408 unsigned long flags;
1409
1410 spin_lock_irqsave(&guc->timestamp.lock, flags);
1411 lrc_update_runtime(ce);
1412 spin_unlock_irqrestore(&guc->timestamp.lock, flags);
1413 }
1414
guc_context_update_stats(struct intel_context * ce)1415 static void guc_context_update_stats(struct intel_context *ce)
1416 {
1417 if (!intel_context_pin_if_active(ce))
1418 return;
1419
1420 __guc_context_update_stats(ce);
1421 intel_context_unpin(ce);
1422 }
1423
guc_timestamp_ping(struct work_struct * wrk)1424 static void guc_timestamp_ping(struct work_struct *wrk)
1425 {
1426 struct intel_guc *guc = container_of(wrk, typeof(*guc),
1427 timestamp.work.work);
1428 struct intel_uc *uc = container_of(guc, typeof(*uc), guc);
1429 struct intel_gt *gt = guc_to_gt(guc);
1430 struct intel_context *ce;
1431 intel_wakeref_t wakeref;
1432 unsigned long index;
1433 int srcu, ret;
1434
1435 /*
1436 * Ideally the busyness worker should take a gt pm wakeref because the
1437 * worker only needs to be active while gt is awake. However, the
1438 * gt_park path cancels the worker synchronously and this complicates
1439 * the flow if the worker is also running at the same time. The cancel
1440 * waits for the worker and when the worker releases the wakeref, that
1441 * would call gt_park and would lead to a deadlock.
1442 *
1443 * The resolution is to take the global pm wakeref if runtime pm is
1444 * already active. If not, we don't need to update the busyness stats as
1445 * the stats would already be updated when the gt was parked.
1446 *
1447 * Note:
1448 * - We do not requeue the worker if we cannot take a reference to runtime
1449 * pm since intel_guc_busyness_unpark would requeue the worker in the
1450 * resume path.
1451 *
1452 * - If the gt was parked longer than time taken for GT timestamp to roll
1453 * over, we ignore those rollovers since we don't care about tracking
1454 * the exact GT time. We only care about roll overs when the gt is
1455 * active and running workloads.
1456 *
1457 * - There is a window of time between gt_park and runtime suspend,
1458 * where the worker may run. This is acceptable since the worker will
1459 * not find any new data to update busyness.
1460 */
1461 wakeref = intel_runtime_pm_get_if_active(>->i915->runtime_pm);
1462 if (!wakeref)
1463 return;
1464
1465 /*
1466 * Synchronize with gt reset to make sure the worker does not
1467 * corrupt the engine/guc stats. NB: can't actually block waiting
1468 * for a reset to complete as the reset requires flushing out
1469 * this worker thread if started. So waiting would deadlock.
1470 */
1471 ret = intel_gt_reset_trylock(gt, &srcu);
1472 if (ret)
1473 goto err_trylock;
1474
1475 __update_guc_busyness_stats(guc);
1476
1477 /* adjust context stats for overflow */
1478 xa_for_each(&guc->context_lookup, index, ce)
1479 guc_context_update_stats(ce);
1480
1481 intel_gt_reset_unlock(gt, srcu);
1482
1483 guc_enable_busyness_worker(guc);
1484
1485 err_trylock:
1486 intel_runtime_pm_put(>->i915->runtime_pm, wakeref);
1487 }
1488
guc_action_enable_usage_stats(struct intel_guc * guc)1489 static int guc_action_enable_usage_stats(struct intel_guc *guc)
1490 {
1491 u32 offset = intel_guc_engine_usage_offset(guc);
1492 u32 action[] = {
1493 INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF,
1494 offset,
1495 0,
1496 };
1497
1498 return intel_guc_send(guc, action, ARRAY_SIZE(action));
1499 }
1500
guc_init_engine_stats(struct intel_guc * guc)1501 static int guc_init_engine_stats(struct intel_guc *guc)
1502 {
1503 struct intel_gt *gt = guc_to_gt(guc);
1504 intel_wakeref_t wakeref;
1505 int ret;
1506
1507 with_intel_runtime_pm(>->i915->runtime_pm, wakeref)
1508 ret = guc_action_enable_usage_stats(guc);
1509
1510 if (ret)
1511 guc_err(guc, "Failed to enable usage stats: %pe\n", ERR_PTR(ret));
1512 else
1513 guc_enable_busyness_worker(guc);
1514
1515 return ret;
1516 }
1517
guc_fini_engine_stats(struct intel_guc * guc)1518 static void guc_fini_engine_stats(struct intel_guc *guc)
1519 {
1520 guc_cancel_busyness_worker(guc);
1521 }
1522
intel_guc_busyness_park(struct intel_gt * gt)1523 void intel_guc_busyness_park(struct intel_gt *gt)
1524 {
1525 struct intel_guc *guc = >->uc.guc;
1526
1527 if (!guc_submission_initialized(guc))
1528 return;
1529
1530 /*
1531 * There is a race with suspend flow where the worker runs after suspend
1532 * and causes an unclaimed register access warning. Cancel the worker
1533 * synchronously here.
1534 */
1535 guc_cancel_busyness_worker(guc);
1536
1537 /*
1538 * Before parking, we should sample engine busyness stats if we need to.
1539 * We can skip it if we are less than half a ping from the last time we
1540 * sampled the busyness stats.
1541 */
1542 if (guc->timestamp.last_stat_jiffies &&
1543 !time_after(jiffies, guc->timestamp.last_stat_jiffies +
1544 (guc->timestamp.ping_delay / 2)))
1545 return;
1546
1547 __update_guc_busyness_stats(guc);
1548 }
1549
intel_guc_busyness_unpark(struct intel_gt * gt)1550 void intel_guc_busyness_unpark(struct intel_gt *gt)
1551 {
1552 struct intel_guc *guc = >->uc.guc;
1553 unsigned long flags;
1554 ktime_t unused;
1555
1556 if (!guc_submission_initialized(guc))
1557 return;
1558
1559 spin_lock_irqsave(&guc->timestamp.lock, flags);
1560 guc_update_pm_timestamp(guc, &unused);
1561 spin_unlock_irqrestore(&guc->timestamp.lock, flags);
1562 guc_enable_busyness_worker(guc);
1563 }
1564
1565 static inline bool
submission_disabled(struct intel_guc * guc)1566 submission_disabled(struct intel_guc *guc)
1567 {
1568 struct i915_sched_engine * const sched_engine = guc->sched_engine;
1569
1570 return unlikely(!sched_engine ||
1571 !__tasklet_is_enabled(&sched_engine->tasklet) ||
1572 intel_gt_is_wedged(guc_to_gt(guc)));
1573 }
1574
disable_submission(struct intel_guc * guc)1575 static void disable_submission(struct intel_guc *guc)
1576 {
1577 struct i915_sched_engine * const sched_engine = guc->sched_engine;
1578
1579 if (__tasklet_is_enabled(&sched_engine->tasklet)) {
1580 GEM_BUG_ON(!guc->ct.enabled);
1581 __tasklet_disable_sync_once(&sched_engine->tasklet);
1582 sched_engine->tasklet.callback = NULL;
1583 }
1584 }
1585
enable_submission(struct intel_guc * guc)1586 static void enable_submission(struct intel_guc *guc)
1587 {
1588 struct i915_sched_engine * const sched_engine = guc->sched_engine;
1589 unsigned long flags;
1590
1591 spin_lock_irqsave(&guc->sched_engine->lock, flags);
1592 sched_engine->tasklet.callback = guc_submission_tasklet;
1593 wmb(); /* Make sure callback visible */
1594 if (!__tasklet_is_enabled(&sched_engine->tasklet) &&
1595 __tasklet_enable(&sched_engine->tasklet)) {
1596 GEM_BUG_ON(!guc->ct.enabled);
1597
1598 /* And kick in case we missed a new request submission. */
1599 tasklet_hi_schedule(&sched_engine->tasklet);
1600 }
1601 spin_unlock_irqrestore(&guc->sched_engine->lock, flags);
1602 }
1603
guc_flush_submissions(struct intel_guc * guc)1604 static void guc_flush_submissions(struct intel_guc *guc)
1605 {
1606 struct i915_sched_engine * const sched_engine = guc->sched_engine;
1607 unsigned long flags;
1608
1609 spin_lock_irqsave(&sched_engine->lock, flags);
1610 spin_unlock_irqrestore(&sched_engine->lock, flags);
1611 }
1612
1613 static void guc_flush_destroyed_contexts(struct intel_guc *guc);
1614
intel_guc_submission_reset_prepare(struct intel_guc * guc)1615 void intel_guc_submission_reset_prepare(struct intel_guc *guc)
1616 {
1617 if (unlikely(!guc_submission_initialized(guc))) {
1618 /* Reset called during driver load? GuC not yet initialised! */
1619 return;
1620 }
1621
1622 intel_gt_park_heartbeats(guc_to_gt(guc));
1623 disable_submission(guc);
1624 guc->interrupts.disable(guc);
1625 __reset_guc_busyness_stats(guc);
1626
1627 /* Flush IRQ handler */
1628 spin_lock_irq(guc_to_gt(guc)->irq_lock);
1629 spin_unlock_irq(guc_to_gt(guc)->irq_lock);
1630
1631 guc_flush_submissions(guc);
1632 guc_flush_destroyed_contexts(guc);
1633 flush_work(&guc->ct.requests.worker);
1634
1635 scrub_guc_desc_for_outstanding_g2h(guc);
1636 }
1637
1638 static struct intel_engine_cs *
guc_virtual_get_sibling(struct intel_engine_cs * ve,unsigned int sibling)1639 guc_virtual_get_sibling(struct intel_engine_cs *ve, unsigned int sibling)
1640 {
1641 struct intel_engine_cs *engine;
1642 intel_engine_mask_t tmp, mask = ve->mask;
1643 unsigned int num_siblings = 0;
1644
1645 for_each_engine_masked(engine, ve->gt, mask, tmp)
1646 if (num_siblings++ == sibling)
1647 return engine;
1648
1649 return NULL;
1650 }
1651
1652 static inline struct intel_engine_cs *
__context_to_physical_engine(struct intel_context * ce)1653 __context_to_physical_engine(struct intel_context *ce)
1654 {
1655 struct intel_engine_cs *engine = ce->engine;
1656
1657 if (intel_engine_is_virtual(engine))
1658 engine = guc_virtual_get_sibling(engine, 0);
1659
1660 return engine;
1661 }
1662
guc_reset_state(struct intel_context * ce,u32 head,bool scrub)1663 static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub)
1664 {
1665 struct intel_engine_cs *engine = __context_to_physical_engine(ce);
1666
1667 if (!intel_context_is_schedulable(ce))
1668 return;
1669
1670 GEM_BUG_ON(!intel_context_is_pinned(ce));
1671
1672 /*
1673 * We want a simple context + ring to execute the breadcrumb update.
1674 * We cannot rely on the context being intact across the GPU hang,
1675 * so clear it and rebuild just what we need for the breadcrumb.
1676 * All pending requests for this context will be zapped, and any
1677 * future request will be after userspace has had the opportunity
1678 * to recreate its own state.
1679 */
1680 if (scrub)
1681 lrc_init_regs(ce, engine, true);
1682
1683 /* Rerun the request; its payload has been neutered (if guilty). */
1684 lrc_update_regs(ce, engine, head);
1685 }
1686
guc_engine_reset_prepare(struct intel_engine_cs * engine)1687 static void guc_engine_reset_prepare(struct intel_engine_cs *engine)
1688 {
1689 /*
1690 * Wa_22011802037: In addition to stopping the cs, we need
1691 * to wait for any pending mi force wakeups
1692 */
1693 if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) ||
1694 (GRAPHICS_VER(engine->i915) >= 11 &&
1695 GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70))) {
1696 intel_engine_stop_cs(engine);
1697 intel_engine_wait_for_pending_mi_fw(engine);
1698 }
1699 }
1700
guc_reset_nop(struct intel_engine_cs * engine)1701 static void guc_reset_nop(struct intel_engine_cs *engine)
1702 {
1703 }
1704
guc_rewind_nop(struct intel_engine_cs * engine,bool stalled)1705 static void guc_rewind_nop(struct intel_engine_cs *engine, bool stalled)
1706 {
1707 }
1708
1709 static void
__unwind_incomplete_requests(struct intel_context * ce)1710 __unwind_incomplete_requests(struct intel_context *ce)
1711 {
1712 struct i915_request *rq, *rn;
1713 struct list_head *pl;
1714 int prio = I915_PRIORITY_INVALID;
1715 struct i915_sched_engine * const sched_engine =
1716 ce->engine->sched_engine;
1717 unsigned long flags;
1718
1719 spin_lock_irqsave(&sched_engine->lock, flags);
1720 spin_lock(&ce->guc_state.lock);
1721 list_for_each_entry_safe_reverse(rq, rn,
1722 &ce->guc_state.requests,
1723 sched.link) {
1724 if (i915_request_completed(rq))
1725 continue;
1726
1727 list_del_init(&rq->sched.link);
1728 __i915_request_unsubmit(rq);
1729
1730 /* Push the request back into the queue for later resubmission. */
1731 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
1732 if (rq_prio(rq) != prio) {
1733 prio = rq_prio(rq);
1734 pl = i915_sched_lookup_priolist(sched_engine, prio);
1735 }
1736 GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine));
1737
1738 list_add(&rq->sched.link, pl);
1739 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
1740 }
1741 spin_unlock(&ce->guc_state.lock);
1742 spin_unlock_irqrestore(&sched_engine->lock, flags);
1743 }
1744
__guc_reset_context(struct intel_context * ce,intel_engine_mask_t stalled)1745 static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t stalled)
1746 {
1747 bool guilty;
1748 struct i915_request *rq;
1749 unsigned long flags;
1750 u32 head;
1751 int i, number_children = ce->parallel.number_children;
1752 struct intel_context *parent = ce;
1753
1754 GEM_BUG_ON(intel_context_is_child(ce));
1755
1756 intel_context_get(ce);
1757
1758 /*
1759 * GuC will implicitly mark the context as non-schedulable when it sends
1760 * the reset notification. Make sure our state reflects this change. The
1761 * context will be marked enabled on resubmission.
1762 */
1763 spin_lock_irqsave(&ce->guc_state.lock, flags);
1764 clr_context_enabled(ce);
1765 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
1766
1767 /*
1768 * For each context in the relationship find the hanging request
1769 * resetting each context / request as needed
1770 */
1771 for (i = 0; i < number_children + 1; ++i) {
1772 if (!intel_context_is_pinned(ce))
1773 goto next_context;
1774
1775 guilty = false;
1776 rq = intel_context_get_active_request(ce);
1777 if (!rq) {
1778 head = ce->ring->tail;
1779 goto out_replay;
1780 }
1781
1782 if (i915_request_started(rq))
1783 guilty = stalled & ce->engine->mask;
1784
1785 GEM_BUG_ON(i915_active_is_idle(&ce->active));
1786 head = intel_ring_wrap(ce->ring, rq->head);
1787
1788 __i915_request_reset(rq, guilty);
1789 i915_request_put(rq);
1790 out_replay:
1791 guc_reset_state(ce, head, guilty);
1792 next_context:
1793 if (i != number_children)
1794 ce = list_next_entry(ce, parallel.child_link);
1795 }
1796
1797 __unwind_incomplete_requests(parent);
1798 intel_context_put(parent);
1799 }
1800
intel_guc_submission_reset(struct intel_guc * guc,intel_engine_mask_t stalled)1801 void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled)
1802 {
1803 struct intel_context *ce;
1804 unsigned long index;
1805 unsigned long flags;
1806
1807 if (unlikely(!guc_submission_initialized(guc))) {
1808 /* Reset called during driver load? GuC not yet initialised! */
1809 return;
1810 }
1811
1812 xa_lock_irqsave(&guc->context_lookup, flags);
1813 xa_for_each(&guc->context_lookup, index, ce) {
1814 if (!kref_get_unless_zero(&ce->ref))
1815 continue;
1816
1817 xa_unlock(&guc->context_lookup);
1818
1819 if (intel_context_is_pinned(ce) &&
1820 !intel_context_is_child(ce))
1821 __guc_reset_context(ce, stalled);
1822
1823 intel_context_put(ce);
1824
1825 xa_lock(&guc->context_lookup);
1826 }
1827 xa_unlock_irqrestore(&guc->context_lookup, flags);
1828
1829 /* GuC is blown away, drop all references to contexts */
1830 xa_destroy(&guc->context_lookup);
1831 }
1832
guc_cancel_context_requests(struct intel_context * ce)1833 static void guc_cancel_context_requests(struct intel_context *ce)
1834 {
1835 struct i915_sched_engine *sched_engine = ce_to_guc(ce)->sched_engine;
1836 struct i915_request *rq;
1837 unsigned long flags;
1838
1839 /* Mark all executing requests as skipped. */
1840 spin_lock_irqsave(&sched_engine->lock, flags);
1841 spin_lock(&ce->guc_state.lock);
1842 list_for_each_entry(rq, &ce->guc_state.requests, sched.link)
1843 i915_request_put(i915_request_mark_eio(rq));
1844 spin_unlock(&ce->guc_state.lock);
1845 spin_unlock_irqrestore(&sched_engine->lock, flags);
1846 }
1847
1848 static void
guc_cancel_sched_engine_requests(struct i915_sched_engine * sched_engine)1849 guc_cancel_sched_engine_requests(struct i915_sched_engine *sched_engine)
1850 {
1851 struct i915_request *rq, *rn;
1852 struct rb_node *rb;
1853 unsigned long flags;
1854
1855 /* Can be called during boot if GuC fails to load */
1856 if (!sched_engine)
1857 return;
1858
1859 /*
1860 * Before we call engine->cancel_requests(), we should have exclusive
1861 * access to the submission state. This is arranged for us by the
1862 * caller disabling the interrupt generation, the tasklet and other
1863 * threads that may then access the same state, giving us a free hand
1864 * to reset state. However, we still need to let lockdep be aware that
1865 * we know this state may be accessed in hardirq context, so we
1866 * disable the irq around this manipulation and we want to keep
1867 * the spinlock focused on its duties and not accidentally conflate
1868 * coverage to the submission's irq state. (Similarly, although we
1869 * shouldn't need to disable irq around the manipulation of the
1870 * submission's irq state, we also wish to remind ourselves that
1871 * it is irq state.)
1872 */
1873 spin_lock_irqsave(&sched_engine->lock, flags);
1874
1875 /* Flush the queued requests to the timeline list (for retiring). */
1876 while ((rb = rb_first_cached(&sched_engine->queue))) {
1877 struct i915_priolist *p = to_priolist(rb);
1878
1879 priolist_for_each_request_consume(rq, rn, p) {
1880 list_del_init(&rq->sched.link);
1881
1882 __i915_request_submit(rq);
1883
1884 i915_request_put(i915_request_mark_eio(rq));
1885 }
1886
1887 rb_erase_cached(&p->node, &sched_engine->queue);
1888 i915_priolist_free(p);
1889 }
1890
1891 /* Remaining _unready_ requests will be nop'ed when submitted */
1892
1893 sched_engine->queue_priority_hint = INT_MIN;
1894 sched_engine->queue = RB_ROOT_CACHED;
1895
1896 spin_unlock_irqrestore(&sched_engine->lock, flags);
1897 }
1898
intel_guc_submission_cancel_requests(struct intel_guc * guc)1899 void intel_guc_submission_cancel_requests(struct intel_guc *guc)
1900 {
1901 struct intel_context *ce;
1902 unsigned long index;
1903 unsigned long flags;
1904
1905 xa_lock_irqsave(&guc->context_lookup, flags);
1906 xa_for_each(&guc->context_lookup, index, ce) {
1907 if (!kref_get_unless_zero(&ce->ref))
1908 continue;
1909
1910 xa_unlock(&guc->context_lookup);
1911
1912 if (intel_context_is_pinned(ce) &&
1913 !intel_context_is_child(ce))
1914 guc_cancel_context_requests(ce);
1915
1916 intel_context_put(ce);
1917
1918 xa_lock(&guc->context_lookup);
1919 }
1920 xa_unlock_irqrestore(&guc->context_lookup, flags);
1921
1922 guc_cancel_sched_engine_requests(guc->sched_engine);
1923
1924 /* GuC is blown away, drop all references to contexts */
1925 xa_destroy(&guc->context_lookup);
1926 }
1927
intel_guc_submission_reset_finish(struct intel_guc * guc)1928 void intel_guc_submission_reset_finish(struct intel_guc *guc)
1929 {
1930 /* Reset called during driver load or during wedge? */
1931 if (unlikely(!guc_submission_initialized(guc) ||
1932 intel_gt_is_wedged(guc_to_gt(guc)))) {
1933 return;
1934 }
1935
1936 /*
1937 * Technically possible for either of these values to be non-zero here,
1938 * but very unlikely + harmless. Regardless let's add a warn so we can
1939 * see in CI if this happens frequently / a precursor to taking down the
1940 * machine.
1941 */
1942 GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h));
1943 atomic_set(&guc->outstanding_submission_g2h, 0);
1944
1945 intel_guc_global_policies_update(guc);
1946 enable_submission(guc);
1947 intel_gt_unpark_heartbeats(guc_to_gt(guc));
1948 }
1949
1950 static void destroyed_worker_func(struct work_struct *w);
1951 static void reset_fail_worker_func(struct work_struct *w);
1952
1953 /*
1954 * Set up the memory resources to be shared with the GuC (via the GGTT)
1955 * at firmware loading time.
1956 */
intel_guc_submission_init(struct intel_guc * guc)1957 int intel_guc_submission_init(struct intel_guc *guc)
1958 {
1959 struct intel_gt *gt = guc_to_gt(guc);
1960 int ret;
1961
1962 if (guc->submission_initialized)
1963 return 0;
1964
1965 if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 0, 0)) {
1966 ret = guc_lrc_desc_pool_create_v69(guc);
1967 if (ret)
1968 return ret;
1969 }
1970
1971 guc->submission_state.guc_ids_bitmap =
1972 bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
1973 if (!guc->submission_state.guc_ids_bitmap) {
1974 ret = -ENOMEM;
1975 goto destroy_pool;
1976 }
1977
1978 guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ;
1979 guc->timestamp.shift = gpm_timestamp_shift(gt);
1980 guc->submission_initialized = true;
1981
1982 return 0;
1983
1984 destroy_pool:
1985 guc_lrc_desc_pool_destroy_v69(guc);
1986
1987 return ret;
1988 }
1989
intel_guc_submission_fini(struct intel_guc * guc)1990 void intel_guc_submission_fini(struct intel_guc *guc)
1991 {
1992 if (!guc->submission_initialized)
1993 return;
1994
1995 guc_flush_destroyed_contexts(guc);
1996 guc_lrc_desc_pool_destroy_v69(guc);
1997 i915_sched_engine_put(guc->sched_engine);
1998 bitmap_free(guc->submission_state.guc_ids_bitmap);
1999 guc->submission_initialized = false;
2000 }
2001
queue_request(struct i915_sched_engine * sched_engine,struct i915_request * rq,int prio)2002 static inline void queue_request(struct i915_sched_engine *sched_engine,
2003 struct i915_request *rq,
2004 int prio)
2005 {
2006 GEM_BUG_ON(!list_empty(&rq->sched.link));
2007 list_add_tail(&rq->sched.link,
2008 i915_sched_lookup_priolist(sched_engine, prio));
2009 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2010 tasklet_hi_schedule(&sched_engine->tasklet);
2011 }
2012
guc_bypass_tasklet_submit(struct intel_guc * guc,struct i915_request * rq)2013 static int guc_bypass_tasklet_submit(struct intel_guc *guc,
2014 struct i915_request *rq)
2015 {
2016 int ret = 0;
2017
2018 __i915_request_submit(rq);
2019
2020 trace_i915_request_in(rq, 0);
2021
2022 if (is_multi_lrc_rq(rq)) {
2023 if (multi_lrc_submit(rq)) {
2024 ret = guc_wq_item_append(guc, rq);
2025 if (!ret)
2026 ret = guc_add_request(guc, rq);
2027 }
2028 } else {
2029 guc_set_lrc_tail(rq);
2030 ret = guc_add_request(guc, rq);
2031 }
2032
2033 if (unlikely(ret == -EPIPE))
2034 disable_submission(guc);
2035
2036 return ret;
2037 }
2038
need_tasklet(struct intel_guc * guc,struct i915_request * rq)2039 static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq)
2040 {
2041 struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
2042 struct intel_context *ce = request_to_scheduling_context(rq);
2043
2044 return submission_disabled(guc) || guc->stalled_request ||
2045 !i915_sched_engine_is_empty(sched_engine) ||
2046 !ctx_id_mapped(guc, ce->guc_id.id);
2047 }
2048
guc_submit_request(struct i915_request * rq)2049 static void guc_submit_request(struct i915_request *rq)
2050 {
2051 struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
2052 struct intel_guc *guc = &rq->engine->gt->uc.guc;
2053 unsigned long flags;
2054
2055 /* Will be called from irq-context when using foreign fences. */
2056 spin_lock_irqsave(&sched_engine->lock, flags);
2057
2058 if (need_tasklet(guc, rq))
2059 queue_request(sched_engine, rq, rq_prio(rq));
2060 else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY)
2061 tasklet_hi_schedule(&sched_engine->tasklet);
2062
2063 spin_unlock_irqrestore(&sched_engine->lock, flags);
2064 }
2065
new_guc_id(struct intel_guc * guc,struct intel_context * ce)2066 static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
2067 {
2068 int ret;
2069
2070 GEM_BUG_ON(intel_context_is_child(ce));
2071
2072 if (intel_context_is_parent(ce))
2073 ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
2074 NUMBER_MULTI_LRC_GUC_ID(guc),
2075 order_base_2(ce->parallel.number_children
2076 + 1));
2077 else
2078 ret = ida_simple_get(&guc->submission_state.guc_ids,
2079 NUMBER_MULTI_LRC_GUC_ID(guc),
2080 guc->submission_state.num_guc_ids,
2081 GFP_KERNEL | __GFP_RETRY_MAYFAIL |
2082 __GFP_NOWARN);
2083 if (unlikely(ret < 0))
2084 return ret;
2085
2086 if (!intel_context_is_parent(ce))
2087 ++guc->submission_state.guc_ids_in_use;
2088
2089 ce->guc_id.id = ret;
2090 return 0;
2091 }
2092
__release_guc_id(struct intel_guc * guc,struct intel_context * ce)2093 static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce)
2094 {
2095 GEM_BUG_ON(intel_context_is_child(ce));
2096
2097 if (!context_guc_id_invalid(ce)) {
2098 if (intel_context_is_parent(ce)) {
2099 bitmap_release_region(guc->submission_state.guc_ids_bitmap,
2100 ce->guc_id.id,
2101 order_base_2(ce->parallel.number_children
2102 + 1));
2103 } else {
2104 --guc->submission_state.guc_ids_in_use;
2105 ida_simple_remove(&guc->submission_state.guc_ids,
2106 ce->guc_id.id);
2107 }
2108 clr_ctx_id_mapping(guc, ce->guc_id.id);
2109 set_context_guc_id_invalid(ce);
2110 }
2111 if (!list_empty(&ce->guc_id.link))
2112 list_del_init(&ce->guc_id.link);
2113 }
2114
release_guc_id(struct intel_guc * guc,struct intel_context * ce)2115 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce)
2116 {
2117 unsigned long flags;
2118
2119 spin_lock_irqsave(&guc->submission_state.lock, flags);
2120 __release_guc_id(guc, ce);
2121 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
2122 }
2123
steal_guc_id(struct intel_guc * guc,struct intel_context * ce)2124 static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce)
2125 {
2126 struct intel_context *cn;
2127
2128 lockdep_assert_held(&guc->submission_state.lock);
2129 GEM_BUG_ON(intel_context_is_child(ce));
2130 GEM_BUG_ON(intel_context_is_parent(ce));
2131
2132 if (!list_empty(&guc->submission_state.guc_id_list)) {
2133 cn = list_first_entry(&guc->submission_state.guc_id_list,
2134 struct intel_context,
2135 guc_id.link);
2136
2137 GEM_BUG_ON(atomic_read(&cn->guc_id.ref));
2138 GEM_BUG_ON(context_guc_id_invalid(cn));
2139 GEM_BUG_ON(intel_context_is_child(cn));
2140 GEM_BUG_ON(intel_context_is_parent(cn));
2141
2142 list_del_init(&cn->guc_id.link);
2143 ce->guc_id.id = cn->guc_id.id;
2144
2145 spin_lock(&cn->guc_state.lock);
2146 clr_context_registered(cn);
2147 spin_unlock(&cn->guc_state.lock);
2148
2149 set_context_guc_id_invalid(cn);
2150
2151 #ifdef CONFIG_DRM_I915_SELFTEST
2152 guc->number_guc_id_stolen++;
2153 #endif
2154
2155 return 0;
2156 } else {
2157 return -EAGAIN;
2158 }
2159 }
2160
assign_guc_id(struct intel_guc * guc,struct intel_context * ce)2161 static int assign_guc_id(struct intel_guc *guc, struct intel_context *ce)
2162 {
2163 int ret;
2164
2165 lockdep_assert_held(&guc->submission_state.lock);
2166 GEM_BUG_ON(intel_context_is_child(ce));
2167
2168 ret = new_guc_id(guc, ce);
2169 if (unlikely(ret < 0)) {
2170 if (intel_context_is_parent(ce))
2171 return -ENOSPC;
2172
2173 ret = steal_guc_id(guc, ce);
2174 if (ret < 0)
2175 return ret;
2176 }
2177
2178 if (intel_context_is_parent(ce)) {
2179 struct intel_context *child;
2180 int i = 1;
2181
2182 for_each_child(ce, child)
2183 child->guc_id.id = ce->guc_id.id + i++;
2184 }
2185
2186 return 0;
2187 }
2188
2189 #define PIN_GUC_ID_TRIES 4
pin_guc_id(struct intel_guc * guc,struct intel_context * ce)2190 static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce)
2191 {
2192 int ret = 0;
2193 unsigned long flags, tries = PIN_GUC_ID_TRIES;
2194
2195 GEM_BUG_ON(atomic_read(&ce->guc_id.ref));
2196
2197 try_again:
2198 spin_lock_irqsave(&guc->submission_state.lock, flags);
2199
2200 might_lock(&ce->guc_state.lock);
2201
2202 if (context_guc_id_invalid(ce)) {
2203 ret = assign_guc_id(guc, ce);
2204 if (ret)
2205 goto out_unlock;
2206 ret = 1; /* Indidcates newly assigned guc_id */
2207 }
2208 if (!list_empty(&ce->guc_id.link))
2209 list_del_init(&ce->guc_id.link);
2210 atomic_inc(&ce->guc_id.ref);
2211
2212 out_unlock:
2213 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
2214
2215 /*
2216 * -EAGAIN indicates no guc_id are available, let's retire any
2217 * outstanding requests to see if that frees up a guc_id. If the first
2218 * retire didn't help, insert a sleep with the timeslice duration before
2219 * attempting to retire more requests. Double the sleep period each
2220 * subsequent pass before finally giving up. The sleep period has max of
2221 * 100ms and minimum of 1ms.
2222 */
2223 if (ret == -EAGAIN && --tries) {
2224 if (PIN_GUC_ID_TRIES - tries > 1) {
2225 unsigned int timeslice_shifted =
2226 ce->engine->props.timeslice_duration_ms <<
2227 (PIN_GUC_ID_TRIES - tries - 2);
2228 unsigned int max = min_t(unsigned int, 100,
2229 timeslice_shifted);
2230
2231 msleep(max_t(unsigned int, max, 1));
2232 }
2233 intel_gt_retire_requests(guc_to_gt(guc));
2234 goto try_again;
2235 }
2236
2237 return ret;
2238 }
2239
unpin_guc_id(struct intel_guc * guc,struct intel_context * ce)2240 static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce)
2241 {
2242 unsigned long flags;
2243
2244 GEM_BUG_ON(atomic_read(&ce->guc_id.ref) < 0);
2245 GEM_BUG_ON(intel_context_is_child(ce));
2246
2247 if (unlikely(context_guc_id_invalid(ce) ||
2248 intel_context_is_parent(ce)))
2249 return;
2250
2251 spin_lock_irqsave(&guc->submission_state.lock, flags);
2252 if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id.link) &&
2253 !atomic_read(&ce->guc_id.ref))
2254 list_add_tail(&ce->guc_id.link,
2255 &guc->submission_state.guc_id_list);
2256 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
2257 }
2258
__guc_action_register_multi_lrc_v69(struct intel_guc * guc,struct intel_context * ce,u32 guc_id,u32 offset,bool loop)2259 static int __guc_action_register_multi_lrc_v69(struct intel_guc *guc,
2260 struct intel_context *ce,
2261 u32 guc_id,
2262 u32 offset,
2263 bool loop)
2264 {
2265 struct intel_context *child;
2266 u32 action[4 + MAX_ENGINE_INSTANCE];
2267 int len = 0;
2268
2269 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE);
2270
2271 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
2272 action[len++] = guc_id;
2273 action[len++] = ce->parallel.number_children + 1;
2274 action[len++] = offset;
2275 for_each_child(ce, child) {
2276 offset += sizeof(struct guc_lrc_desc_v69);
2277 action[len++] = offset;
2278 }
2279
2280 return guc_submission_send_busy_loop(guc, action, len, 0, loop);
2281 }
2282
__guc_action_register_multi_lrc_v70(struct intel_guc * guc,struct intel_context * ce,struct guc_ctxt_registration_info * info,bool loop)2283 static int __guc_action_register_multi_lrc_v70(struct intel_guc *guc,
2284 struct intel_context *ce,
2285 struct guc_ctxt_registration_info *info,
2286 bool loop)
2287 {
2288 struct intel_context *child;
2289 u32 action[13 + (MAX_ENGINE_INSTANCE * 2)];
2290 int len = 0;
2291 u32 next_id;
2292
2293 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE);
2294
2295 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
2296 action[len++] = info->flags;
2297 action[len++] = info->context_idx;
2298 action[len++] = info->engine_class;
2299 action[len++] = info->engine_submit_mask;
2300 action[len++] = info->wq_desc_lo;
2301 action[len++] = info->wq_desc_hi;
2302 action[len++] = info->wq_base_lo;
2303 action[len++] = info->wq_base_hi;
2304 action[len++] = info->wq_size;
2305 action[len++] = ce->parallel.number_children + 1;
2306 action[len++] = info->hwlrca_lo;
2307 action[len++] = info->hwlrca_hi;
2308
2309 next_id = info->context_idx + 1;
2310 for_each_child(ce, child) {
2311 GEM_BUG_ON(next_id++ != child->guc_id.id);
2312
2313 /*
2314 * NB: GuC interface supports 64 bit LRCA even though i915/HW
2315 * only supports 32 bit currently.
2316 */
2317 action[len++] = lower_32_bits(child->lrc.lrca);
2318 action[len++] = upper_32_bits(child->lrc.lrca);
2319 }
2320
2321 GEM_BUG_ON(len > ARRAY_SIZE(action));
2322
2323 return guc_submission_send_busy_loop(guc, action, len, 0, loop);
2324 }
2325
__guc_action_register_context_v69(struct intel_guc * guc,u32 guc_id,u32 offset,bool loop)2326 static int __guc_action_register_context_v69(struct intel_guc *guc,
2327 u32 guc_id,
2328 u32 offset,
2329 bool loop)
2330 {
2331 u32 action[] = {
2332 INTEL_GUC_ACTION_REGISTER_CONTEXT,
2333 guc_id,
2334 offset,
2335 };
2336
2337 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2338 0, loop);
2339 }
2340
__guc_action_register_context_v70(struct intel_guc * guc,struct guc_ctxt_registration_info * info,bool loop)2341 static int __guc_action_register_context_v70(struct intel_guc *guc,
2342 struct guc_ctxt_registration_info *info,
2343 bool loop)
2344 {
2345 u32 action[] = {
2346 INTEL_GUC_ACTION_REGISTER_CONTEXT,
2347 info->flags,
2348 info->context_idx,
2349 info->engine_class,
2350 info->engine_submit_mask,
2351 info->wq_desc_lo,
2352 info->wq_desc_hi,
2353 info->wq_base_lo,
2354 info->wq_base_hi,
2355 info->wq_size,
2356 info->hwlrca_lo,
2357 info->hwlrca_hi,
2358 };
2359
2360 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2361 0, loop);
2362 }
2363
2364 static void prepare_context_registration_info_v69(struct intel_context *ce);
2365 static void prepare_context_registration_info_v70(struct intel_context *ce,
2366 struct guc_ctxt_registration_info *info);
2367
2368 static int
register_context_v69(struct intel_guc * guc,struct intel_context * ce,bool loop)2369 register_context_v69(struct intel_guc *guc, struct intel_context *ce, bool loop)
2370 {
2371 u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool_v69) +
2372 ce->guc_id.id * sizeof(struct guc_lrc_desc_v69);
2373
2374 prepare_context_registration_info_v69(ce);
2375
2376 if (intel_context_is_parent(ce))
2377 return __guc_action_register_multi_lrc_v69(guc, ce, ce->guc_id.id,
2378 offset, loop);
2379 else
2380 return __guc_action_register_context_v69(guc, ce->guc_id.id,
2381 offset, loop);
2382 }
2383
2384 static int
register_context_v70(struct intel_guc * guc,struct intel_context * ce,bool loop)2385 register_context_v70(struct intel_guc *guc, struct intel_context *ce, bool loop)
2386 {
2387 struct guc_ctxt_registration_info info;
2388
2389 prepare_context_registration_info_v70(ce, &info);
2390
2391 if (intel_context_is_parent(ce))
2392 return __guc_action_register_multi_lrc_v70(guc, ce, &info, loop);
2393 else
2394 return __guc_action_register_context_v70(guc, &info, loop);
2395 }
2396
register_context(struct intel_context * ce,bool loop)2397 static int register_context(struct intel_context *ce, bool loop)
2398 {
2399 struct intel_guc *guc = ce_to_guc(ce);
2400 int ret;
2401
2402 GEM_BUG_ON(intel_context_is_child(ce));
2403 trace_intel_context_register(ce);
2404
2405 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0))
2406 ret = register_context_v70(guc, ce, loop);
2407 else
2408 ret = register_context_v69(guc, ce, loop);
2409
2410 if (likely(!ret)) {
2411 unsigned long flags;
2412
2413 spin_lock_irqsave(&ce->guc_state.lock, flags);
2414 set_context_registered(ce);
2415 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2416
2417 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0))
2418 guc_context_policy_init_v70(ce, loop);
2419 }
2420
2421 return ret;
2422 }
2423
__guc_action_deregister_context(struct intel_guc * guc,u32 guc_id)2424 static int __guc_action_deregister_context(struct intel_guc *guc,
2425 u32 guc_id)
2426 {
2427 u32 action[] = {
2428 INTEL_GUC_ACTION_DEREGISTER_CONTEXT,
2429 guc_id,
2430 };
2431
2432 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2433 G2H_LEN_DW_DEREGISTER_CONTEXT,
2434 true);
2435 }
2436
deregister_context(struct intel_context * ce,u32 guc_id)2437 static int deregister_context(struct intel_context *ce, u32 guc_id)
2438 {
2439 struct intel_guc *guc = ce_to_guc(ce);
2440
2441 GEM_BUG_ON(intel_context_is_child(ce));
2442 trace_intel_context_deregister(ce);
2443
2444 return __guc_action_deregister_context(guc, guc_id);
2445 }
2446
clear_children_join_go_memory(struct intel_context * ce)2447 static inline void clear_children_join_go_memory(struct intel_context *ce)
2448 {
2449 struct parent_scratch *ps = __get_parent_scratch(ce);
2450 int i;
2451
2452 ps->go.semaphore = 0;
2453 for (i = 0; i < ce->parallel.number_children + 1; ++i)
2454 ps->join[i].semaphore = 0;
2455 }
2456
get_children_go_value(struct intel_context * ce)2457 static inline u32 get_children_go_value(struct intel_context *ce)
2458 {
2459 return __get_parent_scratch(ce)->go.semaphore;
2460 }
2461
get_children_join_value(struct intel_context * ce,u8 child_index)2462 static inline u32 get_children_join_value(struct intel_context *ce,
2463 u8 child_index)
2464 {
2465 return __get_parent_scratch(ce)->join[child_index].semaphore;
2466 }
2467
2468 struct context_policy {
2469 u32 count;
2470 struct guc_update_context_policy h2g;
2471 };
2472
__guc_context_policy_action_size(struct context_policy * policy)2473 static u32 __guc_context_policy_action_size(struct context_policy *policy)
2474 {
2475 size_t bytes = sizeof(policy->h2g.header) +
2476 (sizeof(policy->h2g.klv[0]) * policy->count);
2477
2478 return bytes / sizeof(u32);
2479 }
2480
__guc_context_policy_start_klv(struct context_policy * policy,u16 guc_id)2481 static void __guc_context_policy_start_klv(struct context_policy *policy, u16 guc_id)
2482 {
2483 policy->h2g.header.action = INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES;
2484 policy->h2g.header.ctx_id = guc_id;
2485 policy->count = 0;
2486 }
2487
2488 #define MAKE_CONTEXT_POLICY_ADD(func, id) \
2489 static void __guc_context_policy_add_##func(struct context_policy *policy, u32 data) \
2490 { \
2491 GEM_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \
2492 policy->h2g.klv[policy->count].kl = \
2493 FIELD_PREP(GUC_KLV_0_KEY, GUC_CONTEXT_POLICIES_KLV_ID_##id) | \
2494 FIELD_PREP(GUC_KLV_0_LEN, 1); \
2495 policy->h2g.klv[policy->count].value = data; \
2496 policy->count++; \
2497 }
2498
MAKE_CONTEXT_POLICY_ADD(execution_quantum,EXECUTION_QUANTUM)2499 MAKE_CONTEXT_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM)
2500 MAKE_CONTEXT_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT)
2501 MAKE_CONTEXT_POLICY_ADD(priority, SCHEDULING_PRIORITY)
2502 MAKE_CONTEXT_POLICY_ADD(preempt_to_idle, PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY)
2503
2504 #undef MAKE_CONTEXT_POLICY_ADD
2505
2506 static int __guc_context_set_context_policies(struct intel_guc *guc,
2507 struct context_policy *policy,
2508 bool loop)
2509 {
2510 return guc_submission_send_busy_loop(guc, (u32 *)&policy->h2g,
2511 __guc_context_policy_action_size(policy),
2512 0, loop);
2513 }
2514
guc_context_policy_init_v70(struct intel_context * ce,bool loop)2515 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop)
2516 {
2517 struct intel_engine_cs *engine = ce->engine;
2518 struct intel_guc *guc = &engine->gt->uc.guc;
2519 struct context_policy policy;
2520 u32 execution_quantum;
2521 u32 preemption_timeout;
2522 unsigned long flags;
2523 int ret;
2524
2525 /* NB: For both of these, zero means disabled. */
2526 GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000,
2527 execution_quantum));
2528 GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000,
2529 preemption_timeout));
2530 execution_quantum = engine->props.timeslice_duration_ms * 1000;
2531 preemption_timeout = engine->props.preempt_timeout_ms * 1000;
2532
2533 __guc_context_policy_start_klv(&policy, ce->guc_id.id);
2534
2535 __guc_context_policy_add_priority(&policy, ce->guc_state.prio);
2536 __guc_context_policy_add_execution_quantum(&policy, execution_quantum);
2537 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
2538
2539 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
2540 __guc_context_policy_add_preempt_to_idle(&policy, 1);
2541
2542 ret = __guc_context_set_context_policies(guc, &policy, loop);
2543
2544 spin_lock_irqsave(&ce->guc_state.lock, flags);
2545 if (ret != 0)
2546 set_context_policy_required(ce);
2547 else
2548 clr_context_policy_required(ce);
2549 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2550
2551 return ret;
2552 }
2553
guc_context_policy_init_v69(struct intel_engine_cs * engine,struct guc_lrc_desc_v69 * desc)2554 static void guc_context_policy_init_v69(struct intel_engine_cs *engine,
2555 struct guc_lrc_desc_v69 *desc)
2556 {
2557 desc->policy_flags = 0;
2558
2559 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
2560 desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69;
2561
2562 /* NB: For both of these, zero means disabled. */
2563 GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000,
2564 desc->execution_quantum));
2565 GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000,
2566 desc->preemption_timeout));
2567 desc->execution_quantum = engine->props.timeslice_duration_ms * 1000;
2568 desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000;
2569 }
2570
map_guc_prio_to_lrc_desc_prio(u8 prio)2571 static u32 map_guc_prio_to_lrc_desc_prio(u8 prio)
2572 {
2573 /*
2574 * this matches the mapping we do in map_i915_prio_to_guc_prio()
2575 * (e.g. prio < I915_PRIORITY_NORMAL maps to GUC_CLIENT_PRIORITY_NORMAL)
2576 */
2577 switch (prio) {
2578 default:
2579 MISSING_CASE(prio);
2580 fallthrough;
2581 case GUC_CLIENT_PRIORITY_KMD_NORMAL:
2582 return GEN12_CTX_PRIORITY_NORMAL;
2583 case GUC_CLIENT_PRIORITY_NORMAL:
2584 return GEN12_CTX_PRIORITY_LOW;
2585 case GUC_CLIENT_PRIORITY_HIGH:
2586 case GUC_CLIENT_PRIORITY_KMD_HIGH:
2587 return GEN12_CTX_PRIORITY_HIGH;
2588 }
2589 }
2590
prepare_context_registration_info_v69(struct intel_context * ce)2591 static void prepare_context_registration_info_v69(struct intel_context *ce)
2592 {
2593 struct intel_engine_cs *engine = ce->engine;
2594 struct intel_guc *guc = &engine->gt->uc.guc;
2595 u32 ctx_id = ce->guc_id.id;
2596 struct guc_lrc_desc_v69 *desc;
2597 struct intel_context *child;
2598
2599 GEM_BUG_ON(!engine->mask);
2600
2601 /*
2602 * Ensure LRC + CT vmas are is same region as write barrier is done
2603 * based on CT vma region.
2604 */
2605 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) !=
2606 i915_gem_object_is_lmem(ce->ring->vma->obj));
2607
2608 desc = __get_lrc_desc_v69(guc, ctx_id);
2609 GEM_BUG_ON(!desc);
2610 desc->engine_class = engine_class_to_guc_class(engine->class);
2611 desc->engine_submit_mask = engine->logical_mask;
2612 desc->hw_context_desc = ce->lrc.lrca;
2613 desc->priority = ce->guc_state.prio;
2614 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
2615 guc_context_policy_init_v69(engine, desc);
2616
2617 /*
2618 * If context is a parent, we need to register a process descriptor
2619 * describing a work queue and register all child contexts.
2620 */
2621 if (intel_context_is_parent(ce)) {
2622 struct guc_process_desc_v69 *pdesc;
2623
2624 ce->parallel.guc.wqi_tail = 0;
2625 ce->parallel.guc.wqi_head = 0;
2626
2627 desc->process_desc = i915_ggtt_offset(ce->state) +
2628 __get_parent_scratch_offset(ce);
2629 desc->wq_addr = i915_ggtt_offset(ce->state) +
2630 __get_wq_offset(ce);
2631 desc->wq_size = WQ_SIZE;
2632
2633 pdesc = __get_process_desc_v69(ce);
2634 memset(pdesc, 0, sizeof(*(pdesc)));
2635 pdesc->stage_id = ce->guc_id.id;
2636 pdesc->wq_base_addr = desc->wq_addr;
2637 pdesc->wq_size_bytes = desc->wq_size;
2638 pdesc->wq_status = WQ_STATUS_ACTIVE;
2639
2640 ce->parallel.guc.wq_head = &pdesc->head;
2641 ce->parallel.guc.wq_tail = &pdesc->tail;
2642 ce->parallel.guc.wq_status = &pdesc->wq_status;
2643
2644 for_each_child(ce, child) {
2645 desc = __get_lrc_desc_v69(guc, child->guc_id.id);
2646
2647 desc->engine_class =
2648 engine_class_to_guc_class(engine->class);
2649 desc->hw_context_desc = child->lrc.lrca;
2650 desc->priority = ce->guc_state.prio;
2651 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
2652 guc_context_policy_init_v69(engine, desc);
2653 }
2654
2655 clear_children_join_go_memory(ce);
2656 }
2657 }
2658
prepare_context_registration_info_v70(struct intel_context * ce,struct guc_ctxt_registration_info * info)2659 static void prepare_context_registration_info_v70(struct intel_context *ce,
2660 struct guc_ctxt_registration_info *info)
2661 {
2662 struct intel_engine_cs *engine = ce->engine;
2663 struct intel_guc *guc = &engine->gt->uc.guc;
2664 u32 ctx_id = ce->guc_id.id;
2665
2666 GEM_BUG_ON(!engine->mask);
2667
2668 /*
2669 * Ensure LRC + CT vmas are is same region as write barrier is done
2670 * based on CT vma region.
2671 */
2672 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) !=
2673 i915_gem_object_is_lmem(ce->ring->vma->obj));
2674
2675 memset(info, 0, sizeof(*info));
2676 info->context_idx = ctx_id;
2677 info->engine_class = engine_class_to_guc_class(engine->class);
2678 info->engine_submit_mask = engine->logical_mask;
2679 /*
2680 * NB: GuC interface supports 64 bit LRCA even though i915/HW
2681 * only supports 32 bit currently.
2682 */
2683 info->hwlrca_lo = lower_32_bits(ce->lrc.lrca);
2684 info->hwlrca_hi = upper_32_bits(ce->lrc.lrca);
2685 if (engine->flags & I915_ENGINE_HAS_EU_PRIORITY)
2686 info->hwlrca_lo |= map_guc_prio_to_lrc_desc_prio(ce->guc_state.prio);
2687 info->flags = CONTEXT_REGISTRATION_FLAG_KMD;
2688
2689 /*
2690 * If context is a parent, we need to register a process descriptor
2691 * describing a work queue and register all child contexts.
2692 */
2693 if (intel_context_is_parent(ce)) {
2694 struct guc_sched_wq_desc *wq_desc;
2695 u64 wq_desc_offset, wq_base_offset;
2696
2697 ce->parallel.guc.wqi_tail = 0;
2698 ce->parallel.guc.wqi_head = 0;
2699
2700 wq_desc_offset = i915_ggtt_offset(ce->state) +
2701 __get_parent_scratch_offset(ce);
2702 wq_base_offset = i915_ggtt_offset(ce->state) +
2703 __get_wq_offset(ce);
2704 info->wq_desc_lo = lower_32_bits(wq_desc_offset);
2705 info->wq_desc_hi = upper_32_bits(wq_desc_offset);
2706 info->wq_base_lo = lower_32_bits(wq_base_offset);
2707 info->wq_base_hi = upper_32_bits(wq_base_offset);
2708 info->wq_size = WQ_SIZE;
2709
2710 wq_desc = __get_wq_desc_v70(ce);
2711 memset(wq_desc, 0, sizeof(*wq_desc));
2712 wq_desc->wq_status = WQ_STATUS_ACTIVE;
2713
2714 ce->parallel.guc.wq_head = &wq_desc->head;
2715 ce->parallel.guc.wq_tail = &wq_desc->tail;
2716 ce->parallel.guc.wq_status = &wq_desc->wq_status;
2717
2718 clear_children_join_go_memory(ce);
2719 }
2720 }
2721
try_context_registration(struct intel_context * ce,bool loop)2722 static int try_context_registration(struct intel_context *ce, bool loop)
2723 {
2724 struct intel_engine_cs *engine = ce->engine;
2725 struct intel_runtime_pm *runtime_pm = engine->uncore->rpm;
2726 struct intel_guc *guc = &engine->gt->uc.guc;
2727 intel_wakeref_t wakeref;
2728 u32 ctx_id = ce->guc_id.id;
2729 bool context_registered;
2730 int ret = 0;
2731
2732 GEM_BUG_ON(!sched_state_is_init(ce));
2733
2734 context_registered = ctx_id_mapped(guc, ctx_id);
2735
2736 clr_ctx_id_mapping(guc, ctx_id);
2737 set_ctx_id_mapping(guc, ctx_id, ce);
2738
2739 /*
2740 * The context_lookup xarray is used to determine if the hardware
2741 * context is currently registered. There are two cases in which it
2742 * could be registered either the guc_id has been stolen from another
2743 * context or the lrc descriptor address of this context has changed. In
2744 * either case the context needs to be deregistered with the GuC before
2745 * registering this context.
2746 */
2747 if (context_registered) {
2748 bool disabled;
2749 unsigned long flags;
2750
2751 trace_intel_context_steal_guc_id(ce);
2752 GEM_BUG_ON(!loop);
2753
2754 /* Seal race with Reset */
2755 spin_lock_irqsave(&ce->guc_state.lock, flags);
2756 disabled = submission_disabled(guc);
2757 if (likely(!disabled)) {
2758 set_context_wait_for_deregister_to_register(ce);
2759 intel_context_get(ce);
2760 }
2761 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2762 if (unlikely(disabled)) {
2763 clr_ctx_id_mapping(guc, ctx_id);
2764 return 0; /* Will get registered later */
2765 }
2766
2767 /*
2768 * If stealing the guc_id, this ce has the same guc_id as the
2769 * context whose guc_id was stolen.
2770 */
2771 with_intel_runtime_pm(runtime_pm, wakeref)
2772 ret = deregister_context(ce, ce->guc_id.id);
2773 if (unlikely(ret == -ENODEV))
2774 ret = 0; /* Will get registered later */
2775 } else {
2776 with_intel_runtime_pm(runtime_pm, wakeref)
2777 ret = register_context(ce, loop);
2778 if (unlikely(ret == -EBUSY)) {
2779 clr_ctx_id_mapping(guc, ctx_id);
2780 } else if (unlikely(ret == -ENODEV)) {
2781 clr_ctx_id_mapping(guc, ctx_id);
2782 ret = 0; /* Will get registered later */
2783 }
2784 }
2785
2786 return ret;
2787 }
2788
__guc_context_pre_pin(struct intel_context * ce,struct intel_engine_cs * engine,struct i915_gem_ww_ctx * ww,void ** vaddr)2789 static int __guc_context_pre_pin(struct intel_context *ce,
2790 struct intel_engine_cs *engine,
2791 struct i915_gem_ww_ctx *ww,
2792 void **vaddr)
2793 {
2794 return lrc_pre_pin(ce, engine, ww, vaddr);
2795 }
2796
__guc_context_pin(struct intel_context * ce,struct intel_engine_cs * engine,void * vaddr)2797 static int __guc_context_pin(struct intel_context *ce,
2798 struct intel_engine_cs *engine,
2799 void *vaddr)
2800 {
2801 if (i915_ggtt_offset(ce->state) !=
2802 (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK))
2803 set_bit(CONTEXT_LRCA_DIRTY, &ce->flags);
2804
2805 /*
2806 * GuC context gets pinned in guc_request_alloc. See that function for
2807 * explaination of why.
2808 */
2809
2810 return lrc_pin(ce, engine, vaddr);
2811 }
2812
guc_context_pre_pin(struct intel_context * ce,struct i915_gem_ww_ctx * ww,void ** vaddr)2813 static int guc_context_pre_pin(struct intel_context *ce,
2814 struct i915_gem_ww_ctx *ww,
2815 void **vaddr)
2816 {
2817 return __guc_context_pre_pin(ce, ce->engine, ww, vaddr);
2818 }
2819
guc_context_pin(struct intel_context * ce,void * vaddr)2820 static int guc_context_pin(struct intel_context *ce, void *vaddr)
2821 {
2822 int ret = __guc_context_pin(ce, ce->engine, vaddr);
2823
2824 if (likely(!ret && !intel_context_is_barrier(ce)))
2825 intel_engine_pm_get(ce->engine);
2826
2827 return ret;
2828 }
2829
guc_context_unpin(struct intel_context * ce)2830 static void guc_context_unpin(struct intel_context *ce)
2831 {
2832 struct intel_guc *guc = ce_to_guc(ce);
2833
2834 __guc_context_update_stats(ce);
2835 unpin_guc_id(guc, ce);
2836 lrc_unpin(ce);
2837
2838 if (likely(!intel_context_is_barrier(ce)))
2839 intel_engine_pm_put_async(ce->engine);
2840 }
2841
guc_context_post_unpin(struct intel_context * ce)2842 static void guc_context_post_unpin(struct intel_context *ce)
2843 {
2844 lrc_post_unpin(ce);
2845 }
2846
__guc_context_sched_enable(struct intel_guc * guc,struct intel_context * ce)2847 static void __guc_context_sched_enable(struct intel_guc *guc,
2848 struct intel_context *ce)
2849 {
2850 u32 action[] = {
2851 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET,
2852 ce->guc_id.id,
2853 GUC_CONTEXT_ENABLE
2854 };
2855
2856 trace_intel_context_sched_enable(ce);
2857
2858 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2859 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true);
2860 }
2861
__guc_context_sched_disable(struct intel_guc * guc,struct intel_context * ce,u16 guc_id)2862 static void __guc_context_sched_disable(struct intel_guc *guc,
2863 struct intel_context *ce,
2864 u16 guc_id)
2865 {
2866 u32 action[] = {
2867 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET,
2868 guc_id, /* ce->guc_id.id not stable */
2869 GUC_CONTEXT_DISABLE
2870 };
2871
2872 GEM_BUG_ON(guc_id == GUC_INVALID_CONTEXT_ID);
2873
2874 GEM_BUG_ON(intel_context_is_child(ce));
2875 trace_intel_context_sched_disable(ce);
2876
2877 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2878 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true);
2879 }
2880
guc_blocked_fence_complete(struct intel_context * ce)2881 static void guc_blocked_fence_complete(struct intel_context *ce)
2882 {
2883 lockdep_assert_held(&ce->guc_state.lock);
2884
2885 if (!i915_sw_fence_done(&ce->guc_state.blocked))
2886 i915_sw_fence_complete(&ce->guc_state.blocked);
2887 }
2888
guc_blocked_fence_reinit(struct intel_context * ce)2889 static void guc_blocked_fence_reinit(struct intel_context *ce)
2890 {
2891 lockdep_assert_held(&ce->guc_state.lock);
2892 GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_state.blocked));
2893
2894 /*
2895 * This fence is always complete unless a pending schedule disable is
2896 * outstanding. We arm the fence here and complete it when we receive
2897 * the pending schedule disable complete message.
2898 */
2899 i915_sw_fence_fini(&ce->guc_state.blocked);
2900 i915_sw_fence_reinit(&ce->guc_state.blocked);
2901 i915_sw_fence_await(&ce->guc_state.blocked);
2902 i915_sw_fence_commit(&ce->guc_state.blocked);
2903 }
2904
prep_context_pending_disable(struct intel_context * ce)2905 static u16 prep_context_pending_disable(struct intel_context *ce)
2906 {
2907 lockdep_assert_held(&ce->guc_state.lock);
2908
2909 set_context_pending_disable(ce);
2910 clr_context_enabled(ce);
2911 guc_blocked_fence_reinit(ce);
2912 intel_context_get(ce);
2913
2914 return ce->guc_id.id;
2915 }
2916
guc_context_block(struct intel_context * ce)2917 static struct i915_sw_fence *guc_context_block(struct intel_context *ce)
2918 {
2919 struct intel_guc *guc = ce_to_guc(ce);
2920 unsigned long flags;
2921 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
2922 intel_wakeref_t wakeref;
2923 u16 guc_id;
2924 bool enabled;
2925
2926 GEM_BUG_ON(intel_context_is_child(ce));
2927
2928 spin_lock_irqsave(&ce->guc_state.lock, flags);
2929
2930 incr_context_blocked(ce);
2931
2932 enabled = context_enabled(ce);
2933 if (unlikely(!enabled || submission_disabled(guc))) {
2934 if (enabled)
2935 clr_context_enabled(ce);
2936 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2937 return &ce->guc_state.blocked;
2938 }
2939
2940 /*
2941 * We add +2 here as the schedule disable complete CTB handler calls
2942 * intel_context_sched_disable_unpin (-2 to pin_count).
2943 */
2944 atomic_add(2, &ce->pin_count);
2945
2946 guc_id = prep_context_pending_disable(ce);
2947
2948 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2949
2950 with_intel_runtime_pm(runtime_pm, wakeref)
2951 __guc_context_sched_disable(guc, ce, guc_id);
2952
2953 return &ce->guc_state.blocked;
2954 }
2955
2956 #define SCHED_STATE_MULTI_BLOCKED_MASK \
2957 (SCHED_STATE_BLOCKED_MASK & ~SCHED_STATE_BLOCKED)
2958 #define SCHED_STATE_NO_UNBLOCK \
2959 (SCHED_STATE_MULTI_BLOCKED_MASK | \
2960 SCHED_STATE_PENDING_DISABLE | \
2961 SCHED_STATE_BANNED)
2962
context_cant_unblock(struct intel_context * ce)2963 static bool context_cant_unblock(struct intel_context *ce)
2964 {
2965 lockdep_assert_held(&ce->guc_state.lock);
2966
2967 return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) ||
2968 context_guc_id_invalid(ce) ||
2969 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id) ||
2970 !intel_context_is_pinned(ce);
2971 }
2972
guc_context_unblock(struct intel_context * ce)2973 static void guc_context_unblock(struct intel_context *ce)
2974 {
2975 struct intel_guc *guc = ce_to_guc(ce);
2976 unsigned long flags;
2977 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
2978 intel_wakeref_t wakeref;
2979 bool enable;
2980
2981 GEM_BUG_ON(context_enabled(ce));
2982 GEM_BUG_ON(intel_context_is_child(ce));
2983
2984 spin_lock_irqsave(&ce->guc_state.lock, flags);
2985
2986 if (unlikely(submission_disabled(guc) ||
2987 context_cant_unblock(ce))) {
2988 enable = false;
2989 } else {
2990 enable = true;
2991 set_context_pending_enable(ce);
2992 set_context_enabled(ce);
2993 intel_context_get(ce);
2994 }
2995
2996 decr_context_blocked(ce);
2997
2998 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2999
3000 if (enable) {
3001 with_intel_runtime_pm(runtime_pm, wakeref)
3002 __guc_context_sched_enable(guc, ce);
3003 }
3004 }
3005
guc_context_cancel_request(struct intel_context * ce,struct i915_request * rq)3006 static void guc_context_cancel_request(struct intel_context *ce,
3007 struct i915_request *rq)
3008 {
3009 struct intel_context *block_context =
3010 request_to_scheduling_context(rq);
3011
3012 if (i915_sw_fence_signaled(&rq->submit)) {
3013 struct i915_sw_fence *fence;
3014
3015 intel_context_get(ce);
3016 fence = guc_context_block(block_context);
3017 i915_sw_fence_wait(fence);
3018 if (!i915_request_completed(rq)) {
3019 __i915_request_skip(rq);
3020 guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head),
3021 true);
3022 }
3023
3024 guc_context_unblock(block_context);
3025 intel_context_put(ce);
3026 }
3027 }
3028
__guc_context_set_preemption_timeout(struct intel_guc * guc,u16 guc_id,u32 preemption_timeout)3029 static void __guc_context_set_preemption_timeout(struct intel_guc *guc,
3030 u16 guc_id,
3031 u32 preemption_timeout)
3032 {
3033 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) {
3034 struct context_policy policy;
3035
3036 __guc_context_policy_start_klv(&policy, guc_id);
3037 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
3038 __guc_context_set_context_policies(guc, &policy, true);
3039 } else {
3040 u32 action[] = {
3041 INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT,
3042 guc_id,
3043 preemption_timeout
3044 };
3045
3046 intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
3047 }
3048 }
3049
3050 static void
guc_context_revoke(struct intel_context * ce,struct i915_request * rq,unsigned int preempt_timeout_ms)3051 guc_context_revoke(struct intel_context *ce, struct i915_request *rq,
3052 unsigned int preempt_timeout_ms)
3053 {
3054 struct intel_guc *guc = ce_to_guc(ce);
3055 struct intel_runtime_pm *runtime_pm =
3056 &ce->engine->gt->i915->runtime_pm;
3057 intel_wakeref_t wakeref;
3058 unsigned long flags;
3059
3060 GEM_BUG_ON(intel_context_is_child(ce));
3061
3062 guc_flush_submissions(guc);
3063
3064 spin_lock_irqsave(&ce->guc_state.lock, flags);
3065 set_context_banned(ce);
3066
3067 if (submission_disabled(guc) ||
3068 (!context_enabled(ce) && !context_pending_disable(ce))) {
3069 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3070
3071 guc_cancel_context_requests(ce);
3072 intel_engine_signal_breadcrumbs(ce->engine);
3073 } else if (!context_pending_disable(ce)) {
3074 u16 guc_id;
3075
3076 /*
3077 * We add +2 here as the schedule disable complete CTB handler
3078 * calls intel_context_sched_disable_unpin (-2 to pin_count).
3079 */
3080 atomic_add(2, &ce->pin_count);
3081
3082 guc_id = prep_context_pending_disable(ce);
3083 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3084
3085 /*
3086 * In addition to disabling scheduling, set the preemption
3087 * timeout to the minimum value (1 us) so the banned context
3088 * gets kicked off the HW ASAP.
3089 */
3090 with_intel_runtime_pm(runtime_pm, wakeref) {
3091 __guc_context_set_preemption_timeout(guc, guc_id,
3092 preempt_timeout_ms);
3093 __guc_context_sched_disable(guc, ce, guc_id);
3094 }
3095 } else {
3096 if (!context_guc_id_invalid(ce))
3097 with_intel_runtime_pm(runtime_pm, wakeref)
3098 __guc_context_set_preemption_timeout(guc,
3099 ce->guc_id.id,
3100 preempt_timeout_ms);
3101 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3102 }
3103 }
3104
do_sched_disable(struct intel_guc * guc,struct intel_context * ce,unsigned long flags)3105 static void do_sched_disable(struct intel_guc *guc, struct intel_context *ce,
3106 unsigned long flags)
3107 __releases(ce->guc_state.lock)
3108 {
3109 struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm;
3110 intel_wakeref_t wakeref;
3111 u16 guc_id;
3112
3113 lockdep_assert_held(&ce->guc_state.lock);
3114 guc_id = prep_context_pending_disable(ce);
3115
3116 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3117
3118 with_intel_runtime_pm(runtime_pm, wakeref)
3119 __guc_context_sched_disable(guc, ce, guc_id);
3120 }
3121
bypass_sched_disable(struct intel_guc * guc,struct intel_context * ce)3122 static bool bypass_sched_disable(struct intel_guc *guc,
3123 struct intel_context *ce)
3124 {
3125 lockdep_assert_held(&ce->guc_state.lock);
3126 GEM_BUG_ON(intel_context_is_child(ce));
3127
3128 if (submission_disabled(guc) || context_guc_id_invalid(ce) ||
3129 !ctx_id_mapped(guc, ce->guc_id.id)) {
3130 clr_context_enabled(ce);
3131 return true;
3132 }
3133
3134 return !context_enabled(ce);
3135 }
3136
__delay_sched_disable(struct work_struct * wrk)3137 static void __delay_sched_disable(struct work_struct *wrk)
3138 {
3139 struct intel_context *ce =
3140 container_of(wrk, typeof(*ce), guc_state.sched_disable_delay_work.work);
3141 struct intel_guc *guc = ce_to_guc(ce);
3142 unsigned long flags;
3143
3144 spin_lock_irqsave(&ce->guc_state.lock, flags);
3145
3146 if (bypass_sched_disable(guc, ce)) {
3147 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3148 intel_context_sched_disable_unpin(ce);
3149 } else {
3150 do_sched_disable(guc, ce, flags);
3151 }
3152 }
3153
guc_id_pressure(struct intel_guc * guc,struct intel_context * ce)3154 static bool guc_id_pressure(struct intel_guc *guc, struct intel_context *ce)
3155 {
3156 /*
3157 * parent contexts are perma-pinned, if we are unpinning do schedule
3158 * disable immediately.
3159 */
3160 if (intel_context_is_parent(ce))
3161 return true;
3162
3163 /*
3164 * If we are beyond the threshold for avail guc_ids, do schedule disable immediately.
3165 */
3166 return guc->submission_state.guc_ids_in_use >
3167 guc->submission_state.sched_disable_gucid_threshold;
3168 }
3169
guc_context_sched_disable(struct intel_context * ce)3170 static void guc_context_sched_disable(struct intel_context *ce)
3171 {
3172 struct intel_guc *guc = ce_to_guc(ce);
3173 u64 delay = guc->submission_state.sched_disable_delay_ms;
3174 unsigned long flags;
3175
3176 spin_lock_irqsave(&ce->guc_state.lock, flags);
3177
3178 if (bypass_sched_disable(guc, ce)) {
3179 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3180 intel_context_sched_disable_unpin(ce);
3181 } else if (!intel_context_is_closed(ce) && !guc_id_pressure(guc, ce) &&
3182 delay) {
3183 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3184 mod_delayed_work(system_unbound_wq,
3185 &ce->guc_state.sched_disable_delay_work,
3186 msecs_to_jiffies(delay));
3187 } else {
3188 do_sched_disable(guc, ce, flags);
3189 }
3190 }
3191
guc_context_close(struct intel_context * ce)3192 static void guc_context_close(struct intel_context *ce)
3193 {
3194 unsigned long flags;
3195
3196 if (test_bit(CONTEXT_GUC_INIT, &ce->flags) &&
3197 cancel_delayed_work(&ce->guc_state.sched_disable_delay_work))
3198 __delay_sched_disable(&ce->guc_state.sched_disable_delay_work.work);
3199
3200 spin_lock_irqsave(&ce->guc_state.lock, flags);
3201 set_context_close_done(ce);
3202 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3203 }
3204
guc_lrc_desc_unpin(struct intel_context * ce)3205 static inline void guc_lrc_desc_unpin(struct intel_context *ce)
3206 {
3207 struct intel_guc *guc = ce_to_guc(ce);
3208 struct intel_gt *gt = guc_to_gt(guc);
3209 unsigned long flags;
3210 bool disabled;
3211
3212 GEM_BUG_ON(!intel_gt_pm_is_awake(gt));
3213 GEM_BUG_ON(!ctx_id_mapped(guc, ce->guc_id.id));
3214 GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id));
3215 GEM_BUG_ON(context_enabled(ce));
3216
3217 /* Seal race with Reset */
3218 spin_lock_irqsave(&ce->guc_state.lock, flags);
3219 disabled = submission_disabled(guc);
3220 if (likely(!disabled)) {
3221 __intel_gt_pm_get(gt);
3222 set_context_destroyed(ce);
3223 clr_context_registered(ce);
3224 }
3225 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3226 if (unlikely(disabled)) {
3227 release_guc_id(guc, ce);
3228 __guc_context_destroy(ce);
3229 return;
3230 }
3231
3232 deregister_context(ce, ce->guc_id.id);
3233 }
3234
__guc_context_destroy(struct intel_context * ce)3235 static void __guc_context_destroy(struct intel_context *ce)
3236 {
3237 GEM_BUG_ON(ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] ||
3238 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] ||
3239 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] ||
3240 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]);
3241
3242 lrc_fini(ce);
3243 intel_context_fini(ce);
3244
3245 if (intel_engine_is_virtual(ce->engine)) {
3246 struct guc_virtual_engine *ve =
3247 container_of(ce, typeof(*ve), context);
3248
3249 if (ve->base.breadcrumbs)
3250 intel_breadcrumbs_put(ve->base.breadcrumbs);
3251
3252 kfree(ve);
3253 } else {
3254 intel_context_free(ce);
3255 }
3256 }
3257
guc_flush_destroyed_contexts(struct intel_guc * guc)3258 static void guc_flush_destroyed_contexts(struct intel_guc *guc)
3259 {
3260 struct intel_context *ce;
3261 unsigned long flags;
3262
3263 GEM_BUG_ON(!submission_disabled(guc) &&
3264 guc_submission_initialized(guc));
3265
3266 while (!list_empty(&guc->submission_state.destroyed_contexts)) {
3267 spin_lock_irqsave(&guc->submission_state.lock, flags);
3268 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts,
3269 struct intel_context,
3270 destroyed_link);
3271 if (ce)
3272 list_del_init(&ce->destroyed_link);
3273 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
3274
3275 if (!ce)
3276 break;
3277
3278 release_guc_id(guc, ce);
3279 __guc_context_destroy(ce);
3280 }
3281 }
3282
deregister_destroyed_contexts(struct intel_guc * guc)3283 static void deregister_destroyed_contexts(struct intel_guc *guc)
3284 {
3285 struct intel_context *ce;
3286 unsigned long flags;
3287
3288 while (!list_empty(&guc->submission_state.destroyed_contexts)) {
3289 spin_lock_irqsave(&guc->submission_state.lock, flags);
3290 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts,
3291 struct intel_context,
3292 destroyed_link);
3293 if (ce)
3294 list_del_init(&ce->destroyed_link);
3295 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
3296
3297 if (!ce)
3298 break;
3299
3300 guc_lrc_desc_unpin(ce);
3301 }
3302 }
3303
destroyed_worker_func(struct work_struct * w)3304 static void destroyed_worker_func(struct work_struct *w)
3305 {
3306 struct intel_guc *guc = container_of(w, struct intel_guc,
3307 submission_state.destroyed_worker);
3308 struct intel_gt *gt = guc_to_gt(guc);
3309 int tmp;
3310
3311 with_intel_gt_pm(gt, tmp)
3312 deregister_destroyed_contexts(guc);
3313 }
3314
guc_context_destroy(struct kref * kref)3315 static void guc_context_destroy(struct kref *kref)
3316 {
3317 struct intel_context *ce = container_of(kref, typeof(*ce), ref);
3318 struct intel_guc *guc = ce_to_guc(ce);
3319 unsigned long flags;
3320 bool destroy;
3321
3322 /*
3323 * If the guc_id is invalid this context has been stolen and we can free
3324 * it immediately. Also can be freed immediately if the context is not
3325 * registered with the GuC or the GuC is in the middle of a reset.
3326 */
3327 spin_lock_irqsave(&guc->submission_state.lock, flags);
3328 destroy = submission_disabled(guc) || context_guc_id_invalid(ce) ||
3329 !ctx_id_mapped(guc, ce->guc_id.id);
3330 if (likely(!destroy)) {
3331 if (!list_empty(&ce->guc_id.link))
3332 list_del_init(&ce->guc_id.link);
3333 list_add_tail(&ce->destroyed_link,
3334 &guc->submission_state.destroyed_contexts);
3335 } else {
3336 __release_guc_id(guc, ce);
3337 }
3338 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
3339 if (unlikely(destroy)) {
3340 __guc_context_destroy(ce);
3341 return;
3342 }
3343
3344 /*
3345 * We use a worker to issue the H2G to deregister the context as we can
3346 * take the GT PM for the first time which isn't allowed from an atomic
3347 * context.
3348 */
3349 queue_work(system_unbound_wq, &guc->submission_state.destroyed_worker);
3350 }
3351
guc_context_alloc(struct intel_context * ce)3352 static int guc_context_alloc(struct intel_context *ce)
3353 {
3354 return lrc_alloc(ce, ce->engine);
3355 }
3356
__guc_context_set_prio(struct intel_guc * guc,struct intel_context * ce)3357 static void __guc_context_set_prio(struct intel_guc *guc,
3358 struct intel_context *ce)
3359 {
3360 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) {
3361 struct context_policy policy;
3362
3363 __guc_context_policy_start_klv(&policy, ce->guc_id.id);
3364 __guc_context_policy_add_priority(&policy, ce->guc_state.prio);
3365 __guc_context_set_context_policies(guc, &policy, true);
3366 } else {
3367 u32 action[] = {
3368 INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY,
3369 ce->guc_id.id,
3370 ce->guc_state.prio,
3371 };
3372
3373 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
3374 }
3375 }
3376
guc_context_set_prio(struct intel_guc * guc,struct intel_context * ce,u8 prio)3377 static void guc_context_set_prio(struct intel_guc *guc,
3378 struct intel_context *ce,
3379 u8 prio)
3380 {
3381 GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH ||
3382 prio > GUC_CLIENT_PRIORITY_NORMAL);
3383 lockdep_assert_held(&ce->guc_state.lock);
3384
3385 if (ce->guc_state.prio == prio || submission_disabled(guc) ||
3386 !context_registered(ce)) {
3387 ce->guc_state.prio = prio;
3388 return;
3389 }
3390
3391 ce->guc_state.prio = prio;
3392 __guc_context_set_prio(guc, ce);
3393
3394 trace_intel_context_set_prio(ce);
3395 }
3396
map_i915_prio_to_guc_prio(int prio)3397 static inline u8 map_i915_prio_to_guc_prio(int prio)
3398 {
3399 if (prio == I915_PRIORITY_NORMAL)
3400 return GUC_CLIENT_PRIORITY_KMD_NORMAL;
3401 else if (prio < I915_PRIORITY_NORMAL)
3402 return GUC_CLIENT_PRIORITY_NORMAL;
3403 else if (prio < I915_PRIORITY_DISPLAY)
3404 return GUC_CLIENT_PRIORITY_HIGH;
3405 else
3406 return GUC_CLIENT_PRIORITY_KMD_HIGH;
3407 }
3408
add_context_inflight_prio(struct intel_context * ce,u8 guc_prio)3409 static inline void add_context_inflight_prio(struct intel_context *ce,
3410 u8 guc_prio)
3411 {
3412 lockdep_assert_held(&ce->guc_state.lock);
3413 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count));
3414
3415 ++ce->guc_state.prio_count[guc_prio];
3416
3417 /* Overflow protection */
3418 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]);
3419 }
3420
sub_context_inflight_prio(struct intel_context * ce,u8 guc_prio)3421 static inline void sub_context_inflight_prio(struct intel_context *ce,
3422 u8 guc_prio)
3423 {
3424 lockdep_assert_held(&ce->guc_state.lock);
3425 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count));
3426
3427 /* Underflow protection */
3428 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]);
3429
3430 --ce->guc_state.prio_count[guc_prio];
3431 }
3432
update_context_prio(struct intel_context * ce)3433 static inline void update_context_prio(struct intel_context *ce)
3434 {
3435 struct intel_guc *guc = &ce->engine->gt->uc.guc;
3436 int i;
3437
3438 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0);
3439 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL);
3440
3441 lockdep_assert_held(&ce->guc_state.lock);
3442
3443 for (i = 0; i < ARRAY_SIZE(ce->guc_state.prio_count); ++i) {
3444 if (ce->guc_state.prio_count[i]) {
3445 guc_context_set_prio(guc, ce, i);
3446 break;
3447 }
3448 }
3449 }
3450
new_guc_prio_higher(u8 old_guc_prio,u8 new_guc_prio)3451 static inline bool new_guc_prio_higher(u8 old_guc_prio, u8 new_guc_prio)
3452 {
3453 /* Lower value is higher priority */
3454 return new_guc_prio < old_guc_prio;
3455 }
3456
add_to_context(struct i915_request * rq)3457 static void add_to_context(struct i915_request *rq)
3458 {
3459 struct intel_context *ce = request_to_scheduling_context(rq);
3460 u8 new_guc_prio = map_i915_prio_to_guc_prio(rq_prio(rq));
3461
3462 GEM_BUG_ON(intel_context_is_child(ce));
3463 GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI);
3464
3465 spin_lock(&ce->guc_state.lock);
3466 list_move_tail(&rq->sched.link, &ce->guc_state.requests);
3467
3468 if (rq->guc_prio == GUC_PRIO_INIT) {
3469 rq->guc_prio = new_guc_prio;
3470 add_context_inflight_prio(ce, rq->guc_prio);
3471 } else if (new_guc_prio_higher(rq->guc_prio, new_guc_prio)) {
3472 sub_context_inflight_prio(ce, rq->guc_prio);
3473 rq->guc_prio = new_guc_prio;
3474 add_context_inflight_prio(ce, rq->guc_prio);
3475 }
3476 update_context_prio(ce);
3477
3478 spin_unlock(&ce->guc_state.lock);
3479 }
3480
guc_prio_fini(struct i915_request * rq,struct intel_context * ce)3481 static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce)
3482 {
3483 lockdep_assert_held(&ce->guc_state.lock);
3484
3485 if (rq->guc_prio != GUC_PRIO_INIT &&
3486 rq->guc_prio != GUC_PRIO_FINI) {
3487 sub_context_inflight_prio(ce, rq->guc_prio);
3488 update_context_prio(ce);
3489 }
3490 rq->guc_prio = GUC_PRIO_FINI;
3491 }
3492
remove_from_context(struct i915_request * rq)3493 static void remove_from_context(struct i915_request *rq)
3494 {
3495 struct intel_context *ce = request_to_scheduling_context(rq);
3496
3497 GEM_BUG_ON(intel_context_is_child(ce));
3498
3499 spin_lock_irq(&ce->guc_state.lock);
3500
3501 list_del_init(&rq->sched.link);
3502 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
3503
3504 /* Prevent further __await_execution() registering a cb, then flush */
3505 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
3506
3507 guc_prio_fini(rq, ce);
3508
3509 spin_unlock_irq(&ce->guc_state.lock);
3510
3511 atomic_dec(&ce->guc_id.ref);
3512 i915_request_notify_execute_cb_imm(rq);
3513 }
3514
3515 static const struct intel_context_ops guc_context_ops = {
3516 .flags = COPS_RUNTIME_CYCLES,
3517 .alloc = guc_context_alloc,
3518
3519 .close = guc_context_close,
3520
3521 .pre_pin = guc_context_pre_pin,
3522 .pin = guc_context_pin,
3523 .unpin = guc_context_unpin,
3524 .post_unpin = guc_context_post_unpin,
3525
3526 .revoke = guc_context_revoke,
3527
3528 .cancel_request = guc_context_cancel_request,
3529
3530 .enter = intel_context_enter_engine,
3531 .exit = intel_context_exit_engine,
3532
3533 .sched_disable = guc_context_sched_disable,
3534
3535 .update_stats = guc_context_update_stats,
3536
3537 .reset = lrc_reset,
3538 .destroy = guc_context_destroy,
3539
3540 .create_virtual = guc_create_virtual,
3541 .create_parallel = guc_create_parallel,
3542 };
3543
submit_work_cb(struct irq_work * wrk)3544 static void submit_work_cb(struct irq_work *wrk)
3545 {
3546 struct i915_request *rq = container_of(wrk, typeof(*rq), submit_work);
3547
3548 might_lock(&rq->engine->sched_engine->lock);
3549 i915_sw_fence_complete(&rq->submit);
3550 }
3551
__guc_signal_context_fence(struct intel_context * ce)3552 static void __guc_signal_context_fence(struct intel_context *ce)
3553 {
3554 struct i915_request *rq, *rn;
3555
3556 lockdep_assert_held(&ce->guc_state.lock);
3557
3558 if (!list_empty(&ce->guc_state.fences))
3559 trace_intel_context_fence_release(ce);
3560
3561 /*
3562 * Use an IRQ to ensure locking order of sched_engine->lock ->
3563 * ce->guc_state.lock is preserved.
3564 */
3565 list_for_each_entry_safe(rq, rn, &ce->guc_state.fences,
3566 guc_fence_link) {
3567 list_del(&rq->guc_fence_link);
3568 irq_work_queue(&rq->submit_work);
3569 }
3570
3571 INIT_LIST_HEAD(&ce->guc_state.fences);
3572 }
3573
guc_signal_context_fence(struct intel_context * ce)3574 static void guc_signal_context_fence(struct intel_context *ce)
3575 {
3576 unsigned long flags;
3577
3578 GEM_BUG_ON(intel_context_is_child(ce));
3579
3580 spin_lock_irqsave(&ce->guc_state.lock, flags);
3581 clr_context_wait_for_deregister_to_register(ce);
3582 __guc_signal_context_fence(ce);
3583 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3584 }
3585
context_needs_register(struct intel_context * ce,bool new_guc_id)3586 static bool context_needs_register(struct intel_context *ce, bool new_guc_id)
3587 {
3588 return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) ||
3589 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)) &&
3590 !submission_disabled(ce_to_guc(ce));
3591 }
3592
guc_context_init(struct intel_context * ce)3593 static void guc_context_init(struct intel_context *ce)
3594 {
3595 const struct i915_gem_context *ctx;
3596 int prio = I915_CONTEXT_DEFAULT_PRIORITY;
3597
3598 rcu_read_lock();
3599 ctx = rcu_dereference(ce->gem_context);
3600 if (ctx)
3601 prio = ctx->sched.priority;
3602 rcu_read_unlock();
3603
3604 ce->guc_state.prio = map_i915_prio_to_guc_prio(prio);
3605
3606 INIT_DELAYED_WORK(&ce->guc_state.sched_disable_delay_work,
3607 __delay_sched_disable);
3608
3609 set_bit(CONTEXT_GUC_INIT, &ce->flags);
3610 }
3611
guc_request_alloc(struct i915_request * rq)3612 static int guc_request_alloc(struct i915_request *rq)
3613 {
3614 struct intel_context *ce = request_to_scheduling_context(rq);
3615 struct intel_guc *guc = ce_to_guc(ce);
3616 unsigned long flags;
3617 int ret;
3618
3619 GEM_BUG_ON(!intel_context_is_pinned(rq->context));
3620
3621 /*
3622 * Flush enough space to reduce the likelihood of waiting after
3623 * we start building the request - in which case we will just
3624 * have to repeat work.
3625 */
3626 rq->reserved_space += GUC_REQUEST_SIZE;
3627
3628 /*
3629 * Note that after this point, we have committed to using
3630 * this request as it is being used to both track the
3631 * state of engine initialisation and liveness of the
3632 * golden renderstate above. Think twice before you try
3633 * to cancel/unwind this request now.
3634 */
3635
3636 /* Unconditionally invalidate GPU caches and TLBs. */
3637 ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
3638 if (ret)
3639 return ret;
3640
3641 rq->reserved_space -= GUC_REQUEST_SIZE;
3642
3643 if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags)))
3644 guc_context_init(ce);
3645
3646 /*
3647 * If the context gets closed while the execbuf is ongoing, the context
3648 * close code will race with the below code to cancel the delayed work.
3649 * If the context close wins the race and cancels the work, it will
3650 * immediately call the sched disable (see guc_context_close), so there
3651 * is a chance we can get past this check while the sched_disable code
3652 * is being executed. To make sure that code completes before we check
3653 * the status further down, we wait for the close process to complete.
3654 * Else, this code path could send a request down thinking that the
3655 * context is still in a schedule-enable mode while the GuC ends up
3656 * dropping the request completely because the disable did go from the
3657 * context_close path right to GuC just prior. In the event the CT is
3658 * full, we could potentially need to wait up to 1.5 seconds.
3659 */
3660 if (cancel_delayed_work_sync(&ce->guc_state.sched_disable_delay_work))
3661 intel_context_sched_disable_unpin(ce);
3662 else if (intel_context_is_closed(ce))
3663 if (wait_for(context_close_done(ce), 1500))
3664 guc_warn(guc, "timed out waiting on context sched close before realloc\n");
3665 /*
3666 * Call pin_guc_id here rather than in the pinning step as with
3667 * dma_resv, contexts can be repeatedly pinned / unpinned trashing the
3668 * guc_id and creating horrible race conditions. This is especially bad
3669 * when guc_id are being stolen due to over subscription. By the time
3670 * this function is reached, it is guaranteed that the guc_id will be
3671 * persistent until the generated request is retired. Thus, sealing these
3672 * race conditions. It is still safe to fail here if guc_id are
3673 * exhausted and return -EAGAIN to the user indicating that they can try
3674 * again in the future.
3675 *
3676 * There is no need for a lock here as the timeline mutex ensures at
3677 * most one context can be executing this code path at once. The
3678 * guc_id_ref is incremented once for every request in flight and
3679 * decremented on each retire. When it is zero, a lock around the
3680 * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id.
3681 */
3682 if (atomic_add_unless(&ce->guc_id.ref, 1, 0))
3683 goto out;
3684
3685 ret = pin_guc_id(guc, ce); /* returns 1 if new guc_id assigned */
3686 if (unlikely(ret < 0))
3687 return ret;
3688 if (context_needs_register(ce, !!ret)) {
3689 ret = try_context_registration(ce, true);
3690 if (unlikely(ret)) { /* unwind */
3691 if (ret == -EPIPE) {
3692 disable_submission(guc);
3693 goto out; /* GPU will be reset */
3694 }
3695 atomic_dec(&ce->guc_id.ref);
3696 unpin_guc_id(guc, ce);
3697 return ret;
3698 }
3699 }
3700
3701 clear_bit(CONTEXT_LRCA_DIRTY, &ce->flags);
3702
3703 out:
3704 /*
3705 * We block all requests on this context if a G2H is pending for a
3706 * schedule disable or context deregistration as the GuC will fail a
3707 * schedule enable or context registration if either G2H is pending
3708 * respectfully. Once a G2H returns, the fence is released that is
3709 * blocking these requests (see guc_signal_context_fence).
3710 */
3711 spin_lock_irqsave(&ce->guc_state.lock, flags);
3712 if (context_wait_for_deregister_to_register(ce) ||
3713 context_pending_disable(ce)) {
3714 init_irq_work(&rq->submit_work, submit_work_cb);
3715 i915_sw_fence_await(&rq->submit);
3716
3717 list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences);
3718 }
3719 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3720
3721 return 0;
3722 }
3723
guc_virtual_context_pre_pin(struct intel_context * ce,struct i915_gem_ww_ctx * ww,void ** vaddr)3724 static int guc_virtual_context_pre_pin(struct intel_context *ce,
3725 struct i915_gem_ww_ctx *ww,
3726 void **vaddr)
3727 {
3728 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3729
3730 return __guc_context_pre_pin(ce, engine, ww, vaddr);
3731 }
3732
guc_virtual_context_pin(struct intel_context * ce,void * vaddr)3733 static int guc_virtual_context_pin(struct intel_context *ce, void *vaddr)
3734 {
3735 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3736 int ret = __guc_context_pin(ce, engine, vaddr);
3737 intel_engine_mask_t tmp, mask = ce->engine->mask;
3738
3739 if (likely(!ret))
3740 for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
3741 intel_engine_pm_get(engine);
3742
3743 return ret;
3744 }
3745
guc_virtual_context_unpin(struct intel_context * ce)3746 static void guc_virtual_context_unpin(struct intel_context *ce)
3747 {
3748 intel_engine_mask_t tmp, mask = ce->engine->mask;
3749 struct intel_engine_cs *engine;
3750 struct intel_guc *guc = ce_to_guc(ce);
3751
3752 GEM_BUG_ON(context_enabled(ce));
3753 GEM_BUG_ON(intel_context_is_barrier(ce));
3754
3755 unpin_guc_id(guc, ce);
3756 lrc_unpin(ce);
3757
3758 for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
3759 intel_engine_pm_put_async(engine);
3760 }
3761
guc_virtual_context_enter(struct intel_context * ce)3762 static void guc_virtual_context_enter(struct intel_context *ce)
3763 {
3764 intel_engine_mask_t tmp, mask = ce->engine->mask;
3765 struct intel_engine_cs *engine;
3766
3767 for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
3768 intel_engine_pm_get(engine);
3769
3770 intel_timeline_enter(ce->timeline);
3771 }
3772
guc_virtual_context_exit(struct intel_context * ce)3773 static void guc_virtual_context_exit(struct intel_context *ce)
3774 {
3775 intel_engine_mask_t tmp, mask = ce->engine->mask;
3776 struct intel_engine_cs *engine;
3777
3778 for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
3779 intel_engine_pm_put(engine);
3780
3781 intel_timeline_exit(ce->timeline);
3782 }
3783
guc_virtual_context_alloc(struct intel_context * ce)3784 static int guc_virtual_context_alloc(struct intel_context *ce)
3785 {
3786 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3787
3788 return lrc_alloc(ce, engine);
3789 }
3790
3791 static const struct intel_context_ops virtual_guc_context_ops = {
3792 .flags = COPS_RUNTIME_CYCLES,
3793 .alloc = guc_virtual_context_alloc,
3794
3795 .close = guc_context_close,
3796
3797 .pre_pin = guc_virtual_context_pre_pin,
3798 .pin = guc_virtual_context_pin,
3799 .unpin = guc_virtual_context_unpin,
3800 .post_unpin = guc_context_post_unpin,
3801
3802 .revoke = guc_context_revoke,
3803
3804 .cancel_request = guc_context_cancel_request,
3805
3806 .enter = guc_virtual_context_enter,
3807 .exit = guc_virtual_context_exit,
3808
3809 .sched_disable = guc_context_sched_disable,
3810 .update_stats = guc_context_update_stats,
3811
3812 .destroy = guc_context_destroy,
3813
3814 .get_sibling = guc_virtual_get_sibling,
3815 };
3816
guc_parent_context_pin(struct intel_context * ce,void * vaddr)3817 static int guc_parent_context_pin(struct intel_context *ce, void *vaddr)
3818 {
3819 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3820 struct intel_guc *guc = ce_to_guc(ce);
3821 int ret;
3822
3823 GEM_BUG_ON(!intel_context_is_parent(ce));
3824 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
3825
3826 ret = pin_guc_id(guc, ce);
3827 if (unlikely(ret < 0))
3828 return ret;
3829
3830 return __guc_context_pin(ce, engine, vaddr);
3831 }
3832
guc_child_context_pin(struct intel_context * ce,void * vaddr)3833 static int guc_child_context_pin(struct intel_context *ce, void *vaddr)
3834 {
3835 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3836
3837 GEM_BUG_ON(!intel_context_is_child(ce));
3838 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
3839
3840 __intel_context_pin(ce->parallel.parent);
3841 return __guc_context_pin(ce, engine, vaddr);
3842 }
3843
guc_parent_context_unpin(struct intel_context * ce)3844 static void guc_parent_context_unpin(struct intel_context *ce)
3845 {
3846 struct intel_guc *guc = ce_to_guc(ce);
3847
3848 GEM_BUG_ON(context_enabled(ce));
3849 GEM_BUG_ON(intel_context_is_barrier(ce));
3850 GEM_BUG_ON(!intel_context_is_parent(ce));
3851 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
3852
3853 unpin_guc_id(guc, ce);
3854 lrc_unpin(ce);
3855 }
3856
guc_child_context_unpin(struct intel_context * ce)3857 static void guc_child_context_unpin(struct intel_context *ce)
3858 {
3859 GEM_BUG_ON(context_enabled(ce));
3860 GEM_BUG_ON(intel_context_is_barrier(ce));
3861 GEM_BUG_ON(!intel_context_is_child(ce));
3862 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
3863
3864 lrc_unpin(ce);
3865 }
3866
guc_child_context_post_unpin(struct intel_context * ce)3867 static void guc_child_context_post_unpin(struct intel_context *ce)
3868 {
3869 GEM_BUG_ON(!intel_context_is_child(ce));
3870 GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent));
3871 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
3872
3873 lrc_post_unpin(ce);
3874 intel_context_unpin(ce->parallel.parent);
3875 }
3876
guc_child_context_destroy(struct kref * kref)3877 static void guc_child_context_destroy(struct kref *kref)
3878 {
3879 struct intel_context *ce = container_of(kref, typeof(*ce), ref);
3880
3881 __guc_context_destroy(ce);
3882 }
3883
3884 static const struct intel_context_ops virtual_parent_context_ops = {
3885 .alloc = guc_virtual_context_alloc,
3886
3887 .close = guc_context_close,
3888
3889 .pre_pin = guc_context_pre_pin,
3890 .pin = guc_parent_context_pin,
3891 .unpin = guc_parent_context_unpin,
3892 .post_unpin = guc_context_post_unpin,
3893
3894 .revoke = guc_context_revoke,
3895
3896 .cancel_request = guc_context_cancel_request,
3897
3898 .enter = guc_virtual_context_enter,
3899 .exit = guc_virtual_context_exit,
3900
3901 .sched_disable = guc_context_sched_disable,
3902
3903 .destroy = guc_context_destroy,
3904
3905 .get_sibling = guc_virtual_get_sibling,
3906 };
3907
3908 static const struct intel_context_ops virtual_child_context_ops = {
3909 .alloc = guc_virtual_context_alloc,
3910
3911 .pre_pin = guc_context_pre_pin,
3912 .pin = guc_child_context_pin,
3913 .unpin = guc_child_context_unpin,
3914 .post_unpin = guc_child_context_post_unpin,
3915
3916 .cancel_request = guc_context_cancel_request,
3917
3918 .enter = guc_virtual_context_enter,
3919 .exit = guc_virtual_context_exit,
3920
3921 .destroy = guc_child_context_destroy,
3922
3923 .get_sibling = guc_virtual_get_sibling,
3924 };
3925
3926 /*
3927 * The below override of the breadcrumbs is enabled when the user configures a
3928 * context for parallel submission (multi-lrc, parent-child).
3929 *
3930 * The overridden breadcrumbs implements an algorithm which allows the GuC to
3931 * safely preempt all the hw contexts configured for parallel submission
3932 * between each BB. The contract between the i915 and GuC is if the parent
3933 * context can be preempted, all the children can be preempted, and the GuC will
3934 * always try to preempt the parent before the children. A handshake between the
3935 * parent / children breadcrumbs ensures the i915 holds up its end of the deal
3936 * creating a window to preempt between each set of BBs.
3937 */
3938 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq,
3939 u64 offset, u32 len,
3940 const unsigned int flags);
3941 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq,
3942 u64 offset, u32 len,
3943 const unsigned int flags);
3944 static u32 *
3945 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
3946 u32 *cs);
3947 static u32 *
3948 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
3949 u32 *cs);
3950
3951 static struct intel_context *
guc_create_parallel(struct intel_engine_cs ** engines,unsigned int num_siblings,unsigned int width)3952 guc_create_parallel(struct intel_engine_cs **engines,
3953 unsigned int num_siblings,
3954 unsigned int width)
3955 {
3956 struct intel_engine_cs **siblings = NULL;
3957 struct intel_context *parent = NULL, *ce, *err;
3958 int i, j;
3959
3960 siblings = kmalloc_array(num_siblings,
3961 sizeof(*siblings),
3962 GFP_KERNEL);
3963 if (!siblings)
3964 return ERR_PTR(-ENOMEM);
3965
3966 for (i = 0; i < width; ++i) {
3967 for (j = 0; j < num_siblings; ++j)
3968 siblings[j] = engines[i * num_siblings + j];
3969
3970 ce = intel_engine_create_virtual(siblings, num_siblings,
3971 FORCE_VIRTUAL);
3972 if (IS_ERR(ce)) {
3973 err = ERR_CAST(ce);
3974 goto unwind;
3975 }
3976
3977 if (i == 0) {
3978 parent = ce;
3979 parent->ops = &virtual_parent_context_ops;
3980 } else {
3981 ce->ops = &virtual_child_context_ops;
3982 intel_context_bind_parent_child(parent, ce);
3983 }
3984 }
3985
3986 parent->parallel.fence_context = dma_fence_context_alloc(1);
3987
3988 parent->engine->emit_bb_start =
3989 emit_bb_start_parent_no_preempt_mid_batch;
3990 parent->engine->emit_fini_breadcrumb =
3991 emit_fini_breadcrumb_parent_no_preempt_mid_batch;
3992 parent->engine->emit_fini_breadcrumb_dw =
3993 12 + 4 * parent->parallel.number_children;
3994 for_each_child(parent, ce) {
3995 ce->engine->emit_bb_start =
3996 emit_bb_start_child_no_preempt_mid_batch;
3997 ce->engine->emit_fini_breadcrumb =
3998 emit_fini_breadcrumb_child_no_preempt_mid_batch;
3999 ce->engine->emit_fini_breadcrumb_dw = 16;
4000 }
4001
4002 kfree(siblings);
4003 return parent;
4004
4005 unwind:
4006 if (parent)
4007 intel_context_put(parent);
4008 kfree(siblings);
4009 return err;
4010 }
4011
4012 static bool
guc_irq_enable_breadcrumbs(struct intel_breadcrumbs * b)4013 guc_irq_enable_breadcrumbs(struct intel_breadcrumbs *b)
4014 {
4015 struct intel_engine_cs *sibling;
4016 intel_engine_mask_t tmp, mask = b->engine_mask;
4017 bool result = false;
4018
4019 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp)
4020 result |= intel_engine_irq_enable(sibling);
4021
4022 return result;
4023 }
4024
4025 static void
guc_irq_disable_breadcrumbs(struct intel_breadcrumbs * b)4026 guc_irq_disable_breadcrumbs(struct intel_breadcrumbs *b)
4027 {
4028 struct intel_engine_cs *sibling;
4029 intel_engine_mask_t tmp, mask = b->engine_mask;
4030
4031 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp)
4032 intel_engine_irq_disable(sibling);
4033 }
4034
guc_init_breadcrumbs(struct intel_engine_cs * engine)4035 static void guc_init_breadcrumbs(struct intel_engine_cs *engine)
4036 {
4037 int i;
4038
4039 /*
4040 * In GuC submission mode we do not know which physical engine a request
4041 * will be scheduled on, this creates a problem because the breadcrumb
4042 * interrupt is per physical engine. To work around this we attach
4043 * requests and direct all breadcrumb interrupts to the first instance
4044 * of an engine per class. In addition all breadcrumb interrupts are
4045 * enabled / disabled across an engine class in unison.
4046 */
4047 for (i = 0; i < MAX_ENGINE_INSTANCE; ++i) {
4048 struct intel_engine_cs *sibling =
4049 engine->gt->engine_class[engine->class][i];
4050
4051 if (sibling) {
4052 if (engine->breadcrumbs != sibling->breadcrumbs) {
4053 intel_breadcrumbs_put(engine->breadcrumbs);
4054 engine->breadcrumbs =
4055 intel_breadcrumbs_get(sibling->breadcrumbs);
4056 }
4057 break;
4058 }
4059 }
4060
4061 if (engine->breadcrumbs) {
4062 engine->breadcrumbs->engine_mask |= engine->mask;
4063 engine->breadcrumbs->irq_enable = guc_irq_enable_breadcrumbs;
4064 engine->breadcrumbs->irq_disable = guc_irq_disable_breadcrumbs;
4065 }
4066 }
4067
guc_bump_inflight_request_prio(struct i915_request * rq,int prio)4068 static void guc_bump_inflight_request_prio(struct i915_request *rq,
4069 int prio)
4070 {
4071 struct intel_context *ce = request_to_scheduling_context(rq);
4072 u8 new_guc_prio = map_i915_prio_to_guc_prio(prio);
4073
4074 /* Short circuit function */
4075 if (prio < I915_PRIORITY_NORMAL ||
4076 rq->guc_prio == GUC_PRIO_FINI ||
4077 (rq->guc_prio != GUC_PRIO_INIT &&
4078 !new_guc_prio_higher(rq->guc_prio, new_guc_prio)))
4079 return;
4080
4081 spin_lock(&ce->guc_state.lock);
4082 if (rq->guc_prio != GUC_PRIO_FINI) {
4083 if (rq->guc_prio != GUC_PRIO_INIT)
4084 sub_context_inflight_prio(ce, rq->guc_prio);
4085 rq->guc_prio = new_guc_prio;
4086 add_context_inflight_prio(ce, rq->guc_prio);
4087 update_context_prio(ce);
4088 }
4089 spin_unlock(&ce->guc_state.lock);
4090 }
4091
guc_retire_inflight_request_prio(struct i915_request * rq)4092 static void guc_retire_inflight_request_prio(struct i915_request *rq)
4093 {
4094 struct intel_context *ce = request_to_scheduling_context(rq);
4095
4096 spin_lock(&ce->guc_state.lock);
4097 guc_prio_fini(rq, ce);
4098 spin_unlock(&ce->guc_state.lock);
4099 }
4100
sanitize_hwsp(struct intel_engine_cs * engine)4101 static void sanitize_hwsp(struct intel_engine_cs *engine)
4102 {
4103 struct intel_timeline *tl;
4104
4105 list_for_each_entry(tl, &engine->status_page.timelines, engine_link)
4106 intel_timeline_reset_seqno(tl);
4107 }
4108
guc_sanitize(struct intel_engine_cs * engine)4109 static void guc_sanitize(struct intel_engine_cs *engine)
4110 {
4111 /*
4112 * Poison residual state on resume, in case the suspend didn't!
4113 *
4114 * We have to assume that across suspend/resume (or other loss
4115 * of control) that the contents of our pinned buffers has been
4116 * lost, replaced by garbage. Since this doesn't always happen,
4117 * let's poison such state so that we more quickly spot when
4118 * we falsely assume it has been preserved.
4119 */
4120 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
4121 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE);
4122
4123 /*
4124 * The kernel_context HWSP is stored in the status_page. As above,
4125 * that may be lost on resume/initialisation, and so we need to
4126 * reset the value in the HWSP.
4127 */
4128 sanitize_hwsp(engine);
4129
4130 /* And scrub the dirty cachelines for the HWSP */
4131 drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE);
4132
4133 intel_engine_reset_pinned_contexts(engine);
4134 }
4135
setup_hwsp(struct intel_engine_cs * engine)4136 static void setup_hwsp(struct intel_engine_cs *engine)
4137 {
4138 intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
4139
4140 ENGINE_WRITE_FW(engine,
4141 RING_HWS_PGA,
4142 i915_ggtt_offset(engine->status_page.vma));
4143 }
4144
start_engine(struct intel_engine_cs * engine)4145 static void start_engine(struct intel_engine_cs *engine)
4146 {
4147 ENGINE_WRITE_FW(engine,
4148 RING_MODE_GEN7,
4149 _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE));
4150
4151 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
4152 ENGINE_POSTING_READ(engine, RING_MI_MODE);
4153 }
4154
guc_resume(struct intel_engine_cs * engine)4155 static int guc_resume(struct intel_engine_cs *engine)
4156 {
4157 assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
4158
4159 intel_mocs_init_engine(engine);
4160
4161 intel_breadcrumbs_reset(engine->breadcrumbs);
4162
4163 setup_hwsp(engine);
4164 start_engine(engine);
4165
4166 if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE)
4167 xehp_enable_ccs_engines(engine);
4168
4169 return 0;
4170 }
4171
guc_sched_engine_disabled(struct i915_sched_engine * sched_engine)4172 static bool guc_sched_engine_disabled(struct i915_sched_engine *sched_engine)
4173 {
4174 return !sched_engine->tasklet.callback;
4175 }
4176
guc_set_default_submission(struct intel_engine_cs * engine)4177 static void guc_set_default_submission(struct intel_engine_cs *engine)
4178 {
4179 engine->submit_request = guc_submit_request;
4180 }
4181
guc_kernel_context_pin(struct intel_guc * guc,struct intel_context * ce)4182 static inline int guc_kernel_context_pin(struct intel_guc *guc,
4183 struct intel_context *ce)
4184 {
4185 int ret;
4186
4187 /*
4188 * Note: we purposefully do not check the returns below because
4189 * the registration can only fail if a reset is just starting.
4190 * This is called at the end of reset so presumably another reset
4191 * isn't happening and even it did this code would be run again.
4192 */
4193
4194 if (context_guc_id_invalid(ce)) {
4195 ret = pin_guc_id(guc, ce);
4196
4197 if (ret < 0)
4198 return ret;
4199 }
4200
4201 if (!test_bit(CONTEXT_GUC_INIT, &ce->flags))
4202 guc_context_init(ce);
4203
4204 ret = try_context_registration(ce, true);
4205 if (ret)
4206 unpin_guc_id(guc, ce);
4207
4208 return ret;
4209 }
4210
guc_init_submission(struct intel_guc * guc)4211 static inline int guc_init_submission(struct intel_guc *guc)
4212 {
4213 struct intel_gt *gt = guc_to_gt(guc);
4214 struct intel_engine_cs *engine;
4215 enum intel_engine_id id;
4216
4217 /* make sure all descriptors are clean... */
4218 xa_destroy(&guc->context_lookup);
4219
4220 /*
4221 * A reset might have occurred while we had a pending stalled request,
4222 * so make sure we clean that up.
4223 */
4224 guc->stalled_request = NULL;
4225 guc->submission_stall_reason = STALL_NONE;
4226
4227 /*
4228 * Some contexts might have been pinned before we enabled GuC
4229 * submission, so we need to add them to the GuC bookeeping.
4230 * Also, after a reset the of the GuC we want to make sure that the
4231 * information shared with GuC is properly reset. The kernel LRCs are
4232 * not attached to the gem_context, so they need to be added separately.
4233 */
4234 for_each_engine(engine, gt, id) {
4235 struct intel_context *ce;
4236
4237 list_for_each_entry(ce, &engine->pinned_contexts_list,
4238 pinned_contexts_link) {
4239 int ret = guc_kernel_context_pin(guc, ce);
4240
4241 if (ret) {
4242 /* No point in trying to clean up as i915 will wedge on failure */
4243 return ret;
4244 }
4245 }
4246 }
4247
4248 return 0;
4249 }
4250
guc_release(struct intel_engine_cs * engine)4251 static void guc_release(struct intel_engine_cs *engine)
4252 {
4253 engine->sanitize = NULL; /* no longer in control, nothing to sanitize */
4254
4255 intel_engine_cleanup_common(engine);
4256 lrc_fini_wa_ctx(engine);
4257 }
4258
virtual_guc_bump_serial(struct intel_engine_cs * engine)4259 static void virtual_guc_bump_serial(struct intel_engine_cs *engine)
4260 {
4261 struct intel_engine_cs *e;
4262 intel_engine_mask_t tmp, mask = engine->mask;
4263
4264 for_each_engine_masked(e, engine->gt, mask, tmp)
4265 e->serial++;
4266 }
4267
guc_default_vfuncs(struct intel_engine_cs * engine)4268 static void guc_default_vfuncs(struct intel_engine_cs *engine)
4269 {
4270 /* Default vfuncs which can be overridden by each engine. */
4271
4272 engine->resume = guc_resume;
4273
4274 engine->cops = &guc_context_ops;
4275 engine->request_alloc = guc_request_alloc;
4276 engine->add_active_request = add_to_context;
4277 engine->remove_active_request = remove_from_context;
4278
4279 engine->sched_engine->schedule = i915_schedule;
4280
4281 engine->reset.prepare = guc_engine_reset_prepare;
4282 engine->reset.rewind = guc_rewind_nop;
4283 engine->reset.cancel = guc_reset_nop;
4284 engine->reset.finish = guc_reset_nop;
4285
4286 engine->emit_flush = gen8_emit_flush_xcs;
4287 engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
4288 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs;
4289 if (GRAPHICS_VER(engine->i915) >= 12) {
4290 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs;
4291 engine->emit_flush = gen12_emit_flush_xcs;
4292 }
4293 engine->set_default_submission = guc_set_default_submission;
4294 engine->busyness = guc_engine_busyness;
4295
4296 engine->flags |= I915_ENGINE_SUPPORTS_STATS;
4297 engine->flags |= I915_ENGINE_HAS_PREEMPTION;
4298 engine->flags |= I915_ENGINE_HAS_TIMESLICES;
4299
4300 /* Wa_14014475959:dg2 */
4301 if (engine->class == COMPUTE_CLASS)
4302 if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) ||
4303 IS_DG2(engine->i915))
4304 engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT;
4305
4306 /*
4307 * TODO: GuC supports timeslicing and semaphores as well, but they're
4308 * handled by the firmware so some minor tweaks are required before
4309 * enabling.
4310 *
4311 * engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
4312 */
4313
4314 engine->emit_bb_start = gen8_emit_bb_start;
4315 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
4316 engine->emit_bb_start = xehp_emit_bb_start;
4317 }
4318
rcs_submission_override(struct intel_engine_cs * engine)4319 static void rcs_submission_override(struct intel_engine_cs *engine)
4320 {
4321 switch (GRAPHICS_VER(engine->i915)) {
4322 case 12:
4323 engine->emit_flush = gen12_emit_flush_rcs;
4324 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
4325 break;
4326 case 11:
4327 engine->emit_flush = gen11_emit_flush_rcs;
4328 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
4329 break;
4330 default:
4331 engine->emit_flush = gen8_emit_flush_rcs;
4332 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
4333 break;
4334 }
4335 }
4336
guc_default_irqs(struct intel_engine_cs * engine)4337 static inline void guc_default_irqs(struct intel_engine_cs *engine)
4338 {
4339 engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT;
4340 intel_engine_set_irq_handler(engine, cs_irq_handler);
4341 }
4342
guc_sched_engine_destroy(struct kref * kref)4343 static void guc_sched_engine_destroy(struct kref *kref)
4344 {
4345 struct i915_sched_engine *sched_engine =
4346 container_of(kref, typeof(*sched_engine), ref);
4347 struct intel_guc *guc = sched_engine->private_data;
4348
4349 guc->sched_engine = NULL;
4350 tasklet_kill(&sched_engine->tasklet); /* flush the callback */
4351 kfree(sched_engine);
4352 }
4353
intel_guc_submission_setup(struct intel_engine_cs * engine)4354 int intel_guc_submission_setup(struct intel_engine_cs *engine)
4355 {
4356 struct drm_i915_private *i915 = engine->i915;
4357 struct intel_guc *guc = &engine->gt->uc.guc;
4358
4359 /*
4360 * The setup relies on several assumptions (e.g. irqs always enabled)
4361 * that are only valid on gen11+
4362 */
4363 GEM_BUG_ON(GRAPHICS_VER(i915) < 11);
4364
4365 if (!guc->sched_engine) {
4366 guc->sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL);
4367 if (!guc->sched_engine)
4368 return -ENOMEM;
4369
4370 guc->sched_engine->schedule = i915_schedule;
4371 guc->sched_engine->disabled = guc_sched_engine_disabled;
4372 guc->sched_engine->private_data = guc;
4373 guc->sched_engine->destroy = guc_sched_engine_destroy;
4374 guc->sched_engine->bump_inflight_request_prio =
4375 guc_bump_inflight_request_prio;
4376 guc->sched_engine->retire_inflight_request_prio =
4377 guc_retire_inflight_request_prio;
4378 tasklet_setup(&guc->sched_engine->tasklet,
4379 guc_submission_tasklet);
4380 }
4381 i915_sched_engine_put(engine->sched_engine);
4382 engine->sched_engine = i915_sched_engine_get(guc->sched_engine);
4383
4384 guc_default_vfuncs(engine);
4385 guc_default_irqs(engine);
4386 guc_init_breadcrumbs(engine);
4387
4388 if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE)
4389 rcs_submission_override(engine);
4390
4391 lrc_init_wa_ctx(engine);
4392
4393 /* Finally, take ownership and responsibility for cleanup! */
4394 engine->sanitize = guc_sanitize;
4395 engine->release = guc_release;
4396
4397 return 0;
4398 }
4399
4400 struct scheduling_policy {
4401 /* internal data */
4402 u32 max_words, num_words;
4403 u32 count;
4404 /* API data */
4405 struct guc_update_scheduling_policy h2g;
4406 };
4407
__guc_scheduling_policy_action_size(struct scheduling_policy * policy)4408 static u32 __guc_scheduling_policy_action_size(struct scheduling_policy *policy)
4409 {
4410 u32 *start = (void *)&policy->h2g;
4411 u32 *end = policy->h2g.data + policy->num_words;
4412 size_t delta = end - start;
4413
4414 return delta;
4415 }
4416
__guc_scheduling_policy_start_klv(struct scheduling_policy * policy)4417 static struct scheduling_policy *__guc_scheduling_policy_start_klv(struct scheduling_policy *policy)
4418 {
4419 policy->h2g.header.action = INTEL_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV;
4420 policy->max_words = ARRAY_SIZE(policy->h2g.data);
4421 policy->num_words = 0;
4422 policy->count = 0;
4423
4424 return policy;
4425 }
4426
__guc_scheduling_policy_add_klv(struct scheduling_policy * policy,u32 action,u32 * data,u32 len)4427 static void __guc_scheduling_policy_add_klv(struct scheduling_policy *policy,
4428 u32 action, u32 *data, u32 len)
4429 {
4430 u32 *klv_ptr = policy->h2g.data + policy->num_words;
4431
4432 GEM_BUG_ON((policy->num_words + 1 + len) > policy->max_words);
4433 *(klv_ptr++) = FIELD_PREP(GUC_KLV_0_KEY, action) |
4434 FIELD_PREP(GUC_KLV_0_LEN, len);
4435 memcpy(klv_ptr, data, sizeof(u32) * len);
4436 policy->num_words += 1 + len;
4437 policy->count++;
4438 }
4439
__guc_action_set_scheduling_policies(struct intel_guc * guc,struct scheduling_policy * policy)4440 static int __guc_action_set_scheduling_policies(struct intel_guc *guc,
4441 struct scheduling_policy *policy)
4442 {
4443 int ret;
4444
4445 ret = intel_guc_send(guc, (u32 *)&policy->h2g,
4446 __guc_scheduling_policy_action_size(policy));
4447 if (ret < 0) {
4448 guc_probe_error(guc, "Failed to configure global scheduling policies: %pe!\n",
4449 ERR_PTR(ret));
4450 return ret;
4451 }
4452
4453 if (ret != policy->count) {
4454 guc_warn(guc, "global scheduler policy processed %d of %d KLVs!",
4455 ret, policy->count);
4456 if (ret > policy->count)
4457 return -EPROTO;
4458 }
4459
4460 return 0;
4461 }
4462
guc_init_global_schedule_policy(struct intel_guc * guc)4463 static int guc_init_global_schedule_policy(struct intel_guc *guc)
4464 {
4465 struct scheduling_policy policy;
4466 struct intel_gt *gt = guc_to_gt(guc);
4467 intel_wakeref_t wakeref;
4468 int ret;
4469
4470 if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0))
4471 return 0;
4472
4473 __guc_scheduling_policy_start_klv(&policy);
4474
4475 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) {
4476 u32 yield[] = {
4477 GLOBAL_SCHEDULE_POLICY_RC_YIELD_DURATION,
4478 GLOBAL_SCHEDULE_POLICY_RC_YIELD_RATIO,
4479 };
4480
4481 __guc_scheduling_policy_add_klv(&policy,
4482 GUC_SCHEDULING_POLICIES_KLV_ID_RENDER_COMPUTE_YIELD,
4483 yield, ARRAY_SIZE(yield));
4484
4485 ret = __guc_action_set_scheduling_policies(guc, &policy);
4486 }
4487
4488 return ret;
4489 }
4490
guc_route_semaphores(struct intel_guc * guc,bool to_guc)4491 static void guc_route_semaphores(struct intel_guc *guc, bool to_guc)
4492 {
4493 struct intel_gt *gt = guc_to_gt(guc);
4494 u32 val;
4495
4496 if (GRAPHICS_VER(gt->i915) < 12)
4497 return;
4498
4499 if (to_guc)
4500 val = GUC_SEM_INTR_ROUTE_TO_GUC | GUC_SEM_INTR_ENABLE_ALL;
4501 else
4502 val = 0;
4503
4504 intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES, val);
4505 }
4506
intel_guc_submission_enable(struct intel_guc * guc)4507 int intel_guc_submission_enable(struct intel_guc *guc)
4508 {
4509 int ret;
4510
4511 /* Semaphore interrupt enable and route to GuC */
4512 guc_route_semaphores(guc, true);
4513
4514 ret = guc_init_submission(guc);
4515 if (ret)
4516 goto fail_sem;
4517
4518 ret = guc_init_engine_stats(guc);
4519 if (ret)
4520 goto fail_sem;
4521
4522 ret = guc_init_global_schedule_policy(guc);
4523 if (ret)
4524 goto fail_stats;
4525
4526 return 0;
4527
4528 fail_stats:
4529 guc_fini_engine_stats(guc);
4530 fail_sem:
4531 guc_route_semaphores(guc, false);
4532 return ret;
4533 }
4534
4535 /* Note: By the time we're here, GuC may have already been reset */
intel_guc_submission_disable(struct intel_guc * guc)4536 void intel_guc_submission_disable(struct intel_guc *guc)
4537 {
4538 guc_cancel_busyness_worker(guc);
4539
4540 /* Semaphore interrupt disable and route to host */
4541 guc_route_semaphores(guc, false);
4542 }
4543
__guc_submission_supported(struct intel_guc * guc)4544 static bool __guc_submission_supported(struct intel_guc *guc)
4545 {
4546 /* GuC submission is unavailable for pre-Gen11 */
4547 return intel_guc_is_supported(guc) &&
4548 GRAPHICS_VER(guc_to_gt(guc)->i915) >= 11;
4549 }
4550
__guc_submission_selected(struct intel_guc * guc)4551 static bool __guc_submission_selected(struct intel_guc *guc)
4552 {
4553 struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
4554
4555 if (!intel_guc_submission_is_supported(guc))
4556 return false;
4557
4558 return i915->params.enable_guc & ENABLE_GUC_SUBMISSION;
4559 }
4560
intel_guc_sched_disable_gucid_threshold_max(struct intel_guc * guc)4561 int intel_guc_sched_disable_gucid_threshold_max(struct intel_guc *guc)
4562 {
4563 return guc->submission_state.num_guc_ids - NUMBER_MULTI_LRC_GUC_ID(guc);
4564 }
4565
4566 /*
4567 * This default value of 33 milisecs (+1 milisec round up) ensures 30fps or higher
4568 * workloads are able to enjoy the latency reduction when delaying the schedule-disable
4569 * operation. This matches the 30fps game-render + encode (real world) workload this
4570 * knob was tested against.
4571 */
4572 #define SCHED_DISABLE_DELAY_MS 34
4573
4574 /*
4575 * A threshold of 75% is a reasonable starting point considering that real world apps
4576 * generally don't get anywhere near this.
4577 */
4578 #define NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(__guc) \
4579 (((intel_guc_sched_disable_gucid_threshold_max(guc)) * 3) / 4)
4580
intel_guc_submission_init_early(struct intel_guc * guc)4581 void intel_guc_submission_init_early(struct intel_guc *guc)
4582 {
4583 xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ);
4584
4585 spin_lock_init(&guc->submission_state.lock);
4586 INIT_LIST_HEAD(&guc->submission_state.guc_id_list);
4587 ida_init(&guc->submission_state.guc_ids);
4588 INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts);
4589 INIT_WORK(&guc->submission_state.destroyed_worker,
4590 destroyed_worker_func);
4591 INIT_WORK(&guc->submission_state.reset_fail_worker,
4592 reset_fail_worker_func);
4593
4594 spin_lock_init(&guc->timestamp.lock);
4595 INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping);
4596
4597 guc->submission_state.sched_disable_delay_ms = SCHED_DISABLE_DELAY_MS;
4598 guc->submission_state.num_guc_ids = GUC_MAX_CONTEXT_ID;
4599 guc->submission_state.sched_disable_gucid_threshold =
4600 NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(guc);
4601 guc->submission_supported = __guc_submission_supported(guc);
4602 guc->submission_selected = __guc_submission_selected(guc);
4603 }
4604
4605 static inline struct intel_context *
g2h_context_lookup(struct intel_guc * guc,u32 ctx_id)4606 g2h_context_lookup(struct intel_guc *guc, u32 ctx_id)
4607 {
4608 struct intel_context *ce;
4609
4610 if (unlikely(ctx_id >= GUC_MAX_CONTEXT_ID)) {
4611 guc_err(guc, "Invalid ctx_id %u\n", ctx_id);
4612 return NULL;
4613 }
4614
4615 ce = __get_context(guc, ctx_id);
4616 if (unlikely(!ce)) {
4617 guc_err(guc, "Context is NULL, ctx_id %u\n", ctx_id);
4618 return NULL;
4619 }
4620
4621 if (unlikely(intel_context_is_child(ce))) {
4622 guc_err(guc, "Context is child, ctx_id %u\n", ctx_id);
4623 return NULL;
4624 }
4625
4626 return ce;
4627 }
4628
intel_guc_deregister_done_process_msg(struct intel_guc * guc,const u32 * msg,u32 len)4629 int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
4630 const u32 *msg,
4631 u32 len)
4632 {
4633 struct intel_context *ce;
4634 u32 ctx_id;
4635
4636 if (unlikely(len < 1)) {
4637 guc_err(guc, "Invalid length %u\n", len);
4638 return -EPROTO;
4639 }
4640 ctx_id = msg[0];
4641
4642 ce = g2h_context_lookup(guc, ctx_id);
4643 if (unlikely(!ce))
4644 return -EPROTO;
4645
4646 trace_intel_context_deregister_done(ce);
4647
4648 #ifdef CONFIG_DRM_I915_SELFTEST
4649 if (unlikely(ce->drop_deregister)) {
4650 ce->drop_deregister = false;
4651 return 0;
4652 }
4653 #endif
4654
4655 if (context_wait_for_deregister_to_register(ce)) {
4656 struct intel_runtime_pm *runtime_pm =
4657 &ce->engine->gt->i915->runtime_pm;
4658 intel_wakeref_t wakeref;
4659
4660 /*
4661 * Previous owner of this guc_id has been deregistered, now safe
4662 * register this context.
4663 */
4664 with_intel_runtime_pm(runtime_pm, wakeref)
4665 register_context(ce, true);
4666 guc_signal_context_fence(ce);
4667 intel_context_put(ce);
4668 } else if (context_destroyed(ce)) {
4669 /* Context has been destroyed */
4670 intel_gt_pm_put_async(guc_to_gt(guc));
4671 release_guc_id(guc, ce);
4672 __guc_context_destroy(ce);
4673 }
4674
4675 decr_outstanding_submission_g2h(guc);
4676
4677 return 0;
4678 }
4679
intel_guc_sched_done_process_msg(struct intel_guc * guc,const u32 * msg,u32 len)4680 int intel_guc_sched_done_process_msg(struct intel_guc *guc,
4681 const u32 *msg,
4682 u32 len)
4683 {
4684 struct intel_context *ce;
4685 unsigned long flags;
4686 u32 ctx_id;
4687
4688 if (unlikely(len < 2)) {
4689 guc_err(guc, "Invalid length %u\n", len);
4690 return -EPROTO;
4691 }
4692 ctx_id = msg[0];
4693
4694 ce = g2h_context_lookup(guc, ctx_id);
4695 if (unlikely(!ce))
4696 return -EPROTO;
4697
4698 if (unlikely(context_destroyed(ce) ||
4699 (!context_pending_enable(ce) &&
4700 !context_pending_disable(ce)))) {
4701 guc_err(guc, "Bad context sched_state 0x%x, ctx_id %u\n",
4702 ce->guc_state.sched_state, ctx_id);
4703 return -EPROTO;
4704 }
4705
4706 trace_intel_context_sched_done(ce);
4707
4708 if (context_pending_enable(ce)) {
4709 #ifdef CONFIG_DRM_I915_SELFTEST
4710 if (unlikely(ce->drop_schedule_enable)) {
4711 ce->drop_schedule_enable = false;
4712 return 0;
4713 }
4714 #endif
4715
4716 spin_lock_irqsave(&ce->guc_state.lock, flags);
4717 clr_context_pending_enable(ce);
4718 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
4719 } else if (context_pending_disable(ce)) {
4720 bool banned;
4721
4722 #ifdef CONFIG_DRM_I915_SELFTEST
4723 if (unlikely(ce->drop_schedule_disable)) {
4724 ce->drop_schedule_disable = false;
4725 return 0;
4726 }
4727 #endif
4728
4729 /*
4730 * Unpin must be done before __guc_signal_context_fence,
4731 * otherwise a race exists between the requests getting
4732 * submitted + retired before this unpin completes resulting in
4733 * the pin_count going to zero and the context still being
4734 * enabled.
4735 */
4736 intel_context_sched_disable_unpin(ce);
4737
4738 spin_lock_irqsave(&ce->guc_state.lock, flags);
4739 banned = context_banned(ce);
4740 clr_context_banned(ce);
4741 clr_context_pending_disable(ce);
4742 __guc_signal_context_fence(ce);
4743 guc_blocked_fence_complete(ce);
4744 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
4745
4746 if (banned) {
4747 guc_cancel_context_requests(ce);
4748 intel_engine_signal_breadcrumbs(ce->engine);
4749 }
4750 }
4751
4752 decr_outstanding_submission_g2h(guc);
4753 intel_context_put(ce);
4754
4755 return 0;
4756 }
4757
capture_error_state(struct intel_guc * guc,struct intel_context * ce)4758 static void capture_error_state(struct intel_guc *guc,
4759 struct intel_context *ce)
4760 {
4761 struct intel_gt *gt = guc_to_gt(guc);
4762 struct drm_i915_private *i915 = gt->i915;
4763 intel_wakeref_t wakeref;
4764 intel_engine_mask_t engine_mask;
4765
4766 if (intel_engine_is_virtual(ce->engine)) {
4767 struct intel_engine_cs *e;
4768 intel_engine_mask_t tmp, virtual_mask = ce->engine->mask;
4769
4770 engine_mask = 0;
4771 for_each_engine_masked(e, ce->engine->gt, virtual_mask, tmp) {
4772 bool match = intel_guc_capture_is_matching_engine(gt, ce, e);
4773
4774 if (match) {
4775 intel_engine_set_hung_context(e, ce);
4776 engine_mask |= e->mask;
4777 i915_increase_reset_engine_count(&i915->gpu_error,
4778 e);
4779 }
4780 }
4781
4782 if (!engine_mask) {
4783 guc_warn(guc, "No matching physical engine capture for virtual engine context 0x%04X / %s",
4784 ce->guc_id.id, ce->engine->name);
4785 engine_mask = ~0U;
4786 }
4787 } else {
4788 intel_engine_set_hung_context(ce->engine, ce);
4789 engine_mask = ce->engine->mask;
4790 i915_increase_reset_engine_count(&i915->gpu_error, ce->engine);
4791 }
4792
4793 with_intel_runtime_pm(&i915->runtime_pm, wakeref)
4794 i915_capture_error_state(gt, engine_mask, CORE_DUMP_FLAG_IS_GUC_CAPTURE);
4795 }
4796
guc_context_replay(struct intel_context * ce)4797 static void guc_context_replay(struct intel_context *ce)
4798 {
4799 struct i915_sched_engine *sched_engine = ce->engine->sched_engine;
4800
4801 __guc_reset_context(ce, ce->engine->mask);
4802 tasklet_hi_schedule(&sched_engine->tasklet);
4803 }
4804
guc_handle_context_reset(struct intel_guc * guc,struct intel_context * ce)4805 static void guc_handle_context_reset(struct intel_guc *guc,
4806 struct intel_context *ce)
4807 {
4808 trace_intel_context_reset(ce);
4809
4810 guc_dbg(guc, "Got context reset notification: 0x%04X on %s, exiting = %s, banned = %s\n",
4811 ce->guc_id.id, ce->engine->name,
4812 str_yes_no(intel_context_is_exiting(ce)),
4813 str_yes_no(intel_context_is_banned(ce)));
4814
4815 if (likely(intel_context_is_schedulable(ce))) {
4816 capture_error_state(guc, ce);
4817 guc_context_replay(ce);
4818 } else {
4819 guc_info(guc, "Ignoring context reset notification of exiting context 0x%04X on %s",
4820 ce->guc_id.id, ce->engine->name);
4821 }
4822 }
4823
intel_guc_context_reset_process_msg(struct intel_guc * guc,const u32 * msg,u32 len)4824 int intel_guc_context_reset_process_msg(struct intel_guc *guc,
4825 const u32 *msg, u32 len)
4826 {
4827 struct intel_context *ce;
4828 unsigned long flags;
4829 int ctx_id;
4830
4831 if (unlikely(len != 1)) {
4832 guc_err(guc, "Invalid length %u", len);
4833 return -EPROTO;
4834 }
4835
4836 ctx_id = msg[0];
4837
4838 /*
4839 * The context lookup uses the xarray but lookups only require an RCU lock
4840 * not the full spinlock. So take the lock explicitly and keep it until the
4841 * context has been reference count locked to ensure it can't be destroyed
4842 * asynchronously until the reset is done.
4843 */
4844 xa_lock_irqsave(&guc->context_lookup, flags);
4845 ce = g2h_context_lookup(guc, ctx_id);
4846 if (ce)
4847 intel_context_get(ce);
4848 xa_unlock_irqrestore(&guc->context_lookup, flags);
4849
4850 if (unlikely(!ce))
4851 return -EPROTO;
4852
4853 guc_handle_context_reset(guc, ce);
4854 intel_context_put(ce);
4855
4856 return 0;
4857 }
4858
intel_guc_error_capture_process_msg(struct intel_guc * guc,const u32 * msg,u32 len)4859 int intel_guc_error_capture_process_msg(struct intel_guc *guc,
4860 const u32 *msg, u32 len)
4861 {
4862 u32 status;
4863
4864 if (unlikely(len != 1)) {
4865 guc_dbg(guc, "Invalid length %u", len);
4866 return -EPROTO;
4867 }
4868
4869 status = msg[0] & INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK;
4870 if (status == INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE)
4871 guc_warn(guc, "No space for error capture");
4872
4873 intel_guc_capture_process(guc);
4874
4875 return 0;
4876 }
4877
4878 struct intel_engine_cs *
intel_guc_lookup_engine(struct intel_guc * guc,u8 guc_class,u8 instance)4879 intel_guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance)
4880 {
4881 struct intel_gt *gt = guc_to_gt(guc);
4882 u8 engine_class = guc_class_to_engine_class(guc_class);
4883
4884 /* Class index is checked in class converter */
4885 GEM_BUG_ON(instance > MAX_ENGINE_INSTANCE);
4886
4887 return gt->engine_class[engine_class][instance];
4888 }
4889
reset_fail_worker_func(struct work_struct * w)4890 static void reset_fail_worker_func(struct work_struct *w)
4891 {
4892 struct intel_guc *guc = container_of(w, struct intel_guc,
4893 submission_state.reset_fail_worker);
4894 struct intel_gt *gt = guc_to_gt(guc);
4895 intel_engine_mask_t reset_fail_mask;
4896 unsigned long flags;
4897
4898 spin_lock_irqsave(&guc->submission_state.lock, flags);
4899 reset_fail_mask = guc->submission_state.reset_fail_mask;
4900 guc->submission_state.reset_fail_mask = 0;
4901 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
4902
4903 if (likely(reset_fail_mask)) {
4904 struct intel_engine_cs *engine;
4905 enum intel_engine_id id;
4906
4907 /*
4908 * GuC is toast at this point - it dead loops after sending the failed
4909 * reset notification. So need to manually determine the guilty context.
4910 * Note that it should be reliable to do this here because the GuC is
4911 * toast and will not be scheduling behind the KMD's back.
4912 */
4913 for_each_engine_masked(engine, gt, reset_fail_mask, id)
4914 intel_guc_find_hung_context(engine);
4915
4916 intel_gt_handle_error(gt, reset_fail_mask,
4917 I915_ERROR_CAPTURE,
4918 "GuC failed to reset engine mask=0x%x",
4919 reset_fail_mask);
4920 }
4921 }
4922
intel_guc_engine_failure_process_msg(struct intel_guc * guc,const u32 * msg,u32 len)4923 int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
4924 const u32 *msg, u32 len)
4925 {
4926 struct intel_engine_cs *engine;
4927 u8 guc_class, instance;
4928 u32 reason;
4929 unsigned long flags;
4930
4931 if (unlikely(len != 3)) {
4932 guc_err(guc, "Invalid length %u", len);
4933 return -EPROTO;
4934 }
4935
4936 guc_class = msg[0];
4937 instance = msg[1];
4938 reason = msg[2];
4939
4940 engine = intel_guc_lookup_engine(guc, guc_class, instance);
4941 if (unlikely(!engine)) {
4942 guc_err(guc, "Invalid engine %d:%d", guc_class, instance);
4943 return -EPROTO;
4944 }
4945
4946 /*
4947 * This is an unexpected failure of a hardware feature. So, log a real
4948 * error message not just the informational that comes with the reset.
4949 */
4950 guc_err(guc, "Engine reset failed on %d:%d (%s) because 0x%08X",
4951 guc_class, instance, engine->name, reason);
4952
4953 spin_lock_irqsave(&guc->submission_state.lock, flags);
4954 guc->submission_state.reset_fail_mask |= engine->mask;
4955 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
4956
4957 /*
4958 * A GT reset flushes this worker queue (G2H handler) so we must use
4959 * another worker to trigger a GT reset.
4960 */
4961 queue_work(system_unbound_wq, &guc->submission_state.reset_fail_worker);
4962
4963 return 0;
4964 }
4965
intel_guc_find_hung_context(struct intel_engine_cs * engine)4966 void intel_guc_find_hung_context(struct intel_engine_cs *engine)
4967 {
4968 struct intel_guc *guc = &engine->gt->uc.guc;
4969 struct intel_context *ce;
4970 struct i915_request *rq;
4971 unsigned long index;
4972 unsigned long flags;
4973
4974 /* Reset called during driver load? GuC not yet initialised! */
4975 if (unlikely(!guc_submission_initialized(guc)))
4976 return;
4977
4978 xa_lock_irqsave(&guc->context_lookup, flags);
4979 xa_for_each(&guc->context_lookup, index, ce) {
4980 bool found;
4981
4982 if (!kref_get_unless_zero(&ce->ref))
4983 continue;
4984
4985 xa_unlock(&guc->context_lookup);
4986
4987 if (!intel_context_is_pinned(ce))
4988 goto next;
4989
4990 if (intel_engine_is_virtual(ce->engine)) {
4991 if (!(ce->engine->mask & engine->mask))
4992 goto next;
4993 } else {
4994 if (ce->engine != engine)
4995 goto next;
4996 }
4997
4998 found = false;
4999 spin_lock(&ce->guc_state.lock);
5000 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) {
5001 if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE)
5002 continue;
5003
5004 found = true;
5005 break;
5006 }
5007 spin_unlock(&ce->guc_state.lock);
5008
5009 if (found) {
5010 intel_engine_set_hung_context(engine, ce);
5011
5012 /* Can only cope with one hang at a time... */
5013 intel_context_put(ce);
5014 xa_lock(&guc->context_lookup);
5015 goto done;
5016 }
5017
5018 next:
5019 intel_context_put(ce);
5020 xa_lock(&guc->context_lookup);
5021 }
5022 done:
5023 xa_unlock_irqrestore(&guc->context_lookup, flags);
5024 }
5025
intel_guc_dump_active_requests(struct intel_engine_cs * engine,struct i915_request * hung_rq,struct drm_printer * m)5026 void intel_guc_dump_active_requests(struct intel_engine_cs *engine,
5027 struct i915_request *hung_rq,
5028 struct drm_printer *m)
5029 {
5030 struct intel_guc *guc = &engine->gt->uc.guc;
5031 struct intel_context *ce;
5032 unsigned long index;
5033 unsigned long flags;
5034
5035 /* Reset called during driver load? GuC not yet initialised! */
5036 if (unlikely(!guc_submission_initialized(guc)))
5037 return;
5038
5039 xa_lock_irqsave(&guc->context_lookup, flags);
5040 xa_for_each(&guc->context_lookup, index, ce) {
5041 if (!kref_get_unless_zero(&ce->ref))
5042 continue;
5043
5044 xa_unlock(&guc->context_lookup);
5045
5046 if (!intel_context_is_pinned(ce))
5047 goto next;
5048
5049 if (intel_engine_is_virtual(ce->engine)) {
5050 if (!(ce->engine->mask & engine->mask))
5051 goto next;
5052 } else {
5053 if (ce->engine != engine)
5054 goto next;
5055 }
5056
5057 spin_lock(&ce->guc_state.lock);
5058 intel_engine_dump_active_requests(&ce->guc_state.requests,
5059 hung_rq, m);
5060 spin_unlock(&ce->guc_state.lock);
5061
5062 next:
5063 intel_context_put(ce);
5064 xa_lock(&guc->context_lookup);
5065 }
5066 xa_unlock_irqrestore(&guc->context_lookup, flags);
5067 }
5068
intel_guc_submission_print_info(struct intel_guc * guc,struct drm_printer * p)5069 void intel_guc_submission_print_info(struct intel_guc *guc,
5070 struct drm_printer *p)
5071 {
5072 struct i915_sched_engine *sched_engine = guc->sched_engine;
5073 struct rb_node *rb;
5074 unsigned long flags;
5075
5076 if (!sched_engine)
5077 return;
5078
5079 drm_printf(p, "GuC Submission API Version: %d.%d.%d\n",
5080 guc->submission_version.major, guc->submission_version.minor,
5081 guc->submission_version.patch);
5082 drm_printf(p, "GuC Number Outstanding Submission G2H: %u\n",
5083 atomic_read(&guc->outstanding_submission_g2h));
5084 drm_printf(p, "GuC tasklet count: %u\n",
5085 atomic_read(&sched_engine->tasklet.count));
5086
5087 spin_lock_irqsave(&sched_engine->lock, flags);
5088 drm_printf(p, "Requests in GuC submit tasklet:\n");
5089 for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) {
5090 struct i915_priolist *pl = to_priolist(rb);
5091 struct i915_request *rq;
5092
5093 priolist_for_each_request(rq, pl)
5094 drm_printf(p, "guc_id=%u, seqno=%llu\n",
5095 rq->context->guc_id.id,
5096 rq->fence.seqno);
5097 }
5098 spin_unlock_irqrestore(&sched_engine->lock, flags);
5099 drm_printf(p, "\n");
5100 }
5101
guc_log_context_priority(struct drm_printer * p,struct intel_context * ce)5102 static inline void guc_log_context_priority(struct drm_printer *p,
5103 struct intel_context *ce)
5104 {
5105 int i;
5106
5107 drm_printf(p, "\t\tPriority: %d\n", ce->guc_state.prio);
5108 drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n");
5109 for (i = GUC_CLIENT_PRIORITY_KMD_HIGH;
5110 i < GUC_CLIENT_PRIORITY_NUM; ++i) {
5111 drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n",
5112 i, ce->guc_state.prio_count[i]);
5113 }
5114 drm_printf(p, "\n");
5115 }
5116
guc_log_context(struct drm_printer * p,struct intel_context * ce)5117 static inline void guc_log_context(struct drm_printer *p,
5118 struct intel_context *ce)
5119 {
5120 drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id);
5121 drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca);
5122 drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n",
5123 ce->ring->head,
5124 ce->lrc_reg_state[CTX_RING_HEAD]);
5125 drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n",
5126 ce->ring->tail,
5127 ce->lrc_reg_state[CTX_RING_TAIL]);
5128 drm_printf(p, "\t\tContext Pin Count: %u\n",
5129 atomic_read(&ce->pin_count));
5130 drm_printf(p, "\t\tGuC ID Ref Count: %u\n",
5131 atomic_read(&ce->guc_id.ref));
5132 drm_printf(p, "\t\tSchedule State: 0x%x\n",
5133 ce->guc_state.sched_state);
5134 }
5135
intel_guc_submission_print_context_info(struct intel_guc * guc,struct drm_printer * p)5136 void intel_guc_submission_print_context_info(struct intel_guc *guc,
5137 struct drm_printer *p)
5138 {
5139 struct intel_context *ce;
5140 unsigned long index;
5141 unsigned long flags;
5142
5143 xa_lock_irqsave(&guc->context_lookup, flags);
5144 xa_for_each(&guc->context_lookup, index, ce) {
5145 GEM_BUG_ON(intel_context_is_child(ce));
5146
5147 guc_log_context(p, ce);
5148 guc_log_context_priority(p, ce);
5149
5150 if (intel_context_is_parent(ce)) {
5151 struct intel_context *child;
5152
5153 drm_printf(p, "\t\tNumber children: %u\n",
5154 ce->parallel.number_children);
5155
5156 if (ce->parallel.guc.wq_status) {
5157 drm_printf(p, "\t\tWQI Head: %u\n",
5158 READ_ONCE(*ce->parallel.guc.wq_head));
5159 drm_printf(p, "\t\tWQI Tail: %u\n",
5160 READ_ONCE(*ce->parallel.guc.wq_tail));
5161 drm_printf(p, "\t\tWQI Status: %u\n",
5162 READ_ONCE(*ce->parallel.guc.wq_status));
5163 }
5164
5165 if (ce->engine->emit_bb_start ==
5166 emit_bb_start_parent_no_preempt_mid_batch) {
5167 u8 i;
5168
5169 drm_printf(p, "\t\tChildren Go: %u\n",
5170 get_children_go_value(ce));
5171 for (i = 0; i < ce->parallel.number_children; ++i)
5172 drm_printf(p, "\t\tChildren Join: %u\n",
5173 get_children_join_value(ce, i));
5174 }
5175
5176 for_each_child(ce, child)
5177 guc_log_context(p, child);
5178 }
5179 }
5180 xa_unlock_irqrestore(&guc->context_lookup, flags);
5181 }
5182
get_children_go_addr(struct intel_context * ce)5183 static inline u32 get_children_go_addr(struct intel_context *ce)
5184 {
5185 GEM_BUG_ON(!intel_context_is_parent(ce));
5186
5187 return i915_ggtt_offset(ce->state) +
5188 __get_parent_scratch_offset(ce) +
5189 offsetof(struct parent_scratch, go.semaphore);
5190 }
5191
get_children_join_addr(struct intel_context * ce,u8 child_index)5192 static inline u32 get_children_join_addr(struct intel_context *ce,
5193 u8 child_index)
5194 {
5195 GEM_BUG_ON(!intel_context_is_parent(ce));
5196
5197 return i915_ggtt_offset(ce->state) +
5198 __get_parent_scratch_offset(ce) +
5199 offsetof(struct parent_scratch, join[child_index].semaphore);
5200 }
5201
5202 #define PARENT_GO_BB 1
5203 #define PARENT_GO_FINI_BREADCRUMB 0
5204 #define CHILD_GO_BB 1
5205 #define CHILD_GO_FINI_BREADCRUMB 0
emit_bb_start_parent_no_preempt_mid_batch(struct i915_request * rq,u64 offset,u32 len,const unsigned int flags)5206 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq,
5207 u64 offset, u32 len,
5208 const unsigned int flags)
5209 {
5210 struct intel_context *ce = rq->context;
5211 u32 *cs;
5212 u8 i;
5213
5214 GEM_BUG_ON(!intel_context_is_parent(ce));
5215
5216 cs = intel_ring_begin(rq, 10 + 4 * ce->parallel.number_children);
5217 if (IS_ERR(cs))
5218 return PTR_ERR(cs);
5219
5220 /* Wait on children */
5221 for (i = 0; i < ce->parallel.number_children; ++i) {
5222 *cs++ = (MI_SEMAPHORE_WAIT |
5223 MI_SEMAPHORE_GLOBAL_GTT |
5224 MI_SEMAPHORE_POLL |
5225 MI_SEMAPHORE_SAD_EQ_SDD);
5226 *cs++ = PARENT_GO_BB;
5227 *cs++ = get_children_join_addr(ce, i);
5228 *cs++ = 0;
5229 }
5230
5231 /* Turn off preemption */
5232 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
5233 *cs++ = MI_NOOP;
5234
5235 /* Tell children go */
5236 cs = gen8_emit_ggtt_write(cs,
5237 CHILD_GO_BB,
5238 get_children_go_addr(ce),
5239 0);
5240
5241 /* Jump to batch */
5242 *cs++ = MI_BATCH_BUFFER_START_GEN8 |
5243 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
5244 *cs++ = lower_32_bits(offset);
5245 *cs++ = upper_32_bits(offset);
5246 *cs++ = MI_NOOP;
5247
5248 intel_ring_advance(rq, cs);
5249
5250 return 0;
5251 }
5252
emit_bb_start_child_no_preempt_mid_batch(struct i915_request * rq,u64 offset,u32 len,const unsigned int flags)5253 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq,
5254 u64 offset, u32 len,
5255 const unsigned int flags)
5256 {
5257 struct intel_context *ce = rq->context;
5258 struct intel_context *parent = intel_context_to_parent(ce);
5259 u32 *cs;
5260
5261 GEM_BUG_ON(!intel_context_is_child(ce));
5262
5263 cs = intel_ring_begin(rq, 12);
5264 if (IS_ERR(cs))
5265 return PTR_ERR(cs);
5266
5267 /* Signal parent */
5268 cs = gen8_emit_ggtt_write(cs,
5269 PARENT_GO_BB,
5270 get_children_join_addr(parent,
5271 ce->parallel.child_index),
5272 0);
5273
5274 /* Wait on parent for go */
5275 *cs++ = (MI_SEMAPHORE_WAIT |
5276 MI_SEMAPHORE_GLOBAL_GTT |
5277 MI_SEMAPHORE_POLL |
5278 MI_SEMAPHORE_SAD_EQ_SDD);
5279 *cs++ = CHILD_GO_BB;
5280 *cs++ = get_children_go_addr(parent);
5281 *cs++ = 0;
5282
5283 /* Turn off preemption */
5284 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
5285
5286 /* Jump to batch */
5287 *cs++ = MI_BATCH_BUFFER_START_GEN8 |
5288 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
5289 *cs++ = lower_32_bits(offset);
5290 *cs++ = upper_32_bits(offset);
5291
5292 intel_ring_advance(rq, cs);
5293
5294 return 0;
5295 }
5296
5297 static u32 *
__emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request * rq,u32 * cs)5298 __emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
5299 u32 *cs)
5300 {
5301 struct intel_context *ce = rq->context;
5302 u8 i;
5303
5304 GEM_BUG_ON(!intel_context_is_parent(ce));
5305
5306 /* Wait on children */
5307 for (i = 0; i < ce->parallel.number_children; ++i) {
5308 *cs++ = (MI_SEMAPHORE_WAIT |
5309 MI_SEMAPHORE_GLOBAL_GTT |
5310 MI_SEMAPHORE_POLL |
5311 MI_SEMAPHORE_SAD_EQ_SDD);
5312 *cs++ = PARENT_GO_FINI_BREADCRUMB;
5313 *cs++ = get_children_join_addr(ce, i);
5314 *cs++ = 0;
5315 }
5316
5317 /* Turn on preemption */
5318 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
5319 *cs++ = MI_NOOP;
5320
5321 /* Tell children go */
5322 cs = gen8_emit_ggtt_write(cs,
5323 CHILD_GO_FINI_BREADCRUMB,
5324 get_children_go_addr(ce),
5325 0);
5326
5327 return cs;
5328 }
5329
5330 /*
5331 * If this true, a submission of multi-lrc requests had an error and the
5332 * requests need to be skipped. The front end (execuf IOCTL) should've called
5333 * i915_request_skip which squashes the BB but we still need to emit the fini
5334 * breadrcrumbs seqno write. At this point we don't know how many of the
5335 * requests in the multi-lrc submission were generated so we can't do the
5336 * handshake between the parent and children (e.g. if 4 requests should be
5337 * generated but 2nd hit an error only 1 would be seen by the GuC backend).
5338 * Simply skip the handshake, but still emit the breadcrumbd seqno, if an error
5339 * has occurred on any of the requests in submission / relationship.
5340 */
skip_handshake(struct i915_request * rq)5341 static inline bool skip_handshake(struct i915_request *rq)
5342 {
5343 return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags);
5344 }
5345
5346 #define NON_SKIP_LEN 6
5347 static u32 *
emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request * rq,u32 * cs)5348 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
5349 u32 *cs)
5350 {
5351 struct intel_context *ce = rq->context;
5352 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs;
5353 __maybe_unused u32 *start_fini_breadcrumb_cs = cs;
5354
5355 GEM_BUG_ON(!intel_context_is_parent(ce));
5356
5357 if (unlikely(skip_handshake(rq))) {
5358 /*
5359 * NOP everything in __emit_fini_breadcrumb_parent_no_preempt_mid_batch,
5360 * the NON_SKIP_LEN comes from the length of the emits below.
5361 */
5362 memset(cs, 0, sizeof(u32) *
5363 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN));
5364 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN;
5365 } else {
5366 cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs);
5367 }
5368
5369 /* Emit fini breadcrumb */
5370 before_fini_breadcrumb_user_interrupt_cs = cs;
5371 cs = gen8_emit_ggtt_write(cs,
5372 rq->fence.seqno,
5373 i915_request_active_timeline(rq)->hwsp_offset,
5374 0);
5375
5376 /* User interrupt */
5377 *cs++ = MI_USER_INTERRUPT;
5378 *cs++ = MI_NOOP;
5379
5380 /* Ensure our math for skip + emit is correct */
5381 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN !=
5382 cs);
5383 GEM_BUG_ON(start_fini_breadcrumb_cs +
5384 ce->engine->emit_fini_breadcrumb_dw != cs);
5385
5386 rq->tail = intel_ring_offset(rq, cs);
5387
5388 return cs;
5389 }
5390
5391 static u32 *
__emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request * rq,u32 * cs)5392 __emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
5393 u32 *cs)
5394 {
5395 struct intel_context *ce = rq->context;
5396 struct intel_context *parent = intel_context_to_parent(ce);
5397
5398 GEM_BUG_ON(!intel_context_is_child(ce));
5399
5400 /* Turn on preemption */
5401 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
5402 *cs++ = MI_NOOP;
5403
5404 /* Signal parent */
5405 cs = gen8_emit_ggtt_write(cs,
5406 PARENT_GO_FINI_BREADCRUMB,
5407 get_children_join_addr(parent,
5408 ce->parallel.child_index),
5409 0);
5410
5411 /* Wait parent on for go */
5412 *cs++ = (MI_SEMAPHORE_WAIT |
5413 MI_SEMAPHORE_GLOBAL_GTT |
5414 MI_SEMAPHORE_POLL |
5415 MI_SEMAPHORE_SAD_EQ_SDD);
5416 *cs++ = CHILD_GO_FINI_BREADCRUMB;
5417 *cs++ = get_children_go_addr(parent);
5418 *cs++ = 0;
5419
5420 return cs;
5421 }
5422
5423 static u32 *
emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request * rq,u32 * cs)5424 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
5425 u32 *cs)
5426 {
5427 struct intel_context *ce = rq->context;
5428 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs;
5429 __maybe_unused u32 *start_fini_breadcrumb_cs = cs;
5430
5431 GEM_BUG_ON(!intel_context_is_child(ce));
5432
5433 if (unlikely(skip_handshake(rq))) {
5434 /*
5435 * NOP everything in __emit_fini_breadcrumb_child_no_preempt_mid_batch,
5436 * the NON_SKIP_LEN comes from the length of the emits below.
5437 */
5438 memset(cs, 0, sizeof(u32) *
5439 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN));
5440 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN;
5441 } else {
5442 cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs);
5443 }
5444
5445 /* Emit fini breadcrumb */
5446 before_fini_breadcrumb_user_interrupt_cs = cs;
5447 cs = gen8_emit_ggtt_write(cs,
5448 rq->fence.seqno,
5449 i915_request_active_timeline(rq)->hwsp_offset,
5450 0);
5451
5452 /* User interrupt */
5453 *cs++ = MI_USER_INTERRUPT;
5454 *cs++ = MI_NOOP;
5455
5456 /* Ensure our math for skip + emit is correct */
5457 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN !=
5458 cs);
5459 GEM_BUG_ON(start_fini_breadcrumb_cs +
5460 ce->engine->emit_fini_breadcrumb_dw != cs);
5461
5462 rq->tail = intel_ring_offset(rq, cs);
5463
5464 return cs;
5465 }
5466
5467 #undef NON_SKIP_LEN
5468
5469 static struct intel_context *
guc_create_virtual(struct intel_engine_cs ** siblings,unsigned int count,unsigned long flags)5470 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
5471 unsigned long flags)
5472 {
5473 struct guc_virtual_engine *ve;
5474 struct intel_guc *guc;
5475 unsigned int n;
5476 int err;
5477
5478 ve = kzalloc(sizeof(*ve), GFP_KERNEL);
5479 if (!ve)
5480 return ERR_PTR(-ENOMEM);
5481
5482 guc = &siblings[0]->gt->uc.guc;
5483
5484 ve->base.i915 = siblings[0]->i915;
5485 ve->base.gt = siblings[0]->gt;
5486 ve->base.uncore = siblings[0]->uncore;
5487 ve->base.id = -1;
5488
5489 ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
5490 ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5491 ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5492 ve->base.saturated = ALL_ENGINES;
5493
5494 snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
5495
5496 ve->base.sched_engine = i915_sched_engine_get(guc->sched_engine);
5497
5498 ve->base.cops = &virtual_guc_context_ops;
5499 ve->base.request_alloc = guc_request_alloc;
5500 ve->base.bump_serial = virtual_guc_bump_serial;
5501
5502 ve->base.submit_request = guc_submit_request;
5503
5504 ve->base.flags = I915_ENGINE_IS_VIRTUAL;
5505
5506 BUILD_BUG_ON(ilog2(VIRTUAL_ENGINES) < I915_NUM_ENGINES);
5507 ve->base.mask = VIRTUAL_ENGINES;
5508
5509 intel_context_init(&ve->context, &ve->base);
5510
5511 for (n = 0; n < count; n++) {
5512 struct intel_engine_cs *sibling = siblings[n];
5513
5514 GEM_BUG_ON(!is_power_of_2(sibling->mask));
5515 if (sibling->mask & ve->base.mask) {
5516 guc_dbg(guc, "duplicate %s entry in load balancer\n",
5517 sibling->name);
5518 err = -EINVAL;
5519 goto err_put;
5520 }
5521
5522 ve->base.mask |= sibling->mask;
5523 ve->base.logical_mask |= sibling->logical_mask;
5524
5525 if (n != 0 && ve->base.class != sibling->class) {
5526 guc_dbg(guc, "invalid mixing of engine class, sibling %d, already %d\n",
5527 sibling->class, ve->base.class);
5528 err = -EINVAL;
5529 goto err_put;
5530 } else if (n == 0) {
5531 ve->base.class = sibling->class;
5532 ve->base.uabi_class = sibling->uabi_class;
5533 snprintf(ve->base.name, sizeof(ve->base.name),
5534 "v%dx%d", ve->base.class, count);
5535 ve->base.context_size = sibling->context_size;
5536
5537 ve->base.add_active_request =
5538 sibling->add_active_request;
5539 ve->base.remove_active_request =
5540 sibling->remove_active_request;
5541 ve->base.emit_bb_start = sibling->emit_bb_start;
5542 ve->base.emit_flush = sibling->emit_flush;
5543 ve->base.emit_init_breadcrumb =
5544 sibling->emit_init_breadcrumb;
5545 ve->base.emit_fini_breadcrumb =
5546 sibling->emit_fini_breadcrumb;
5547 ve->base.emit_fini_breadcrumb_dw =
5548 sibling->emit_fini_breadcrumb_dw;
5549 ve->base.breadcrumbs =
5550 intel_breadcrumbs_get(sibling->breadcrumbs);
5551
5552 ve->base.flags |= sibling->flags;
5553
5554 ve->base.props.timeslice_duration_ms =
5555 sibling->props.timeslice_duration_ms;
5556 ve->base.props.preempt_timeout_ms =
5557 sibling->props.preempt_timeout_ms;
5558 }
5559 }
5560
5561 return &ve->context;
5562
5563 err_put:
5564 intel_context_put(&ve->context);
5565 return ERR_PTR(err);
5566 }
5567
intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs * ve)5568 bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve)
5569 {
5570 struct intel_engine_cs *engine;
5571 intel_engine_mask_t tmp, mask = ve->mask;
5572
5573 for_each_engine_masked(engine, ve->gt, mask, tmp)
5574 if (READ_ONCE(engine->props.heartbeat_interval_ms))
5575 return true;
5576
5577 return false;
5578 }
5579
5580 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
5581 #include "selftest_guc.c"
5582 #include "selftest_guc_multi_lrc.c"
5583 #include "selftest_guc_hangcheck.c"
5584 #endif
5585