1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2019 Intel Corporation
4  */
5 
6 #include <drm/drm_managed.h>
7 
8 #include "gem/i915_gem_internal.h"
9 #include "gem/i915_gem_lmem.h"
10 #include "pxp/intel_pxp.h"
11 
12 #include "i915_drv.h"
13 #include "i915_perf_oa_regs.h"
14 #include "intel_context.h"
15 #include "intel_engine_pm.h"
16 #include "intel_engine_regs.h"
17 #include "intel_gt.h"
18 #include "intel_gt_buffer_pool.h"
19 #include "intel_gt_clock_utils.h"
20 #include "intel_gt_debugfs.h"
21 #include "intel_gt_gmch.h"
22 #include "intel_gt_pm.h"
23 #include "intel_gt_regs.h"
24 #include "intel_gt_requests.h"
25 #include "intel_migrate.h"
26 #include "intel_mocs.h"
27 #include "intel_pm.h"
28 #include "intel_rc6.h"
29 #include "intel_renderstate.h"
30 #include "intel_rps.h"
31 #include "intel_gt_sysfs.h"
32 #include "intel_uncore.h"
33 #include "shmem_utils.h"
34 
__intel_gt_init_early(struct intel_gt * gt)35 static void __intel_gt_init_early(struct intel_gt *gt)
36 {
37 	spin_lock_init(&gt->irq_lock);
38 
39 	INIT_LIST_HEAD(&gt->closed_vma);
40 	spin_lock_init(&gt->closed_lock);
41 
42 	init_llist_head(&gt->watchdog.list);
43 	INIT_WORK(&gt->watchdog.work, intel_gt_watchdog_work);
44 
45 	intel_gt_init_buffer_pool(gt);
46 	intel_gt_init_reset(gt);
47 	intel_gt_init_requests(gt);
48 	intel_gt_init_timelines(gt);
49 	mutex_init(&gt->tlb.invalidate_lock);
50 	seqcount_mutex_init(&gt->tlb.seqno, &gt->tlb.invalidate_lock);
51 	intel_gt_pm_init_early(gt);
52 
53 	intel_uc_init_early(&gt->uc);
54 	intel_rps_init_early(&gt->rps);
55 }
56 
57 /* Preliminary initialization of Tile 0 */
intel_root_gt_init_early(struct drm_i915_private * i915)58 void intel_root_gt_init_early(struct drm_i915_private *i915)
59 {
60 	struct intel_gt *gt = to_gt(i915);
61 
62 	gt->i915 = i915;
63 	gt->uncore = &i915->uncore;
64 
65 	__intel_gt_init_early(gt);
66 }
67 
intel_gt_probe_lmem(struct intel_gt * gt)68 static int intel_gt_probe_lmem(struct intel_gt *gt)
69 {
70 	struct drm_i915_private *i915 = gt->i915;
71 	unsigned int instance = gt->info.id;
72 	int id = INTEL_REGION_LMEM_0 + instance;
73 	struct intel_memory_region *mem;
74 	int err;
75 
76 	mem = intel_gt_setup_lmem(gt);
77 	if (IS_ERR(mem)) {
78 		err = PTR_ERR(mem);
79 		if (err == -ENODEV)
80 			return 0;
81 
82 		drm_err(&i915->drm,
83 			"Failed to setup region(%d) type=%d\n",
84 			err, INTEL_MEMORY_LOCAL);
85 		return err;
86 	}
87 
88 	mem->id = id;
89 	mem->instance = instance;
90 
91 	intel_memory_region_set_name(mem, "local%u", mem->instance);
92 
93 	GEM_BUG_ON(!HAS_REGION(i915, id));
94 	GEM_BUG_ON(i915->mm.regions[id]);
95 	i915->mm.regions[id] = mem;
96 
97 	return 0;
98 }
99 
intel_gt_assign_ggtt(struct intel_gt * gt)100 int intel_gt_assign_ggtt(struct intel_gt *gt)
101 {
102 	gt->ggtt = drmm_kzalloc(&gt->i915->drm, sizeof(*gt->ggtt), GFP_KERNEL);
103 
104 	return gt->ggtt ? 0 : -ENOMEM;
105 }
106 
107 static const char * const intel_steering_types[] = {
108 	"L3BANK",
109 	"MSLICE",
110 	"LNCF",
111 };
112 
113 static const struct intel_mmio_range icl_l3bank_steering_table[] = {
114 	{ 0x00B100, 0x00B3FF },
115 	{},
116 };
117 
118 static const struct intel_mmio_range xehpsdv_mslice_steering_table[] = {
119 	{ 0x004000, 0x004AFF },
120 	{ 0x00C800, 0x00CFFF },
121 	{ 0x00DD00, 0x00DDFF },
122 	{ 0x00E900, 0x00FFFF }, /* 0xEA00 - OxEFFF is unused */
123 	{},
124 };
125 
126 static const struct intel_mmio_range xehpsdv_lncf_steering_table[] = {
127 	{ 0x00B000, 0x00B0FF },
128 	{ 0x00D800, 0x00D8FF },
129 	{},
130 };
131 
132 static const struct intel_mmio_range dg2_lncf_steering_table[] = {
133 	{ 0x00B000, 0x00B0FF },
134 	{ 0x00D880, 0x00D8FF },
135 	{},
136 };
137 
slicemask(struct intel_gt * gt,int count)138 static u16 slicemask(struct intel_gt *gt, int count)
139 {
140 	u64 dss_mask = intel_sseu_get_subslices(&gt->info.sseu, 0);
141 
142 	return intel_slicemask_from_dssmask(dss_mask, count);
143 }
144 
intel_gt_init_mmio(struct intel_gt * gt)145 int intel_gt_init_mmio(struct intel_gt *gt)
146 {
147 	struct drm_i915_private *i915 = gt->i915;
148 
149 	intel_gt_init_clock_frequency(gt);
150 
151 	intel_uc_init_mmio(&gt->uc);
152 	intel_sseu_info_init(gt);
153 
154 	/*
155 	 * An mslice is unavailable only if both the meml3 for the slice is
156 	 * disabled *and* all of the DSS in the slice (quadrant) are disabled.
157 	 */
158 	if (HAS_MSLICES(i915))
159 		gt->info.mslice_mask =
160 			slicemask(gt, GEN_DSS_PER_MSLICE) |
161 			(intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) &
162 			 GEN12_MEML3_EN_MASK);
163 
164 	if (IS_DG2(i915)) {
165 		gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table;
166 		gt->steering_table[LNCF] = dg2_lncf_steering_table;
167 	} else if (IS_XEHPSDV(i915)) {
168 		gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table;
169 		gt->steering_table[LNCF] = xehpsdv_lncf_steering_table;
170 	} else if (GRAPHICS_VER(i915) >= 11 &&
171 		   GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) {
172 		gt->steering_table[L3BANK] = icl_l3bank_steering_table;
173 		gt->info.l3bank_mask =
174 			~intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) &
175 			GEN10_L3BANK_MASK;
176 	} else if (HAS_MSLICES(i915)) {
177 		MISSING_CASE(INTEL_INFO(i915)->platform);
178 	}
179 
180 	return intel_engines_init_mmio(gt);
181 }
182 
init_unused_ring(struct intel_gt * gt,u32 base)183 static void init_unused_ring(struct intel_gt *gt, u32 base)
184 {
185 	struct intel_uncore *uncore = gt->uncore;
186 
187 	intel_uncore_write(uncore, RING_CTL(base), 0);
188 	intel_uncore_write(uncore, RING_HEAD(base), 0);
189 	intel_uncore_write(uncore, RING_TAIL(base), 0);
190 	intel_uncore_write(uncore, RING_START(base), 0);
191 }
192 
init_unused_rings(struct intel_gt * gt)193 static void init_unused_rings(struct intel_gt *gt)
194 {
195 	struct drm_i915_private *i915 = gt->i915;
196 
197 	if (IS_I830(i915)) {
198 		init_unused_ring(gt, PRB1_BASE);
199 		init_unused_ring(gt, SRB0_BASE);
200 		init_unused_ring(gt, SRB1_BASE);
201 		init_unused_ring(gt, SRB2_BASE);
202 		init_unused_ring(gt, SRB3_BASE);
203 	} else if (GRAPHICS_VER(i915) == 2) {
204 		init_unused_ring(gt, SRB0_BASE);
205 		init_unused_ring(gt, SRB1_BASE);
206 	} else if (GRAPHICS_VER(i915) == 3) {
207 		init_unused_ring(gt, PRB1_BASE);
208 		init_unused_ring(gt, PRB2_BASE);
209 	}
210 }
211 
intel_gt_init_hw(struct intel_gt * gt)212 int intel_gt_init_hw(struct intel_gt *gt)
213 {
214 	struct drm_i915_private *i915 = gt->i915;
215 	struct intel_uncore *uncore = gt->uncore;
216 	int ret;
217 
218 	gt->last_init_time = ktime_get();
219 
220 	/* Double layer security blanket, see i915_gem_init() */
221 	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
222 
223 	if (HAS_EDRAM(i915) && GRAPHICS_VER(i915) < 9)
224 		intel_uncore_rmw(uncore, HSW_IDICR, 0, IDIHASHMSK(0xf));
225 
226 	if (IS_HASWELL(i915))
227 		intel_uncore_write(uncore,
228 				   HSW_MI_PREDICATE_RESULT_2,
229 				   IS_HSW_GT3(i915) ?
230 				   LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
231 
232 	/* Apply the GT workarounds... */
233 	intel_gt_apply_workarounds(gt);
234 	/* ...and determine whether they are sticking. */
235 	intel_gt_verify_workarounds(gt, "init");
236 
237 	intel_gt_init_swizzling(gt);
238 
239 	/*
240 	 * At least 830 can leave some of the unused rings
241 	 * "active" (ie. head != tail) after resume which
242 	 * will prevent c3 entry. Makes sure all unused rings
243 	 * are totally idle.
244 	 */
245 	init_unused_rings(gt);
246 
247 	ret = i915_ppgtt_init_hw(gt);
248 	if (ret) {
249 		DRM_ERROR("Enabling PPGTT failed (%d)\n", ret);
250 		goto out;
251 	}
252 
253 	/* We can't enable contexts until all firmware is loaded */
254 	ret = intel_uc_init_hw(&gt->uc);
255 	if (ret) {
256 		i915_probe_error(i915, "Enabling uc failed (%d)\n", ret);
257 		goto out;
258 	}
259 
260 	intel_mocs_init(gt);
261 
262 out:
263 	intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
264 	return ret;
265 }
266 
rmw_set(struct intel_uncore * uncore,i915_reg_t reg,u32 set)267 static void rmw_set(struct intel_uncore *uncore, i915_reg_t reg, u32 set)
268 {
269 	intel_uncore_rmw(uncore, reg, 0, set);
270 }
271 
rmw_clear(struct intel_uncore * uncore,i915_reg_t reg,u32 clr)272 static void rmw_clear(struct intel_uncore *uncore, i915_reg_t reg, u32 clr)
273 {
274 	intel_uncore_rmw(uncore, reg, clr, 0);
275 }
276 
clear_register(struct intel_uncore * uncore,i915_reg_t reg)277 static void clear_register(struct intel_uncore *uncore, i915_reg_t reg)
278 {
279 	intel_uncore_rmw(uncore, reg, 0, 0);
280 }
281 
gen6_clear_engine_error_register(struct intel_engine_cs * engine)282 static void gen6_clear_engine_error_register(struct intel_engine_cs *engine)
283 {
284 	GEN6_RING_FAULT_REG_RMW(engine, RING_FAULT_VALID, 0);
285 	GEN6_RING_FAULT_REG_POSTING_READ(engine);
286 }
287 
288 void
intel_gt_clear_error_registers(struct intel_gt * gt,intel_engine_mask_t engine_mask)289 intel_gt_clear_error_registers(struct intel_gt *gt,
290 			       intel_engine_mask_t engine_mask)
291 {
292 	struct drm_i915_private *i915 = gt->i915;
293 	struct intel_uncore *uncore = gt->uncore;
294 	u32 eir;
295 
296 	if (GRAPHICS_VER(i915) != 2)
297 		clear_register(uncore, PGTBL_ER);
298 
299 	if (GRAPHICS_VER(i915) < 4)
300 		clear_register(uncore, IPEIR(RENDER_RING_BASE));
301 	else
302 		clear_register(uncore, IPEIR_I965);
303 
304 	clear_register(uncore, EIR);
305 	eir = intel_uncore_read(uncore, EIR);
306 	if (eir) {
307 		/*
308 		 * some errors might have become stuck,
309 		 * mask them.
310 		 */
311 		DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir);
312 		rmw_set(uncore, EMR, eir);
313 		intel_uncore_write(uncore, GEN2_IIR,
314 				   I915_MASTER_ERROR_INTERRUPT);
315 	}
316 
317 	if (GRAPHICS_VER(i915) >= 12) {
318 		rmw_clear(uncore, GEN12_RING_FAULT_REG, RING_FAULT_VALID);
319 		intel_uncore_posting_read(uncore, GEN12_RING_FAULT_REG);
320 	} else if (GRAPHICS_VER(i915) >= 8) {
321 		rmw_clear(uncore, GEN8_RING_FAULT_REG, RING_FAULT_VALID);
322 		intel_uncore_posting_read(uncore, GEN8_RING_FAULT_REG);
323 	} else if (GRAPHICS_VER(i915) >= 6) {
324 		struct intel_engine_cs *engine;
325 		enum intel_engine_id id;
326 
327 		for_each_engine_masked(engine, gt, engine_mask, id)
328 			gen6_clear_engine_error_register(engine);
329 	}
330 }
331 
gen6_check_faults(struct intel_gt * gt)332 static void gen6_check_faults(struct intel_gt *gt)
333 {
334 	struct intel_engine_cs *engine;
335 	enum intel_engine_id id;
336 	u32 fault;
337 
338 	for_each_engine(engine, gt, id) {
339 		fault = GEN6_RING_FAULT_REG_READ(engine);
340 		if (fault & RING_FAULT_VALID) {
341 			drm_dbg(&engine->i915->drm, "Unexpected fault\n"
342 				"\tAddr: 0x%08lx\n"
343 				"\tAddress space: %s\n"
344 				"\tSource ID: %d\n"
345 				"\tType: %d\n",
346 				fault & PAGE_MASK,
347 				fault & RING_FAULT_GTTSEL_MASK ?
348 				"GGTT" : "PPGTT",
349 				RING_FAULT_SRCID(fault),
350 				RING_FAULT_FAULT_TYPE(fault));
351 		}
352 	}
353 }
354 
gen8_check_faults(struct intel_gt * gt)355 static void gen8_check_faults(struct intel_gt *gt)
356 {
357 	struct intel_uncore *uncore = gt->uncore;
358 	i915_reg_t fault_reg, fault_data0_reg, fault_data1_reg;
359 	u32 fault;
360 
361 	if (GRAPHICS_VER(gt->i915) >= 12) {
362 		fault_reg = GEN12_RING_FAULT_REG;
363 		fault_data0_reg = GEN12_FAULT_TLB_DATA0;
364 		fault_data1_reg = GEN12_FAULT_TLB_DATA1;
365 	} else {
366 		fault_reg = GEN8_RING_FAULT_REG;
367 		fault_data0_reg = GEN8_FAULT_TLB_DATA0;
368 		fault_data1_reg = GEN8_FAULT_TLB_DATA1;
369 	}
370 
371 	fault = intel_uncore_read(uncore, fault_reg);
372 	if (fault & RING_FAULT_VALID) {
373 		u32 fault_data0, fault_data1;
374 		u64 fault_addr;
375 
376 		fault_data0 = intel_uncore_read(uncore, fault_data0_reg);
377 		fault_data1 = intel_uncore_read(uncore, fault_data1_reg);
378 
379 		fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) |
380 			     ((u64)fault_data0 << 12);
381 
382 		drm_dbg(&uncore->i915->drm, "Unexpected fault\n"
383 			"\tAddr: 0x%08x_%08x\n"
384 			"\tAddress space: %s\n"
385 			"\tEngine ID: %d\n"
386 			"\tSource ID: %d\n"
387 			"\tType: %d\n",
388 			upper_32_bits(fault_addr), lower_32_bits(fault_addr),
389 			fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT",
390 			GEN8_RING_FAULT_ENGINE_ID(fault),
391 			RING_FAULT_SRCID(fault),
392 			RING_FAULT_FAULT_TYPE(fault));
393 	}
394 }
395 
intel_gt_check_and_clear_faults(struct intel_gt * gt)396 void intel_gt_check_and_clear_faults(struct intel_gt *gt)
397 {
398 	struct drm_i915_private *i915 = gt->i915;
399 
400 	/* From GEN8 onwards we only have one 'All Engine Fault Register' */
401 	if (GRAPHICS_VER(i915) >= 8)
402 		gen8_check_faults(gt);
403 	else if (GRAPHICS_VER(i915) >= 6)
404 		gen6_check_faults(gt);
405 	else
406 		return;
407 
408 	intel_gt_clear_error_registers(gt, ALL_ENGINES);
409 }
410 
intel_gt_flush_ggtt_writes(struct intel_gt * gt)411 void intel_gt_flush_ggtt_writes(struct intel_gt *gt)
412 {
413 	struct intel_uncore *uncore = gt->uncore;
414 	intel_wakeref_t wakeref;
415 
416 	/*
417 	 * No actual flushing is required for the GTT write domain for reads
418 	 * from the GTT domain. Writes to it "immediately" go to main memory
419 	 * as far as we know, so there's no chipset flush. It also doesn't
420 	 * land in the GPU render cache.
421 	 *
422 	 * However, we do have to enforce the order so that all writes through
423 	 * the GTT land before any writes to the device, such as updates to
424 	 * the GATT itself.
425 	 *
426 	 * We also have to wait a bit for the writes to land from the GTT.
427 	 * An uncached read (i.e. mmio) seems to be ideal for the round-trip
428 	 * timing. This issue has only been observed when switching quickly
429 	 * between GTT writes and CPU reads from inside the kernel on recent hw,
430 	 * and it appears to only affect discrete GTT blocks (i.e. on LLC
431 	 * system agents we cannot reproduce this behaviour, until Cannonlake
432 	 * that was!).
433 	 */
434 
435 	wmb();
436 
437 	if (INTEL_INFO(gt->i915)->has_coherent_ggtt)
438 		return;
439 
440 	intel_gt_chipset_flush(gt);
441 
442 	with_intel_runtime_pm_if_in_use(uncore->rpm, wakeref) {
443 		unsigned long flags;
444 
445 		spin_lock_irqsave(&uncore->lock, flags);
446 		intel_uncore_posting_read_fw(uncore,
447 					     RING_HEAD(RENDER_RING_BASE));
448 		spin_unlock_irqrestore(&uncore->lock, flags);
449 	}
450 }
451 
intel_gt_chipset_flush(struct intel_gt * gt)452 void intel_gt_chipset_flush(struct intel_gt *gt)
453 {
454 	wmb();
455 	if (GRAPHICS_VER(gt->i915) < 6)
456 		intel_gt_gmch_gen5_chipset_flush(gt);
457 }
458 
intel_gt_driver_register(struct intel_gt * gt)459 void intel_gt_driver_register(struct intel_gt *gt)
460 {
461 	intel_gsc_init(&gt->gsc, gt->i915);
462 
463 	intel_rps_driver_register(&gt->rps);
464 
465 	intel_gt_debugfs_register(gt);
466 	intel_gt_sysfs_register(gt);
467 }
468 
intel_gt_init_scratch(struct intel_gt * gt,unsigned int size)469 static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
470 {
471 	struct drm_i915_private *i915 = gt->i915;
472 	struct drm_i915_gem_object *obj;
473 	struct i915_vma *vma;
474 	int ret;
475 
476 	obj = i915_gem_object_create_lmem(i915, size,
477 					  I915_BO_ALLOC_VOLATILE |
478 					  I915_BO_ALLOC_GPU_ONLY);
479 	if (IS_ERR(obj))
480 		obj = i915_gem_object_create_stolen(i915, size);
481 	if (IS_ERR(obj))
482 		obj = i915_gem_object_create_internal(i915, size);
483 	if (IS_ERR(obj)) {
484 		drm_err(&i915->drm, "Failed to allocate scratch page\n");
485 		return PTR_ERR(obj);
486 	}
487 
488 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
489 	if (IS_ERR(vma)) {
490 		ret = PTR_ERR(vma);
491 		goto err_unref;
492 	}
493 
494 	ret = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH);
495 	if (ret)
496 		goto err_unref;
497 
498 	gt->scratch = i915_vma_make_unshrinkable(vma);
499 
500 	return 0;
501 
502 err_unref:
503 	i915_gem_object_put(obj);
504 	return ret;
505 }
506 
intel_gt_fini_scratch(struct intel_gt * gt)507 static void intel_gt_fini_scratch(struct intel_gt *gt)
508 {
509 	i915_vma_unpin_and_release(&gt->scratch, 0);
510 }
511 
kernel_vm(struct intel_gt * gt)512 static struct i915_address_space *kernel_vm(struct intel_gt *gt)
513 {
514 	if (INTEL_PPGTT(gt->i915) > INTEL_PPGTT_ALIASING)
515 		return &i915_ppgtt_create(gt, I915_BO_ALLOC_PM_EARLY)->vm;
516 	else
517 		return i915_vm_get(&gt->ggtt->vm);
518 }
519 
__engines_record_defaults(struct intel_gt * gt)520 static int __engines_record_defaults(struct intel_gt *gt)
521 {
522 	struct i915_request *requests[I915_NUM_ENGINES] = {};
523 	struct intel_engine_cs *engine;
524 	enum intel_engine_id id;
525 	int err = 0;
526 
527 	/*
528 	 * As we reset the gpu during very early sanitisation, the current
529 	 * register state on the GPU should reflect its defaults values.
530 	 * We load a context onto the hw (with restore-inhibit), then switch
531 	 * over to a second context to save that default register state. We
532 	 * can then prime every new context with that state so they all start
533 	 * from the same default HW values.
534 	 */
535 
536 	for_each_engine(engine, gt, id) {
537 		struct intel_renderstate so;
538 		struct intel_context *ce;
539 		struct i915_request *rq;
540 
541 		/* We must be able to switch to something! */
542 		GEM_BUG_ON(!engine->kernel_context);
543 
544 		ce = intel_context_create(engine);
545 		if (IS_ERR(ce)) {
546 			err = PTR_ERR(ce);
547 			goto out;
548 		}
549 
550 		err = intel_renderstate_init(&so, ce);
551 		if (err)
552 			goto err;
553 
554 		rq = i915_request_create(ce);
555 		if (IS_ERR(rq)) {
556 			err = PTR_ERR(rq);
557 			goto err_fini;
558 		}
559 
560 		err = intel_engine_emit_ctx_wa(rq);
561 		if (err)
562 			goto err_rq;
563 
564 		err = intel_renderstate_emit(&so, rq);
565 		if (err)
566 			goto err_rq;
567 
568 err_rq:
569 		requests[id] = i915_request_get(rq);
570 		i915_request_add(rq);
571 err_fini:
572 		intel_renderstate_fini(&so, ce);
573 err:
574 		if (err) {
575 			intel_context_put(ce);
576 			goto out;
577 		}
578 	}
579 
580 	/* Flush the default context image to memory, and enable powersaving. */
581 	if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) {
582 		err = -EIO;
583 		goto out;
584 	}
585 
586 	for (id = 0; id < ARRAY_SIZE(requests); id++) {
587 		struct i915_request *rq;
588 		struct file *state;
589 
590 		rq = requests[id];
591 		if (!rq)
592 			continue;
593 
594 		if (rq->fence.error) {
595 			err = -EIO;
596 			goto out;
597 		}
598 
599 		GEM_BUG_ON(!test_bit(CONTEXT_ALLOC_BIT, &rq->context->flags));
600 		if (!rq->context->state)
601 			continue;
602 
603 		/* Keep a copy of the state's backing pages; free the obj */
604 		state = shmem_create_from_object(rq->context->state->obj);
605 		if (IS_ERR(state)) {
606 			err = PTR_ERR(state);
607 			goto out;
608 		}
609 		rq->engine->default_state = state;
610 	}
611 
612 out:
613 	/*
614 	 * If we have to abandon now, we expect the engines to be idle
615 	 * and ready to be torn-down. The quickest way we can accomplish
616 	 * this is by declaring ourselves wedged.
617 	 */
618 	if (err)
619 		intel_gt_set_wedged(gt);
620 
621 	for (id = 0; id < ARRAY_SIZE(requests); id++) {
622 		struct intel_context *ce;
623 		struct i915_request *rq;
624 
625 		rq = requests[id];
626 		if (!rq)
627 			continue;
628 
629 		ce = rq->context;
630 		i915_request_put(rq);
631 		intel_context_put(ce);
632 	}
633 	return err;
634 }
635 
__engines_verify_workarounds(struct intel_gt * gt)636 static int __engines_verify_workarounds(struct intel_gt *gt)
637 {
638 	struct intel_engine_cs *engine;
639 	enum intel_engine_id id;
640 	int err = 0;
641 
642 	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
643 		return 0;
644 
645 	for_each_engine(engine, gt, id) {
646 		if (intel_engine_verify_workarounds(engine, "load"))
647 			err = -EIO;
648 	}
649 
650 	/* Flush and restore the kernel context for safety */
651 	if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME)
652 		err = -EIO;
653 
654 	return err;
655 }
656 
__intel_gt_disable(struct intel_gt * gt)657 static void __intel_gt_disable(struct intel_gt *gt)
658 {
659 	intel_gt_set_wedged_on_fini(gt);
660 
661 	intel_gt_suspend_prepare(gt);
662 	intel_gt_suspend_late(gt);
663 
664 	GEM_BUG_ON(intel_gt_pm_is_awake(gt));
665 }
666 
intel_gt_wait_for_idle(struct intel_gt * gt,long timeout)667 int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout)
668 {
669 	long remaining_timeout;
670 
671 	/* If the device is asleep, we have no requests outstanding */
672 	if (!intel_gt_pm_is_awake(gt))
673 		return 0;
674 
675 	while ((timeout = intel_gt_retire_requests_timeout(gt, timeout,
676 							   &remaining_timeout)) > 0) {
677 		cond_resched();
678 		if (signal_pending(current))
679 			return -EINTR;
680 	}
681 
682 	return timeout ? timeout : intel_uc_wait_for_idle(&gt->uc,
683 							  remaining_timeout);
684 }
685 
intel_gt_init(struct intel_gt * gt)686 int intel_gt_init(struct intel_gt *gt)
687 {
688 	int err;
689 
690 	err = i915_inject_probe_error(gt->i915, -ENODEV);
691 	if (err)
692 		return err;
693 
694 	intel_gt_init_workarounds(gt);
695 
696 	/*
697 	 * This is just a security blanket to placate dragons.
698 	 * On some systems, we very sporadically observe that the first TLBs
699 	 * used by the CS may be stale, despite us poking the TLB reset. If
700 	 * we hold the forcewake during initialisation these problems
701 	 * just magically go away.
702 	 */
703 	intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
704 
705 	err = intel_gt_init_scratch(gt,
706 				    GRAPHICS_VER(gt->i915) == 2 ? SZ_256K : SZ_4K);
707 	if (err)
708 		goto out_fw;
709 
710 	intel_gt_pm_init(gt);
711 
712 	gt->vm = kernel_vm(gt);
713 	if (!gt->vm) {
714 		err = -ENOMEM;
715 		goto err_pm;
716 	}
717 
718 	intel_set_mocs_index(gt);
719 
720 	err = intel_engines_init(gt);
721 	if (err)
722 		goto err_engines;
723 
724 	err = intel_uc_init(&gt->uc);
725 	if (err)
726 		goto err_engines;
727 
728 	err = intel_gt_resume(gt);
729 	if (err)
730 		goto err_uc_init;
731 
732 	err = intel_gt_init_hwconfig(gt);
733 	if (err)
734 		drm_err(&gt->i915->drm, "Failed to retrieve hwconfig table: %pe\n",
735 			ERR_PTR(err));
736 
737 	err = __engines_record_defaults(gt);
738 	if (err)
739 		goto err_gt;
740 
741 	err = __engines_verify_workarounds(gt);
742 	if (err)
743 		goto err_gt;
744 
745 	intel_uc_init_late(&gt->uc);
746 
747 	err = i915_inject_probe_error(gt->i915, -EIO);
748 	if (err)
749 		goto err_gt;
750 
751 	intel_migrate_init(&gt->migrate, gt);
752 
753 	intel_pxp_init(&gt->pxp);
754 
755 	goto out_fw;
756 err_gt:
757 	__intel_gt_disable(gt);
758 	intel_uc_fini_hw(&gt->uc);
759 err_uc_init:
760 	intel_uc_fini(&gt->uc);
761 err_engines:
762 	intel_engines_release(gt);
763 	i915_vm_put(fetch_and_zero(&gt->vm));
764 err_pm:
765 	intel_gt_pm_fini(gt);
766 	intel_gt_fini_scratch(gt);
767 out_fw:
768 	if (err)
769 		intel_gt_set_wedged_on_init(gt);
770 	intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
771 	return err;
772 }
773 
intel_gt_driver_remove(struct intel_gt * gt)774 void intel_gt_driver_remove(struct intel_gt *gt)
775 {
776 	__intel_gt_disable(gt);
777 
778 	intel_migrate_fini(&gt->migrate);
779 	intel_uc_driver_remove(&gt->uc);
780 
781 	intel_engines_release(gt);
782 
783 	intel_gt_flush_buffer_pool(gt);
784 }
785 
intel_gt_driver_unregister(struct intel_gt * gt)786 void intel_gt_driver_unregister(struct intel_gt *gt)
787 {
788 	intel_wakeref_t wakeref;
789 
790 	intel_gt_sysfs_unregister(gt);
791 	intel_rps_driver_unregister(&gt->rps);
792 	intel_gsc_fini(&gt->gsc);
793 
794 	intel_pxp_fini(&gt->pxp);
795 
796 	/*
797 	 * Upon unregistering the device to prevent any new users, cancel
798 	 * all in-flight requests so that we can quickly unbind the active
799 	 * resources.
800 	 */
801 	intel_gt_set_wedged_on_fini(gt);
802 
803 	/* Scrub all HW state upon release */
804 	with_intel_runtime_pm(gt->uncore->rpm, wakeref)
805 		__intel_gt_reset(gt, ALL_ENGINES);
806 }
807 
intel_gt_driver_release(struct intel_gt * gt)808 void intel_gt_driver_release(struct intel_gt *gt)
809 {
810 	struct i915_address_space *vm;
811 
812 	vm = fetch_and_zero(&gt->vm);
813 	if (vm) /* FIXME being called twice on error paths :( */
814 		i915_vm_put(vm);
815 
816 	intel_wa_list_free(&gt->wa_list);
817 	intel_gt_pm_fini(gt);
818 	intel_gt_fini_scratch(gt);
819 	intel_gt_fini_buffer_pool(gt);
820 	intel_gt_fini_hwconfig(gt);
821 }
822 
intel_gt_driver_late_release_all(struct drm_i915_private * i915)823 void intel_gt_driver_late_release_all(struct drm_i915_private *i915)
824 {
825 	struct intel_gt *gt;
826 	unsigned int id;
827 
828 	/* We need to wait for inflight RCU frees to release their grip */
829 	rcu_barrier();
830 
831 	for_each_gt(gt, i915, id) {
832 		intel_uc_driver_late_release(&gt->uc);
833 		intel_gt_fini_requests(gt);
834 		intel_gt_fini_reset(gt);
835 		intel_gt_fini_timelines(gt);
836 		mutex_destroy(&gt->tlb.invalidate_lock);
837 		intel_engines_free(gt);
838 	}
839 }
840 
841 /**
842  * intel_gt_reg_needs_read_steering - determine whether a register read
843  *     requires explicit steering
844  * @gt: GT structure
845  * @reg: the register to check steering requirements for
846  * @type: type of multicast steering to check
847  *
848  * Determines whether @reg needs explicit steering of a specific type for
849  * reads.
850  *
851  * Returns false if @reg does not belong to a register range of the given
852  * steering type, or if the default (subslice-based) steering IDs are suitable
853  * for @type steering too.
854  */
intel_gt_reg_needs_read_steering(struct intel_gt * gt,i915_reg_t reg,enum intel_steering_type type)855 static bool intel_gt_reg_needs_read_steering(struct intel_gt *gt,
856 					     i915_reg_t reg,
857 					     enum intel_steering_type type)
858 {
859 	const u32 offset = i915_mmio_reg_offset(reg);
860 	const struct intel_mmio_range *entry;
861 
862 	if (likely(!intel_gt_needs_read_steering(gt, type)))
863 		return false;
864 
865 	for (entry = gt->steering_table[type]; entry->end; entry++) {
866 		if (offset >= entry->start && offset <= entry->end)
867 			return true;
868 	}
869 
870 	return false;
871 }
872 
873 /**
874  * intel_gt_get_valid_steering - determines valid IDs for a class of MCR steering
875  * @gt: GT structure
876  * @type: multicast register type
877  * @sliceid: Slice ID returned
878  * @subsliceid: Subslice ID returned
879  *
880  * Determines sliceid and subsliceid values that will steer reads
881  * of a specific multicast register class to a valid value.
882  */
intel_gt_get_valid_steering(struct intel_gt * gt,enum intel_steering_type type,u8 * sliceid,u8 * subsliceid)883 static void intel_gt_get_valid_steering(struct intel_gt *gt,
884 					enum intel_steering_type type,
885 					u8 *sliceid, u8 *subsliceid)
886 {
887 	switch (type) {
888 	case L3BANK:
889 		GEM_DEBUG_WARN_ON(!gt->info.l3bank_mask); /* should be impossible! */
890 
891 		*sliceid = 0;		/* unused */
892 		*subsliceid = __ffs(gt->info.l3bank_mask);
893 		break;
894 	case MSLICE:
895 		GEM_DEBUG_WARN_ON(!gt->info.mslice_mask); /* should be impossible! */
896 
897 		*sliceid = __ffs(gt->info.mslice_mask);
898 		*subsliceid = 0;	/* unused */
899 		break;
900 	case LNCF:
901 		GEM_DEBUG_WARN_ON(!gt->info.mslice_mask); /* should be impossible! */
902 
903 		/*
904 		 * An LNCF is always present if its mslice is present, so we
905 		 * can safely just steer to LNCF 0 in all cases.
906 		 */
907 		*sliceid = __ffs(gt->info.mslice_mask) << 1;
908 		*subsliceid = 0;	/* unused */
909 		break;
910 	default:
911 		MISSING_CASE(type);
912 		*sliceid = 0;
913 		*subsliceid = 0;
914 	}
915 }
916 
917 /**
918  * intel_gt_read_register_fw - reads a GT register with support for multicast
919  * @gt: GT structure
920  * @reg: register to read
921  *
922  * This function will read a GT register.  If the register is a multicast
923  * register, the read will be steered to a valid instance (i.e., one that
924  * isn't fused off or powered down by power gating).
925  *
926  * Returns the value from a valid instance of @reg.
927  */
intel_gt_read_register_fw(struct intel_gt * gt,i915_reg_t reg)928 u32 intel_gt_read_register_fw(struct intel_gt *gt, i915_reg_t reg)
929 {
930 	int type;
931 	u8 sliceid, subsliceid;
932 
933 	for (type = 0; type < NUM_STEERING_TYPES; type++) {
934 		if (intel_gt_reg_needs_read_steering(gt, reg, type)) {
935 			intel_gt_get_valid_steering(gt, type, &sliceid,
936 						    &subsliceid);
937 			return intel_uncore_read_with_mcr_steering_fw(gt->uncore,
938 								      reg,
939 								      sliceid,
940 								      subsliceid);
941 		}
942 	}
943 
944 	return intel_uncore_read_fw(gt->uncore, reg);
945 }
946 
947 /**
948  * intel_gt_get_valid_steering_for_reg - get a valid steering for a register
949  * @gt: GT structure
950  * @reg: register for which the steering is required
951  * @sliceid: return variable for slice steering
952  * @subsliceid: return variable for subslice steering
953  *
954  * This function returns a slice/subslice pair that is guaranteed to work for
955  * read steering of the given register. Note that a value will be returned even
956  * if the register is not replicated and therefore does not actually require
957  * steering.
958  */
intel_gt_get_valid_steering_for_reg(struct intel_gt * gt,i915_reg_t reg,u8 * sliceid,u8 * subsliceid)959 void intel_gt_get_valid_steering_for_reg(struct intel_gt *gt, i915_reg_t reg,
960 					 u8 *sliceid, u8 *subsliceid)
961 {
962 	int type;
963 
964 	for (type = 0; type < NUM_STEERING_TYPES; type++) {
965 		if (intel_gt_reg_needs_read_steering(gt, reg, type)) {
966 			intel_gt_get_valid_steering(gt, type, sliceid,
967 						    subsliceid);
968 			return;
969 		}
970 	}
971 
972 	*sliceid = gt->default_steering.groupid;
973 	*subsliceid = gt->default_steering.instanceid;
974 }
975 
intel_gt_read_register(struct intel_gt * gt,i915_reg_t reg)976 u32 intel_gt_read_register(struct intel_gt *gt, i915_reg_t reg)
977 {
978 	int type;
979 	u8 sliceid, subsliceid;
980 
981 	for (type = 0; type < NUM_STEERING_TYPES; type++) {
982 		if (intel_gt_reg_needs_read_steering(gt, reg, type)) {
983 			intel_gt_get_valid_steering(gt, type, &sliceid,
984 						    &subsliceid);
985 			return intel_uncore_read_with_mcr_steering(gt->uncore,
986 								   reg,
987 								   sliceid,
988 								   subsliceid);
989 		}
990 	}
991 
992 	return intel_uncore_read(gt->uncore, reg);
993 }
994 
report_steering_type(struct drm_printer * p,struct intel_gt * gt,enum intel_steering_type type,bool dump_table)995 static void report_steering_type(struct drm_printer *p,
996 				 struct intel_gt *gt,
997 				 enum intel_steering_type type,
998 				 bool dump_table)
999 {
1000 	const struct intel_mmio_range *entry;
1001 	u8 slice, subslice;
1002 
1003 	BUILD_BUG_ON(ARRAY_SIZE(intel_steering_types) != NUM_STEERING_TYPES);
1004 
1005 	if (!gt->steering_table[type]) {
1006 		drm_printf(p, "%s steering: uses default steering\n",
1007 			   intel_steering_types[type]);
1008 		return;
1009 	}
1010 
1011 	intel_gt_get_valid_steering(gt, type, &slice, &subslice);
1012 	drm_printf(p, "%s steering: sliceid=0x%x, subsliceid=0x%x\n",
1013 		   intel_steering_types[type], slice, subslice);
1014 
1015 	if (!dump_table)
1016 		return;
1017 
1018 	for (entry = gt->steering_table[type]; entry->end; entry++)
1019 		drm_printf(p, "\t0x%06x - 0x%06x\n", entry->start, entry->end);
1020 }
1021 
intel_gt_report_steering(struct drm_printer * p,struct intel_gt * gt,bool dump_table)1022 void intel_gt_report_steering(struct drm_printer *p, struct intel_gt *gt,
1023 			      bool dump_table)
1024 {
1025 	drm_printf(p, "Default steering: sliceid=0x%x, subsliceid=0x%x\n",
1026 		   gt->default_steering.groupid,
1027 		   gt->default_steering.instanceid);
1028 
1029 	if (HAS_MSLICES(gt->i915)) {
1030 		report_steering_type(p, gt, MSLICE, dump_table);
1031 		report_steering_type(p, gt, LNCF, dump_table);
1032 	}
1033 }
1034 
intel_gt_tile_setup(struct intel_gt * gt,phys_addr_t phys_addr)1035 static int intel_gt_tile_setup(struct intel_gt *gt, phys_addr_t phys_addr)
1036 {
1037 	int ret;
1038 
1039 	if (!gt_is_root(gt)) {
1040 		struct intel_uncore_mmio_debug *mmio_debug;
1041 		struct intel_uncore *uncore;
1042 
1043 		uncore = kzalloc(sizeof(*uncore), GFP_KERNEL);
1044 		if (!uncore)
1045 			return -ENOMEM;
1046 
1047 		mmio_debug = kzalloc(sizeof(*mmio_debug), GFP_KERNEL);
1048 		if (!mmio_debug) {
1049 			kfree(uncore);
1050 			return -ENOMEM;
1051 		}
1052 
1053 		gt->uncore = uncore;
1054 		gt->uncore->debug = mmio_debug;
1055 
1056 		__intel_gt_init_early(gt);
1057 	}
1058 
1059 	intel_uncore_init_early(gt->uncore, gt);
1060 
1061 	ret = intel_uncore_setup_mmio(gt->uncore, phys_addr);
1062 	if (ret)
1063 		return ret;
1064 
1065 	gt->phys_addr = phys_addr;
1066 
1067 	return 0;
1068 }
1069 
1070 static void
intel_gt_tile_cleanup(struct intel_gt * gt)1071 intel_gt_tile_cleanup(struct intel_gt *gt)
1072 {
1073 	intel_uncore_cleanup_mmio(gt->uncore);
1074 
1075 	if (!gt_is_root(gt)) {
1076 		kfree(gt->uncore->debug);
1077 		kfree(gt->uncore);
1078 		kfree(gt);
1079 	}
1080 }
1081 
intel_gt_probe_all(struct drm_i915_private * i915)1082 int intel_gt_probe_all(struct drm_i915_private *i915)
1083 {
1084 	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
1085 	struct intel_gt *gt = &i915->gt0;
1086 	phys_addr_t phys_addr;
1087 	unsigned int mmio_bar;
1088 	int ret;
1089 
1090 	mmio_bar = GRAPHICS_VER(i915) == 2 ? 1 : 0;
1091 	phys_addr = pci_resource_start(pdev, mmio_bar);
1092 
1093 	/*
1094 	 * We always have at least one primary GT on any device
1095 	 * and it has been already initialized early during probe
1096 	 * in i915_driver_probe()
1097 	 */
1098 	ret = intel_gt_tile_setup(gt, phys_addr);
1099 	if (ret)
1100 		return ret;
1101 
1102 	i915->gt[0] = gt;
1103 
1104 	/* TODO: add more tiles */
1105 	return 0;
1106 }
1107 
intel_gt_tiles_init(struct drm_i915_private * i915)1108 int intel_gt_tiles_init(struct drm_i915_private *i915)
1109 {
1110 	struct intel_gt *gt;
1111 	unsigned int id;
1112 	int ret;
1113 
1114 	for_each_gt(gt, i915, id) {
1115 		ret = intel_gt_probe_lmem(gt);
1116 		if (ret)
1117 			return ret;
1118 	}
1119 
1120 	return 0;
1121 }
1122 
intel_gt_release_all(struct drm_i915_private * i915)1123 void intel_gt_release_all(struct drm_i915_private *i915)
1124 {
1125 	struct intel_gt *gt;
1126 	unsigned int id;
1127 
1128 	for_each_gt(gt, i915, id) {
1129 		intel_gt_tile_cleanup(gt);
1130 		i915->gt[id] = NULL;
1131 	}
1132 }
1133 
intel_gt_info_print(const struct intel_gt_info * info,struct drm_printer * p)1134 void intel_gt_info_print(const struct intel_gt_info *info,
1135 			 struct drm_printer *p)
1136 {
1137 	drm_printf(p, "available engines: %x\n", info->engine_mask);
1138 
1139 	intel_sseu_dump(&info->sseu, p);
1140 }
1141 
1142 struct reg_and_bit {
1143 	i915_reg_t reg;
1144 	u32 bit;
1145 };
1146 
1147 static struct reg_and_bit
get_reg_and_bit(const struct intel_engine_cs * engine,const bool gen8,const i915_reg_t * regs,const unsigned int num)1148 get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
1149 		const i915_reg_t *regs, const unsigned int num)
1150 {
1151 	const unsigned int class = engine->class;
1152 	struct reg_and_bit rb = { };
1153 
1154 	if (drm_WARN_ON_ONCE(&engine->i915->drm,
1155 			     class >= num || !regs[class].reg))
1156 		return rb;
1157 
1158 	rb.reg = regs[class];
1159 	if (gen8 && class == VIDEO_DECODE_CLASS)
1160 		rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */
1161 	else
1162 		rb.bit = engine->instance;
1163 
1164 	rb.bit = BIT(rb.bit);
1165 
1166 	return rb;
1167 }
1168 
mmio_invalidate_full(struct intel_gt * gt)1169 static void mmio_invalidate_full(struct intel_gt *gt)
1170 {
1171 	static const i915_reg_t gen8_regs[] = {
1172 		[RENDER_CLASS]			= GEN8_RTCR,
1173 		[VIDEO_DECODE_CLASS]		= GEN8_M1TCR, /* , GEN8_M2TCR */
1174 		[VIDEO_ENHANCEMENT_CLASS]	= GEN8_VTCR,
1175 		[COPY_ENGINE_CLASS]		= GEN8_BTCR,
1176 	};
1177 	static const i915_reg_t gen12_regs[] = {
1178 		[RENDER_CLASS]			= GEN12_GFX_TLB_INV_CR,
1179 		[VIDEO_DECODE_CLASS]		= GEN12_VD_TLB_INV_CR,
1180 		[VIDEO_ENHANCEMENT_CLASS]	= GEN12_VE_TLB_INV_CR,
1181 		[COPY_ENGINE_CLASS]		= GEN12_BLT_TLB_INV_CR,
1182 		[COMPUTE_CLASS]			= GEN12_COMPCTX_TLB_INV_CR,
1183 	};
1184 	struct drm_i915_private *i915 = gt->i915;
1185 	struct intel_uncore *uncore = gt->uncore;
1186 	struct intel_engine_cs *engine;
1187 	intel_engine_mask_t awake, tmp;
1188 	enum intel_engine_id id;
1189 	const i915_reg_t *regs;
1190 	unsigned int num = 0;
1191 
1192 	if (GRAPHICS_VER(i915) == 12) {
1193 		regs = gen12_regs;
1194 		num = ARRAY_SIZE(gen12_regs);
1195 	} else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
1196 		regs = gen8_regs;
1197 		num = ARRAY_SIZE(gen8_regs);
1198 	} else if (GRAPHICS_VER(i915) < 8) {
1199 		return;
1200 	}
1201 
1202 	if (drm_WARN_ONCE(&i915->drm, !num,
1203 			  "Platform does not implement TLB invalidation!"))
1204 		return;
1205 
1206 	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
1207 
1208 	spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */
1209 
1210 	awake = 0;
1211 	for_each_engine(engine, gt, id) {
1212 		struct reg_and_bit rb;
1213 
1214 		if (!intel_engine_pm_is_awake(engine))
1215 			continue;
1216 
1217 		rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
1218 		if (!i915_mmio_reg_offset(rb.reg))
1219 			continue;
1220 
1221 		intel_uncore_write_fw(uncore, rb.reg, rb.bit);
1222 		awake |= engine->mask;
1223 	}
1224 
1225 	GT_TRACE(gt, "invalidated engines %08x\n", awake);
1226 
1227 	/* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */
1228 	if (awake &&
1229 	    (IS_TIGERLAKE(i915) ||
1230 	     IS_DG1(i915) ||
1231 	     IS_ROCKETLAKE(i915) ||
1232 	     IS_ALDERLAKE_S(i915) ||
1233 	     IS_ALDERLAKE_P(i915)))
1234 		intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1);
1235 
1236 	spin_unlock_irq(&uncore->lock);
1237 
1238 	for_each_engine_masked(engine, gt, awake, tmp) {
1239 		struct reg_and_bit rb;
1240 
1241 		/*
1242 		 * HW architecture suggest typical invalidation time at 40us,
1243 		 * with pessimistic cases up to 100us and a recommendation to
1244 		 * cap at 1ms. We go a bit higher just in case.
1245 		 */
1246 		const unsigned int timeout_us = 100;
1247 		const unsigned int timeout_ms = 4;
1248 
1249 		rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
1250 		if (__intel_wait_for_register_fw(uncore,
1251 						 rb.reg, rb.bit, 0,
1252 						 timeout_us, timeout_ms,
1253 						 NULL))
1254 			drm_err_ratelimited(&gt->i915->drm,
1255 					    "%s TLB invalidation did not complete in %ums!\n",
1256 					    engine->name, timeout_ms);
1257 	}
1258 
1259 	/*
1260 	 * Use delayed put since a) we mostly expect a flurry of TLB
1261 	 * invalidations so it is good to avoid paying the forcewake cost and
1262 	 * b) it works around a bug in Icelake which cannot cope with too rapid
1263 	 * transitions.
1264 	 */
1265 	intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
1266 }
1267 
tlb_seqno_passed(const struct intel_gt * gt,u32 seqno)1268 static bool tlb_seqno_passed(const struct intel_gt *gt, u32 seqno)
1269 {
1270 	u32 cur = intel_gt_tlb_seqno(gt);
1271 
1272 	/* Only skip if a *full* TLB invalidate barrier has passed */
1273 	return (s32)(cur - ALIGN(seqno, 2)) > 0;
1274 }
1275 
intel_gt_invalidate_tlb(struct intel_gt * gt,u32 seqno)1276 void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno)
1277 {
1278 	intel_wakeref_t wakeref;
1279 
1280 	if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
1281 		return;
1282 
1283 	if (intel_gt_is_wedged(gt))
1284 		return;
1285 
1286 	if (tlb_seqno_passed(gt, seqno))
1287 		return;
1288 
1289 	with_intel_gt_pm_if_awake(gt, wakeref) {
1290 		mutex_lock(&gt->tlb.invalidate_lock);
1291 		if (tlb_seqno_passed(gt, seqno))
1292 			goto unlock;
1293 
1294 		mmio_invalidate_full(gt);
1295 
1296 		write_seqcount_invalidate(&gt->tlb.seqno);
1297 unlock:
1298 		mutex_unlock(&gt->tlb.invalidate_lock);
1299 	}
1300 }
1301