1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2020 Intel Corporation
4  */
5 
6 #include <linux/slab.h> /* fault-inject.h is not standalone! */
7 
8 #include <linux/fault-inject.h>
9 #include <linux/sched/mm.h>
10 
11 #include <drm/drm_cache.h>
12 
13 #include "gem/i915_gem_internal.h"
14 #include "gem/i915_gem_lmem.h"
15 #include "i915_trace.h"
16 #include "i915_utils.h"
17 #include "intel_gt.h"
18 #include "intel_gt_regs.h"
19 #include "intel_gtt.h"
20 
21 
intel_ggtt_update_needs_vtd_wa(struct drm_i915_private * i915)22 static bool intel_ggtt_update_needs_vtd_wa(struct drm_i915_private *i915)
23 {
24 	return IS_BROXTON(i915) && i915_vtd_active(i915);
25 }
26 
intel_vm_no_concurrent_access_wa(struct drm_i915_private * i915)27 bool intel_vm_no_concurrent_access_wa(struct drm_i915_private *i915)
28 {
29 	return IS_CHERRYVIEW(i915) || intel_ggtt_update_needs_vtd_wa(i915);
30 }
31 
alloc_pt_lmem(struct i915_address_space * vm,int sz)32 struct drm_i915_gem_object *alloc_pt_lmem(struct i915_address_space *vm, int sz)
33 {
34 	struct drm_i915_gem_object *obj;
35 
36 	/*
37 	 * To avoid severe over-allocation when dealing with min_page_size
38 	 * restrictions, we override that behaviour here by allowing an object
39 	 * size and page layout which can be smaller. In practice this should be
40 	 * totally fine, since GTT paging structures are not typically inserted
41 	 * into the GTT.
42 	 *
43 	 * Note that we also hit this path for the scratch page, and for this
44 	 * case it might need to be 64K, but that should work fine here since we
45 	 * used the passed in size for the page size, which should ensure it
46 	 * also has the same alignment.
47 	 */
48 	obj = __i915_gem_object_create_lmem_with_ps(vm->i915, sz, sz,
49 						    vm->lmem_pt_obj_flags);
50 	/*
51 	 * Ensure all paging structures for this vm share the same dma-resv
52 	 * object underneath, with the idea that one object_lock() will lock
53 	 * them all at once.
54 	 */
55 	if (!IS_ERR(obj)) {
56 		obj->base.resv = i915_vm_resv_get(vm);
57 		obj->shares_resv_from = vm;
58 	}
59 
60 	return obj;
61 }
62 
alloc_pt_dma(struct i915_address_space * vm,int sz)63 struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz)
64 {
65 	struct drm_i915_gem_object *obj;
66 
67 	if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1)))
68 		i915_gem_shrink_all(vm->i915);
69 
70 	obj = i915_gem_object_create_internal(vm->i915, sz);
71 	/*
72 	 * Ensure all paging structures for this vm share the same dma-resv
73 	 * object underneath, with the idea that one object_lock() will lock
74 	 * them all at once.
75 	 */
76 	if (!IS_ERR(obj)) {
77 		obj->base.resv = i915_vm_resv_get(vm);
78 		obj->shares_resv_from = vm;
79 	}
80 
81 	return obj;
82 }
83 
map_pt_dma(struct i915_address_space * vm,struct drm_i915_gem_object * obj)84 int map_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj)
85 {
86 	enum i915_map_type type;
87 	void *vaddr;
88 
89 	type = i915_coherent_map_type(vm->i915, obj, true);
90 	vaddr = i915_gem_object_pin_map_unlocked(obj, type);
91 	if (IS_ERR(vaddr))
92 		return PTR_ERR(vaddr);
93 
94 	i915_gem_object_make_unshrinkable(obj);
95 	return 0;
96 }
97 
map_pt_dma_locked(struct i915_address_space * vm,struct drm_i915_gem_object * obj)98 int map_pt_dma_locked(struct i915_address_space *vm, struct drm_i915_gem_object *obj)
99 {
100 	enum i915_map_type type;
101 	void *vaddr;
102 
103 	type = i915_coherent_map_type(vm->i915, obj, true);
104 	vaddr = i915_gem_object_pin_map(obj, type);
105 	if (IS_ERR(vaddr))
106 		return PTR_ERR(vaddr);
107 
108 	i915_gem_object_make_unshrinkable(obj);
109 	return 0;
110 }
111 
clear_vm_list(struct list_head * list)112 static void clear_vm_list(struct list_head *list)
113 {
114 	struct i915_vma *vma, *vn;
115 
116 	list_for_each_entry_safe(vma, vn, list, vm_link) {
117 		struct drm_i915_gem_object *obj = vma->obj;
118 
119 		if (!i915_gem_object_get_rcu(obj)) {
120 			/*
121 			 * Object is dying, but has not yet cleared its
122 			 * vma list.
123 			 * Unbind the dying vma to ensure our list
124 			 * is completely drained. We leave the destruction to
125 			 * the object destructor to avoid the vma
126 			 * disappearing under it.
127 			 */
128 			atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
129 			WARN_ON(__i915_vma_unbind(vma));
130 
131 			/* Remove from the unbound list */
132 			list_del_init(&vma->vm_link);
133 
134 			/*
135 			 * Delay the vm and vm mutex freeing until the
136 			 * object is done with destruction.
137 			 */
138 			i915_vm_resv_get(vma->vm);
139 			vma->vm_ddestroy = true;
140 		} else {
141 			i915_vma_destroy_locked(vma);
142 			i915_gem_object_put(obj);
143 		}
144 
145 	}
146 }
147 
__i915_vm_close(struct i915_address_space * vm)148 static void __i915_vm_close(struct i915_address_space *vm)
149 {
150 	mutex_lock(&vm->mutex);
151 
152 	clear_vm_list(&vm->bound_list);
153 	clear_vm_list(&vm->unbound_list);
154 
155 	/* Check for must-fix unanticipated side-effects */
156 	GEM_BUG_ON(!list_empty(&vm->bound_list));
157 	GEM_BUG_ON(!list_empty(&vm->unbound_list));
158 
159 	mutex_unlock(&vm->mutex);
160 }
161 
162 /* lock the vm into the current ww, if we lock one, we lock all */
i915_vm_lock_objects(struct i915_address_space * vm,struct i915_gem_ww_ctx * ww)163 int i915_vm_lock_objects(struct i915_address_space *vm,
164 			 struct i915_gem_ww_ctx *ww)
165 {
166 	if (vm->scratch[0]->base.resv == &vm->_resv) {
167 		return i915_gem_object_lock(vm->scratch[0], ww);
168 	} else {
169 		struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
170 
171 		/* We borrowed the scratch page from ggtt, take the top level object */
172 		return i915_gem_object_lock(ppgtt->pd->pt.base, ww);
173 	}
174 }
175 
i915_address_space_fini(struct i915_address_space * vm)176 void i915_address_space_fini(struct i915_address_space *vm)
177 {
178 	drm_mm_takedown(&vm->mm);
179 }
180 
181 /**
182  * i915_vm_resv_release - Final struct i915_address_space destructor
183  * @kref: Pointer to the &i915_address_space.resv_ref member.
184  *
185  * This function is called when the last lock sharer no longer shares the
186  * &i915_address_space._resv lock, and also if we raced when
187  * destroying a vma by the vma destruction
188  */
i915_vm_resv_release(struct kref * kref)189 void i915_vm_resv_release(struct kref *kref)
190 {
191 	struct i915_address_space *vm =
192 		container_of(kref, typeof(*vm), resv_ref);
193 
194 	dma_resv_fini(&vm->_resv);
195 	mutex_destroy(&vm->mutex);
196 
197 	kfree(vm);
198 }
199 
__i915_vm_release(struct work_struct * work)200 static void __i915_vm_release(struct work_struct *work)
201 {
202 	struct i915_address_space *vm =
203 		container_of(work, struct i915_address_space, release_work);
204 
205 	__i915_vm_close(vm);
206 
207 	/* Synchronize async unbinds. */
208 	i915_vma_resource_bind_dep_sync_all(vm);
209 
210 	vm->cleanup(vm);
211 	i915_address_space_fini(vm);
212 
213 	i915_vm_resv_put(vm);
214 }
215 
i915_vm_release(struct kref * kref)216 void i915_vm_release(struct kref *kref)
217 {
218 	struct i915_address_space *vm =
219 		container_of(kref, struct i915_address_space, ref);
220 
221 	GEM_BUG_ON(i915_is_ggtt(vm));
222 	trace_i915_ppgtt_release(vm);
223 
224 	queue_work(vm->i915->wq, &vm->release_work);
225 }
226 
i915_address_space_init(struct i915_address_space * vm,int subclass)227 void i915_address_space_init(struct i915_address_space *vm, int subclass)
228 {
229 	kref_init(&vm->ref);
230 
231 	/*
232 	 * Special case for GGTT that has already done an early
233 	 * kref_init here.
234 	 */
235 	if (!kref_read(&vm->resv_ref))
236 		kref_init(&vm->resv_ref);
237 
238 	vm->pending_unbind = RB_ROOT_CACHED;
239 	INIT_WORK(&vm->release_work, __i915_vm_release);
240 
241 	/*
242 	 * The vm->mutex must be reclaim safe (for use in the shrinker).
243 	 * Do a dummy acquire now under fs_reclaim so that any allocation
244 	 * attempt holding the lock is immediately reported by lockdep.
245 	 */
246 	mutex_init(&vm->mutex);
247 	lockdep_set_subclass(&vm->mutex, subclass);
248 
249 	if (!intel_vm_no_concurrent_access_wa(vm->i915)) {
250 		i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex);
251 	} else {
252 		/*
253 		 * CHV + BXT VTD workaround use stop_machine(),
254 		 * which is allowed to allocate memory. This means &vm->mutex
255 		 * is the outer lock, and in theory we can allocate memory inside
256 		 * it through stop_machine().
257 		 *
258 		 * Add the annotation for this, we use trylock in shrinker.
259 		 */
260 		mutex_acquire(&vm->mutex.dep_map, 0, 0, _THIS_IP_);
261 		might_alloc(GFP_KERNEL);
262 		mutex_release(&vm->mutex.dep_map, _THIS_IP_);
263 	}
264 	dma_resv_init(&vm->_resv);
265 
266 	GEM_BUG_ON(!vm->total);
267 	drm_mm_init(&vm->mm, 0, vm->total);
268 
269 	memset64(vm->min_alignment, I915_GTT_MIN_ALIGNMENT,
270 		 ARRAY_SIZE(vm->min_alignment));
271 
272 	if (HAS_64K_PAGES(vm->i915) && NEEDS_COMPACT_PT(vm->i915) &&
273 	    subclass == VM_CLASS_PPGTT) {
274 		vm->min_alignment[INTEL_MEMORY_LOCAL] = I915_GTT_PAGE_SIZE_2M;
275 		vm->min_alignment[INTEL_MEMORY_STOLEN_LOCAL] = I915_GTT_PAGE_SIZE_2M;
276 	} else if (HAS_64K_PAGES(vm->i915)) {
277 		vm->min_alignment[INTEL_MEMORY_LOCAL] = I915_GTT_PAGE_SIZE_64K;
278 		vm->min_alignment[INTEL_MEMORY_STOLEN_LOCAL] = I915_GTT_PAGE_SIZE_64K;
279 	}
280 
281 	vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
282 
283 	INIT_LIST_HEAD(&vm->bound_list);
284 	INIT_LIST_HEAD(&vm->unbound_list);
285 }
286 
__px_vaddr(struct drm_i915_gem_object * p)287 void *__px_vaddr(struct drm_i915_gem_object *p)
288 {
289 	enum i915_map_type type;
290 
291 	GEM_BUG_ON(!i915_gem_object_has_pages(p));
292 	return page_unpack_bits(p->mm.mapping, &type);
293 }
294 
__px_dma(struct drm_i915_gem_object * p)295 dma_addr_t __px_dma(struct drm_i915_gem_object *p)
296 {
297 	GEM_BUG_ON(!i915_gem_object_has_pages(p));
298 	return sg_dma_address(p->mm.pages->sgl);
299 }
300 
__px_page(struct drm_i915_gem_object * p)301 struct page *__px_page(struct drm_i915_gem_object *p)
302 {
303 	GEM_BUG_ON(!i915_gem_object_has_pages(p));
304 	return sg_page(p->mm.pages->sgl);
305 }
306 
307 void
fill_page_dma(struct drm_i915_gem_object * p,const u64 val,unsigned int count)308 fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count)
309 {
310 	void *vaddr = __px_vaddr(p);
311 
312 	memset64(vaddr, val, count);
313 	drm_clflush_virt_range(vaddr, PAGE_SIZE);
314 }
315 
poison_scratch_page(struct drm_i915_gem_object * scratch)316 static void poison_scratch_page(struct drm_i915_gem_object *scratch)
317 {
318 	void *vaddr = __px_vaddr(scratch);
319 	u8 val;
320 
321 	val = 0;
322 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
323 		val = POISON_FREE;
324 
325 	memset(vaddr, val, scratch->base.size);
326 	drm_clflush_virt_range(vaddr, scratch->base.size);
327 }
328 
setup_scratch_page(struct i915_address_space * vm)329 int setup_scratch_page(struct i915_address_space *vm)
330 {
331 	unsigned long size;
332 
333 	/*
334 	 * In order to utilize 64K pages for an object with a size < 2M, we will
335 	 * need to support a 64K scratch page, given that every 16th entry for a
336 	 * page-table operating in 64K mode must point to a properly aligned 64K
337 	 * region, including any PTEs which happen to point to scratch.
338 	 *
339 	 * This is only relevant for the 48b PPGTT where we support
340 	 * huge-gtt-pages, see also i915_vma_insert(). However, as we share the
341 	 * scratch (read-only) between all vm, we create one 64k scratch page
342 	 * for all.
343 	 */
344 	size = I915_GTT_PAGE_SIZE_4K;
345 	if (i915_vm_is_4lvl(vm) &&
346 	    HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K))
347 		size = I915_GTT_PAGE_SIZE_64K;
348 
349 	do {
350 		struct drm_i915_gem_object *obj;
351 
352 		obj = vm->alloc_scratch_dma(vm, size);
353 		if (IS_ERR(obj))
354 			goto skip;
355 
356 		if (map_pt_dma(vm, obj))
357 			goto skip_obj;
358 
359 		/* We need a single contiguous page for our scratch */
360 		if (obj->mm.page_sizes.sg < size)
361 			goto skip_obj;
362 
363 		/* And it needs to be correspondingly aligned */
364 		if (__px_dma(obj) & (size - 1))
365 			goto skip_obj;
366 
367 		/*
368 		 * Use a non-zero scratch page for debugging.
369 		 *
370 		 * We want a value that should be reasonably obvious
371 		 * to spot in the error state, while also causing a GPU hang
372 		 * if executed. We prefer using a clear page in production, so
373 		 * should it ever be accidentally used, the effect should be
374 		 * fairly benign.
375 		 */
376 		poison_scratch_page(obj);
377 
378 		vm->scratch[0] = obj;
379 		vm->scratch_order = get_order(size);
380 		return 0;
381 
382 skip_obj:
383 		i915_gem_object_put(obj);
384 skip:
385 		if (size == I915_GTT_PAGE_SIZE_4K)
386 			return -ENOMEM;
387 
388 		/*
389 		 * If we need 64K minimum GTT pages for device local-memory,
390 		 * like on XEHPSDV, then we need to fail the allocation here,
391 		 * otherwise we can't safely support the insertion of
392 		 * local-memory pages for this vm, since the HW expects the
393 		 * correct physical alignment and size when the page-table is
394 		 * operating in 64K GTT mode, which includes any scratch PTEs,
395 		 * since userspace can still touch them.
396 		 */
397 		if (HAS_64K_PAGES(vm->i915))
398 			return -ENOMEM;
399 
400 		size = I915_GTT_PAGE_SIZE_4K;
401 	} while (1);
402 }
403 
free_scratch(struct i915_address_space * vm)404 void free_scratch(struct i915_address_space *vm)
405 {
406 	int i;
407 
408 	for (i = 0; i <= vm->top; i++)
409 		i915_gem_object_put(vm->scratch[i]);
410 }
411 
gtt_write_workarounds(struct intel_gt * gt)412 void gtt_write_workarounds(struct intel_gt *gt)
413 {
414 	struct drm_i915_private *i915 = gt->i915;
415 	struct intel_uncore *uncore = gt->uncore;
416 
417 	/*
418 	 * This function is for gtt related workarounds. This function is
419 	 * called on driver load and after a GPU reset, so you can place
420 	 * workarounds here even if they get overwritten by GPU reset.
421 	 */
422 	/* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */
423 	if (IS_BROADWELL(i915))
424 		intel_uncore_write(uncore,
425 				   GEN8_L3_LRA_1_GPGPU,
426 				   GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
427 	else if (IS_CHERRYVIEW(i915))
428 		intel_uncore_write(uncore,
429 				   GEN8_L3_LRA_1_GPGPU,
430 				   GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
431 	else if (IS_GEN9_LP(i915))
432 		intel_uncore_write(uncore,
433 				   GEN8_L3_LRA_1_GPGPU,
434 				   GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
435 	else if (GRAPHICS_VER(i915) >= 9 && GRAPHICS_VER(i915) <= 11)
436 		intel_uncore_write(uncore,
437 				   GEN8_L3_LRA_1_GPGPU,
438 				   GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
439 
440 	/*
441 	 * To support 64K PTEs we need to first enable the use of the
442 	 * Intermediate-Page-Size(IPS) bit of the PDE field via some magical
443 	 * mmio, otherwise the page-walker will simply ignore the IPS bit. This
444 	 * shouldn't be needed after GEN10.
445 	 *
446 	 * 64K pages were first introduced from BDW+, although technically they
447 	 * only *work* from gen9+. For pre-BDW we instead have the option for
448 	 * 32K pages, but we don't currently have any support for it in our
449 	 * driver.
450 	 */
451 	if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K) &&
452 	    GRAPHICS_VER(i915) <= 10)
453 		intel_uncore_rmw(uncore,
454 				 GEN8_GAMW_ECO_DEV_RW_IA,
455 				 0,
456 				 GAMW_ECO_ENABLE_64K_IPS_FIELD);
457 
458 	if (IS_GRAPHICS_VER(i915, 8, 11)) {
459 		bool can_use_gtt_cache = true;
460 
461 		/*
462 		 * According to the BSpec if we use 2M/1G pages then we also
463 		 * need to disable the GTT cache. At least on BDW we can see
464 		 * visual corruption when using 2M pages, and not disabling the
465 		 * GTT cache.
466 		 */
467 		if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_2M))
468 			can_use_gtt_cache = false;
469 
470 		/* WaGttCachingOffByDefault */
471 		intel_uncore_write(uncore,
472 				   HSW_GTT_CACHE_EN,
473 				   can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0);
474 		drm_WARN_ON_ONCE(&i915->drm, can_use_gtt_cache &&
475 				 intel_uncore_read(uncore,
476 						   HSW_GTT_CACHE_EN) == 0);
477 	}
478 }
479 
tgl_setup_private_ppat(struct intel_uncore * uncore)480 static void tgl_setup_private_ppat(struct intel_uncore *uncore)
481 {
482 	/* TGL doesn't support LLC or AGE settings */
483 	intel_uncore_write(uncore, GEN12_PAT_INDEX(0), GEN8_PPAT_WB);
484 	intel_uncore_write(uncore, GEN12_PAT_INDEX(1), GEN8_PPAT_WC);
485 	intel_uncore_write(uncore, GEN12_PAT_INDEX(2), GEN8_PPAT_WT);
486 	intel_uncore_write(uncore, GEN12_PAT_INDEX(3), GEN8_PPAT_UC);
487 	intel_uncore_write(uncore, GEN12_PAT_INDEX(4), GEN8_PPAT_WB);
488 	intel_uncore_write(uncore, GEN12_PAT_INDEX(5), GEN8_PPAT_WB);
489 	intel_uncore_write(uncore, GEN12_PAT_INDEX(6), GEN8_PPAT_WB);
490 	intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB);
491 }
492 
icl_setup_private_ppat(struct intel_uncore * uncore)493 static void icl_setup_private_ppat(struct intel_uncore *uncore)
494 {
495 	intel_uncore_write(uncore,
496 			   GEN10_PAT_INDEX(0),
497 			   GEN8_PPAT_WB | GEN8_PPAT_LLC);
498 	intel_uncore_write(uncore,
499 			   GEN10_PAT_INDEX(1),
500 			   GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);
501 	intel_uncore_write(uncore,
502 			   GEN10_PAT_INDEX(2),
503 			   GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE);
504 	intel_uncore_write(uncore,
505 			   GEN10_PAT_INDEX(3),
506 			   GEN8_PPAT_UC);
507 	intel_uncore_write(uncore,
508 			   GEN10_PAT_INDEX(4),
509 			   GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
510 	intel_uncore_write(uncore,
511 			   GEN10_PAT_INDEX(5),
512 			   GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
513 	intel_uncore_write(uncore,
514 			   GEN10_PAT_INDEX(6),
515 			   GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
516 	intel_uncore_write(uncore,
517 			   GEN10_PAT_INDEX(7),
518 			   GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
519 }
520 
521 /*
522  * The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
523  * bits. When using advanced contexts each context stores its own PAT, but
524  * writing this data shouldn't be harmful even in those cases.
525  */
bdw_setup_private_ppat(struct intel_uncore * uncore)526 static void bdw_setup_private_ppat(struct intel_uncore *uncore)
527 {
528 	struct drm_i915_private *i915 = uncore->i915;
529 	u64 pat;
530 
531 	pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) |	/* for normal objects, no eLLC */
532 	      GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) |	/* for something pointing to ptes? */
533 	      GEN8_PPAT(3, GEN8_PPAT_UC) |			/* Uncached objects, mostly for scanout */
534 	      GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
535 	      GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
536 	      GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
537 	      GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
538 
539 	/* for scanout with eLLC */
540 	if (GRAPHICS_VER(i915) >= 9)
541 		pat |= GEN8_PPAT(2, GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE);
542 	else
543 		pat |= GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
544 
545 	intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
546 	intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
547 }
548 
chv_setup_private_ppat(struct intel_uncore * uncore)549 static void chv_setup_private_ppat(struct intel_uncore *uncore)
550 {
551 	u64 pat;
552 
553 	/*
554 	 * Map WB on BDW to snooped on CHV.
555 	 *
556 	 * Only the snoop bit has meaning for CHV, the rest is
557 	 * ignored.
558 	 *
559 	 * The hardware will never snoop for certain types of accesses:
560 	 * - CPU GTT (GMADR->GGTT->no snoop->memory)
561 	 * - PPGTT page tables
562 	 * - some other special cycles
563 	 *
564 	 * As with BDW, we also need to consider the following for GT accesses:
565 	 * "For GGTT, there is NO pat_sel[2:0] from the entry,
566 	 * so RTL will always use the value corresponding to
567 	 * pat_sel = 000".
568 	 * Which means we must set the snoop bit in PAT entry 0
569 	 * in order to keep the global status page working.
570 	 */
571 
572 	pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
573 	      GEN8_PPAT(1, 0) |
574 	      GEN8_PPAT(2, 0) |
575 	      GEN8_PPAT(3, 0) |
576 	      GEN8_PPAT(4, CHV_PPAT_SNOOP) |
577 	      GEN8_PPAT(5, CHV_PPAT_SNOOP) |
578 	      GEN8_PPAT(6, CHV_PPAT_SNOOP) |
579 	      GEN8_PPAT(7, CHV_PPAT_SNOOP);
580 
581 	intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
582 	intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
583 }
584 
setup_private_pat(struct intel_uncore * uncore)585 void setup_private_pat(struct intel_uncore *uncore)
586 {
587 	struct drm_i915_private *i915 = uncore->i915;
588 
589 	GEM_BUG_ON(GRAPHICS_VER(i915) < 8);
590 
591 	if (GRAPHICS_VER(i915) >= 12)
592 		tgl_setup_private_ppat(uncore);
593 	else if (GRAPHICS_VER(i915) >= 11)
594 		icl_setup_private_ppat(uncore);
595 	else if (IS_CHERRYVIEW(i915) || IS_GEN9_LP(i915))
596 		chv_setup_private_ppat(uncore);
597 	else
598 		bdw_setup_private_ppat(uncore);
599 }
600 
601 struct i915_vma *
__vm_create_scratch_for_read(struct i915_address_space * vm,unsigned long size)602 __vm_create_scratch_for_read(struct i915_address_space *vm, unsigned long size)
603 {
604 	struct drm_i915_gem_object *obj;
605 	struct i915_vma *vma;
606 
607 	obj = i915_gem_object_create_internal(vm->i915, PAGE_ALIGN(size));
608 	if (IS_ERR(obj))
609 		return ERR_CAST(obj);
610 
611 	i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
612 
613 	vma = i915_vma_instance(obj, vm, NULL);
614 	if (IS_ERR(vma)) {
615 		i915_gem_object_put(obj);
616 		return vma;
617 	}
618 
619 	return vma;
620 }
621 
622 struct i915_vma *
__vm_create_scratch_for_read_pinned(struct i915_address_space * vm,unsigned long size)623 __vm_create_scratch_for_read_pinned(struct i915_address_space *vm, unsigned long size)
624 {
625 	struct i915_vma *vma;
626 	int err;
627 
628 	vma = __vm_create_scratch_for_read(vm, size);
629 	if (IS_ERR(vma))
630 		return vma;
631 
632 	err = i915_vma_pin(vma, 0, 0,
633 			   i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER);
634 	if (err) {
635 		i915_vma_put(vma);
636 		return ERR_PTR(err);
637 	}
638 
639 	return vma;
640 }
641 
642 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
643 #include "selftests/mock_gtt.c"
644 #endif
645