1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2019 Intel Corporation
4 */
5
6 #include <linux/string_helpers.h>
7
8 #include "i915_drv.h"
9 #include "intel_engine_regs.h"
10 #include "intel_gt_regs.h"
11 #include "intel_sseu.h"
12
intel_sseu_set_info(struct sseu_dev_info * sseu,u8 max_slices,u8 max_subslices,u8 max_eus_per_subslice)13 void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices,
14 u8 max_subslices, u8 max_eus_per_subslice)
15 {
16 sseu->max_slices = max_slices;
17 sseu->max_subslices = max_subslices;
18 sseu->max_eus_per_subslice = max_eus_per_subslice;
19
20 sseu->ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices);
21 GEM_BUG_ON(sseu->ss_stride > GEN_MAX_SUBSLICE_STRIDE);
22 sseu->eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice);
23 GEM_BUG_ON(sseu->eu_stride > GEN_MAX_EU_STRIDE);
24 }
25
26 unsigned int
intel_sseu_subslice_total(const struct sseu_dev_info * sseu)27 intel_sseu_subslice_total(const struct sseu_dev_info *sseu)
28 {
29 unsigned int i, total = 0;
30
31 for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++)
32 total += hweight8(sseu->subslice_mask[i]);
33
34 return total;
35 }
36
37 static u32
sseu_get_subslices(const struct sseu_dev_info * sseu,const u8 * subslice_mask,u8 slice)38 sseu_get_subslices(const struct sseu_dev_info *sseu,
39 const u8 *subslice_mask, u8 slice)
40 {
41 int i, offset = slice * sseu->ss_stride;
42 u32 mask = 0;
43
44 GEM_BUG_ON(slice >= sseu->max_slices);
45
46 for (i = 0; i < sseu->ss_stride; i++)
47 mask |= (u32)subslice_mask[offset + i] << i * BITS_PER_BYTE;
48
49 return mask;
50 }
51
intel_sseu_get_subslices(const struct sseu_dev_info * sseu,u8 slice)52 u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice)
53 {
54 return sseu_get_subslices(sseu, sseu->subslice_mask, slice);
55 }
56
sseu_get_geometry_subslices(const struct sseu_dev_info * sseu)57 static u32 sseu_get_geometry_subslices(const struct sseu_dev_info *sseu)
58 {
59 return sseu_get_subslices(sseu, sseu->geometry_subslice_mask, 0);
60 }
61
intel_sseu_get_compute_subslices(const struct sseu_dev_info * sseu)62 u32 intel_sseu_get_compute_subslices(const struct sseu_dev_info *sseu)
63 {
64 return sseu_get_subslices(sseu, sseu->compute_subslice_mask, 0);
65 }
66
intel_sseu_set_subslices(struct sseu_dev_info * sseu,int slice,u8 * subslice_mask,u32 ss_mask)67 void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
68 u8 *subslice_mask, u32 ss_mask)
69 {
70 int offset = slice * sseu->ss_stride;
71
72 memcpy(&subslice_mask[offset], &ss_mask, sseu->ss_stride);
73 }
74
75 unsigned int
intel_sseu_subslices_per_slice(const struct sseu_dev_info * sseu,u8 slice)76 intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice)
77 {
78 return hweight32(intel_sseu_get_subslices(sseu, slice));
79 }
80
sseu_eu_idx(const struct sseu_dev_info * sseu,int slice,int subslice)81 static int sseu_eu_idx(const struct sseu_dev_info *sseu, int slice,
82 int subslice)
83 {
84 int slice_stride = sseu->max_subslices * sseu->eu_stride;
85
86 return slice * slice_stride + subslice * sseu->eu_stride;
87 }
88
sseu_get_eus(const struct sseu_dev_info * sseu,int slice,int subslice)89 static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice,
90 int subslice)
91 {
92 int i, offset = sseu_eu_idx(sseu, slice, subslice);
93 u16 eu_mask = 0;
94
95 for (i = 0; i < sseu->eu_stride; i++)
96 eu_mask |=
97 ((u16)sseu->eu_mask[offset + i]) << (i * BITS_PER_BYTE);
98
99 return eu_mask;
100 }
101
sseu_set_eus(struct sseu_dev_info * sseu,int slice,int subslice,u16 eu_mask)102 static void sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice,
103 u16 eu_mask)
104 {
105 int i, offset = sseu_eu_idx(sseu, slice, subslice);
106
107 for (i = 0; i < sseu->eu_stride; i++)
108 sseu->eu_mask[offset + i] =
109 (eu_mask >> (BITS_PER_BYTE * i)) & 0xff;
110 }
111
compute_eu_total(const struct sseu_dev_info * sseu)112 static u16 compute_eu_total(const struct sseu_dev_info *sseu)
113 {
114 u16 i, total = 0;
115
116 for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++)
117 total += hweight8(sseu->eu_mask[i]);
118
119 return total;
120 }
121
get_ss_stride_mask(struct sseu_dev_info * sseu,u8 s,u32 ss_en)122 static u32 get_ss_stride_mask(struct sseu_dev_info *sseu, u8 s, u32 ss_en)
123 {
124 u32 ss_mask;
125
126 ss_mask = ss_en >> (s * sseu->max_subslices);
127 ss_mask &= GENMASK(sseu->max_subslices - 1, 0);
128
129 return ss_mask;
130 }
131
gen11_compute_sseu_info(struct sseu_dev_info * sseu,u8 s_en,u32 g_ss_en,u32 c_ss_en,u16 eu_en)132 static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u8 s_en,
133 u32 g_ss_en, u32 c_ss_en, u16 eu_en)
134 {
135 int s, ss;
136
137 /* g_ss_en/c_ss_en represent entire subslice mask across all slices */
138 GEM_BUG_ON(sseu->max_slices * sseu->max_subslices >
139 sizeof(g_ss_en) * BITS_PER_BYTE);
140
141 for (s = 0; s < sseu->max_slices; s++) {
142 if ((s_en & BIT(s)) == 0)
143 continue;
144
145 sseu->slice_mask |= BIT(s);
146
147 /*
148 * XeHP introduces the concept of compute vs geometry DSS. To
149 * reduce variation between GENs around subslice usage, store a
150 * mask for both the geometry and compute enabled masks since
151 * userspace will need to be able to query these masks
152 * independently. Also compute a total enabled subslice count
153 * for the purposes of selecting subslices to use in a
154 * particular GEM context.
155 */
156 intel_sseu_set_subslices(sseu, s, sseu->compute_subslice_mask,
157 get_ss_stride_mask(sseu, s, c_ss_en));
158 intel_sseu_set_subslices(sseu, s, sseu->geometry_subslice_mask,
159 get_ss_stride_mask(sseu, s, g_ss_en));
160 intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
161 get_ss_stride_mask(sseu, s,
162 g_ss_en | c_ss_en));
163
164 for (ss = 0; ss < sseu->max_subslices; ss++)
165 if (intel_sseu_has_subslice(sseu, s, ss))
166 sseu_set_eus(sseu, s, ss, eu_en);
167 }
168 sseu->eu_per_subslice = hweight16(eu_en);
169 sseu->eu_total = compute_eu_total(sseu);
170 }
171
gen12_sseu_info_init(struct intel_gt * gt)172 static void gen12_sseu_info_init(struct intel_gt *gt)
173 {
174 struct sseu_dev_info *sseu = >->info.sseu;
175 struct intel_uncore *uncore = gt->uncore;
176 u32 g_dss_en, c_dss_en = 0;
177 u16 eu_en = 0;
178 u8 eu_en_fuse;
179 u8 s_en;
180 int eu;
181
182 /*
183 * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS.
184 * Instead of splitting these, provide userspace with an array
185 * of DSS to more closely represent the hardware resource.
186 *
187 * In addition, the concept of slice has been removed in Xe_HP.
188 * To be compatible with prior generations, assume a single slice
189 * across the entire device. Then calculate out the DSS for each
190 * workload type within that software slice.
191 */
192 if (IS_DG2(gt->i915) || IS_XEHPSDV(gt->i915))
193 intel_sseu_set_info(sseu, 1, 32, 16);
194 else
195 intel_sseu_set_info(sseu, 1, 6, 16);
196
197 /*
198 * As mentioned above, Xe_HP does not have the concept of a slice.
199 * Enable one for software backwards compatibility.
200 */
201 if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
202 s_en = 0x1;
203 else
204 s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) &
205 GEN11_GT_S_ENA_MASK;
206
207 g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE);
208 if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
209 c_dss_en = intel_uncore_read(uncore, GEN12_GT_COMPUTE_DSS_ENABLE);
210
211 /* one bit per pair of EUs */
212 if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
213 eu_en_fuse = intel_uncore_read(uncore, XEHP_EU_ENABLE) & XEHP_EU_ENA_MASK;
214 else
215 eu_en_fuse = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) &
216 GEN11_EU_DIS_MASK);
217
218 for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++)
219 if (eu_en_fuse & BIT(eu))
220 eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1);
221
222 gen11_compute_sseu_info(sseu, s_en, g_dss_en, c_dss_en, eu_en);
223
224 /* TGL only supports slice-level power gating */
225 sseu->has_slice_pg = 1;
226 }
227
gen11_sseu_info_init(struct intel_gt * gt)228 static void gen11_sseu_info_init(struct intel_gt *gt)
229 {
230 struct sseu_dev_info *sseu = >->info.sseu;
231 struct intel_uncore *uncore = gt->uncore;
232 u32 ss_en;
233 u8 eu_en;
234 u8 s_en;
235
236 if (IS_JSL_EHL(gt->i915))
237 intel_sseu_set_info(sseu, 1, 4, 8);
238 else
239 intel_sseu_set_info(sseu, 1, 8, 8);
240
241 s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) &
242 GEN11_GT_S_ENA_MASK;
243 ss_en = ~intel_uncore_read(uncore, GEN11_GT_SUBSLICE_DISABLE);
244
245 eu_en = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) &
246 GEN11_EU_DIS_MASK);
247
248 gen11_compute_sseu_info(sseu, s_en, ss_en, 0, eu_en);
249
250 /* ICL has no power gating restrictions. */
251 sseu->has_slice_pg = 1;
252 sseu->has_subslice_pg = 1;
253 sseu->has_eu_pg = 1;
254 }
255
cherryview_sseu_info_init(struct intel_gt * gt)256 static void cherryview_sseu_info_init(struct intel_gt *gt)
257 {
258 struct sseu_dev_info *sseu = >->info.sseu;
259 u32 fuse;
260 u8 subslice_mask = 0;
261
262 fuse = intel_uncore_read(gt->uncore, CHV_FUSE_GT);
263
264 sseu->slice_mask = BIT(0);
265 intel_sseu_set_info(sseu, 1, 2, 8);
266
267 if (!(fuse & CHV_FGT_DISABLE_SS0)) {
268 u8 disabled_mask =
269 ((fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >>
270 CHV_FGT_EU_DIS_SS0_R0_SHIFT) |
271 (((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >>
272 CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4);
273
274 subslice_mask |= BIT(0);
275 sseu_set_eus(sseu, 0, 0, ~disabled_mask);
276 }
277
278 if (!(fuse & CHV_FGT_DISABLE_SS1)) {
279 u8 disabled_mask =
280 ((fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >>
281 CHV_FGT_EU_DIS_SS1_R0_SHIFT) |
282 (((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >>
283 CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4);
284
285 subslice_mask |= BIT(1);
286 sseu_set_eus(sseu, 0, 1, ~disabled_mask);
287 }
288
289 intel_sseu_set_subslices(sseu, 0, sseu->subslice_mask, subslice_mask);
290
291 sseu->eu_total = compute_eu_total(sseu);
292
293 /*
294 * CHV expected to always have a uniform distribution of EU
295 * across subslices.
296 */
297 sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ?
298 sseu->eu_total /
299 intel_sseu_subslice_total(sseu) :
300 0;
301 /*
302 * CHV supports subslice power gating on devices with more than
303 * one subslice, and supports EU power gating on devices with
304 * more than one EU pair per subslice.
305 */
306 sseu->has_slice_pg = 0;
307 sseu->has_subslice_pg = intel_sseu_subslice_total(sseu) > 1;
308 sseu->has_eu_pg = (sseu->eu_per_subslice > 2);
309 }
310
gen9_sseu_info_init(struct intel_gt * gt)311 static void gen9_sseu_info_init(struct intel_gt *gt)
312 {
313 struct drm_i915_private *i915 = gt->i915;
314 struct intel_device_info *info = mkwrite_device_info(i915);
315 struct sseu_dev_info *sseu = >->info.sseu;
316 struct intel_uncore *uncore = gt->uncore;
317 u32 fuse2, eu_disable, subslice_mask;
318 const u8 eu_mask = 0xff;
319 int s, ss;
320
321 fuse2 = intel_uncore_read(uncore, GEN8_FUSE2);
322 sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
323
324 /* BXT has a single slice and at most 3 subslices. */
325 intel_sseu_set_info(sseu, IS_GEN9_LP(i915) ? 1 : 3,
326 IS_GEN9_LP(i915) ? 3 : 4, 8);
327
328 /*
329 * The subslice disable field is global, i.e. it applies
330 * to each of the enabled slices.
331 */
332 subslice_mask = (1 << sseu->max_subslices) - 1;
333 subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >>
334 GEN9_F2_SS_DIS_SHIFT);
335
336 /*
337 * Iterate through enabled slices and subslices to
338 * count the total enabled EU.
339 */
340 for (s = 0; s < sseu->max_slices; s++) {
341 if (!(sseu->slice_mask & BIT(s)))
342 /* skip disabled slice */
343 continue;
344
345 intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
346 subslice_mask);
347
348 eu_disable = intel_uncore_read(uncore, GEN9_EU_DISABLE(s));
349 for (ss = 0; ss < sseu->max_subslices; ss++) {
350 int eu_per_ss;
351 u8 eu_disabled_mask;
352
353 if (!intel_sseu_has_subslice(sseu, s, ss))
354 /* skip disabled subslice */
355 continue;
356
357 eu_disabled_mask = (eu_disable >> (ss * 8)) & eu_mask;
358
359 sseu_set_eus(sseu, s, ss, ~eu_disabled_mask);
360
361 eu_per_ss = sseu->max_eus_per_subslice -
362 hweight8(eu_disabled_mask);
363
364 /*
365 * Record which subslice(s) has(have) 7 EUs. we
366 * can tune the hash used to spread work among
367 * subslices if they are unbalanced.
368 */
369 if (eu_per_ss == 7)
370 sseu->subslice_7eu[s] |= BIT(ss);
371 }
372 }
373
374 sseu->eu_total = compute_eu_total(sseu);
375
376 /*
377 * SKL is expected to always have a uniform distribution
378 * of EU across subslices with the exception that any one
379 * EU in any one subslice may be fused off for die
380 * recovery. BXT is expected to be perfectly uniform in EU
381 * distribution.
382 */
383 sseu->eu_per_subslice =
384 intel_sseu_subslice_total(sseu) ?
385 DIV_ROUND_UP(sseu->eu_total, intel_sseu_subslice_total(sseu)) :
386 0;
387
388 /*
389 * SKL+ supports slice power gating on devices with more than
390 * one slice, and supports EU power gating on devices with
391 * more than one EU pair per subslice. BXT+ supports subslice
392 * power gating on devices with more than one subslice, and
393 * supports EU power gating on devices with more than one EU
394 * pair per subslice.
395 */
396 sseu->has_slice_pg =
397 !IS_GEN9_LP(i915) && hweight8(sseu->slice_mask) > 1;
398 sseu->has_subslice_pg =
399 IS_GEN9_LP(i915) && intel_sseu_subslice_total(sseu) > 1;
400 sseu->has_eu_pg = sseu->eu_per_subslice > 2;
401
402 if (IS_GEN9_LP(i915)) {
403 #define IS_SS_DISABLED(ss) (!(sseu->subslice_mask[0] & BIT(ss)))
404 info->has_pooled_eu = hweight8(sseu->subslice_mask[0]) == 3;
405
406 sseu->min_eu_in_pool = 0;
407 if (info->has_pooled_eu) {
408 if (IS_SS_DISABLED(2) || IS_SS_DISABLED(0))
409 sseu->min_eu_in_pool = 3;
410 else if (IS_SS_DISABLED(1))
411 sseu->min_eu_in_pool = 6;
412 else
413 sseu->min_eu_in_pool = 9;
414 }
415 #undef IS_SS_DISABLED
416 }
417 }
418
bdw_sseu_info_init(struct intel_gt * gt)419 static void bdw_sseu_info_init(struct intel_gt *gt)
420 {
421 struct sseu_dev_info *sseu = >->info.sseu;
422 struct intel_uncore *uncore = gt->uncore;
423 int s, ss;
424 u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */
425 u32 eu_disable0, eu_disable1, eu_disable2;
426
427 fuse2 = intel_uncore_read(uncore, GEN8_FUSE2);
428 sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
429 intel_sseu_set_info(sseu, 3, 3, 8);
430
431 /*
432 * The subslice disable field is global, i.e. it applies
433 * to each of the enabled slices.
434 */
435 subslice_mask = GENMASK(sseu->max_subslices - 1, 0);
436 subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >>
437 GEN8_F2_SS_DIS_SHIFT);
438 eu_disable0 = intel_uncore_read(uncore, GEN8_EU_DISABLE0);
439 eu_disable1 = intel_uncore_read(uncore, GEN8_EU_DISABLE1);
440 eu_disable2 = intel_uncore_read(uncore, GEN8_EU_DISABLE2);
441 eu_disable[0] = eu_disable0 & GEN8_EU_DIS0_S0_MASK;
442 eu_disable[1] = (eu_disable0 >> GEN8_EU_DIS0_S1_SHIFT) |
443 ((eu_disable1 & GEN8_EU_DIS1_S1_MASK) <<
444 (32 - GEN8_EU_DIS0_S1_SHIFT));
445 eu_disable[2] = (eu_disable1 >> GEN8_EU_DIS1_S2_SHIFT) |
446 ((eu_disable2 & GEN8_EU_DIS2_S2_MASK) <<
447 (32 - GEN8_EU_DIS1_S2_SHIFT));
448
449 /*
450 * Iterate through enabled slices and subslices to
451 * count the total enabled EU.
452 */
453 for (s = 0; s < sseu->max_slices; s++) {
454 if (!(sseu->slice_mask & BIT(s)))
455 /* skip disabled slice */
456 continue;
457
458 intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
459 subslice_mask);
460
461 for (ss = 0; ss < sseu->max_subslices; ss++) {
462 u8 eu_disabled_mask;
463 u32 n_disabled;
464
465 if (!intel_sseu_has_subslice(sseu, s, ss))
466 /* skip disabled subslice */
467 continue;
468
469 eu_disabled_mask =
470 eu_disable[s] >> (ss * sseu->max_eus_per_subslice);
471
472 sseu_set_eus(sseu, s, ss, ~eu_disabled_mask);
473
474 n_disabled = hweight8(eu_disabled_mask);
475
476 /*
477 * Record which subslices have 7 EUs.
478 */
479 if (sseu->max_eus_per_subslice - n_disabled == 7)
480 sseu->subslice_7eu[s] |= 1 << ss;
481 }
482 }
483
484 sseu->eu_total = compute_eu_total(sseu);
485
486 /*
487 * BDW is expected to always have a uniform distribution of EU across
488 * subslices with the exception that any one EU in any one subslice may
489 * be fused off for die recovery.
490 */
491 sseu->eu_per_subslice =
492 intel_sseu_subslice_total(sseu) ?
493 DIV_ROUND_UP(sseu->eu_total, intel_sseu_subslice_total(sseu)) :
494 0;
495
496 /*
497 * BDW supports slice power gating on devices with more than
498 * one slice.
499 */
500 sseu->has_slice_pg = hweight8(sseu->slice_mask) > 1;
501 sseu->has_subslice_pg = 0;
502 sseu->has_eu_pg = 0;
503 }
504
hsw_sseu_info_init(struct intel_gt * gt)505 static void hsw_sseu_info_init(struct intel_gt *gt)
506 {
507 struct drm_i915_private *i915 = gt->i915;
508 struct sseu_dev_info *sseu = >->info.sseu;
509 u32 fuse1;
510 u8 subslice_mask = 0;
511 int s, ss;
512
513 /*
514 * There isn't a register to tell us how many slices/subslices. We
515 * work off the PCI-ids here.
516 */
517 switch (INTEL_INFO(i915)->gt) {
518 default:
519 MISSING_CASE(INTEL_INFO(i915)->gt);
520 fallthrough;
521 case 1:
522 sseu->slice_mask = BIT(0);
523 subslice_mask = BIT(0);
524 break;
525 case 2:
526 sseu->slice_mask = BIT(0);
527 subslice_mask = BIT(0) | BIT(1);
528 break;
529 case 3:
530 sseu->slice_mask = BIT(0) | BIT(1);
531 subslice_mask = BIT(0) | BIT(1);
532 break;
533 }
534
535 fuse1 = intel_uncore_read(gt->uncore, HSW_PAVP_FUSE1);
536 switch (REG_FIELD_GET(HSW_F1_EU_DIS_MASK, fuse1)) {
537 default:
538 MISSING_CASE(REG_FIELD_GET(HSW_F1_EU_DIS_MASK, fuse1));
539 fallthrough;
540 case HSW_F1_EU_DIS_10EUS:
541 sseu->eu_per_subslice = 10;
542 break;
543 case HSW_F1_EU_DIS_8EUS:
544 sseu->eu_per_subslice = 8;
545 break;
546 case HSW_F1_EU_DIS_6EUS:
547 sseu->eu_per_subslice = 6;
548 break;
549 }
550
551 intel_sseu_set_info(sseu, hweight8(sseu->slice_mask),
552 hweight8(subslice_mask),
553 sseu->eu_per_subslice);
554
555 for (s = 0; s < sseu->max_slices; s++) {
556 intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
557 subslice_mask);
558
559 for (ss = 0; ss < sseu->max_subslices; ss++) {
560 sseu_set_eus(sseu, s, ss,
561 (1UL << sseu->eu_per_subslice) - 1);
562 }
563 }
564
565 sseu->eu_total = compute_eu_total(sseu);
566
567 /* No powergating for you. */
568 sseu->has_slice_pg = 0;
569 sseu->has_subslice_pg = 0;
570 sseu->has_eu_pg = 0;
571 }
572
intel_sseu_info_init(struct intel_gt * gt)573 void intel_sseu_info_init(struct intel_gt *gt)
574 {
575 struct drm_i915_private *i915 = gt->i915;
576
577 if (IS_HASWELL(i915))
578 hsw_sseu_info_init(gt);
579 else if (IS_CHERRYVIEW(i915))
580 cherryview_sseu_info_init(gt);
581 else if (IS_BROADWELL(i915))
582 bdw_sseu_info_init(gt);
583 else if (GRAPHICS_VER(i915) == 9)
584 gen9_sseu_info_init(gt);
585 else if (GRAPHICS_VER(i915) == 11)
586 gen11_sseu_info_init(gt);
587 else if (GRAPHICS_VER(i915) >= 12)
588 gen12_sseu_info_init(gt);
589 }
590
intel_sseu_make_rpcs(struct intel_gt * gt,const struct intel_sseu * req_sseu)591 u32 intel_sseu_make_rpcs(struct intel_gt *gt,
592 const struct intel_sseu *req_sseu)
593 {
594 struct drm_i915_private *i915 = gt->i915;
595 const struct sseu_dev_info *sseu = >->info.sseu;
596 bool subslice_pg = sseu->has_subslice_pg;
597 u8 slices, subslices;
598 u32 rpcs = 0;
599
600 /*
601 * No explicit RPCS request is needed to ensure full
602 * slice/subslice/EU enablement prior to Gen9.
603 */
604 if (GRAPHICS_VER(i915) < 9)
605 return 0;
606
607 /*
608 * If i915/perf is active, we want a stable powergating configuration
609 * on the system. Use the configuration pinned by i915/perf.
610 */
611 if (i915->perf.exclusive_stream)
612 req_sseu = &i915->perf.sseu;
613
614 slices = hweight8(req_sseu->slice_mask);
615 subslices = hweight8(req_sseu->subslice_mask);
616
617 /*
618 * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits
619 * wide and Icelake has up to eight subslices, specfial programming is
620 * needed in order to correctly enable all subslices.
621 *
622 * According to documentation software must consider the configuration
623 * as 2x4x8 and hardware will translate this to 1x8x8.
624 *
625 * Furthemore, even though SScount is three bits, maximum documented
626 * value for it is four. From this some rules/restrictions follow:
627 *
628 * 1.
629 * If enabled subslice count is greater than four, two whole slices must
630 * be enabled instead.
631 *
632 * 2.
633 * When more than one slice is enabled, hardware ignores the subslice
634 * count altogether.
635 *
636 * From these restrictions it follows that it is not possible to enable
637 * a count of subslices between the SScount maximum of four restriction,
638 * and the maximum available number on a particular SKU. Either all
639 * subslices are enabled, or a count between one and four on the first
640 * slice.
641 */
642 if (GRAPHICS_VER(i915) == 11 &&
643 slices == 1 &&
644 subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) {
645 GEM_BUG_ON(subslices & 1);
646
647 subslice_pg = false;
648 slices *= 2;
649 }
650
651 /*
652 * Starting in Gen9, render power gating can leave
653 * slice/subslice/EU in a partially enabled state. We
654 * must make an explicit request through RPCS for full
655 * enablement.
656 */
657 if (sseu->has_slice_pg) {
658 u32 mask, val = slices;
659
660 if (GRAPHICS_VER(i915) >= 11) {
661 mask = GEN11_RPCS_S_CNT_MASK;
662 val <<= GEN11_RPCS_S_CNT_SHIFT;
663 } else {
664 mask = GEN8_RPCS_S_CNT_MASK;
665 val <<= GEN8_RPCS_S_CNT_SHIFT;
666 }
667
668 GEM_BUG_ON(val & ~mask);
669 val &= mask;
670
671 rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_S_CNT_ENABLE | val;
672 }
673
674 if (subslice_pg) {
675 u32 val = subslices;
676
677 val <<= GEN8_RPCS_SS_CNT_SHIFT;
678
679 GEM_BUG_ON(val & ~GEN8_RPCS_SS_CNT_MASK);
680 val &= GEN8_RPCS_SS_CNT_MASK;
681
682 rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_SS_CNT_ENABLE | val;
683 }
684
685 if (sseu->has_eu_pg) {
686 u32 val;
687
688 val = req_sseu->min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT;
689 GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK);
690 val &= GEN8_RPCS_EU_MIN_MASK;
691
692 rpcs |= val;
693
694 val = req_sseu->max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT;
695 GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK);
696 val &= GEN8_RPCS_EU_MAX_MASK;
697
698 rpcs |= val;
699
700 rpcs |= GEN8_RPCS_ENABLE;
701 }
702
703 return rpcs;
704 }
705
intel_sseu_dump(const struct sseu_dev_info * sseu,struct drm_printer * p)706 void intel_sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p)
707 {
708 int s;
709
710 drm_printf(p, "slice total: %u, mask=%04x\n",
711 hweight8(sseu->slice_mask), sseu->slice_mask);
712 drm_printf(p, "subslice total: %u\n", intel_sseu_subslice_total(sseu));
713 for (s = 0; s < sseu->max_slices; s++) {
714 drm_printf(p, "slice%d: %u subslices, mask=%08x\n",
715 s, intel_sseu_subslices_per_slice(sseu, s),
716 intel_sseu_get_subslices(sseu, s));
717 }
718 drm_printf(p, "EU total: %u\n", sseu->eu_total);
719 drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice);
720 drm_printf(p, "has slice power gating: %s\n",
721 str_yes_no(sseu->has_slice_pg));
722 drm_printf(p, "has subslice power gating: %s\n",
723 str_yes_no(sseu->has_subslice_pg));
724 drm_printf(p, "has EU power gating: %s\n",
725 str_yes_no(sseu->has_eu_pg));
726 }
727
sseu_print_hsw_topology(const struct sseu_dev_info * sseu,struct drm_printer * p)728 static void sseu_print_hsw_topology(const struct sseu_dev_info *sseu,
729 struct drm_printer *p)
730 {
731 int s, ss;
732
733 for (s = 0; s < sseu->max_slices; s++) {
734 drm_printf(p, "slice%d: %u subslice(s) (0x%08x):\n",
735 s, intel_sseu_subslices_per_slice(sseu, s),
736 intel_sseu_get_subslices(sseu, s));
737
738 for (ss = 0; ss < sseu->max_subslices; ss++) {
739 u16 enabled_eus = sseu_get_eus(sseu, s, ss);
740
741 drm_printf(p, "\tsubslice%d: %u EUs (0x%hx)\n",
742 ss, hweight16(enabled_eus), enabled_eus);
743 }
744 }
745 }
746
sseu_print_xehp_topology(const struct sseu_dev_info * sseu,struct drm_printer * p)747 static void sseu_print_xehp_topology(const struct sseu_dev_info *sseu,
748 struct drm_printer *p)
749 {
750 u32 g_dss_mask = sseu_get_geometry_subslices(sseu);
751 u32 c_dss_mask = intel_sseu_get_compute_subslices(sseu);
752 int dss;
753
754 for (dss = 0; dss < sseu->max_subslices; dss++) {
755 u16 enabled_eus = sseu_get_eus(sseu, 0, dss);
756
757 drm_printf(p, "DSS_%02d: G:%3s C:%3s, %2u EUs (0x%04hx)\n", dss,
758 str_yes_no(g_dss_mask & BIT(dss)),
759 str_yes_no(c_dss_mask & BIT(dss)),
760 hweight16(enabled_eus), enabled_eus);
761 }
762 }
763
intel_sseu_print_topology(struct drm_i915_private * i915,const struct sseu_dev_info * sseu,struct drm_printer * p)764 void intel_sseu_print_topology(struct drm_i915_private *i915,
765 const struct sseu_dev_info *sseu,
766 struct drm_printer *p)
767 {
768 if (sseu->max_slices == 0) {
769 drm_printf(p, "Unavailable\n");
770 } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
771 sseu_print_xehp_topology(sseu, p);
772 } else {
773 sseu_print_hsw_topology(sseu, p);
774 }
775 }
776
intel_slicemask_from_dssmask(u64 dss_mask,int dss_per_slice)777 u16 intel_slicemask_from_dssmask(u64 dss_mask, int dss_per_slice)
778 {
779 u16 slice_mask = 0;
780 int i;
781
782 WARN_ON(sizeof(dss_mask) * 8 / dss_per_slice > 8 * sizeof(slice_mask));
783
784 for (i = 0; dss_mask; i++) {
785 if (dss_mask & GENMASK(dss_per_slice - 1, 0))
786 slice_mask |= BIT(i);
787
788 dss_mask >>= dss_per_slice;
789 }
790
791 return slice_mask;
792 }
793
794