1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Routines to identify caches on Intel CPU.
4 *
5 * Changes:
6 * Venkatesh Pallipadi : Adding cache identification through cpuid(4)
7 * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
8 * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD.
9 */
10
11 #include <linux/slab.h>
12 #include <linux/cacheinfo.h>
13 #include <linux/cpu.h>
14 #include <linux/sched.h>
15 #include <linux/capability.h>
16 #include <linux/sysfs.h>
17 #include <linux/pci.h>
18
19 #include <asm/cpufeature.h>
20 #include <asm/cacheinfo.h>
21 #include <asm/amd_nb.h>
22 #include <asm/smp.h>
23
24 #include "cpu.h"
25
26 #define LVL_1_INST 1
27 #define LVL_1_DATA 2
28 #define LVL_2 3
29 #define LVL_3 4
30 #define LVL_TRACE 5
31
32 /* Shared last level cache maps */
33 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
34
35 /* Shared L2 cache maps */
36 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map);
37
38 struct _cache_table {
39 unsigned char descriptor;
40 char cache_type;
41 short size;
42 };
43
44 #define MB(x) ((x) * 1024)
45
46 /* All the cache descriptor types we care about (no TLB or
47 trace cache entries) */
48
49 static const struct _cache_table cache_table[] =
50 {
51 { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */
52 { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */
53 { 0x09, LVL_1_INST, 32 }, /* 4-way set assoc, 64 byte line size */
54 { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */
55 { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */
56 { 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */
57 { 0x0e, LVL_1_DATA, 24 }, /* 6-way set assoc, 64 byte line size */
58 { 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */
59 { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
60 { 0x23, LVL_3, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */
61 { 0x25, LVL_3, MB(2) }, /* 8-way set assoc, sectored cache, 64 byte line size */
62 { 0x29, LVL_3, MB(4) }, /* 8-way set assoc, sectored cache, 64 byte line size */
63 { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */
64 { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */
65 { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */
66 { 0x3a, LVL_2, 192 }, /* 6-way set assoc, sectored cache, 64 byte line size */
67 { 0x3b, LVL_2, 128 }, /* 2-way set assoc, sectored cache, 64 byte line size */
68 { 0x3c, LVL_2, 256 }, /* 4-way set assoc, sectored cache, 64 byte line size */
69 { 0x3d, LVL_2, 384 }, /* 6-way set assoc, sectored cache, 64 byte line size */
70 { 0x3e, LVL_2, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
71 { 0x3f, LVL_2, 256 }, /* 2-way set assoc, 64 byte line size */
72 { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */
73 { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */
74 { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */
75 { 0x44, LVL_2, MB(1) }, /* 4-way set assoc, 32 byte line size */
76 { 0x45, LVL_2, MB(2) }, /* 4-way set assoc, 32 byte line size */
77 { 0x46, LVL_3, MB(4) }, /* 4-way set assoc, 64 byte line size */
78 { 0x47, LVL_3, MB(8) }, /* 8-way set assoc, 64 byte line size */
79 { 0x48, LVL_2, MB(3) }, /* 12-way set assoc, 64 byte line size */
80 { 0x49, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */
81 { 0x4a, LVL_3, MB(6) }, /* 12-way set assoc, 64 byte line size */
82 { 0x4b, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */
83 { 0x4c, LVL_3, MB(12) }, /* 12-way set assoc, 64 byte line size */
84 { 0x4d, LVL_3, MB(16) }, /* 16-way set assoc, 64 byte line size */
85 { 0x4e, LVL_2, MB(6) }, /* 24-way set assoc, 64 byte line size */
86 { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */
87 { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */
88 { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */
89 { 0x68, LVL_1_DATA, 32 }, /* 4-way set assoc, sectored cache, 64 byte line size */
90 { 0x70, LVL_TRACE, 12 }, /* 8-way set assoc */
91 { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */
92 { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */
93 { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */
94 { 0x78, LVL_2, MB(1) }, /* 4-way set assoc, 64 byte line size */
95 { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */
96 { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */
97 { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */
98 { 0x7c, LVL_2, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */
99 { 0x7d, LVL_2, MB(2) }, /* 8-way set assoc, 64 byte line size */
100 { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */
101 { 0x80, LVL_2, 512 }, /* 8-way set assoc, 64 byte line size */
102 { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */
103 { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */
104 { 0x84, LVL_2, MB(1) }, /* 8-way set assoc, 32 byte line size */
105 { 0x85, LVL_2, MB(2) }, /* 8-way set assoc, 32 byte line size */
106 { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */
107 { 0x87, LVL_2, MB(1) }, /* 8-way set assoc, 64 byte line size */
108 { 0xd0, LVL_3, 512 }, /* 4-way set assoc, 64 byte line size */
109 { 0xd1, LVL_3, MB(1) }, /* 4-way set assoc, 64 byte line size */
110 { 0xd2, LVL_3, MB(2) }, /* 4-way set assoc, 64 byte line size */
111 { 0xd6, LVL_3, MB(1) }, /* 8-way set assoc, 64 byte line size */
112 { 0xd7, LVL_3, MB(2) }, /* 8-way set assoc, 64 byte line size */
113 { 0xd8, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */
114 { 0xdc, LVL_3, MB(2) }, /* 12-way set assoc, 64 byte line size */
115 { 0xdd, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */
116 { 0xde, LVL_3, MB(8) }, /* 12-way set assoc, 64 byte line size */
117 { 0xe2, LVL_3, MB(2) }, /* 16-way set assoc, 64 byte line size */
118 { 0xe3, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */
119 { 0xe4, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */
120 { 0xea, LVL_3, MB(12) }, /* 24-way set assoc, 64 byte line size */
121 { 0xeb, LVL_3, MB(18) }, /* 24-way set assoc, 64 byte line size */
122 { 0xec, LVL_3, MB(24) }, /* 24-way set assoc, 64 byte line size */
123 { 0x00, 0, 0}
124 };
125
126
127 enum _cache_type {
128 CTYPE_NULL = 0,
129 CTYPE_DATA = 1,
130 CTYPE_INST = 2,
131 CTYPE_UNIFIED = 3
132 };
133
134 union _cpuid4_leaf_eax {
135 struct {
136 enum _cache_type type:5;
137 unsigned int level:3;
138 unsigned int is_self_initializing:1;
139 unsigned int is_fully_associative:1;
140 unsigned int reserved:4;
141 unsigned int num_threads_sharing:12;
142 unsigned int num_cores_on_die:6;
143 } split;
144 u32 full;
145 };
146
147 union _cpuid4_leaf_ebx {
148 struct {
149 unsigned int coherency_line_size:12;
150 unsigned int physical_line_partition:10;
151 unsigned int ways_of_associativity:10;
152 } split;
153 u32 full;
154 };
155
156 union _cpuid4_leaf_ecx {
157 struct {
158 unsigned int number_of_sets:32;
159 } split;
160 u32 full;
161 };
162
163 struct _cpuid4_info_regs {
164 union _cpuid4_leaf_eax eax;
165 union _cpuid4_leaf_ebx ebx;
166 union _cpuid4_leaf_ecx ecx;
167 unsigned int id;
168 unsigned long size;
169 struct amd_northbridge *nb;
170 };
171
172 static unsigned short num_cache_leaves;
173
174 /* AMD doesn't have CPUID4. Emulate it here to report the same
175 information to the user. This makes some assumptions about the machine:
176 L2 not shared, no SMT etc. that is currently true on AMD CPUs.
177
178 In theory the TLBs could be reported as fake type (they are in "dummy").
179 Maybe later */
180 union l1_cache {
181 struct {
182 unsigned line_size:8;
183 unsigned lines_per_tag:8;
184 unsigned assoc:8;
185 unsigned size_in_kb:8;
186 };
187 unsigned val;
188 };
189
190 union l2_cache {
191 struct {
192 unsigned line_size:8;
193 unsigned lines_per_tag:4;
194 unsigned assoc:4;
195 unsigned size_in_kb:16;
196 };
197 unsigned val;
198 };
199
200 union l3_cache {
201 struct {
202 unsigned line_size:8;
203 unsigned lines_per_tag:4;
204 unsigned assoc:4;
205 unsigned res:2;
206 unsigned size_encoded:14;
207 };
208 unsigned val;
209 };
210
211 static const unsigned short assocs[] = {
212 [1] = 1,
213 [2] = 2,
214 [4] = 4,
215 [6] = 8,
216 [8] = 16,
217 [0xa] = 32,
218 [0xb] = 48,
219 [0xc] = 64,
220 [0xd] = 96,
221 [0xe] = 128,
222 [0xf] = 0xffff /* fully associative - no way to show this currently */
223 };
224
225 static const unsigned char levels[] = { 1, 1, 2, 3 };
226 static const unsigned char types[] = { 1, 2, 3, 3 };
227
228 static const enum cache_type cache_type_map[] = {
229 [CTYPE_NULL] = CACHE_TYPE_NOCACHE,
230 [CTYPE_DATA] = CACHE_TYPE_DATA,
231 [CTYPE_INST] = CACHE_TYPE_INST,
232 [CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
233 };
234
235 static void
amd_cpuid4(int leaf,union _cpuid4_leaf_eax * eax,union _cpuid4_leaf_ebx * ebx,union _cpuid4_leaf_ecx * ecx)236 amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
237 union _cpuid4_leaf_ebx *ebx,
238 union _cpuid4_leaf_ecx *ecx)
239 {
240 unsigned dummy;
241 unsigned line_size, lines_per_tag, assoc, size_in_kb;
242 union l1_cache l1i, l1d;
243 union l2_cache l2;
244 union l3_cache l3;
245 union l1_cache *l1 = &l1d;
246
247 eax->full = 0;
248 ebx->full = 0;
249 ecx->full = 0;
250
251 cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
252 cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
253
254 switch (leaf) {
255 case 1:
256 l1 = &l1i;
257 fallthrough;
258 case 0:
259 if (!l1->val)
260 return;
261 assoc = assocs[l1->assoc];
262 line_size = l1->line_size;
263 lines_per_tag = l1->lines_per_tag;
264 size_in_kb = l1->size_in_kb;
265 break;
266 case 2:
267 if (!l2.val)
268 return;
269 assoc = assocs[l2.assoc];
270 line_size = l2.line_size;
271 lines_per_tag = l2.lines_per_tag;
272 /* cpu_data has errata corrections for K7 applied */
273 size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
274 break;
275 case 3:
276 if (!l3.val)
277 return;
278 assoc = assocs[l3.assoc];
279 line_size = l3.line_size;
280 lines_per_tag = l3.lines_per_tag;
281 size_in_kb = l3.size_encoded * 512;
282 if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
283 size_in_kb = size_in_kb >> 1;
284 assoc = assoc >> 1;
285 }
286 break;
287 default:
288 return;
289 }
290
291 eax->split.is_self_initializing = 1;
292 eax->split.type = types[leaf];
293 eax->split.level = levels[leaf];
294 eax->split.num_threads_sharing = 0;
295 eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
296
297
298 if (assoc == 0xffff)
299 eax->split.is_fully_associative = 1;
300 ebx->split.coherency_line_size = line_size - 1;
301 ebx->split.ways_of_associativity = assoc - 1;
302 ebx->split.physical_line_partition = lines_per_tag - 1;
303 ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
304 (ebx->split.ways_of_associativity + 1) - 1;
305 }
306
307 #if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
308
309 /*
310 * L3 cache descriptors
311 */
amd_calc_l3_indices(struct amd_northbridge * nb)312 static void amd_calc_l3_indices(struct amd_northbridge *nb)
313 {
314 struct amd_l3_cache *l3 = &nb->l3_cache;
315 unsigned int sc0, sc1, sc2, sc3;
316 u32 val = 0;
317
318 pci_read_config_dword(nb->misc, 0x1C4, &val);
319
320 /* calculate subcache sizes */
321 l3->subcaches[0] = sc0 = !(val & BIT(0));
322 l3->subcaches[1] = sc1 = !(val & BIT(4));
323
324 if (boot_cpu_data.x86 == 0x15) {
325 l3->subcaches[0] = sc0 += !(val & BIT(1));
326 l3->subcaches[1] = sc1 += !(val & BIT(5));
327 }
328
329 l3->subcaches[2] = sc2 = !(val & BIT(8)) + !(val & BIT(9));
330 l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
331
332 l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
333 }
334
335 /*
336 * check whether a slot used for disabling an L3 index is occupied.
337 * @l3: L3 cache descriptor
338 * @slot: slot number (0..1)
339 *
340 * @returns: the disabled index if used or negative value if slot free.
341 */
amd_get_l3_disable_slot(struct amd_northbridge * nb,unsigned slot)342 static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
343 {
344 unsigned int reg = 0;
345
346 pci_read_config_dword(nb->misc, 0x1BC + slot * 4, ®);
347
348 /* check whether this slot is activated already */
349 if (reg & (3UL << 30))
350 return reg & 0xfff;
351
352 return -1;
353 }
354
show_cache_disable(struct cacheinfo * this_leaf,char * buf,unsigned int slot)355 static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf,
356 unsigned int slot)
357 {
358 int index;
359 struct amd_northbridge *nb = this_leaf->priv;
360
361 index = amd_get_l3_disable_slot(nb, slot);
362 if (index >= 0)
363 return sprintf(buf, "%d\n", index);
364
365 return sprintf(buf, "FREE\n");
366 }
367
368 #define SHOW_CACHE_DISABLE(slot) \
369 static ssize_t \
370 cache_disable_##slot##_show(struct device *dev, \
371 struct device_attribute *attr, char *buf) \
372 { \
373 struct cacheinfo *this_leaf = dev_get_drvdata(dev); \
374 return show_cache_disable(this_leaf, buf, slot); \
375 }
376 SHOW_CACHE_DISABLE(0)
377 SHOW_CACHE_DISABLE(1)
378
amd_l3_disable_index(struct amd_northbridge * nb,int cpu,unsigned slot,unsigned long idx)379 static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
380 unsigned slot, unsigned long idx)
381 {
382 int i;
383
384 idx |= BIT(30);
385
386 /*
387 * disable index in all 4 subcaches
388 */
389 for (i = 0; i < 4; i++) {
390 u32 reg = idx | (i << 20);
391
392 if (!nb->l3_cache.subcaches[i])
393 continue;
394
395 pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
396
397 /*
398 * We need to WBINVD on a core on the node containing the L3
399 * cache which indices we disable therefore a simple wbinvd()
400 * is not sufficient.
401 */
402 wbinvd_on_cpu(cpu);
403
404 reg |= BIT(31);
405 pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
406 }
407 }
408
409 /*
410 * disable a L3 cache index by using a disable-slot
411 *
412 * @l3: L3 cache descriptor
413 * @cpu: A CPU on the node containing the L3 cache
414 * @slot: slot number (0..1)
415 * @index: index to disable
416 *
417 * @return: 0 on success, error status on failure
418 */
amd_set_l3_disable_slot(struct amd_northbridge * nb,int cpu,unsigned slot,unsigned long index)419 static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu,
420 unsigned slot, unsigned long index)
421 {
422 int ret = 0;
423
424 /* check if @slot is already used or the index is already disabled */
425 ret = amd_get_l3_disable_slot(nb, slot);
426 if (ret >= 0)
427 return -EEXIST;
428
429 if (index > nb->l3_cache.indices)
430 return -EINVAL;
431
432 /* check whether the other slot has disabled the same index already */
433 if (index == amd_get_l3_disable_slot(nb, !slot))
434 return -EEXIST;
435
436 amd_l3_disable_index(nb, cpu, slot, index);
437
438 return 0;
439 }
440
store_cache_disable(struct cacheinfo * this_leaf,const char * buf,size_t count,unsigned int slot)441 static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
442 const char *buf, size_t count,
443 unsigned int slot)
444 {
445 unsigned long val = 0;
446 int cpu, err = 0;
447 struct amd_northbridge *nb = this_leaf->priv;
448
449 if (!capable(CAP_SYS_ADMIN))
450 return -EPERM;
451
452 cpu = cpumask_first(&this_leaf->shared_cpu_map);
453
454 if (kstrtoul(buf, 10, &val) < 0)
455 return -EINVAL;
456
457 err = amd_set_l3_disable_slot(nb, cpu, slot, val);
458 if (err) {
459 if (err == -EEXIST)
460 pr_warn("L3 slot %d in use/index already disabled!\n",
461 slot);
462 return err;
463 }
464 return count;
465 }
466
467 #define STORE_CACHE_DISABLE(slot) \
468 static ssize_t \
469 cache_disable_##slot##_store(struct device *dev, \
470 struct device_attribute *attr, \
471 const char *buf, size_t count) \
472 { \
473 struct cacheinfo *this_leaf = dev_get_drvdata(dev); \
474 return store_cache_disable(this_leaf, buf, count, slot); \
475 }
476 STORE_CACHE_DISABLE(0)
477 STORE_CACHE_DISABLE(1)
478
subcaches_show(struct device * dev,struct device_attribute * attr,char * buf)479 static ssize_t subcaches_show(struct device *dev,
480 struct device_attribute *attr, char *buf)
481 {
482 struct cacheinfo *this_leaf = dev_get_drvdata(dev);
483 int cpu = cpumask_first(&this_leaf->shared_cpu_map);
484
485 return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
486 }
487
subcaches_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)488 static ssize_t subcaches_store(struct device *dev,
489 struct device_attribute *attr,
490 const char *buf, size_t count)
491 {
492 struct cacheinfo *this_leaf = dev_get_drvdata(dev);
493 int cpu = cpumask_first(&this_leaf->shared_cpu_map);
494 unsigned long val;
495
496 if (!capable(CAP_SYS_ADMIN))
497 return -EPERM;
498
499 if (kstrtoul(buf, 16, &val) < 0)
500 return -EINVAL;
501
502 if (amd_set_subcaches(cpu, val))
503 return -EINVAL;
504
505 return count;
506 }
507
508 static DEVICE_ATTR_RW(cache_disable_0);
509 static DEVICE_ATTR_RW(cache_disable_1);
510 static DEVICE_ATTR_RW(subcaches);
511
512 static umode_t
cache_private_attrs_is_visible(struct kobject * kobj,struct attribute * attr,int unused)513 cache_private_attrs_is_visible(struct kobject *kobj,
514 struct attribute *attr, int unused)
515 {
516 struct device *dev = kobj_to_dev(kobj);
517 struct cacheinfo *this_leaf = dev_get_drvdata(dev);
518 umode_t mode = attr->mode;
519
520 if (!this_leaf->priv)
521 return 0;
522
523 if ((attr == &dev_attr_subcaches.attr) &&
524 amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
525 return mode;
526
527 if ((attr == &dev_attr_cache_disable_0.attr ||
528 attr == &dev_attr_cache_disable_1.attr) &&
529 amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
530 return mode;
531
532 return 0;
533 }
534
535 static struct attribute_group cache_private_group = {
536 .is_visible = cache_private_attrs_is_visible,
537 };
538
init_amd_l3_attrs(void)539 static void init_amd_l3_attrs(void)
540 {
541 int n = 1;
542 static struct attribute **amd_l3_attrs;
543
544 if (amd_l3_attrs) /* already initialized */
545 return;
546
547 if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
548 n += 2;
549 if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
550 n += 1;
551
552 amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
553 if (!amd_l3_attrs)
554 return;
555
556 n = 0;
557 if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
558 amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
559 amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
560 }
561 if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
562 amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
563
564 cache_private_group.attrs = amd_l3_attrs;
565 }
566
567 const struct attribute_group *
cache_get_priv_group(struct cacheinfo * this_leaf)568 cache_get_priv_group(struct cacheinfo *this_leaf)
569 {
570 struct amd_northbridge *nb = this_leaf->priv;
571
572 if (this_leaf->level < 3 || !nb)
573 return NULL;
574
575 if (nb && nb->l3_cache.indices)
576 init_amd_l3_attrs();
577
578 return &cache_private_group;
579 }
580
amd_init_l3_cache(struct _cpuid4_info_regs * this_leaf,int index)581 static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
582 {
583 int node;
584
585 /* only for L3, and not in virtualized environments */
586 if (index < 3)
587 return;
588
589 node = topology_die_id(smp_processor_id());
590 this_leaf->nb = node_to_amd_nb(node);
591 if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
592 amd_calc_l3_indices(this_leaf->nb);
593 }
594 #else
595 #define amd_init_l3_cache(x, y)
596 #endif /* CONFIG_AMD_NB && CONFIG_SYSFS */
597
598 static int
cpuid4_cache_lookup_regs(int index,struct _cpuid4_info_regs * this_leaf)599 cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
600 {
601 union _cpuid4_leaf_eax eax;
602 union _cpuid4_leaf_ebx ebx;
603 union _cpuid4_leaf_ecx ecx;
604 unsigned edx;
605
606 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
607 if (boot_cpu_has(X86_FEATURE_TOPOEXT))
608 cpuid_count(0x8000001d, index, &eax.full,
609 &ebx.full, &ecx.full, &edx);
610 else
611 amd_cpuid4(index, &eax, &ebx, &ecx);
612 amd_init_l3_cache(this_leaf, index);
613 } else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
614 cpuid_count(0x8000001d, index, &eax.full,
615 &ebx.full, &ecx.full, &edx);
616 amd_init_l3_cache(this_leaf, index);
617 } else {
618 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
619 }
620
621 if (eax.split.type == CTYPE_NULL)
622 return -EIO; /* better error ? */
623
624 this_leaf->eax = eax;
625 this_leaf->ebx = ebx;
626 this_leaf->ecx = ecx;
627 this_leaf->size = (ecx.split.number_of_sets + 1) *
628 (ebx.split.coherency_line_size + 1) *
629 (ebx.split.physical_line_partition + 1) *
630 (ebx.split.ways_of_associativity + 1);
631 return 0;
632 }
633
find_num_cache_leaves(struct cpuinfo_x86 * c)634 static int find_num_cache_leaves(struct cpuinfo_x86 *c)
635 {
636 unsigned int eax, ebx, ecx, edx, op;
637 union _cpuid4_leaf_eax cache_eax;
638 int i = -1;
639
640 if (c->x86_vendor == X86_VENDOR_AMD ||
641 c->x86_vendor == X86_VENDOR_HYGON)
642 op = 0x8000001d;
643 else
644 op = 4;
645
646 do {
647 ++i;
648 /* Do cpuid(op) loop to find out num_cache_leaves */
649 cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
650 cache_eax.full = eax;
651 } while (cache_eax.split.type != CTYPE_NULL);
652 return i;
653 }
654
cacheinfo_amd_init_llc_id(struct cpuinfo_x86 * c,int cpu)655 void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu)
656 {
657 /*
658 * We may have multiple LLCs if L3 caches exist, so check if we
659 * have an L3 cache by looking at the L3 cache CPUID leaf.
660 */
661 if (!cpuid_edx(0x80000006))
662 return;
663
664 if (c->x86 < 0x17) {
665 /* LLC is at the node level. */
666 per_cpu(cpu_llc_id, cpu) = c->cpu_die_id;
667 } else if (c->x86 == 0x17 && c->x86_model <= 0x1F) {
668 /*
669 * LLC is at the core complex level.
670 * Core complex ID is ApicId[3] for these processors.
671 */
672 per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
673 } else {
674 /*
675 * LLC ID is calculated from the number of threads sharing the
676 * cache.
677 * */
678 u32 eax, ebx, ecx, edx, num_sharing_cache = 0;
679 u32 llc_index = find_num_cache_leaves(c) - 1;
680
681 cpuid_count(0x8000001d, llc_index, &eax, &ebx, &ecx, &edx);
682 if (eax)
683 num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
684
685 if (num_sharing_cache) {
686 int bits = get_count_order(num_sharing_cache);
687
688 per_cpu(cpu_llc_id, cpu) = c->apicid >> bits;
689 }
690 }
691 }
692
cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 * c,int cpu)693 void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu)
694 {
695 /*
696 * We may have multiple LLCs if L3 caches exist, so check if we
697 * have an L3 cache by looking at the L3 cache CPUID leaf.
698 */
699 if (!cpuid_edx(0x80000006))
700 return;
701
702 /*
703 * LLC is at the core complex level.
704 * Core complex ID is ApicId[3] for these processors.
705 */
706 per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
707 }
708
init_amd_cacheinfo(struct cpuinfo_x86 * c)709 void init_amd_cacheinfo(struct cpuinfo_x86 *c)
710 {
711
712 if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
713 num_cache_leaves = find_num_cache_leaves(c);
714 } else if (c->extended_cpuid_level >= 0x80000006) {
715 if (cpuid_edx(0x80000006) & 0xf000)
716 num_cache_leaves = 4;
717 else
718 num_cache_leaves = 3;
719 }
720 }
721
init_hygon_cacheinfo(struct cpuinfo_x86 * c)722 void init_hygon_cacheinfo(struct cpuinfo_x86 *c)
723 {
724 num_cache_leaves = find_num_cache_leaves(c);
725 }
726
init_intel_cacheinfo(struct cpuinfo_x86 * c)727 void init_intel_cacheinfo(struct cpuinfo_x86 *c)
728 {
729 /* Cache sizes */
730 unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
731 unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
732 unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
733 unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
734 #ifdef CONFIG_SMP
735 unsigned int cpu = c->cpu_index;
736 #endif
737
738 if (c->cpuid_level > 3) {
739 static int is_initialized;
740
741 if (is_initialized == 0) {
742 /* Init num_cache_leaves from boot CPU */
743 num_cache_leaves = find_num_cache_leaves(c);
744 is_initialized++;
745 }
746
747 /*
748 * Whenever possible use cpuid(4), deterministic cache
749 * parameters cpuid leaf to find the cache details
750 */
751 for (i = 0; i < num_cache_leaves; i++) {
752 struct _cpuid4_info_regs this_leaf = {};
753 int retval;
754
755 retval = cpuid4_cache_lookup_regs(i, &this_leaf);
756 if (retval < 0)
757 continue;
758
759 switch (this_leaf.eax.split.level) {
760 case 1:
761 if (this_leaf.eax.split.type == CTYPE_DATA)
762 new_l1d = this_leaf.size/1024;
763 else if (this_leaf.eax.split.type == CTYPE_INST)
764 new_l1i = this_leaf.size/1024;
765 break;
766 case 2:
767 new_l2 = this_leaf.size/1024;
768 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
769 index_msb = get_count_order(num_threads_sharing);
770 l2_id = c->apicid & ~((1 << index_msb) - 1);
771 break;
772 case 3:
773 new_l3 = this_leaf.size/1024;
774 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
775 index_msb = get_count_order(num_threads_sharing);
776 l3_id = c->apicid & ~((1 << index_msb) - 1);
777 break;
778 default:
779 break;
780 }
781 }
782 }
783 /*
784 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
785 * trace cache
786 */
787 if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
788 /* supports eax=2 call */
789 int j, n;
790 unsigned int regs[4];
791 unsigned char *dp = (unsigned char *)regs;
792 int only_trace = 0;
793
794 if (num_cache_leaves != 0 && c->x86 == 15)
795 only_trace = 1;
796
797 /* Number of times to iterate */
798 n = cpuid_eax(2) & 0xFF;
799
800 for (i = 0 ; i < n ; i++) {
801 cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]);
802
803 /* If bit 31 is set, this is an unknown format */
804 for (j = 0 ; j < 3 ; j++)
805 if (regs[j] & (1 << 31))
806 regs[j] = 0;
807
808 /* Byte 0 is level count, not a descriptor */
809 for (j = 1 ; j < 16 ; j++) {
810 unsigned char des = dp[j];
811 unsigned char k = 0;
812
813 /* look up this descriptor in the table */
814 while (cache_table[k].descriptor != 0) {
815 if (cache_table[k].descriptor == des) {
816 if (only_trace && cache_table[k].cache_type != LVL_TRACE)
817 break;
818 switch (cache_table[k].cache_type) {
819 case LVL_1_INST:
820 l1i += cache_table[k].size;
821 break;
822 case LVL_1_DATA:
823 l1d += cache_table[k].size;
824 break;
825 case LVL_2:
826 l2 += cache_table[k].size;
827 break;
828 case LVL_3:
829 l3 += cache_table[k].size;
830 break;
831 case LVL_TRACE:
832 trace += cache_table[k].size;
833 break;
834 }
835
836 break;
837 }
838
839 k++;
840 }
841 }
842 }
843 }
844
845 if (new_l1d)
846 l1d = new_l1d;
847
848 if (new_l1i)
849 l1i = new_l1i;
850
851 if (new_l2) {
852 l2 = new_l2;
853 #ifdef CONFIG_SMP
854 per_cpu(cpu_llc_id, cpu) = l2_id;
855 per_cpu(cpu_l2c_id, cpu) = l2_id;
856 #endif
857 }
858
859 if (new_l3) {
860 l3 = new_l3;
861 #ifdef CONFIG_SMP
862 per_cpu(cpu_llc_id, cpu) = l3_id;
863 #endif
864 }
865
866 #ifdef CONFIG_SMP
867 /*
868 * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in
869 * turns means that the only possibility is SMT (as indicated in
870 * cpuid1). Since cpuid2 doesn't specify shared caches, and we know
871 * that SMT shares all caches, we can unconditionally set cpu_llc_id to
872 * c->phys_proc_id.
873 */
874 if (per_cpu(cpu_llc_id, cpu) == BAD_APICID)
875 per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
876 #endif
877
878 c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
879
880 if (!l2)
881 cpu_detect_cache_sizes(c);
882 }
883
__cache_amd_cpumap_setup(unsigned int cpu,int index,struct _cpuid4_info_regs * base)884 static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
885 struct _cpuid4_info_regs *base)
886 {
887 struct cpu_cacheinfo *this_cpu_ci;
888 struct cacheinfo *this_leaf;
889 int i, sibling;
890
891 /*
892 * For L3, always use the pre-calculated cpu_llc_shared_mask
893 * to derive shared_cpu_map.
894 */
895 if (index == 3) {
896 for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
897 this_cpu_ci = get_cpu_cacheinfo(i);
898 if (!this_cpu_ci->info_list)
899 continue;
900 this_leaf = this_cpu_ci->info_list + index;
901 for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
902 if (!cpu_online(sibling))
903 continue;
904 cpumask_set_cpu(sibling,
905 &this_leaf->shared_cpu_map);
906 }
907 }
908 } else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
909 unsigned int apicid, nshared, first, last;
910
911 nshared = base->eax.split.num_threads_sharing + 1;
912 apicid = cpu_data(cpu).apicid;
913 first = apicid - (apicid % nshared);
914 last = first + nshared - 1;
915
916 for_each_online_cpu(i) {
917 this_cpu_ci = get_cpu_cacheinfo(i);
918 if (!this_cpu_ci->info_list)
919 continue;
920
921 apicid = cpu_data(i).apicid;
922 if ((apicid < first) || (apicid > last))
923 continue;
924
925 this_leaf = this_cpu_ci->info_list + index;
926
927 for_each_online_cpu(sibling) {
928 apicid = cpu_data(sibling).apicid;
929 if ((apicid < first) || (apicid > last))
930 continue;
931 cpumask_set_cpu(sibling,
932 &this_leaf->shared_cpu_map);
933 }
934 }
935 } else
936 return 0;
937
938 return 1;
939 }
940
__cache_cpumap_setup(unsigned int cpu,int index,struct _cpuid4_info_regs * base)941 static void __cache_cpumap_setup(unsigned int cpu, int index,
942 struct _cpuid4_info_regs *base)
943 {
944 struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
945 struct cacheinfo *this_leaf, *sibling_leaf;
946 unsigned long num_threads_sharing;
947 int index_msb, i;
948 struct cpuinfo_x86 *c = &cpu_data(cpu);
949
950 if (c->x86_vendor == X86_VENDOR_AMD ||
951 c->x86_vendor == X86_VENDOR_HYGON) {
952 if (__cache_amd_cpumap_setup(cpu, index, base))
953 return;
954 }
955
956 this_leaf = this_cpu_ci->info_list + index;
957 num_threads_sharing = 1 + base->eax.split.num_threads_sharing;
958
959 cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
960 if (num_threads_sharing == 1)
961 return;
962
963 index_msb = get_count_order(num_threads_sharing);
964
965 for_each_online_cpu(i)
966 if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) {
967 struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
968
969 if (i == cpu || !sib_cpu_ci->info_list)
970 continue;/* skip if itself or no cacheinfo */
971 sibling_leaf = sib_cpu_ci->info_list + index;
972 cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
973 cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map);
974 }
975 }
976
ci_leaf_init(struct cacheinfo * this_leaf,struct _cpuid4_info_regs * base)977 static void ci_leaf_init(struct cacheinfo *this_leaf,
978 struct _cpuid4_info_regs *base)
979 {
980 this_leaf->id = base->id;
981 this_leaf->attributes = CACHE_ID;
982 this_leaf->level = base->eax.split.level;
983 this_leaf->type = cache_type_map[base->eax.split.type];
984 this_leaf->coherency_line_size =
985 base->ebx.split.coherency_line_size + 1;
986 this_leaf->ways_of_associativity =
987 base->ebx.split.ways_of_associativity + 1;
988 this_leaf->size = base->size;
989 this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1;
990 this_leaf->physical_line_partition =
991 base->ebx.split.physical_line_partition + 1;
992 this_leaf->priv = base->nb;
993 }
994
init_cache_level(unsigned int cpu)995 int init_cache_level(unsigned int cpu)
996 {
997 struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
998
999 if (!num_cache_leaves)
1000 return -ENOENT;
1001 if (!this_cpu_ci)
1002 return -EINVAL;
1003 this_cpu_ci->num_levels = 3;
1004 this_cpu_ci->num_leaves = num_cache_leaves;
1005 return 0;
1006 }
1007
1008 /*
1009 * The max shared threads number comes from CPUID.4:EAX[25-14] with input
1010 * ECX as cache index. Then right shift apicid by the number's order to get
1011 * cache id for this cache node.
1012 */
get_cache_id(int cpu,struct _cpuid4_info_regs * id4_regs)1013 static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs)
1014 {
1015 struct cpuinfo_x86 *c = &cpu_data(cpu);
1016 unsigned long num_threads_sharing;
1017 int index_msb;
1018
1019 num_threads_sharing = 1 + id4_regs->eax.split.num_threads_sharing;
1020 index_msb = get_count_order(num_threads_sharing);
1021 id4_regs->id = c->apicid >> index_msb;
1022 }
1023
populate_cache_leaves(unsigned int cpu)1024 int populate_cache_leaves(unsigned int cpu)
1025 {
1026 unsigned int idx, ret;
1027 struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
1028 struct cacheinfo *this_leaf = this_cpu_ci->info_list;
1029 struct _cpuid4_info_regs id4_regs = {};
1030
1031 for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
1032 ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
1033 if (ret)
1034 return ret;
1035 get_cache_id(cpu, &id4_regs);
1036 ci_leaf_init(this_leaf++, &id4_regs);
1037 __cache_cpumap_setup(cpu, idx, &id4_regs);
1038 }
1039 this_cpu_ci->cpu_map_populated = true;
1040
1041 return 0;
1042 }
1043