1 /*
2 * UEFI Common Platform Error Record (CPER) support
3 *
4 * Copyright (C) 2010, Intel Corp.
5 * Author: Huang Ying <ying.huang@intel.com>
6 *
7 * CPER is the format used to describe platform hardware error by
8 * various APEI tables, such as ERST, BERT and HEST etc.
9 *
10 * For more information about CPER, please refer to Appendix N of UEFI
11 * Specification version 2.3.
12 *
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License version
15 * 2 as published by the Free Software Foundation.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 */
26
27 #include <linux/kernel.h>
28 #include <linux/module.h>
29 #include <linux/time.h>
30 #include <linux/cper.h>
31 #include <linux/acpi.h>
32 #include <linux/aer.h>
33
34 /*
35 * CPER record ID need to be unique even after reboot, because record
36 * ID is used as index for ERST storage, while CPER records from
37 * multiple boot may co-exist in ERST.
38 */
cper_next_record_id(void)39 u64 cper_next_record_id(void)
40 {
41 static atomic64_t seq;
42
43 if (!atomic64_read(&seq))
44 atomic64_set(&seq, ((u64)get_seconds()) << 32);
45
46 return atomic64_inc_return(&seq);
47 }
48 EXPORT_SYMBOL_GPL(cper_next_record_id);
49
50 static const char *cper_severity_strs[] = {
51 "recoverable",
52 "fatal",
53 "corrected",
54 "info",
55 };
56
cper_severity_str(unsigned int severity)57 static const char *cper_severity_str(unsigned int severity)
58 {
59 return severity < ARRAY_SIZE(cper_severity_strs) ?
60 cper_severity_strs[severity] : "unknown";
61 }
62
63 /*
64 * cper_print_bits - print strings for set bits
65 * @pfx: prefix for each line, including log level and prefix string
66 * @bits: bit mask
67 * @strs: string array, indexed by bit position
68 * @strs_size: size of the string array: @strs
69 *
70 * For each set bit in @bits, print the corresponding string in @strs.
71 * If the output length is longer than 80, multiple line will be
72 * printed, with @pfx is printed at the beginning of each line.
73 */
cper_print_bits(const char * pfx,unsigned int bits,const char * strs[],unsigned int strs_size)74 void cper_print_bits(const char *pfx, unsigned int bits,
75 const char *strs[], unsigned int strs_size)
76 {
77 int i, len = 0;
78 const char *str;
79 char buf[84];
80
81 for (i = 0; i < strs_size; i++) {
82 if (!(bits & (1U << i)))
83 continue;
84 str = strs[i];
85 if (!str)
86 continue;
87 if (len && len + strlen(str) + 2 > 80) {
88 printk("%s\n", buf);
89 len = 0;
90 }
91 if (!len)
92 len = snprintf(buf, sizeof(buf), "%s%s", pfx, str);
93 else
94 len += snprintf(buf+len, sizeof(buf)-len, ", %s", str);
95 }
96 if (len)
97 printk("%s\n", buf);
98 }
99
100 static const char *cper_proc_type_strs[] = {
101 "IA32/X64",
102 "IA64",
103 };
104
105 static const char *cper_proc_isa_strs[] = {
106 "IA32",
107 "IA64",
108 "X64",
109 };
110
111 static const char *cper_proc_error_type_strs[] = {
112 "cache error",
113 "TLB error",
114 "bus error",
115 "micro-architectural error",
116 };
117
118 static const char *cper_proc_op_strs[] = {
119 "unknown or generic",
120 "data read",
121 "data write",
122 "instruction execution",
123 };
124
125 static const char *cper_proc_flag_strs[] = {
126 "restartable",
127 "precise IP",
128 "overflow",
129 "corrected",
130 };
131
cper_print_proc_generic(const char * pfx,const struct cper_sec_proc_generic * proc)132 static void cper_print_proc_generic(const char *pfx,
133 const struct cper_sec_proc_generic *proc)
134 {
135 if (proc->validation_bits & CPER_PROC_VALID_TYPE)
136 printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type,
137 proc->proc_type < ARRAY_SIZE(cper_proc_type_strs) ?
138 cper_proc_type_strs[proc->proc_type] : "unknown");
139 if (proc->validation_bits & CPER_PROC_VALID_ISA)
140 printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa,
141 proc->proc_isa < ARRAY_SIZE(cper_proc_isa_strs) ?
142 cper_proc_isa_strs[proc->proc_isa] : "unknown");
143 if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) {
144 printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type);
145 cper_print_bits(pfx, proc->proc_error_type,
146 cper_proc_error_type_strs,
147 ARRAY_SIZE(cper_proc_error_type_strs));
148 }
149 if (proc->validation_bits & CPER_PROC_VALID_OPERATION)
150 printk("%s""operation: %d, %s\n", pfx, proc->operation,
151 proc->operation < ARRAY_SIZE(cper_proc_op_strs) ?
152 cper_proc_op_strs[proc->operation] : "unknown");
153 if (proc->validation_bits & CPER_PROC_VALID_FLAGS) {
154 printk("%s""flags: 0x%02x\n", pfx, proc->flags);
155 cper_print_bits(pfx, proc->flags, cper_proc_flag_strs,
156 ARRAY_SIZE(cper_proc_flag_strs));
157 }
158 if (proc->validation_bits & CPER_PROC_VALID_LEVEL)
159 printk("%s""level: %d\n", pfx, proc->level);
160 if (proc->validation_bits & CPER_PROC_VALID_VERSION)
161 printk("%s""version_info: 0x%016llx\n", pfx, proc->cpu_version);
162 if (proc->validation_bits & CPER_PROC_VALID_ID)
163 printk("%s""processor_id: 0x%016llx\n", pfx, proc->proc_id);
164 if (proc->validation_bits & CPER_PROC_VALID_TARGET_ADDRESS)
165 printk("%s""target_address: 0x%016llx\n",
166 pfx, proc->target_addr);
167 if (proc->validation_bits & CPER_PROC_VALID_REQUESTOR_ID)
168 printk("%s""requestor_id: 0x%016llx\n",
169 pfx, proc->requestor_id);
170 if (proc->validation_bits & CPER_PROC_VALID_RESPONDER_ID)
171 printk("%s""responder_id: 0x%016llx\n",
172 pfx, proc->responder_id);
173 if (proc->validation_bits & CPER_PROC_VALID_IP)
174 printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
175 }
176
177 static const char *cper_mem_err_type_strs[] = {
178 "unknown",
179 "no error",
180 "single-bit ECC",
181 "multi-bit ECC",
182 "single-symbol chipkill ECC",
183 "multi-symbol chipkill ECC",
184 "master abort",
185 "target abort",
186 "parity error",
187 "watchdog timeout",
188 "invalid address",
189 "mirror Broken",
190 "memory sparing",
191 "scrub corrected error",
192 "scrub uncorrected error",
193 };
194
cper_print_mem(const char * pfx,const struct cper_sec_mem_err * mem)195 static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
196 {
197 if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
198 printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
199 if (mem->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS)
200 printk("%s""physical_address: 0x%016llx\n",
201 pfx, mem->physical_addr);
202 if (mem->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS_MASK)
203 printk("%s""physical_address_mask: 0x%016llx\n",
204 pfx, mem->physical_addr_mask);
205 if (mem->validation_bits & CPER_MEM_VALID_NODE)
206 printk("%s""node: %d\n", pfx, mem->node);
207 if (mem->validation_bits & CPER_MEM_VALID_CARD)
208 printk("%s""card: %d\n", pfx, mem->card);
209 if (mem->validation_bits & CPER_MEM_VALID_MODULE)
210 printk("%s""module: %d\n", pfx, mem->module);
211 if (mem->validation_bits & CPER_MEM_VALID_BANK)
212 printk("%s""bank: %d\n", pfx, mem->bank);
213 if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
214 printk("%s""device: %d\n", pfx, mem->device);
215 if (mem->validation_bits & CPER_MEM_VALID_ROW)
216 printk("%s""row: %d\n", pfx, mem->row);
217 if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
218 printk("%s""column: %d\n", pfx, mem->column);
219 if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
220 printk("%s""bit_position: %d\n", pfx, mem->bit_pos);
221 if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
222 printk("%s""requestor_id: 0x%016llx\n", pfx, mem->requestor_id);
223 if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
224 printk("%s""responder_id: 0x%016llx\n", pfx, mem->responder_id);
225 if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
226 printk("%s""target_id: 0x%016llx\n", pfx, mem->target_id);
227 if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
228 u8 etype = mem->error_type;
229 printk("%s""error_type: %d, %s\n", pfx, etype,
230 etype < ARRAY_SIZE(cper_mem_err_type_strs) ?
231 cper_mem_err_type_strs[etype] : "unknown");
232 }
233 }
234
235 static const char *cper_pcie_port_type_strs[] = {
236 "PCIe end point",
237 "legacy PCI end point",
238 "unknown",
239 "unknown",
240 "root port",
241 "upstream switch port",
242 "downstream switch port",
243 "PCIe to PCI/PCI-X bridge",
244 "PCI/PCI-X to PCIe bridge",
245 "root complex integrated endpoint device",
246 "root complex event collector",
247 };
248
cper_print_pcie(const char * pfx,const struct cper_sec_pcie * pcie,const struct acpi_hest_generic_data * gdata)249 static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
250 const struct acpi_hest_generic_data *gdata)
251 {
252 if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
253 printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
254 pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ?
255 cper_pcie_port_type_strs[pcie->port_type] : "unknown");
256 if (pcie->validation_bits & CPER_PCIE_VALID_VERSION)
257 printk("%s""version: %d.%d\n", pfx,
258 pcie->version.major, pcie->version.minor);
259 if (pcie->validation_bits & CPER_PCIE_VALID_COMMAND_STATUS)
260 printk("%s""command: 0x%04x, status: 0x%04x\n", pfx,
261 pcie->command, pcie->status);
262 if (pcie->validation_bits & CPER_PCIE_VALID_DEVICE_ID) {
263 const __u8 *p;
264 printk("%s""device_id: %04x:%02x:%02x.%x\n", pfx,
265 pcie->device_id.segment, pcie->device_id.bus,
266 pcie->device_id.device, pcie->device_id.function);
267 printk("%s""slot: %d\n", pfx,
268 pcie->device_id.slot >> CPER_PCIE_SLOT_SHIFT);
269 printk("%s""secondary_bus: 0x%02x\n", pfx,
270 pcie->device_id.secondary_bus);
271 printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx,
272 pcie->device_id.vendor_id, pcie->device_id.device_id);
273 p = pcie->device_id.class_code;
274 printk("%s""class_code: %02x%02x%02x\n", pfx, p[0], p[1], p[2]);
275 }
276 if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER)
277 printk("%s""serial number: 0x%04x, 0x%04x\n", pfx,
278 pcie->serial_number.lower, pcie->serial_number.upper);
279 if (pcie->validation_bits & CPER_PCIE_VALID_BRIDGE_CONTROL_STATUS)
280 printk(
281 "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
282 pfx, pcie->bridge.secondary_status, pcie->bridge.control);
283 #ifdef CONFIG_ACPI_APEI_PCIEAER
284 if (pcie->validation_bits & CPER_PCIE_VALID_AER_INFO) {
285 struct aer_capability_regs *aer_regs = (void *)pcie->aer_info;
286 cper_print_aer(pfx, gdata->error_severity, aer_regs);
287 }
288 #endif
289 }
290
291 static const char *apei_estatus_section_flag_strs[] = {
292 "primary",
293 "containment warning",
294 "reset",
295 "threshold exceeded",
296 "resource not accessible",
297 "latent error",
298 };
299
apei_estatus_print_section(const char * pfx,const struct acpi_hest_generic_data * gdata,int sec_no)300 static void apei_estatus_print_section(
301 const char *pfx, const struct acpi_hest_generic_data *gdata, int sec_no)
302 {
303 uuid_le *sec_type = (uuid_le *)gdata->section_type;
304 __u16 severity;
305
306 severity = gdata->error_severity;
307 printk("%s""section: %d, severity: %d, %s\n", pfx, sec_no, severity,
308 cper_severity_str(severity));
309 printk("%s""flags: 0x%02x\n", pfx, gdata->flags);
310 cper_print_bits(pfx, gdata->flags, apei_estatus_section_flag_strs,
311 ARRAY_SIZE(apei_estatus_section_flag_strs));
312 if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
313 printk("%s""fru_id: %pUl\n", pfx, (uuid_le *)gdata->fru_id);
314 if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
315 printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text);
316
317 if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_GENERIC)) {
318 struct cper_sec_proc_generic *proc_err = (void *)(gdata + 1);
319 printk("%s""section_type: general processor error\n", pfx);
320 if (gdata->error_data_length >= sizeof(*proc_err))
321 cper_print_proc_generic(pfx, proc_err);
322 else
323 goto err_section_too_small;
324 } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
325 struct cper_sec_mem_err *mem_err = (void *)(gdata + 1);
326 printk("%s""section_type: memory error\n", pfx);
327 if (gdata->error_data_length >= sizeof(*mem_err))
328 cper_print_mem(pfx, mem_err);
329 else
330 goto err_section_too_small;
331 } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) {
332 struct cper_sec_pcie *pcie = (void *)(gdata + 1);
333 printk("%s""section_type: PCIe error\n", pfx);
334 if (gdata->error_data_length >= sizeof(*pcie))
335 cper_print_pcie(pfx, pcie, gdata);
336 else
337 goto err_section_too_small;
338 } else
339 printk("%s""section type: unknown, %pUl\n", pfx, sec_type);
340
341 return;
342
343 err_section_too_small:
344 pr_err(FW_WARN "error section length is too small\n");
345 }
346
apei_estatus_print(const char * pfx,const struct acpi_hest_generic_status * estatus)347 void apei_estatus_print(const char *pfx,
348 const struct acpi_hest_generic_status *estatus)
349 {
350 struct acpi_hest_generic_data *gdata;
351 unsigned int data_len, gedata_len;
352 int sec_no = 0;
353 __u16 severity;
354
355 printk("%s""APEI generic hardware error status\n", pfx);
356 severity = estatus->error_severity;
357 printk("%s""severity: %d, %s\n", pfx, severity,
358 cper_severity_str(severity));
359 data_len = estatus->data_length;
360 gdata = (struct acpi_hest_generic_data *)(estatus + 1);
361 while (data_len > sizeof(*gdata)) {
362 gedata_len = gdata->error_data_length;
363 apei_estatus_print_section(pfx, gdata, sec_no);
364 data_len -= gedata_len + sizeof(*gdata);
365 gdata = (void *)(gdata + 1) + gedata_len;
366 sec_no++;
367 }
368 }
369 EXPORT_SYMBOL_GPL(apei_estatus_print);
370
apei_estatus_check_header(const struct acpi_hest_generic_status * estatus)371 int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus)
372 {
373 if (estatus->data_length &&
374 estatus->data_length < sizeof(struct acpi_hest_generic_data))
375 return -EINVAL;
376 if (estatus->raw_data_length &&
377 estatus->raw_data_offset < sizeof(*estatus) + estatus->data_length)
378 return -EINVAL;
379
380 return 0;
381 }
382 EXPORT_SYMBOL_GPL(apei_estatus_check_header);
383
apei_estatus_check(const struct acpi_hest_generic_status * estatus)384 int apei_estatus_check(const struct acpi_hest_generic_status *estatus)
385 {
386 struct acpi_hest_generic_data *gdata;
387 unsigned int data_len, gedata_len;
388 int rc;
389
390 rc = apei_estatus_check_header(estatus);
391 if (rc)
392 return rc;
393 data_len = estatus->data_length;
394 gdata = (struct acpi_hest_generic_data *)(estatus + 1);
395 while (data_len > sizeof(*gdata)) {
396 gedata_len = gdata->error_data_length;
397 if (gedata_len > data_len - sizeof(*gdata))
398 return -EINVAL;
399 data_len -= gedata_len + sizeof(*gdata);
400 gdata = (void *)(gdata + 1) + gedata_len;
401 }
402 if (data_len)
403 return -EINVAL;
404
405 return 0;
406 }
407 EXPORT_SYMBOL_GPL(apei_estatus_check);
408