1 /*
2  * UEFI Common Platform Error Record (CPER) support
3  *
4  * Copyright (C) 2010, Intel Corp.
5  *	Author: Huang Ying <ying.huang@intel.com>
6  *
7  * CPER is the format used to describe platform hardware error by
8  * various APEI tables, such as ERST, BERT and HEST etc.
9  *
10  * For more information about CPER, please refer to Appendix N of UEFI
11  * Specification version 2.3.
12  *
13  * This program is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU General Public License version
15  * 2 as published by the Free Software Foundation.
16  *
17  * This program is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20  * GNU General Public License for more details.
21  *
22  * You should have received a copy of the GNU General Public License
23  * along with this program; if not, write to the Free Software
24  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25  */
26 
27 #include <linux/kernel.h>
28 #include <linux/module.h>
29 #include <linux/time.h>
30 #include <linux/cper.h>
31 #include <linux/acpi.h>
32 #include <linux/aer.h>
33 
34 /*
35  * CPER record ID need to be unique even after reboot, because record
36  * ID is used as index for ERST storage, while CPER records from
37  * multiple boot may co-exist in ERST.
38  */
cper_next_record_id(void)39 u64 cper_next_record_id(void)
40 {
41 	static atomic64_t seq;
42 
43 	if (!atomic64_read(&seq))
44 		atomic64_set(&seq, ((u64)get_seconds()) << 32);
45 
46 	return atomic64_inc_return(&seq);
47 }
48 EXPORT_SYMBOL_GPL(cper_next_record_id);
49 
50 static const char *cper_severity_strs[] = {
51 	"recoverable",
52 	"fatal",
53 	"corrected",
54 	"info",
55 };
56 
cper_severity_str(unsigned int severity)57 static const char *cper_severity_str(unsigned int severity)
58 {
59 	return severity < ARRAY_SIZE(cper_severity_strs) ?
60 		cper_severity_strs[severity] : "unknown";
61 }
62 
63 /*
64  * cper_print_bits - print strings for set bits
65  * @pfx: prefix for each line, including log level and prefix string
66  * @bits: bit mask
67  * @strs: string array, indexed by bit position
68  * @strs_size: size of the string array: @strs
69  *
70  * For each set bit in @bits, print the corresponding string in @strs.
71  * If the output length is longer than 80, multiple line will be
72  * printed, with @pfx is printed at the beginning of each line.
73  */
cper_print_bits(const char * pfx,unsigned int bits,const char * strs[],unsigned int strs_size)74 void cper_print_bits(const char *pfx, unsigned int bits,
75 		     const char *strs[], unsigned int strs_size)
76 {
77 	int i, len = 0;
78 	const char *str;
79 	char buf[84];
80 
81 	for (i = 0; i < strs_size; i++) {
82 		if (!(bits & (1U << i)))
83 			continue;
84 		str = strs[i];
85 		if (!str)
86 			continue;
87 		if (len && len + strlen(str) + 2 > 80) {
88 			printk("%s\n", buf);
89 			len = 0;
90 		}
91 		if (!len)
92 			len = snprintf(buf, sizeof(buf), "%s%s", pfx, str);
93 		else
94 			len += snprintf(buf+len, sizeof(buf)-len, ", %s", str);
95 	}
96 	if (len)
97 		printk("%s\n", buf);
98 }
99 
100 static const char *cper_proc_type_strs[] = {
101 	"IA32/X64",
102 	"IA64",
103 };
104 
105 static const char *cper_proc_isa_strs[] = {
106 	"IA32",
107 	"IA64",
108 	"X64",
109 };
110 
111 static const char *cper_proc_error_type_strs[] = {
112 	"cache error",
113 	"TLB error",
114 	"bus error",
115 	"micro-architectural error",
116 };
117 
118 static const char *cper_proc_op_strs[] = {
119 	"unknown or generic",
120 	"data read",
121 	"data write",
122 	"instruction execution",
123 };
124 
125 static const char *cper_proc_flag_strs[] = {
126 	"restartable",
127 	"precise IP",
128 	"overflow",
129 	"corrected",
130 };
131 
cper_print_proc_generic(const char * pfx,const struct cper_sec_proc_generic * proc)132 static void cper_print_proc_generic(const char *pfx,
133 				    const struct cper_sec_proc_generic *proc)
134 {
135 	if (proc->validation_bits & CPER_PROC_VALID_TYPE)
136 		printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type,
137 		       proc->proc_type < ARRAY_SIZE(cper_proc_type_strs) ?
138 		       cper_proc_type_strs[proc->proc_type] : "unknown");
139 	if (proc->validation_bits & CPER_PROC_VALID_ISA)
140 		printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa,
141 		       proc->proc_isa < ARRAY_SIZE(cper_proc_isa_strs) ?
142 		       cper_proc_isa_strs[proc->proc_isa] : "unknown");
143 	if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) {
144 		printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type);
145 		cper_print_bits(pfx, proc->proc_error_type,
146 				cper_proc_error_type_strs,
147 				ARRAY_SIZE(cper_proc_error_type_strs));
148 	}
149 	if (proc->validation_bits & CPER_PROC_VALID_OPERATION)
150 		printk("%s""operation: %d, %s\n", pfx, proc->operation,
151 		       proc->operation < ARRAY_SIZE(cper_proc_op_strs) ?
152 		       cper_proc_op_strs[proc->operation] : "unknown");
153 	if (proc->validation_bits & CPER_PROC_VALID_FLAGS) {
154 		printk("%s""flags: 0x%02x\n", pfx, proc->flags);
155 		cper_print_bits(pfx, proc->flags, cper_proc_flag_strs,
156 				ARRAY_SIZE(cper_proc_flag_strs));
157 	}
158 	if (proc->validation_bits & CPER_PROC_VALID_LEVEL)
159 		printk("%s""level: %d\n", pfx, proc->level);
160 	if (proc->validation_bits & CPER_PROC_VALID_VERSION)
161 		printk("%s""version_info: 0x%016llx\n", pfx, proc->cpu_version);
162 	if (proc->validation_bits & CPER_PROC_VALID_ID)
163 		printk("%s""processor_id: 0x%016llx\n", pfx, proc->proc_id);
164 	if (proc->validation_bits & CPER_PROC_VALID_TARGET_ADDRESS)
165 		printk("%s""target_address: 0x%016llx\n",
166 		       pfx, proc->target_addr);
167 	if (proc->validation_bits & CPER_PROC_VALID_REQUESTOR_ID)
168 		printk("%s""requestor_id: 0x%016llx\n",
169 		       pfx, proc->requestor_id);
170 	if (proc->validation_bits & CPER_PROC_VALID_RESPONDER_ID)
171 		printk("%s""responder_id: 0x%016llx\n",
172 		       pfx, proc->responder_id);
173 	if (proc->validation_bits & CPER_PROC_VALID_IP)
174 		printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
175 }
176 
177 static const char *cper_mem_err_type_strs[] = {
178 	"unknown",
179 	"no error",
180 	"single-bit ECC",
181 	"multi-bit ECC",
182 	"single-symbol chipkill ECC",
183 	"multi-symbol chipkill ECC",
184 	"master abort",
185 	"target abort",
186 	"parity error",
187 	"watchdog timeout",
188 	"invalid address",
189 	"mirror Broken",
190 	"memory sparing",
191 	"scrub corrected error",
192 	"scrub uncorrected error",
193 };
194 
cper_print_mem(const char * pfx,const struct cper_sec_mem_err * mem)195 static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
196 {
197 	if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
198 		printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
199 	if (mem->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS)
200 		printk("%s""physical_address: 0x%016llx\n",
201 		       pfx, mem->physical_addr);
202 	if (mem->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS_MASK)
203 		printk("%s""physical_address_mask: 0x%016llx\n",
204 		       pfx, mem->physical_addr_mask);
205 	if (mem->validation_bits & CPER_MEM_VALID_NODE)
206 		printk("%s""node: %d\n", pfx, mem->node);
207 	if (mem->validation_bits & CPER_MEM_VALID_CARD)
208 		printk("%s""card: %d\n", pfx, mem->card);
209 	if (mem->validation_bits & CPER_MEM_VALID_MODULE)
210 		printk("%s""module: %d\n", pfx, mem->module);
211 	if (mem->validation_bits & CPER_MEM_VALID_BANK)
212 		printk("%s""bank: %d\n", pfx, mem->bank);
213 	if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
214 		printk("%s""device: %d\n", pfx, mem->device);
215 	if (mem->validation_bits & CPER_MEM_VALID_ROW)
216 		printk("%s""row: %d\n", pfx, mem->row);
217 	if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
218 		printk("%s""column: %d\n", pfx, mem->column);
219 	if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
220 		printk("%s""bit_position: %d\n", pfx, mem->bit_pos);
221 	if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
222 		printk("%s""requestor_id: 0x%016llx\n", pfx, mem->requestor_id);
223 	if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
224 		printk("%s""responder_id: 0x%016llx\n", pfx, mem->responder_id);
225 	if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
226 		printk("%s""target_id: 0x%016llx\n", pfx, mem->target_id);
227 	if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
228 		u8 etype = mem->error_type;
229 		printk("%s""error_type: %d, %s\n", pfx, etype,
230 		       etype < ARRAY_SIZE(cper_mem_err_type_strs) ?
231 		       cper_mem_err_type_strs[etype] : "unknown");
232 	}
233 }
234 
235 static const char *cper_pcie_port_type_strs[] = {
236 	"PCIe end point",
237 	"legacy PCI end point",
238 	"unknown",
239 	"unknown",
240 	"root port",
241 	"upstream switch port",
242 	"downstream switch port",
243 	"PCIe to PCI/PCI-X bridge",
244 	"PCI/PCI-X to PCIe bridge",
245 	"root complex integrated endpoint device",
246 	"root complex event collector",
247 };
248 
cper_print_pcie(const char * pfx,const struct cper_sec_pcie * pcie,const struct acpi_hest_generic_data * gdata)249 static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
250 			    const struct acpi_hest_generic_data *gdata)
251 {
252 	if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
253 		printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
254 		       pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ?
255 		       cper_pcie_port_type_strs[pcie->port_type] : "unknown");
256 	if (pcie->validation_bits & CPER_PCIE_VALID_VERSION)
257 		printk("%s""version: %d.%d\n", pfx,
258 		       pcie->version.major, pcie->version.minor);
259 	if (pcie->validation_bits & CPER_PCIE_VALID_COMMAND_STATUS)
260 		printk("%s""command: 0x%04x, status: 0x%04x\n", pfx,
261 		       pcie->command, pcie->status);
262 	if (pcie->validation_bits & CPER_PCIE_VALID_DEVICE_ID) {
263 		const __u8 *p;
264 		printk("%s""device_id: %04x:%02x:%02x.%x\n", pfx,
265 		       pcie->device_id.segment, pcie->device_id.bus,
266 		       pcie->device_id.device, pcie->device_id.function);
267 		printk("%s""slot: %d\n", pfx,
268 		       pcie->device_id.slot >> CPER_PCIE_SLOT_SHIFT);
269 		printk("%s""secondary_bus: 0x%02x\n", pfx,
270 		       pcie->device_id.secondary_bus);
271 		printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx,
272 		       pcie->device_id.vendor_id, pcie->device_id.device_id);
273 		p = pcie->device_id.class_code;
274 		printk("%s""class_code: %02x%02x%02x\n", pfx, p[0], p[1], p[2]);
275 	}
276 	if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER)
277 		printk("%s""serial number: 0x%04x, 0x%04x\n", pfx,
278 		       pcie->serial_number.lower, pcie->serial_number.upper);
279 	if (pcie->validation_bits & CPER_PCIE_VALID_BRIDGE_CONTROL_STATUS)
280 		printk(
281 	"%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
282 	pfx, pcie->bridge.secondary_status, pcie->bridge.control);
283 #ifdef CONFIG_ACPI_APEI_PCIEAER
284 	if (pcie->validation_bits & CPER_PCIE_VALID_AER_INFO) {
285 		struct aer_capability_regs *aer_regs = (void *)pcie->aer_info;
286 		cper_print_aer(pfx, gdata->error_severity, aer_regs);
287 	}
288 #endif
289 }
290 
291 static const char *apei_estatus_section_flag_strs[] = {
292 	"primary",
293 	"containment warning",
294 	"reset",
295 	"threshold exceeded",
296 	"resource not accessible",
297 	"latent error",
298 };
299 
apei_estatus_print_section(const char * pfx,const struct acpi_hest_generic_data * gdata,int sec_no)300 static void apei_estatus_print_section(
301 	const char *pfx, const struct acpi_hest_generic_data *gdata, int sec_no)
302 {
303 	uuid_le *sec_type = (uuid_le *)gdata->section_type;
304 	__u16 severity;
305 
306 	severity = gdata->error_severity;
307 	printk("%s""section: %d, severity: %d, %s\n", pfx, sec_no, severity,
308 	       cper_severity_str(severity));
309 	printk("%s""flags: 0x%02x\n", pfx, gdata->flags);
310 	cper_print_bits(pfx, gdata->flags, apei_estatus_section_flag_strs,
311 			ARRAY_SIZE(apei_estatus_section_flag_strs));
312 	if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
313 		printk("%s""fru_id: %pUl\n", pfx, (uuid_le *)gdata->fru_id);
314 	if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
315 		printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text);
316 
317 	if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_GENERIC)) {
318 		struct cper_sec_proc_generic *proc_err = (void *)(gdata + 1);
319 		printk("%s""section_type: general processor error\n", pfx);
320 		if (gdata->error_data_length >= sizeof(*proc_err))
321 			cper_print_proc_generic(pfx, proc_err);
322 		else
323 			goto err_section_too_small;
324 	} else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
325 		struct cper_sec_mem_err *mem_err = (void *)(gdata + 1);
326 		printk("%s""section_type: memory error\n", pfx);
327 		if (gdata->error_data_length >= sizeof(*mem_err))
328 			cper_print_mem(pfx, mem_err);
329 		else
330 			goto err_section_too_small;
331 	} else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) {
332 		struct cper_sec_pcie *pcie = (void *)(gdata + 1);
333 		printk("%s""section_type: PCIe error\n", pfx);
334 		if (gdata->error_data_length >= sizeof(*pcie))
335 			cper_print_pcie(pfx, pcie, gdata);
336 		else
337 			goto err_section_too_small;
338 	} else
339 		printk("%s""section type: unknown, %pUl\n", pfx, sec_type);
340 
341 	return;
342 
343 err_section_too_small:
344 	pr_err(FW_WARN "error section length is too small\n");
345 }
346 
apei_estatus_print(const char * pfx,const struct acpi_hest_generic_status * estatus)347 void apei_estatus_print(const char *pfx,
348 			const struct acpi_hest_generic_status *estatus)
349 {
350 	struct acpi_hest_generic_data *gdata;
351 	unsigned int data_len, gedata_len;
352 	int sec_no = 0;
353 	__u16 severity;
354 
355 	printk("%s""APEI generic hardware error status\n", pfx);
356 	severity = estatus->error_severity;
357 	printk("%s""severity: %d, %s\n", pfx, severity,
358 	       cper_severity_str(severity));
359 	data_len = estatus->data_length;
360 	gdata = (struct acpi_hest_generic_data *)(estatus + 1);
361 	while (data_len > sizeof(*gdata)) {
362 		gedata_len = gdata->error_data_length;
363 		apei_estatus_print_section(pfx, gdata, sec_no);
364 		data_len -= gedata_len + sizeof(*gdata);
365 		gdata = (void *)(gdata + 1) + gedata_len;
366 		sec_no++;
367 	}
368 }
369 EXPORT_SYMBOL_GPL(apei_estatus_print);
370 
apei_estatus_check_header(const struct acpi_hest_generic_status * estatus)371 int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus)
372 {
373 	if (estatus->data_length &&
374 	    estatus->data_length < sizeof(struct acpi_hest_generic_data))
375 		return -EINVAL;
376 	if (estatus->raw_data_length &&
377 	    estatus->raw_data_offset < sizeof(*estatus) + estatus->data_length)
378 		return -EINVAL;
379 
380 	return 0;
381 }
382 EXPORT_SYMBOL_GPL(apei_estatus_check_header);
383 
apei_estatus_check(const struct acpi_hest_generic_status * estatus)384 int apei_estatus_check(const struct acpi_hest_generic_status *estatus)
385 {
386 	struct acpi_hest_generic_data *gdata;
387 	unsigned int data_len, gedata_len;
388 	int rc;
389 
390 	rc = apei_estatus_check_header(estatus);
391 	if (rc)
392 		return rc;
393 	data_len = estatus->data_length;
394 	gdata = (struct acpi_hest_generic_data *)(estatus + 1);
395 	while (data_len > sizeof(*gdata)) {
396 		gedata_len = gdata->error_data_length;
397 		if (gedata_len > data_len - sizeof(*gdata))
398 			return -EINVAL;
399 		data_len -= gedata_len + sizeof(*gdata);
400 		gdata = (void *)(gdata + 1) + gedata_len;
401 	}
402 	if (data_len)
403 		return -EINVAL;
404 
405 	return 0;
406 }
407 EXPORT_SYMBOL_GPL(apei_estatus_check);
408