[go: nahoru, domu]

1/*
2 * UEFI Common Platform Error Record (CPER) support
3 *
4 * Copyright (C) 2010, Intel Corp.
5 *	Author: Huang Ying <ying.huang@intel.com>
6 *
7 * CPER is the format used to describe platform hardware error by
8 * various tables, such as ERST, BERT and HEST etc.
9 *
10 * For more information about CPER, please refer to Appendix N of UEFI
11 * Specification version 2.4.
12 *
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License version
15 * 2 as published by the Free Software Foundation.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25 */
26
27#include <linux/kernel.h>
28#include <linux/module.h>
29#include <linux/time.h>
30#include <linux/cper.h>
31#include <linux/dmi.h>
32#include <linux/acpi.h>
33#include <linux/pci.h>
34#include <linux/aer.h>
35
36#define INDENT_SP	" "
37
38static char rcd_decode_str[CPER_REC_LEN];
39
40/*
41 * CPER record ID need to be unique even after reboot, because record
42 * ID is used as index for ERST storage, while CPER records from
43 * multiple boot may co-exist in ERST.
44 */
45u64 cper_next_record_id(void)
46{
47	static atomic64_t seq;
48
49	if (!atomic64_read(&seq))
50		atomic64_set(&seq, ((u64)get_seconds()) << 32);
51
52	return atomic64_inc_return(&seq);
53}
54EXPORT_SYMBOL_GPL(cper_next_record_id);
55
56static const char * const severity_strs[] = {
57	"recoverable",
58	"fatal",
59	"corrected",
60	"info",
61};
62
63const char *cper_severity_str(unsigned int severity)
64{
65	return severity < ARRAY_SIZE(severity_strs) ?
66		severity_strs[severity] : "unknown";
67}
68EXPORT_SYMBOL_GPL(cper_severity_str);
69
70/*
71 * cper_print_bits - print strings for set bits
72 * @pfx: prefix for each line, including log level and prefix string
73 * @bits: bit mask
74 * @strs: string array, indexed by bit position
75 * @strs_size: size of the string array: @strs
76 *
77 * For each set bit in @bits, print the corresponding string in @strs.
78 * If the output length is longer than 80, multiple line will be
79 * printed, with @pfx is printed at the beginning of each line.
80 */
81void cper_print_bits(const char *pfx, unsigned int bits,
82		     const char * const strs[], unsigned int strs_size)
83{
84	int i, len = 0;
85	const char *str;
86	char buf[84];
87
88	for (i = 0; i < strs_size; i++) {
89		if (!(bits & (1U << i)))
90			continue;
91		str = strs[i];
92		if (!str)
93			continue;
94		if (len && len + strlen(str) + 2 > 80) {
95			printk("%s\n", buf);
96			len = 0;
97		}
98		if (!len)
99			len = snprintf(buf, sizeof(buf), "%s%s", pfx, str);
100		else
101			len += snprintf(buf+len, sizeof(buf)-len, ", %s", str);
102	}
103	if (len)
104		printk("%s\n", buf);
105}
106
107static const char * const proc_type_strs[] = {
108	"IA32/X64",
109	"IA64",
110};
111
112static const char * const proc_isa_strs[] = {
113	"IA32",
114	"IA64",
115	"X64",
116};
117
118static const char * const proc_error_type_strs[] = {
119	"cache error",
120	"TLB error",
121	"bus error",
122	"micro-architectural error",
123};
124
125static const char * const proc_op_strs[] = {
126	"unknown or generic",
127	"data read",
128	"data write",
129	"instruction execution",
130};
131
132static const char * const proc_flag_strs[] = {
133	"restartable",
134	"precise IP",
135	"overflow",
136	"corrected",
137};
138
139static void cper_print_proc_generic(const char *pfx,
140				    const struct cper_sec_proc_generic *proc)
141{
142	if (proc->validation_bits & CPER_PROC_VALID_TYPE)
143		printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type,
144		       proc->proc_type < ARRAY_SIZE(proc_type_strs) ?
145		       proc_type_strs[proc->proc_type] : "unknown");
146	if (proc->validation_bits & CPER_PROC_VALID_ISA)
147		printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa,
148		       proc->proc_isa < ARRAY_SIZE(proc_isa_strs) ?
149		       proc_isa_strs[proc->proc_isa] : "unknown");
150	if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) {
151		printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type);
152		cper_print_bits(pfx, proc->proc_error_type,
153				proc_error_type_strs,
154				ARRAY_SIZE(proc_error_type_strs));
155	}
156	if (proc->validation_bits & CPER_PROC_VALID_OPERATION)
157		printk("%s""operation: %d, %s\n", pfx, proc->operation,
158		       proc->operation < ARRAY_SIZE(proc_op_strs) ?
159		       proc_op_strs[proc->operation] : "unknown");
160	if (proc->validation_bits & CPER_PROC_VALID_FLAGS) {
161		printk("%s""flags: 0x%02x\n", pfx, proc->flags);
162		cper_print_bits(pfx, proc->flags, proc_flag_strs,
163				ARRAY_SIZE(proc_flag_strs));
164	}
165	if (proc->validation_bits & CPER_PROC_VALID_LEVEL)
166		printk("%s""level: %d\n", pfx, proc->level);
167	if (proc->validation_bits & CPER_PROC_VALID_VERSION)
168		printk("%s""version_info: 0x%016llx\n", pfx, proc->cpu_version);
169	if (proc->validation_bits & CPER_PROC_VALID_ID)
170		printk("%s""processor_id: 0x%016llx\n", pfx, proc->proc_id);
171	if (proc->validation_bits & CPER_PROC_VALID_TARGET_ADDRESS)
172		printk("%s""target_address: 0x%016llx\n",
173		       pfx, proc->target_addr);
174	if (proc->validation_bits & CPER_PROC_VALID_REQUESTOR_ID)
175		printk("%s""requestor_id: 0x%016llx\n",
176		       pfx, proc->requestor_id);
177	if (proc->validation_bits & CPER_PROC_VALID_RESPONDER_ID)
178		printk("%s""responder_id: 0x%016llx\n",
179		       pfx, proc->responder_id);
180	if (proc->validation_bits & CPER_PROC_VALID_IP)
181		printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
182}
183
184static const char * const mem_err_type_strs[] = {
185	"unknown",
186	"no error",
187	"single-bit ECC",
188	"multi-bit ECC",
189	"single-symbol chipkill ECC",
190	"multi-symbol chipkill ECC",
191	"master abort",
192	"target abort",
193	"parity error",
194	"watchdog timeout",
195	"invalid address",
196	"mirror Broken",
197	"memory sparing",
198	"scrub corrected error",
199	"scrub uncorrected error",
200	"physical memory map-out event",
201};
202
203const char *cper_mem_err_type_str(unsigned int etype)
204{
205	return etype < ARRAY_SIZE(mem_err_type_strs) ?
206		mem_err_type_strs[etype] : "unknown";
207}
208EXPORT_SYMBOL_GPL(cper_mem_err_type_str);
209
210static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
211{
212	u32 len, n;
213
214	if (!msg)
215		return 0;
216
217	n = 0;
218	len = CPER_REC_LEN - 1;
219	if (mem->validation_bits & CPER_MEM_VALID_NODE)
220		n += scnprintf(msg + n, len - n, "node: %d ", mem->node);
221	if (mem->validation_bits & CPER_MEM_VALID_CARD)
222		n += scnprintf(msg + n, len - n, "card: %d ", mem->card);
223	if (mem->validation_bits & CPER_MEM_VALID_MODULE)
224		n += scnprintf(msg + n, len - n, "module: %d ", mem->module);
225	if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
226		n += scnprintf(msg + n, len - n, "rank: %d ", mem->rank);
227	if (mem->validation_bits & CPER_MEM_VALID_BANK)
228		n += scnprintf(msg + n, len - n, "bank: %d ", mem->bank);
229	if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
230		n += scnprintf(msg + n, len - n, "device: %d ", mem->device);
231	if (mem->validation_bits & CPER_MEM_VALID_ROW)
232		n += scnprintf(msg + n, len - n, "row: %d ", mem->row);
233	if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
234		n += scnprintf(msg + n, len - n, "column: %d ", mem->column);
235	if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
236		n += scnprintf(msg + n, len - n, "bit_position: %d ",
237			       mem->bit_pos);
238	if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
239		n += scnprintf(msg + n, len - n, "requestor_id: 0x%016llx ",
240			       mem->requestor_id);
241	if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
242		n += scnprintf(msg + n, len - n, "responder_id: 0x%016llx ",
243			       mem->responder_id);
244	if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
245		scnprintf(msg + n, len - n, "target_id: 0x%016llx ",
246			  mem->target_id);
247
248	msg[n] = '\0';
249	return n;
250}
251
252static int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg)
253{
254	u32 len, n;
255	const char *bank = NULL, *device = NULL;
256
257	if (!msg || !(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE))
258		return 0;
259
260	n = 0;
261	len = CPER_REC_LEN - 1;
262	dmi_memdev_name(mem->mem_dev_handle, &bank, &device);
263	if (bank && device)
264		n = snprintf(msg, len, "DIMM location: %s %s ", bank, device);
265	else
266		n = snprintf(msg, len,
267			     "DIMM location: not present. DMI handle: 0x%.4x ",
268			     mem->mem_dev_handle);
269
270	msg[n] = '\0';
271	return n;
272}
273
274void cper_mem_err_pack(const struct cper_sec_mem_err *mem,
275		       struct cper_mem_err_compact *cmem)
276{
277	cmem->validation_bits = mem->validation_bits;
278	cmem->node = mem->node;
279	cmem->card = mem->card;
280	cmem->module = mem->module;
281	cmem->bank = mem->bank;
282	cmem->device = mem->device;
283	cmem->row = mem->row;
284	cmem->column = mem->column;
285	cmem->bit_pos = mem->bit_pos;
286	cmem->requestor_id = mem->requestor_id;
287	cmem->responder_id = mem->responder_id;
288	cmem->target_id = mem->target_id;
289	cmem->rank = mem->rank;
290	cmem->mem_array_handle = mem->mem_array_handle;
291	cmem->mem_dev_handle = mem->mem_dev_handle;
292}
293
294const char *cper_mem_err_unpack(struct trace_seq *p,
295				struct cper_mem_err_compact *cmem)
296{
297	const char *ret = p->buffer + p->len;
298
299	if (cper_mem_err_location(cmem, rcd_decode_str))
300		trace_seq_printf(p, "%s", rcd_decode_str);
301	if (cper_dimm_err_location(cmem, rcd_decode_str))
302		trace_seq_printf(p, "%s", rcd_decode_str);
303	trace_seq_putc(p, '\0');
304
305	return ret;
306}
307
308static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
309{
310	struct cper_mem_err_compact cmem;
311
312	if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
313		printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
314	if (mem->validation_bits & CPER_MEM_VALID_PA)
315		printk("%s""physical_address: 0x%016llx\n",
316		       pfx, mem->physical_addr);
317	if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
318		printk("%s""physical_address_mask: 0x%016llx\n",
319		       pfx, mem->physical_addr_mask);
320	cper_mem_err_pack(mem, &cmem);
321	if (cper_mem_err_location(&cmem, rcd_decode_str))
322		printk("%s%s\n", pfx, rcd_decode_str);
323	if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
324		u8 etype = mem->error_type;
325		printk("%s""error_type: %d, %s\n", pfx, etype,
326		       cper_mem_err_type_str(etype));
327	}
328	if (cper_dimm_err_location(&cmem, rcd_decode_str))
329		printk("%s%s\n", pfx, rcd_decode_str);
330}
331
332static const char * const pcie_port_type_strs[] = {
333	"PCIe end point",
334	"legacy PCI end point",
335	"unknown",
336	"unknown",
337	"root port",
338	"upstream switch port",
339	"downstream switch port",
340	"PCIe to PCI/PCI-X bridge",
341	"PCI/PCI-X to PCIe bridge",
342	"root complex integrated endpoint device",
343	"root complex event collector",
344};
345
346static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
347			    const struct acpi_hest_generic_data *gdata)
348{
349	if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
350		printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
351		       pcie->port_type < ARRAY_SIZE(pcie_port_type_strs) ?
352		       pcie_port_type_strs[pcie->port_type] : "unknown");
353	if (pcie->validation_bits & CPER_PCIE_VALID_VERSION)
354		printk("%s""version: %d.%d\n", pfx,
355		       pcie->version.major, pcie->version.minor);
356	if (pcie->validation_bits & CPER_PCIE_VALID_COMMAND_STATUS)
357		printk("%s""command: 0x%04x, status: 0x%04x\n", pfx,
358		       pcie->command, pcie->status);
359	if (pcie->validation_bits & CPER_PCIE_VALID_DEVICE_ID) {
360		const __u8 *p;
361		printk("%s""device_id: %04x:%02x:%02x.%x\n", pfx,
362		       pcie->device_id.segment, pcie->device_id.bus,
363		       pcie->device_id.device, pcie->device_id.function);
364		printk("%s""slot: %d\n", pfx,
365		       pcie->device_id.slot >> CPER_PCIE_SLOT_SHIFT);
366		printk("%s""secondary_bus: 0x%02x\n", pfx,
367		       pcie->device_id.secondary_bus);
368		printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx,
369		       pcie->device_id.vendor_id, pcie->device_id.device_id);
370		p = pcie->device_id.class_code;
371		printk("%s""class_code: %02x%02x%02x\n", pfx, p[0], p[1], p[2]);
372	}
373	if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER)
374		printk("%s""serial number: 0x%04x, 0x%04x\n", pfx,
375		       pcie->serial_number.lower, pcie->serial_number.upper);
376	if (pcie->validation_bits & CPER_PCIE_VALID_BRIDGE_CONTROL_STATUS)
377		printk(
378	"%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
379	pfx, pcie->bridge.secondary_status, pcie->bridge.control);
380}
381
382static void cper_estatus_print_section(
383	const char *pfx, const struct acpi_hest_generic_data *gdata, int sec_no)
384{
385	uuid_le *sec_type = (uuid_le *)gdata->section_type;
386	__u16 severity;
387	char newpfx[64];
388
389	severity = gdata->error_severity;
390	printk("%s""Error %d, type: %s\n", pfx, sec_no,
391	       cper_severity_str(severity));
392	if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
393		printk("%s""fru_id: %pUl\n", pfx, (uuid_le *)gdata->fru_id);
394	if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
395		printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text);
396
397	snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
398	if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_GENERIC)) {
399		struct cper_sec_proc_generic *proc_err = (void *)(gdata + 1);
400		printk("%s""section_type: general processor error\n", newpfx);
401		if (gdata->error_data_length >= sizeof(*proc_err))
402			cper_print_proc_generic(newpfx, proc_err);
403		else
404			goto err_section_too_small;
405	} else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
406		struct cper_sec_mem_err *mem_err = (void *)(gdata + 1);
407		printk("%s""section_type: memory error\n", newpfx);
408		if (gdata->error_data_length >= sizeof(*mem_err))
409			cper_print_mem(newpfx, mem_err);
410		else
411			goto err_section_too_small;
412	} else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) {
413		struct cper_sec_pcie *pcie = (void *)(gdata + 1);
414		printk("%s""section_type: PCIe error\n", newpfx);
415		if (gdata->error_data_length >= sizeof(*pcie))
416			cper_print_pcie(newpfx, pcie, gdata);
417		else
418			goto err_section_too_small;
419	} else
420		printk("%s""section type: unknown, %pUl\n", newpfx, sec_type);
421
422	return;
423
424err_section_too_small:
425	pr_err(FW_WARN "error section length is too small\n");
426}
427
428void cper_estatus_print(const char *pfx,
429			const struct acpi_hest_generic_status *estatus)
430{
431	struct acpi_hest_generic_data *gdata;
432	unsigned int data_len, gedata_len;
433	int sec_no = 0;
434	char newpfx[64];
435	__u16 severity;
436
437	severity = estatus->error_severity;
438	if (severity == CPER_SEV_CORRECTED)
439		printk("%s%s\n", pfx,
440		       "It has been corrected by h/w "
441		       "and requires no further action");
442	printk("%s""event severity: %s\n", pfx, cper_severity_str(severity));
443	data_len = estatus->data_length;
444	gdata = (struct acpi_hest_generic_data *)(estatus + 1);
445	snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
446	while (data_len >= sizeof(*gdata)) {
447		gedata_len = gdata->error_data_length;
448		cper_estatus_print_section(newpfx, gdata, sec_no);
449		data_len -= gedata_len + sizeof(*gdata);
450		gdata = (void *)(gdata + 1) + gedata_len;
451		sec_no++;
452	}
453}
454EXPORT_SYMBOL_GPL(cper_estatus_print);
455
456int cper_estatus_check_header(const struct acpi_hest_generic_status *estatus)
457{
458	if (estatus->data_length &&
459	    estatus->data_length < sizeof(struct acpi_hest_generic_data))
460		return -EINVAL;
461	if (estatus->raw_data_length &&
462	    estatus->raw_data_offset < sizeof(*estatus) + estatus->data_length)
463		return -EINVAL;
464
465	return 0;
466}
467EXPORT_SYMBOL_GPL(cper_estatus_check_header);
468
469int cper_estatus_check(const struct acpi_hest_generic_status *estatus)
470{
471	struct acpi_hest_generic_data *gdata;
472	unsigned int data_len, gedata_len;
473	int rc;
474
475	rc = cper_estatus_check_header(estatus);
476	if (rc)
477		return rc;
478	data_len = estatus->data_length;
479	gdata = (struct acpi_hest_generic_data *)(estatus + 1);
480	while (data_len >= sizeof(*gdata)) {
481		gedata_len = gdata->error_data_length;
482		if (gedata_len > data_len - sizeof(*gdata))
483			return -EINVAL;
484		data_len -= gedata_len + sizeof(*gdata);
485		gdata = (void *)(gdata + 1) + gedata_len;
486	}
487	if (data_len)
488		return -EINVAL;
489
490	return 0;
491}
492EXPORT_SYMBOL_GPL(cper_estatus_check);
493