[go: nahoru, domu]

1/*
2 * APEI Error Record Serialization Table support
3 *
4 * ERST is a way provided by APEI to save and retrieve hardware error
5 * information to and from a persistent store.
6 *
7 * For more information about ERST, please refer to ACPI Specification
8 * version 4.0, section 17.4.
9 *
10 * Copyright 2010 Intel Corp.
11 *   Author: Huang Ying <ying.huang@intel.com>
12 *
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License version
15 * 2 as published by the Free Software Foundation.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25 */
26
27#include <linux/kernel.h>
28#include <linux/module.h>
29#include <linux/init.h>
30#include <linux/delay.h>
31#include <linux/io.h>
32#include <linux/acpi.h>
33#include <linux/uaccess.h>
34#include <linux/cper.h>
35#include <linux/nmi.h>
36#include <linux/hardirq.h>
37#include <linux/pstore.h>
38#include <acpi/apei.h>
39
40#include "apei-internal.h"
41
42#undef pr_fmt
43#define pr_fmt(fmt) "ERST: " fmt
44
45/* ERST command status */
46#define ERST_STATUS_SUCCESS			0x0
47#define ERST_STATUS_NOT_ENOUGH_SPACE		0x1
48#define ERST_STATUS_HARDWARE_NOT_AVAILABLE	0x2
49#define ERST_STATUS_FAILED			0x3
50#define ERST_STATUS_RECORD_STORE_EMPTY		0x4
51#define ERST_STATUS_RECORD_NOT_FOUND		0x5
52
53#define ERST_TAB_ENTRY(tab)						\
54	((struct acpi_whea_header *)((char *)(tab) +			\
55				     sizeof(struct acpi_table_erst)))
56
57#define SPIN_UNIT		100			/* 100ns */
58/* Firmware should respond within 1 milliseconds */
59#define FIRMWARE_TIMEOUT	(1 * NSEC_PER_MSEC)
60#define FIRMWARE_MAX_STALL	50			/* 50us */
61
62int erst_disable;
63EXPORT_SYMBOL_GPL(erst_disable);
64
65static struct acpi_table_erst *erst_tab;
66
67/* ERST Error Log Address Range atrributes */
68#define ERST_RANGE_RESERVED	0x0001
69#define ERST_RANGE_NVRAM	0x0002
70#define ERST_RANGE_SLOW		0x0004
71
72/*
73 * ERST Error Log Address Range, used as buffer for reading/writing
74 * error records.
75 */
76static struct erst_erange {
77	u64 base;
78	u64 size;
79	void __iomem *vaddr;
80	u32 attr;
81} erst_erange;
82
83/*
84 * Prevent ERST interpreter to run simultaneously, because the
85 * corresponding firmware implementation may not work properly when
86 * invoked simultaneously.
87 *
88 * It is used to provide exclusive accessing for ERST Error Log
89 * Address Range too.
90 */
91static DEFINE_RAW_SPINLOCK(erst_lock);
92
93static inline int erst_errno(int command_status)
94{
95	switch (command_status) {
96	case ERST_STATUS_SUCCESS:
97		return 0;
98	case ERST_STATUS_HARDWARE_NOT_AVAILABLE:
99		return -ENODEV;
100	case ERST_STATUS_NOT_ENOUGH_SPACE:
101		return -ENOSPC;
102	case ERST_STATUS_RECORD_STORE_EMPTY:
103	case ERST_STATUS_RECORD_NOT_FOUND:
104		return -ENOENT;
105	default:
106		return -EINVAL;
107	}
108}
109
110static int erst_timedout(u64 *t, u64 spin_unit)
111{
112	if ((s64)*t < spin_unit) {
113		pr_warn(FW_WARN "Firmware does not respond in time.\n");
114		return 1;
115	}
116	*t -= spin_unit;
117	ndelay(spin_unit);
118	touch_nmi_watchdog();
119	return 0;
120}
121
122static int erst_exec_load_var1(struct apei_exec_context *ctx,
123			       struct acpi_whea_header *entry)
124{
125	return __apei_exec_read_register(entry, &ctx->var1);
126}
127
128static int erst_exec_load_var2(struct apei_exec_context *ctx,
129			       struct acpi_whea_header *entry)
130{
131	return __apei_exec_read_register(entry, &ctx->var2);
132}
133
134static int erst_exec_store_var1(struct apei_exec_context *ctx,
135				struct acpi_whea_header *entry)
136{
137	return __apei_exec_write_register(entry, ctx->var1);
138}
139
140static int erst_exec_add(struct apei_exec_context *ctx,
141			 struct acpi_whea_header *entry)
142{
143	ctx->var1 += ctx->var2;
144	return 0;
145}
146
147static int erst_exec_subtract(struct apei_exec_context *ctx,
148			      struct acpi_whea_header *entry)
149{
150	ctx->var1 -= ctx->var2;
151	return 0;
152}
153
154static int erst_exec_add_value(struct apei_exec_context *ctx,
155			       struct acpi_whea_header *entry)
156{
157	int rc;
158	u64 val;
159
160	rc = __apei_exec_read_register(entry, &val);
161	if (rc)
162		return rc;
163	val += ctx->value;
164	rc = __apei_exec_write_register(entry, val);
165	return rc;
166}
167
168static int erst_exec_subtract_value(struct apei_exec_context *ctx,
169				    struct acpi_whea_header *entry)
170{
171	int rc;
172	u64 val;
173
174	rc = __apei_exec_read_register(entry, &val);
175	if (rc)
176		return rc;
177	val -= ctx->value;
178	rc = __apei_exec_write_register(entry, val);
179	return rc;
180}
181
182static int erst_exec_stall(struct apei_exec_context *ctx,
183			   struct acpi_whea_header *entry)
184{
185	u64 stall_time;
186
187	if (ctx->value > FIRMWARE_MAX_STALL) {
188		if (!in_nmi())
189			pr_warn(FW_WARN
190			"Too long stall time for stall instruction: 0x%llx.\n",
191				   ctx->value);
192		stall_time = FIRMWARE_MAX_STALL;
193	} else
194		stall_time = ctx->value;
195	udelay(stall_time);
196	return 0;
197}
198
199static int erst_exec_stall_while_true(struct apei_exec_context *ctx,
200				      struct acpi_whea_header *entry)
201{
202	int rc;
203	u64 val;
204	u64 timeout = FIRMWARE_TIMEOUT;
205	u64 stall_time;
206
207	if (ctx->var1 > FIRMWARE_MAX_STALL) {
208		if (!in_nmi())
209			pr_warn(FW_WARN
210		"Too long stall time for stall while true instruction: 0x%llx.\n",
211				   ctx->var1);
212		stall_time = FIRMWARE_MAX_STALL;
213	} else
214		stall_time = ctx->var1;
215
216	for (;;) {
217		rc = __apei_exec_read_register(entry, &val);
218		if (rc)
219			return rc;
220		if (val != ctx->value)
221			break;
222		if (erst_timedout(&timeout, stall_time * NSEC_PER_USEC))
223			return -EIO;
224	}
225	return 0;
226}
227
228static int erst_exec_skip_next_instruction_if_true(
229	struct apei_exec_context *ctx,
230	struct acpi_whea_header *entry)
231{
232	int rc;
233	u64 val;
234
235	rc = __apei_exec_read_register(entry, &val);
236	if (rc)
237		return rc;
238	if (val == ctx->value) {
239		ctx->ip += 2;
240		return APEI_EXEC_SET_IP;
241	}
242
243	return 0;
244}
245
246static int erst_exec_goto(struct apei_exec_context *ctx,
247			  struct acpi_whea_header *entry)
248{
249	ctx->ip = ctx->value;
250	return APEI_EXEC_SET_IP;
251}
252
253static int erst_exec_set_src_address_base(struct apei_exec_context *ctx,
254					  struct acpi_whea_header *entry)
255{
256	return __apei_exec_read_register(entry, &ctx->src_base);
257}
258
259static int erst_exec_set_dst_address_base(struct apei_exec_context *ctx,
260					  struct acpi_whea_header *entry)
261{
262	return __apei_exec_read_register(entry, &ctx->dst_base);
263}
264
265static int erst_exec_move_data(struct apei_exec_context *ctx,
266			       struct acpi_whea_header *entry)
267{
268	int rc;
269	u64 offset;
270	void *src, *dst;
271
272	/* ioremap does not work in interrupt context */
273	if (in_interrupt()) {
274		pr_warn("MOVE_DATA can not be used in interrupt context.\n");
275		return -EBUSY;
276	}
277
278	rc = __apei_exec_read_register(entry, &offset);
279	if (rc)
280		return rc;
281
282	src = ioremap(ctx->src_base + offset, ctx->var2);
283	if (!src)
284		return -ENOMEM;
285	dst = ioremap(ctx->dst_base + offset, ctx->var2);
286	if (!dst) {
287		iounmap(src);
288		return -ENOMEM;
289	}
290
291	memmove(dst, src, ctx->var2);
292
293	iounmap(src);
294	iounmap(dst);
295
296	return 0;
297}
298
299static struct apei_exec_ins_type erst_ins_type[] = {
300	[ACPI_ERST_READ_REGISTER] = {
301		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
302		.run = apei_exec_read_register,
303	},
304	[ACPI_ERST_READ_REGISTER_VALUE] = {
305		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
306		.run = apei_exec_read_register_value,
307	},
308	[ACPI_ERST_WRITE_REGISTER] = {
309		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
310		.run = apei_exec_write_register,
311	},
312	[ACPI_ERST_WRITE_REGISTER_VALUE] = {
313		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
314		.run = apei_exec_write_register_value,
315	},
316	[ACPI_ERST_NOOP] = {
317		.flags = 0,
318		.run = apei_exec_noop,
319	},
320	[ACPI_ERST_LOAD_VAR1] = {
321		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
322		.run = erst_exec_load_var1,
323	},
324	[ACPI_ERST_LOAD_VAR2] = {
325		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
326		.run = erst_exec_load_var2,
327	},
328	[ACPI_ERST_STORE_VAR1] = {
329		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
330		.run = erst_exec_store_var1,
331	},
332	[ACPI_ERST_ADD] = {
333		.flags = 0,
334		.run = erst_exec_add,
335	},
336	[ACPI_ERST_SUBTRACT] = {
337		.flags = 0,
338		.run = erst_exec_subtract,
339	},
340	[ACPI_ERST_ADD_VALUE] = {
341		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
342		.run = erst_exec_add_value,
343	},
344	[ACPI_ERST_SUBTRACT_VALUE] = {
345		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
346		.run = erst_exec_subtract_value,
347	},
348	[ACPI_ERST_STALL] = {
349		.flags = 0,
350		.run = erst_exec_stall,
351	},
352	[ACPI_ERST_STALL_WHILE_TRUE] = {
353		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
354		.run = erst_exec_stall_while_true,
355	},
356	[ACPI_ERST_SKIP_NEXT_IF_TRUE] = {
357		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
358		.run = erst_exec_skip_next_instruction_if_true,
359	},
360	[ACPI_ERST_GOTO] = {
361		.flags = 0,
362		.run = erst_exec_goto,
363	},
364	[ACPI_ERST_SET_SRC_ADDRESS_BASE] = {
365		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
366		.run = erst_exec_set_src_address_base,
367	},
368	[ACPI_ERST_SET_DST_ADDRESS_BASE] = {
369		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
370		.run = erst_exec_set_dst_address_base,
371	},
372	[ACPI_ERST_MOVE_DATA] = {
373		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
374		.run = erst_exec_move_data,
375	},
376};
377
378static inline void erst_exec_ctx_init(struct apei_exec_context *ctx)
379{
380	apei_exec_ctx_init(ctx, erst_ins_type, ARRAY_SIZE(erst_ins_type),
381			   ERST_TAB_ENTRY(erst_tab), erst_tab->entries);
382}
383
384static int erst_get_erange(struct erst_erange *range)
385{
386	struct apei_exec_context ctx;
387	int rc;
388
389	erst_exec_ctx_init(&ctx);
390	rc = apei_exec_run(&ctx, ACPI_ERST_GET_ERROR_RANGE);
391	if (rc)
392		return rc;
393	range->base = apei_exec_ctx_get_output(&ctx);
394	rc = apei_exec_run(&ctx, ACPI_ERST_GET_ERROR_LENGTH);
395	if (rc)
396		return rc;
397	range->size = apei_exec_ctx_get_output(&ctx);
398	rc = apei_exec_run(&ctx, ACPI_ERST_GET_ERROR_ATTRIBUTES);
399	if (rc)
400		return rc;
401	range->attr = apei_exec_ctx_get_output(&ctx);
402
403	return 0;
404}
405
406static ssize_t __erst_get_record_count(void)
407{
408	struct apei_exec_context ctx;
409	int rc;
410
411	erst_exec_ctx_init(&ctx);
412	rc = apei_exec_run(&ctx, ACPI_ERST_GET_RECORD_COUNT);
413	if (rc)
414		return rc;
415	return apei_exec_ctx_get_output(&ctx);
416}
417
418ssize_t erst_get_record_count(void)
419{
420	ssize_t count;
421	unsigned long flags;
422
423	if (erst_disable)
424		return -ENODEV;
425
426	raw_spin_lock_irqsave(&erst_lock, flags);
427	count = __erst_get_record_count();
428	raw_spin_unlock_irqrestore(&erst_lock, flags);
429
430	return count;
431}
432EXPORT_SYMBOL_GPL(erst_get_record_count);
433
434#define ERST_RECORD_ID_CACHE_SIZE_MIN	16
435#define ERST_RECORD_ID_CACHE_SIZE_MAX	1024
436
437struct erst_record_id_cache {
438	struct mutex lock;
439	u64 *entries;
440	int len;
441	int size;
442	int refcount;
443};
444
445static struct erst_record_id_cache erst_record_id_cache = {
446	.lock = __MUTEX_INITIALIZER(erst_record_id_cache.lock),
447	.refcount = 0,
448};
449
450static int __erst_get_next_record_id(u64 *record_id)
451{
452	struct apei_exec_context ctx;
453	int rc;
454
455	erst_exec_ctx_init(&ctx);
456	rc = apei_exec_run(&ctx, ACPI_ERST_GET_RECORD_ID);
457	if (rc)
458		return rc;
459	*record_id = apei_exec_ctx_get_output(&ctx);
460
461	return 0;
462}
463
464int erst_get_record_id_begin(int *pos)
465{
466	int rc;
467
468	if (erst_disable)
469		return -ENODEV;
470
471	rc = mutex_lock_interruptible(&erst_record_id_cache.lock);
472	if (rc)
473		return rc;
474	erst_record_id_cache.refcount++;
475	mutex_unlock(&erst_record_id_cache.lock);
476
477	*pos = 0;
478
479	return 0;
480}
481EXPORT_SYMBOL_GPL(erst_get_record_id_begin);
482
483/* erst_record_id_cache.lock must be held by caller */
484static int __erst_record_id_cache_add_one(void)
485{
486	u64 id, prev_id, first_id;
487	int i, rc;
488	u64 *entries;
489	unsigned long flags;
490
491	id = prev_id = first_id = APEI_ERST_INVALID_RECORD_ID;
492retry:
493	raw_spin_lock_irqsave(&erst_lock, flags);
494	rc = __erst_get_next_record_id(&id);
495	raw_spin_unlock_irqrestore(&erst_lock, flags);
496	if (rc == -ENOENT)
497		return 0;
498	if (rc)
499		return rc;
500	if (id == APEI_ERST_INVALID_RECORD_ID)
501		return 0;
502	/* can not skip current ID, or loop back to first ID */
503	if (id == prev_id || id == first_id)
504		return 0;
505	if (first_id == APEI_ERST_INVALID_RECORD_ID)
506		first_id = id;
507	prev_id = id;
508
509	entries = erst_record_id_cache.entries;
510	for (i = 0; i < erst_record_id_cache.len; i++) {
511		if (entries[i] == id)
512			break;
513	}
514	/* record id already in cache, try next */
515	if (i < erst_record_id_cache.len)
516		goto retry;
517	if (erst_record_id_cache.len >= erst_record_id_cache.size) {
518		int new_size, alloc_size;
519		u64 *new_entries;
520
521		new_size = erst_record_id_cache.size * 2;
522		new_size = clamp_val(new_size, ERST_RECORD_ID_CACHE_SIZE_MIN,
523				     ERST_RECORD_ID_CACHE_SIZE_MAX);
524		if (new_size <= erst_record_id_cache.size) {
525			if (printk_ratelimit())
526				pr_warn(FW_WARN "too many record IDs!\n");
527			return 0;
528		}
529		alloc_size = new_size * sizeof(entries[0]);
530		if (alloc_size < PAGE_SIZE)
531			new_entries = kmalloc(alloc_size, GFP_KERNEL);
532		else
533			new_entries = vmalloc(alloc_size);
534		if (!new_entries)
535			return -ENOMEM;
536		memcpy(new_entries, entries,
537		       erst_record_id_cache.len * sizeof(entries[0]));
538		if (erst_record_id_cache.size < PAGE_SIZE)
539			kfree(entries);
540		else
541			vfree(entries);
542		erst_record_id_cache.entries = entries = new_entries;
543		erst_record_id_cache.size = new_size;
544	}
545	entries[i] = id;
546	erst_record_id_cache.len++;
547
548	return 1;
549}
550
551/*
552 * Get the record ID of an existing error record on the persistent
553 * storage. If there is no error record on the persistent storage, the
554 * returned record_id is APEI_ERST_INVALID_RECORD_ID.
555 */
556int erst_get_record_id_next(int *pos, u64 *record_id)
557{
558	int rc = 0;
559	u64 *entries;
560
561	if (erst_disable)
562		return -ENODEV;
563
564	/* must be enclosed by erst_get_record_id_begin/end */
565	BUG_ON(!erst_record_id_cache.refcount);
566	BUG_ON(*pos < 0 || *pos > erst_record_id_cache.len);
567
568	mutex_lock(&erst_record_id_cache.lock);
569	entries = erst_record_id_cache.entries;
570	for (; *pos < erst_record_id_cache.len; (*pos)++)
571		if (entries[*pos] != APEI_ERST_INVALID_RECORD_ID)
572			break;
573	/* found next record id in cache */
574	if (*pos < erst_record_id_cache.len) {
575		*record_id = entries[*pos];
576		(*pos)++;
577		goto out_unlock;
578	}
579
580	/* Try to add one more record ID to cache */
581	rc = __erst_record_id_cache_add_one();
582	if (rc < 0)
583		goto out_unlock;
584	/* successfully add one new ID */
585	if (rc == 1) {
586		*record_id = erst_record_id_cache.entries[*pos];
587		(*pos)++;
588		rc = 0;
589	} else {
590		*pos = -1;
591		*record_id = APEI_ERST_INVALID_RECORD_ID;
592	}
593out_unlock:
594	mutex_unlock(&erst_record_id_cache.lock);
595
596	return rc;
597}
598EXPORT_SYMBOL_GPL(erst_get_record_id_next);
599
600/* erst_record_id_cache.lock must be held by caller */
601static void __erst_record_id_cache_compact(void)
602{
603	int i, wpos = 0;
604	u64 *entries;
605
606	if (erst_record_id_cache.refcount)
607		return;
608
609	entries = erst_record_id_cache.entries;
610	for (i = 0; i < erst_record_id_cache.len; i++) {
611		if (entries[i] == APEI_ERST_INVALID_RECORD_ID)
612			continue;
613		if (wpos != i)
614			entries[wpos] = entries[i];
615		wpos++;
616	}
617	erst_record_id_cache.len = wpos;
618}
619
620void erst_get_record_id_end(void)
621{
622	/*
623	 * erst_disable != 0 should be detected by invoker via the
624	 * return value of erst_get_record_id_begin/next, so this
625	 * function should not be called for erst_disable != 0.
626	 */
627	BUG_ON(erst_disable);
628
629	mutex_lock(&erst_record_id_cache.lock);
630	erst_record_id_cache.refcount--;
631	BUG_ON(erst_record_id_cache.refcount < 0);
632	__erst_record_id_cache_compact();
633	mutex_unlock(&erst_record_id_cache.lock);
634}
635EXPORT_SYMBOL_GPL(erst_get_record_id_end);
636
637static int __erst_write_to_storage(u64 offset)
638{
639	struct apei_exec_context ctx;
640	u64 timeout = FIRMWARE_TIMEOUT;
641	u64 val;
642	int rc;
643
644	erst_exec_ctx_init(&ctx);
645	rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_WRITE);
646	if (rc)
647		return rc;
648	apei_exec_ctx_set_input(&ctx, offset);
649	rc = apei_exec_run(&ctx, ACPI_ERST_SET_RECORD_OFFSET);
650	if (rc)
651		return rc;
652	rc = apei_exec_run(&ctx, ACPI_ERST_EXECUTE_OPERATION);
653	if (rc)
654		return rc;
655	for (;;) {
656		rc = apei_exec_run(&ctx, ACPI_ERST_CHECK_BUSY_STATUS);
657		if (rc)
658			return rc;
659		val = apei_exec_ctx_get_output(&ctx);
660		if (!val)
661			break;
662		if (erst_timedout(&timeout, SPIN_UNIT))
663			return -EIO;
664	}
665	rc = apei_exec_run(&ctx, ACPI_ERST_GET_COMMAND_STATUS);
666	if (rc)
667		return rc;
668	val = apei_exec_ctx_get_output(&ctx);
669	rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
670	if (rc)
671		return rc;
672
673	return erst_errno(val);
674}
675
676static int __erst_read_from_storage(u64 record_id, u64 offset)
677{
678	struct apei_exec_context ctx;
679	u64 timeout = FIRMWARE_TIMEOUT;
680	u64 val;
681	int rc;
682
683	erst_exec_ctx_init(&ctx);
684	rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_READ);
685	if (rc)
686		return rc;
687	apei_exec_ctx_set_input(&ctx, offset);
688	rc = apei_exec_run(&ctx, ACPI_ERST_SET_RECORD_OFFSET);
689	if (rc)
690		return rc;
691	apei_exec_ctx_set_input(&ctx, record_id);
692	rc = apei_exec_run(&ctx, ACPI_ERST_SET_RECORD_ID);
693	if (rc)
694		return rc;
695	rc = apei_exec_run(&ctx, ACPI_ERST_EXECUTE_OPERATION);
696	if (rc)
697		return rc;
698	for (;;) {
699		rc = apei_exec_run(&ctx, ACPI_ERST_CHECK_BUSY_STATUS);
700		if (rc)
701			return rc;
702		val = apei_exec_ctx_get_output(&ctx);
703		if (!val)
704			break;
705		if (erst_timedout(&timeout, SPIN_UNIT))
706			return -EIO;
707	};
708	rc = apei_exec_run(&ctx, ACPI_ERST_GET_COMMAND_STATUS);
709	if (rc)
710		return rc;
711	val = apei_exec_ctx_get_output(&ctx);
712	rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
713	if (rc)
714		return rc;
715
716	return erst_errno(val);
717}
718
719static int __erst_clear_from_storage(u64 record_id)
720{
721	struct apei_exec_context ctx;
722	u64 timeout = FIRMWARE_TIMEOUT;
723	u64 val;
724	int rc;
725
726	erst_exec_ctx_init(&ctx);
727	rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_CLEAR);
728	if (rc)
729		return rc;
730	apei_exec_ctx_set_input(&ctx, record_id);
731	rc = apei_exec_run(&ctx, ACPI_ERST_SET_RECORD_ID);
732	if (rc)
733		return rc;
734	rc = apei_exec_run(&ctx, ACPI_ERST_EXECUTE_OPERATION);
735	if (rc)
736		return rc;
737	for (;;) {
738		rc = apei_exec_run(&ctx, ACPI_ERST_CHECK_BUSY_STATUS);
739		if (rc)
740			return rc;
741		val = apei_exec_ctx_get_output(&ctx);
742		if (!val)
743			break;
744		if (erst_timedout(&timeout, SPIN_UNIT))
745			return -EIO;
746	}
747	rc = apei_exec_run(&ctx, ACPI_ERST_GET_COMMAND_STATUS);
748	if (rc)
749		return rc;
750	val = apei_exec_ctx_get_output(&ctx);
751	rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
752	if (rc)
753		return rc;
754
755	return erst_errno(val);
756}
757
758/* NVRAM ERST Error Log Address Range is not supported yet */
759static void pr_unimpl_nvram(void)
760{
761	if (printk_ratelimit())
762		pr_warn("NVRAM ERST Log Address Range not implemented yet.\n");
763}
764
765static int __erst_write_to_nvram(const struct cper_record_header *record)
766{
767	/* do not print message, because printk is not safe for NMI */
768	return -ENOSYS;
769}
770
771static int __erst_read_to_erange_from_nvram(u64 record_id, u64 *offset)
772{
773	pr_unimpl_nvram();
774	return -ENOSYS;
775}
776
777static int __erst_clear_from_nvram(u64 record_id)
778{
779	pr_unimpl_nvram();
780	return -ENOSYS;
781}
782
783int erst_write(const struct cper_record_header *record)
784{
785	int rc;
786	unsigned long flags;
787	struct cper_record_header *rcd_erange;
788
789	if (erst_disable)
790		return -ENODEV;
791
792	if (memcmp(record->signature, CPER_SIG_RECORD, CPER_SIG_SIZE))
793		return -EINVAL;
794
795	if (erst_erange.attr & ERST_RANGE_NVRAM) {
796		if (!raw_spin_trylock_irqsave(&erst_lock, flags))
797			return -EBUSY;
798		rc = __erst_write_to_nvram(record);
799		raw_spin_unlock_irqrestore(&erst_lock, flags);
800		return rc;
801	}
802
803	if (record->record_length > erst_erange.size)
804		return -EINVAL;
805
806	if (!raw_spin_trylock_irqsave(&erst_lock, flags))
807		return -EBUSY;
808	memcpy(erst_erange.vaddr, record, record->record_length);
809	rcd_erange = erst_erange.vaddr;
810	/* signature for serialization system */
811	memcpy(&rcd_erange->persistence_information, "ER", 2);
812
813	rc = __erst_write_to_storage(0);
814	raw_spin_unlock_irqrestore(&erst_lock, flags);
815
816	return rc;
817}
818EXPORT_SYMBOL_GPL(erst_write);
819
820static int __erst_read_to_erange(u64 record_id, u64 *offset)
821{
822	int rc;
823
824	if (erst_erange.attr & ERST_RANGE_NVRAM)
825		return __erst_read_to_erange_from_nvram(
826			record_id, offset);
827
828	rc = __erst_read_from_storage(record_id, 0);
829	if (rc)
830		return rc;
831	*offset = 0;
832
833	return 0;
834}
835
836static ssize_t __erst_read(u64 record_id, struct cper_record_header *record,
837			   size_t buflen)
838{
839	int rc;
840	u64 offset, len = 0;
841	struct cper_record_header *rcd_tmp;
842
843	rc = __erst_read_to_erange(record_id, &offset);
844	if (rc)
845		return rc;
846	rcd_tmp = erst_erange.vaddr + offset;
847	len = rcd_tmp->record_length;
848	if (len <= buflen)
849		memcpy(record, rcd_tmp, len);
850
851	return len;
852}
853
854/*
855 * If return value > buflen, the buffer size is not big enough,
856 * else if return value < 0, something goes wrong,
857 * else everything is OK, and return value is record length
858 */
859ssize_t erst_read(u64 record_id, struct cper_record_header *record,
860		  size_t buflen)
861{
862	ssize_t len;
863	unsigned long flags;
864
865	if (erst_disable)
866		return -ENODEV;
867
868	raw_spin_lock_irqsave(&erst_lock, flags);
869	len = __erst_read(record_id, record, buflen);
870	raw_spin_unlock_irqrestore(&erst_lock, flags);
871	return len;
872}
873EXPORT_SYMBOL_GPL(erst_read);
874
875int erst_clear(u64 record_id)
876{
877	int rc, i;
878	unsigned long flags;
879	u64 *entries;
880
881	if (erst_disable)
882		return -ENODEV;
883
884	rc = mutex_lock_interruptible(&erst_record_id_cache.lock);
885	if (rc)
886		return rc;
887	raw_spin_lock_irqsave(&erst_lock, flags);
888	if (erst_erange.attr & ERST_RANGE_NVRAM)
889		rc = __erst_clear_from_nvram(record_id);
890	else
891		rc = __erst_clear_from_storage(record_id);
892	raw_spin_unlock_irqrestore(&erst_lock, flags);
893	if (rc)
894		goto out;
895	entries = erst_record_id_cache.entries;
896	for (i = 0; i < erst_record_id_cache.len; i++) {
897		if (entries[i] == record_id)
898			entries[i] = APEI_ERST_INVALID_RECORD_ID;
899	}
900	__erst_record_id_cache_compact();
901out:
902	mutex_unlock(&erst_record_id_cache.lock);
903	return rc;
904}
905EXPORT_SYMBOL_GPL(erst_clear);
906
907static int __init setup_erst_disable(char *str)
908{
909	erst_disable = 1;
910	return 0;
911}
912
913__setup("erst_disable", setup_erst_disable);
914
915static int erst_check_table(struct acpi_table_erst *erst_tab)
916{
917	if ((erst_tab->header_length !=
918	     (sizeof(struct acpi_table_erst) - sizeof(erst_tab->header)))
919	    && (erst_tab->header_length != sizeof(struct acpi_table_erst)))
920		return -EINVAL;
921	if (erst_tab->header.length < sizeof(struct acpi_table_erst))
922		return -EINVAL;
923	if (erst_tab->entries !=
924	    (erst_tab->header.length - sizeof(struct acpi_table_erst)) /
925	    sizeof(struct acpi_erst_entry))
926		return -EINVAL;
927
928	return 0;
929}
930
931static int erst_open_pstore(struct pstore_info *psi);
932static int erst_close_pstore(struct pstore_info *psi);
933static ssize_t erst_reader(u64 *id, enum pstore_type_id *type, int *count,
934			   struct timespec *time, char **buf,
935			   bool *compressed, struct pstore_info *psi);
936static int erst_writer(enum pstore_type_id type, enum kmsg_dump_reason reason,
937		       u64 *id, unsigned int part, int count, bool compressed,
938		       size_t size, struct pstore_info *psi);
939static int erst_clearer(enum pstore_type_id type, u64 id, int count,
940			struct timespec time, struct pstore_info *psi);
941
942static struct pstore_info erst_info = {
943	.owner		= THIS_MODULE,
944	.name		= "erst",
945	.flags		= PSTORE_FLAGS_FRAGILE,
946	.open		= erst_open_pstore,
947	.close		= erst_close_pstore,
948	.read		= erst_reader,
949	.write		= erst_writer,
950	.erase		= erst_clearer
951};
952
953#define CPER_CREATOR_PSTORE						\
954	UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c,	\
955		0x64, 0x90, 0xb8, 0x9d)
956#define CPER_SECTION_TYPE_DMESG						\
957	UUID_LE(0xc197e04e, 0xd545, 0x4a70, 0x9c, 0x17, 0xa5, 0x54,	\
958		0x94, 0x19, 0xeb, 0x12)
959#define CPER_SECTION_TYPE_DMESG_Z					\
960	UUID_LE(0x4f118707, 0x04dd, 0x4055, 0xb5, 0xdd, 0x95, 0x6d,	\
961		0x34, 0xdd, 0xfa, 0xc6)
962#define CPER_SECTION_TYPE_MCE						\
963	UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96,	\
964		0x04, 0x4a, 0x38, 0xfc)
965
966struct cper_pstore_record {
967	struct cper_record_header hdr;
968	struct cper_section_descriptor sec_hdr;
969	char data[];
970} __packed;
971
972static int reader_pos;
973
974static int erst_open_pstore(struct pstore_info *psi)
975{
976	int rc;
977
978	if (erst_disable)
979		return -ENODEV;
980
981	rc = erst_get_record_id_begin(&reader_pos);
982
983	return rc;
984}
985
986static int erst_close_pstore(struct pstore_info *psi)
987{
988	erst_get_record_id_end();
989
990	return 0;
991}
992
993static ssize_t erst_reader(u64 *id, enum pstore_type_id *type, int *count,
994			   struct timespec *time, char **buf,
995			   bool *compressed, struct pstore_info *psi)
996{
997	int rc;
998	ssize_t len = 0;
999	u64 record_id;
1000	struct cper_pstore_record *rcd;
1001	size_t rcd_len = sizeof(*rcd) + erst_info.bufsize;
1002
1003	if (erst_disable)
1004		return -ENODEV;
1005
1006	rcd = kmalloc(rcd_len, GFP_KERNEL);
1007	if (!rcd) {
1008		rc = -ENOMEM;
1009		goto out;
1010	}
1011skip:
1012	rc = erst_get_record_id_next(&reader_pos, &record_id);
1013	if (rc)
1014		goto out;
1015
1016	/* no more record */
1017	if (record_id == APEI_ERST_INVALID_RECORD_ID) {
1018		rc = -EINVAL;
1019		goto out;
1020	}
1021
1022	len = erst_read(record_id, &rcd->hdr, rcd_len);
1023	/* The record may be cleared by others, try read next record */
1024	if (len == -ENOENT)
1025		goto skip;
1026	else if (len < sizeof(*rcd)) {
1027		rc = -EIO;
1028		goto out;
1029	}
1030	if (uuid_le_cmp(rcd->hdr.creator_id, CPER_CREATOR_PSTORE) != 0)
1031		goto skip;
1032
1033	*buf = kmalloc(len, GFP_KERNEL);
1034	if (*buf == NULL) {
1035		rc = -ENOMEM;
1036		goto out;
1037	}
1038	memcpy(*buf, rcd->data, len - sizeof(*rcd));
1039	*id = record_id;
1040	*compressed = false;
1041	if (uuid_le_cmp(rcd->sec_hdr.section_type,
1042			CPER_SECTION_TYPE_DMESG_Z) == 0) {
1043		*type = PSTORE_TYPE_DMESG;
1044		*compressed = true;
1045	} else if (uuid_le_cmp(rcd->sec_hdr.section_type,
1046			CPER_SECTION_TYPE_DMESG) == 0)
1047		*type = PSTORE_TYPE_DMESG;
1048	else if (uuid_le_cmp(rcd->sec_hdr.section_type,
1049			     CPER_SECTION_TYPE_MCE) == 0)
1050		*type = PSTORE_TYPE_MCE;
1051	else
1052		*type = PSTORE_TYPE_UNKNOWN;
1053
1054	if (rcd->hdr.validation_bits & CPER_VALID_TIMESTAMP)
1055		time->tv_sec = rcd->hdr.timestamp;
1056	else
1057		time->tv_sec = 0;
1058	time->tv_nsec = 0;
1059
1060out:
1061	kfree(rcd);
1062	return (rc < 0) ? rc : (len - sizeof(*rcd));
1063}
1064
1065static int erst_writer(enum pstore_type_id type, enum kmsg_dump_reason reason,
1066		       u64 *id, unsigned int part, int count, bool compressed,
1067		       size_t size, struct pstore_info *psi)
1068{
1069	struct cper_pstore_record *rcd = (struct cper_pstore_record *)
1070					(erst_info.buf - sizeof(*rcd));
1071	int ret;
1072
1073	memset(rcd, 0, sizeof(*rcd));
1074	memcpy(rcd->hdr.signature, CPER_SIG_RECORD, CPER_SIG_SIZE);
1075	rcd->hdr.revision = CPER_RECORD_REV;
1076	rcd->hdr.signature_end = CPER_SIG_END;
1077	rcd->hdr.section_count = 1;
1078	rcd->hdr.error_severity = CPER_SEV_FATAL;
1079	/* timestamp valid. platform_id, partition_id are invalid */
1080	rcd->hdr.validation_bits = CPER_VALID_TIMESTAMP;
1081	rcd->hdr.timestamp = get_seconds();
1082	rcd->hdr.record_length = sizeof(*rcd) + size;
1083	rcd->hdr.creator_id = CPER_CREATOR_PSTORE;
1084	rcd->hdr.notification_type = CPER_NOTIFY_MCE;
1085	rcd->hdr.record_id = cper_next_record_id();
1086	rcd->hdr.flags = CPER_HW_ERROR_FLAGS_PREVERR;
1087
1088	rcd->sec_hdr.section_offset = sizeof(*rcd);
1089	rcd->sec_hdr.section_length = size;
1090	rcd->sec_hdr.revision = CPER_SEC_REV;
1091	/* fru_id and fru_text is invalid */
1092	rcd->sec_hdr.validation_bits = 0;
1093	rcd->sec_hdr.flags = CPER_SEC_PRIMARY;
1094	switch (type) {
1095	case PSTORE_TYPE_DMESG:
1096		if (compressed)
1097			rcd->sec_hdr.section_type = CPER_SECTION_TYPE_DMESG_Z;
1098		else
1099			rcd->sec_hdr.section_type = CPER_SECTION_TYPE_DMESG;
1100		break;
1101	case PSTORE_TYPE_MCE:
1102		rcd->sec_hdr.section_type = CPER_SECTION_TYPE_MCE;
1103		break;
1104	default:
1105		return -EINVAL;
1106	}
1107	rcd->sec_hdr.section_severity = CPER_SEV_FATAL;
1108
1109	ret = erst_write(&rcd->hdr);
1110	*id = rcd->hdr.record_id;
1111
1112	return ret;
1113}
1114
1115static int erst_clearer(enum pstore_type_id type, u64 id, int count,
1116			struct timespec time, struct pstore_info *psi)
1117{
1118	return erst_clear(id);
1119}
1120
1121static int __init erst_init(void)
1122{
1123	int rc = 0;
1124	acpi_status status;
1125	struct apei_exec_context ctx;
1126	struct apei_resources erst_resources;
1127	struct resource *r;
1128	char *buf;
1129
1130	if (acpi_disabled)
1131		goto err;
1132
1133	if (erst_disable) {
1134		pr_info(
1135	"Error Record Serialization Table (ERST) support is disabled.\n");
1136		goto err;
1137	}
1138
1139	status = acpi_get_table(ACPI_SIG_ERST, 0,
1140				(struct acpi_table_header **)&erst_tab);
1141	if (status == AE_NOT_FOUND)
1142		goto err;
1143	else if (ACPI_FAILURE(status)) {
1144		const char *msg = acpi_format_exception(status);
1145		pr_err("Failed to get table, %s\n", msg);
1146		rc = -EINVAL;
1147		goto err;
1148	}
1149
1150	rc = erst_check_table(erst_tab);
1151	if (rc) {
1152		pr_err(FW_BUG "ERST table is invalid.\n");
1153		goto err;
1154	}
1155
1156	apei_resources_init(&erst_resources);
1157	erst_exec_ctx_init(&ctx);
1158	rc = apei_exec_collect_resources(&ctx, &erst_resources);
1159	if (rc)
1160		goto err_fini;
1161	rc = apei_resources_request(&erst_resources, "APEI ERST");
1162	if (rc)
1163		goto err_fini;
1164	rc = apei_exec_pre_map_gars(&ctx);
1165	if (rc)
1166		goto err_release;
1167	rc = erst_get_erange(&erst_erange);
1168	if (rc) {
1169		if (rc == -ENODEV)
1170			pr_info(
1171	"The corresponding hardware device or firmware implementation "
1172	"is not available.\n");
1173		else
1174			pr_err("Failed to get Error Log Address Range.\n");
1175		goto err_unmap_reg;
1176	}
1177
1178	r = request_mem_region(erst_erange.base, erst_erange.size, "APEI ERST");
1179	if (!r) {
1180		pr_err("Can not request [mem %#010llx-%#010llx] for ERST.\n",
1181		       (unsigned long long)erst_erange.base,
1182		       (unsigned long long)erst_erange.base + erst_erange.size - 1);
1183		rc = -EIO;
1184		goto err_unmap_reg;
1185	}
1186	rc = -ENOMEM;
1187	erst_erange.vaddr = ioremap_cache(erst_erange.base,
1188					  erst_erange.size);
1189	if (!erst_erange.vaddr)
1190		goto err_release_erange;
1191
1192	pr_info(
1193	"Error Record Serialization Table (ERST) support is initialized.\n");
1194
1195	buf = kmalloc(erst_erange.size, GFP_KERNEL);
1196	spin_lock_init(&erst_info.buf_lock);
1197	if (buf) {
1198		erst_info.buf = buf + sizeof(struct cper_pstore_record);
1199		erst_info.bufsize = erst_erange.size -
1200				    sizeof(struct cper_pstore_record);
1201		rc = pstore_register(&erst_info);
1202		if (rc) {
1203			if (rc != -EPERM)
1204				pr_info(
1205				"Could not register with persistent store.\n");
1206			erst_info.buf = NULL;
1207			erst_info.bufsize = 0;
1208			kfree(buf);
1209		}
1210	} else
1211		pr_err(
1212		"Failed to allocate %lld bytes for persistent store error log.\n",
1213		erst_erange.size);
1214
1215	return 0;
1216
1217err_release_erange:
1218	release_mem_region(erst_erange.base, erst_erange.size);
1219err_unmap_reg:
1220	apei_exec_post_unmap_gars(&ctx);
1221err_release:
1222	apei_resources_release(&erst_resources);
1223err_fini:
1224	apei_resources_fini(&erst_resources);
1225err:
1226	erst_disable = 1;
1227	return rc;
1228}
1229
1230device_initcall(erst_init);
1231