2 * c 2001 PPC 64 Team, IBM Corp
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * /dev/nvram driver for PPC64
11 * This perhaps should live in drivers/char
15 #include <linux/types.h>
16 #include <linux/errno.h>
17 #include <linux/init.h>
18 #include <linux/spinlock.h>
19 #include <linux/slab.h>
20 #include <linux/kmsg_dump.h>
21 #include <linux/ctype.h>
22 #include <linux/zlib.h>
23 #include <asm/uaccess.h>
24 #include <asm/nvram.h>
27 #include <asm/machdep.h>
29 /* Max bytes to read/write in one go */
32 static unsigned int nvram_size
;
33 static int nvram_fetch
, nvram_store
;
34 static char nvram_buf
[NVRW_CNT
]; /* assume this is in the first 4GB */
35 static DEFINE_SPINLOCK(nvram_lock
);
42 struct nvram_os_partition
{
44 int req_size
; /* desired size, in bytes */
45 int min_size
; /* minimum acceptable size (0 means req_size) */
46 long size
; /* size of data portion (excluding err_log_info) */
47 long index
; /* offset of data portion of partition */
50 static struct nvram_os_partition rtas_log_partition
= {
51 .name
= "ibm,rtas-log",
57 static struct nvram_os_partition oops_log_partition
= {
58 .name
= "lnx,oops-log",
64 static const char *pseries_nvram_os_partitions
[] = {
70 static void oops_to_nvram(struct kmsg_dumper
*dumper
,
71 enum kmsg_dump_reason reason
,
72 const char *old_msgs
, unsigned long old_len
,
73 const char *new_msgs
, unsigned long new_len
);
75 static struct kmsg_dumper nvram_kmsg_dumper
= {
79 /* See clobbering_unread_rtas_event() */
80 #define NVRAM_RTAS_READ_TIMEOUT 5 /* seconds */
81 static unsigned long last_unread_rtas_event
; /* timestamp */
84 * For capturing and compressing an oops or panic report...
86 * big_oops_buf[] holds the uncompressed text we're capturing.
88 * oops_buf[] holds the compressed text, preceded by a prefix.
89 * The prefix is just a u16 holding the length of the compressed* text.
90 * (*Or uncompressed, if compression fails.) oops_buf[] gets written
93 * oops_len points to the prefix. oops_data points to the compressed text.
98 * +------------+-----------------------------------------------+
100 * | (2 bytes) | (oops_data_sz bytes) |
101 * +------------+-----------------------------------------------+
105 * We preallocate these buffers during init to avoid kmalloc during oops/panic.
107 static size_t big_oops_buf_sz
;
108 static char *big_oops_buf
, *oops_buf
;
109 static u16
*oops_len
;
110 static char *oops_data
;
111 static size_t oops_data_sz
;
113 /* Compression parameters */
114 #define COMPR_LEVEL 6
115 #define WINDOW_BITS 12
117 static struct z_stream_s stream
;
119 static ssize_t
pSeries_nvram_read(char *buf
, size_t count
, loff_t
*index
)
128 if (nvram_size
== 0 || nvram_fetch
== RTAS_UNKNOWN_SERVICE
)
131 if (*index
>= nvram_size
)
135 if (i
+ count
> nvram_size
)
136 count
= nvram_size
- i
;
138 spin_lock_irqsave(&nvram_lock
, flags
);
140 for (; count
!= 0; count
-= len
) {
145 if ((rtas_call(nvram_fetch
, 3, 2, &done
, i
, __pa(nvram_buf
),
146 len
) != 0) || len
!= done
) {
147 spin_unlock_irqrestore(&nvram_lock
, flags
);
151 memcpy(p
, nvram_buf
, len
);
157 spin_unlock_irqrestore(&nvram_lock
, flags
);
163 static ssize_t
pSeries_nvram_write(char *buf
, size_t count
, loff_t
*index
)
171 if (nvram_size
== 0 || nvram_store
== RTAS_UNKNOWN_SERVICE
)
174 if (*index
>= nvram_size
)
178 if (i
+ count
> nvram_size
)
179 count
= nvram_size
- i
;
181 spin_lock_irqsave(&nvram_lock
, flags
);
183 for (; count
!= 0; count
-= len
) {
188 memcpy(nvram_buf
, p
, len
);
190 if ((rtas_call(nvram_store
, 3, 2, &done
, i
, __pa(nvram_buf
),
191 len
) != 0) || len
!= done
) {
192 spin_unlock_irqrestore(&nvram_lock
, flags
);
199 spin_unlock_irqrestore(&nvram_lock
, flags
);
205 static ssize_t
pSeries_nvram_get_size(void)
207 return nvram_size
? nvram_size
: -ENODEV
;
211 /* nvram_write_os_partition, nvram_write_error_log
213 * We need to buffer the error logs into nvram to ensure that we have
214 * the failure information to decode. If we have a severe error there
215 * is no way to guarantee that the OS or the machine is in a state to
216 * get back to user land and write the error to disk. For example if
217 * the SCSI device driver causes a Machine Check by writing to a bad
218 * IO address, there is no way of guaranteeing that the device driver
219 * is in any state that is would also be able to write the error data
220 * captured to disk, thus we buffer it in NVRAM for analysis on the
223 * In NVRAM the partition containing the error log buffer will looks like:
225 * +-----------+----------+--------+------------+------------------+
226 * | signature | checksum | length | name | data |
227 * |0 |1 |2 3|4 15|16 length-1|
228 * +-----------+----------+--------+------------+------------------+
230 * The 'data' section would look like (in bytes):
231 * +--------------+------------+-----------------------------------+
232 * | event_logged | sequence # | error log |
233 * |0 3|4 7|8 error_log_size-1|
234 * +--------------+------------+-----------------------------------+
236 * event_logged: 0 if event has not been logged to syslog, 1 if it has
237 * sequence #: The unique sequence # for each event. (until it wraps)
238 * error log: The error log from event_scan
240 int nvram_write_os_partition(struct nvram_os_partition
*part
, char * buff
,
241 int length
, unsigned int err_type
, unsigned int error_log_cnt
)
245 struct err_log_info info
;
247 if (part
->index
== -1) {
251 if (length
> part
->size
) {
255 info
.error_type
= err_type
;
256 info
.seq_num
= error_log_cnt
;
258 tmp_index
= part
->index
;
260 rc
= ppc_md
.nvram_write((char *)&info
, sizeof(struct err_log_info
), &tmp_index
);
262 pr_err("%s: Failed nvram_write (%d)\n", __FUNCTION__
, rc
);
266 rc
= ppc_md
.nvram_write(buff
, length
, &tmp_index
);
268 pr_err("%s: Failed nvram_write (%d)\n", __FUNCTION__
, rc
);
275 int nvram_write_error_log(char * buff
, int length
,
276 unsigned int err_type
, unsigned int error_log_cnt
)
278 int rc
= nvram_write_os_partition(&rtas_log_partition
, buff
, length
,
279 err_type
, error_log_cnt
);
281 last_unread_rtas_event
= get_seconds();
285 /* nvram_read_error_log
287 * Reads nvram for error log for at most 'length'
289 int nvram_read_error_log(char * buff
, int length
,
290 unsigned int * err_type
, unsigned int * error_log_cnt
)
294 struct err_log_info info
;
296 if (rtas_log_partition
.index
== -1)
299 if (length
> rtas_log_partition
.size
)
300 length
= rtas_log_partition
.size
;
302 tmp_index
= rtas_log_partition
.index
;
304 rc
= ppc_md
.nvram_read((char *)&info
, sizeof(struct err_log_info
), &tmp_index
);
306 printk(KERN_ERR
"nvram_read_error_log: Failed nvram_read (%d)\n", rc
);
310 rc
= ppc_md
.nvram_read(buff
, length
, &tmp_index
);
312 printk(KERN_ERR
"nvram_read_error_log: Failed nvram_read (%d)\n", rc
);
316 *error_log_cnt
= info
.seq_num
;
317 *err_type
= info
.error_type
;
322 /* This doesn't actually zero anything, but it sets the event_logged
323 * word to tell that this event is safely in syslog.
325 int nvram_clear_error_log(void)
328 int clear_word
= ERR_FLAG_ALREADY_LOGGED
;
331 if (rtas_log_partition
.index
== -1)
334 tmp_index
= rtas_log_partition
.index
;
336 rc
= ppc_md
.nvram_write((char *)&clear_word
, sizeof(int), &tmp_index
);
338 printk(KERN_ERR
"nvram_clear_error_log: Failed nvram_write (%d)\n", rc
);
341 last_unread_rtas_event
= 0;
346 /* pseries_nvram_init_os_partition
348 * This sets up a partition with an "OS" signature.
350 * The general strategy is the following:
351 * 1.) If a partition with the indicated name already exists...
352 * - If it's large enough, use it.
353 * - Otherwise, recycle it and keep going.
354 * 2.) Search for a free partition that is large enough.
355 * 3.) If there's not a free partition large enough, recycle any obsolete
356 * OS partitions and try again.
357 * 4.) Will first try getting a chunk that will satisfy the requested size.
358 * 5.) If a chunk of the requested size cannot be allocated, then try finding
359 * a chunk that will satisfy the minum needed.
361 * Returns 0 on success, else -1.
363 static int __init
pseries_nvram_init_os_partition(struct nvram_os_partition
369 /* Scan nvram for partitions */
370 nvram_scan_partitions();
373 p
= nvram_find_partition(part
->name
, NVRAM_SIG_OS
, &size
);
375 /* Found one but too small, remove it */
376 if (p
&& size
< part
->min_size
) {
377 pr_info("nvram: Found too small %s partition,"
378 " removing it...\n", part
->name
);
379 nvram_remove_partition(part
->name
, NVRAM_SIG_OS
, NULL
);
383 /* Create one if we didn't find */
385 p
= nvram_create_partition(part
->name
, NVRAM_SIG_OS
,
386 part
->req_size
, part
->min_size
);
388 pr_info("nvram: No room to create %s partition, "
389 "deleting any obsolete OS partitions...\n",
391 nvram_remove_partition(NULL
, NVRAM_SIG_OS
,
392 pseries_nvram_os_partitions
);
393 p
= nvram_create_partition(part
->name
, NVRAM_SIG_OS
,
394 part
->req_size
, part
->min_size
);
399 pr_err("nvram: Failed to find or create %s"
400 " partition, err %d\n", part
->name
, (int)p
);
405 part
->size
= nvram_get_partition_size(p
) - sizeof(struct err_log_info
);
410 static void __init
nvram_init_oops_partition(int rtas_partition_exists
)
414 rc
= pseries_nvram_init_os_partition(&oops_log_partition
);
416 if (!rtas_partition_exists
)
418 pr_notice("nvram: Using %s partition to log both"
419 " RTAS errors and oops/panic reports\n",
420 rtas_log_partition
.name
);
421 memcpy(&oops_log_partition
, &rtas_log_partition
,
422 sizeof(rtas_log_partition
));
424 oops_buf
= kmalloc(oops_log_partition
.size
, GFP_KERNEL
);
426 pr_err("nvram: No memory for %s partition\n",
427 oops_log_partition
.name
);
430 oops_len
= (u16
*) oops_buf
;
431 oops_data
= oops_buf
+ sizeof(u16
);
432 oops_data_sz
= oops_log_partition
.size
- sizeof(u16
);
435 * Figure compression (preceded by elimination of each line's <n>
436 * severity prefix) will reduce the oops/panic report to at most
437 * 45% of its original size.
439 big_oops_buf_sz
= (oops_data_sz
* 100) / 45;
440 big_oops_buf
= kmalloc(big_oops_buf_sz
, GFP_KERNEL
);
442 stream
.workspace
= kmalloc(zlib_deflate_workspacesize(
443 WINDOW_BITS
, MEM_LEVEL
), GFP_KERNEL
);
444 if (!stream
.workspace
) {
445 pr_err("nvram: No memory for compression workspace; "
446 "skipping compression of %s partition data\n",
447 oops_log_partition
.name
);
452 pr_err("No memory for uncompressed %s data; "
453 "skipping compression\n", oops_log_partition
.name
);
454 stream
.workspace
= NULL
;
457 rc
= kmsg_dump_register(&nvram_kmsg_dumper
);
459 pr_err("nvram: kmsg_dump_register() failed; returned %d\n", rc
);
462 kfree(stream
.workspace
);
466 static int __init
pseries_nvram_init_log_partitions(void)
470 rc
= pseries_nvram_init_os_partition(&rtas_log_partition
);
471 nvram_init_oops_partition(rc
== 0);
474 machine_arch_initcall(pseries
, pseries_nvram_init_log_partitions
);
476 int __init
pSeries_nvram_init(void)
478 struct device_node
*nvram
;
479 const unsigned int *nbytes_p
;
480 unsigned int proplen
;
482 nvram
= of_find_node_by_type(NULL
, "nvram");
486 nbytes_p
= of_get_property(nvram
, "#bytes", &proplen
);
487 if (nbytes_p
== NULL
|| proplen
!= sizeof(unsigned int)) {
492 nvram_size
= *nbytes_p
;
494 nvram_fetch
= rtas_token("nvram-fetch");
495 nvram_store
= rtas_token("nvram-store");
496 printk(KERN_INFO
"PPC64 nvram contains %d bytes\n", nvram_size
);
499 ppc_md
.nvram_read
= pSeries_nvram_read
;
500 ppc_md
.nvram_write
= pSeries_nvram_write
;
501 ppc_md
.nvram_size
= pSeries_nvram_get_size
;
507 * Try to capture the last capture_len bytes of the printk buffer. Return
508 * the amount actually captured.
510 static size_t capture_last_msgs(const char *old_msgs
, size_t old_len
,
511 const char *new_msgs
, size_t new_len
,
512 char *captured
, size_t capture_len
)
514 if (new_len
>= capture_len
) {
515 memcpy(captured
, new_msgs
+ (new_len
- capture_len
),
519 /* Grab the end of old_msgs. */
520 size_t old_tail_len
= min(old_len
, capture_len
- new_len
);
521 memcpy(captured
, old_msgs
+ (old_len
- old_tail_len
),
523 memcpy(captured
+ old_tail_len
, new_msgs
, new_len
);
524 return old_tail_len
+ new_len
;
529 * Are we using the ibm,rtas-log for oops/panic reports? And if so,
530 * would logging this oops/panic overwrite an RTAS event that rtas_errd
531 * hasn't had a chance to read and process? Return 1 if so, else 0.
533 * We assume that if rtas_errd hasn't read the RTAS event in
534 * NVRAM_RTAS_READ_TIMEOUT seconds, it's probably not going to.
536 static int clobbering_unread_rtas_event(void)
538 return (oops_log_partition
.index
== rtas_log_partition
.index
539 && last_unread_rtas_event
540 && get_seconds() - last_unread_rtas_event
<=
541 NVRAM_RTAS_READ_TIMEOUT
);
544 /* Squeeze out each line's <n> severity prefix. */
545 static size_t elide_severities(char *buf
, size_t len
)
547 char *in
, *out
, *buf_end
= buf
+ len
;
548 /* Assume a <n> at the very beginning marks the start of a line. */
552 while (in
< buf_end
) {
553 if (newline
&& in
+3 <= buf_end
&&
554 *in
== '<' && isdigit(in
[1]) && in
[2] == '>') {
558 newline
= (*in
== '\n');
565 /* Derived from logfs_compress() */
566 static int nvram_compress(const void *in
, void *out
, size_t inlen
,
572 err
= zlib_deflateInit2(&stream
, COMPR_LEVEL
, Z_DEFLATED
, WINDOW_BITS
,
573 MEM_LEVEL
, Z_DEFAULT_STRATEGY
);
578 stream
.avail_in
= inlen
;
580 stream
.next_out
= out
;
581 stream
.avail_out
= outlen
;
582 stream
.total_out
= 0;
584 err
= zlib_deflate(&stream
, Z_FINISH
);
585 if (err
!= Z_STREAM_END
)
588 err
= zlib_deflateEnd(&stream
);
592 if (stream
.total_out
>= stream
.total_in
)
595 ret
= stream
.total_out
;
600 /* Compress the text from big_oops_buf into oops_buf. */
601 static int zip_oops(size_t text_len
)
603 int zipped_len
= nvram_compress(big_oops_buf
, oops_data
, text_len
,
605 if (zipped_len
< 0) {
606 pr_err("nvram: compression failed; returned %d\n", zipped_len
);
607 pr_err("nvram: logging uncompressed oops/panic report\n");
610 *oops_len
= (u16
) zipped_len
;
615 * This is our kmsg_dump callback, called after an oops or panic report
616 * has been written to the printk buffer. We want to capture as much
617 * of the printk buffer as possible. First, capture as much as we can
618 * that we think will compress sufficiently to fit in the lnx,oops-log
619 * partition. If that's too much, go back and capture uncompressed text.
621 static void oops_to_nvram(struct kmsg_dumper
*dumper
,
622 enum kmsg_dump_reason reason
,
623 const char *old_msgs
, unsigned long old_len
,
624 const char *new_msgs
, unsigned long new_len
)
626 static unsigned int oops_count
= 0;
627 static bool panicking
= false;
628 static DEFINE_SPINLOCK(lock
);
631 unsigned int err_type
= ERR_TYPE_KERNEL_PANIC_GZ
;
635 case KMSG_DUMP_RESTART
:
637 case KMSG_DUMP_POWEROFF
:
638 /* These are almost always orderly shutdowns. */
642 case KMSG_DUMP_PANIC
:
645 case KMSG_DUMP_EMERG
:
647 /* Panic report already captured. */
651 pr_err("%s: ignoring unrecognized KMSG_DUMP_* reason %d\n",
652 __FUNCTION__
, (int) reason
);
656 if (clobbering_unread_rtas_event())
659 if (!spin_trylock_irqsave(&lock
, flags
))
663 text_len
= capture_last_msgs(old_msgs
, old_len
,
664 new_msgs
, new_len
, big_oops_buf
, big_oops_buf_sz
);
665 text_len
= elide_severities(big_oops_buf
, text_len
);
666 rc
= zip_oops(text_len
);
669 text_len
= capture_last_msgs(old_msgs
, old_len
,
670 new_msgs
, new_len
, oops_data
, oops_data_sz
);
671 err_type
= ERR_TYPE_KERNEL_PANIC
;
672 *oops_len
= (u16
) text_len
;
675 (void) nvram_write_os_partition(&oops_log_partition
, oops_buf
,
676 (int) (sizeof(*oops_len
) + *oops_len
), err_type
, ++oops_count
);
678 spin_unlock_irqrestore(&lock
, flags
);