[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20111121101420.GB10194@in.ibm.com>
Date:	Mon, 21 Nov 2011 15:44:20 +0530
From:	"K.Prasad" <prasad@...ux.vnet.ibm.com>
To:	linux-kernel@...r.kernel.org
Cc:	Vivek Goyal <vgoyal@...hat.com>, Borislav Petkov <bp@...en8.de>,
	"Luck, Tony" <tony.luck@...el.com>,
	"Eric W. Biederman" <ebiederm@...ssion.com>, anderson@...hat.com,
	tachibana@....nes.nec.co.jp, oomichi@....nes.nec.co.jp,
	Valdis.Kletnieks@...edu, Nick Bowler <nbowler@...iptictech.com>
Subject: [RFC Patch 2/2][slimdump][makedumpfile] Recognise PANIC_MCE crashes
 to generate slimdu
Given that the kernel indicates the cause of crash through a new field
CRASH_REASON in the VMCOREINFO elf-note, recognise the same. For crashes
caused by PANIC_MCE, avoid capture of kernel memory, instead generate
only a slimdump.
Since 'slimdump' will be of very small size (containing only elf-headers and
elf-notes section), the resultant coredump will be of ELF type (and not
kdump-compressed format).
Signed-off-by: K.Prasad <prasad@...ux.vnet.ibm.com>
---
 elf_info.c     |   67 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 elf_info.h     |    2 +
 makedumpfile.c |   13 ++++++++++-
 makedumpfile.h |    1 +
 4 files changed, 82 insertions(+), 1 deletions(-)
diff --git a/elf_info.c b/elf_info.c
index 114dd05..a925484 100644
--- a/elf_info.c
+++ b/elf_info.c
@@ -287,6 +287,73 @@ offset_note_desc(void *note)
 	return offset;
 }
 
+#define CRASH_REASON_PANIC_MCE	"CRASH_REASON=PANIC_MCE"
+
+/*
+ * This function checks if the vmcoreinfo note has its CRASH_REASON set as
+ * PANIC_MCE. This is added if the crash is due to a hardware error and
+ * when it makes no sense to read/store the crashing kernel's memory. In
+ * such a case, only a 'slimdump' is captured.
+ */
+int
+is_crash_by_mce(void)
+{
+	int note_size, ret = FALSE;
+	off_t offset;
+	char buf[VMCOREINFO_XEN_NOTE_NAME_BYTES];
+	char note[MAX_SIZE_NHDR];
+	void *vmcoreinfo_note = NULL;
+
+	offset = offset_pt_note_memory;
+	while (offset < offset_pt_note_memory + size_pt_note_memory) {
+		if (lseek(fd_memory, offset, SEEK_SET) < 0) {
+			ERRMSG("Can't seek the dump memory(%s). %s\n",
+			    name_memory, strerror(errno));
+			return FALSE;
+		}
+		if (read(fd_memory, note, sizeof(note)) != sizeof(note)) {
+			ERRMSG("Can't read the dump memory(%s). %s\n",
+			    name_memory, strerror(errno));
+			return FALSE;
+		}
+
+		if (read(fd_memory, &buf, sizeof(buf)) != sizeof(buf)) {
+			ERRMSG("Can't read the dump memory(%s). %s\n",
+			    name_memory, strerror(errno));
+			return FALSE;
+		}
+		if (strncmp(VMCOREINFO_NOTE_NAME, buf,
+				VMCOREINFO_NOTE_NAME_BYTES)) {
+			offset += offset_next_note(note);
+			continue;
+		}
+
+		/*
+		 * Now copy VMCOREINFO_NOTE to examine its contents.
+		 * We need to parse it to check if the CRASH_REASON=PANIC_MCE.
+		 */
+		note_size = offset_next_note(note);
+
+		vmcoreinfo_note = malloc(note_size);
+		if(!vmcoreinfo_note) {
+			ERRMSG("Can't allocate memory for the vmcoreinfo note."
+				"%s\n", strerror(errno));
+			return FALSE;
+		}
+		if (read(fd_memory, vmcoreinfo_note, note_size) != note_size) {
+			ERRMSG("Can't read the dump memory(%s). %s\n",
+			    name_memory, strerror(errno));
+			goto exit;
+		}
+		if(strstr(vmcoreinfo_note, CRASH_REASON_PANIC_MCE))
+			ret = TRUE;
+			break;
+	}
+exit:
+	free(vmcoreinfo_note);
+	return ret;
+}
+
 static int
 get_pt_note_info(void)
 {
diff --git a/elf_info.h b/elf_info.h
index 4dff9c1..0437481 100644
--- a/elf_info.h
+++ b/elf_info.h
@@ -34,6 +34,8 @@ unsigned long long get_max_paddr(void);
 int get_elf64_ehdr(int fd, char *filename, Elf64_Ehdr *ehdr);
 int get_elf32_ehdr(int fd, char *filename, Elf32_Ehdr *ehdr);
 int get_elf_info(int fd, char *filename);
+int is_crash_by_mce(void);
+
 void free_elf_info(void);
 
 int is_elf64_memory(void);
diff --git a/makedumpfile.c b/makedumpfile.c
index 7b7c266..15efa90 100644
--- a/makedumpfile.c
+++ b/makedumpfile.c
@@ -4173,7 +4173,11 @@ write_elf_pages(struct cache_data *cd_header, struct cache_data *cd_page)
 		if (!get_phdr_memory(i, &load))
 			return FALSE;
 
-		if (load.p_type != PT_LOAD)
+		/*
+		 * Do not capture the kernel's memory if flag_nocoredump is
+		 * turned on. This may be dangerous to the system stability.
+		 */
+		if ((load.p_type != PT_LOAD) || (info->flag_nocoredump))
 			continue;
 
 		off_memory= load.p_offset;
@@ -5760,6 +5764,13 @@ create_dumpfile(void)
 		if (!get_elf_info(info->fd_memory, info->name_memory))
 			return FALSE;
 	}
+	/*
+	 * If NT_NOCOREDUMP elf-note is present, indicate the same through
+	 * 'flag_nocoredump' flag. The resultant slimdump will always be in ELF
+	 * format, irrespective of the user options.
+	 */
+	info->flag_nocoredump = info->flag_elf_dumpfile = is_crash_by_mce();
+
 	if (is_xen_memory()) {
 		if (!initial_xen())
 			return FALSE;
diff --git a/makedumpfile.h b/makedumpfile.h
index f0e5da8..faf1c65 100644
--- a/makedumpfile.h
+++ b/makedumpfile.h
@@ -778,6 +778,7 @@ struct DumpInfo {
 	int		flag_exclude_xen_dom;/* exclude Domain-U from xen-kdump */
 	int             flag_dmesg;          /* dump the dmesg log out of the vmcore file */
 	int		flag_nospace;	     /* the flag of "No space on device" error */
+	int		flag_nocoredump;	/* coredump not collected */
 	unsigned long	vaddr_for_vtop;      /* virtual address for debugging */
 	long		page_size;           /* size of page */
 	long		page_shift;
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Powered by blists - more mailing lists
 
