lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <Pine.LNX.4.64.0612281730550.4473@woody.osdl.org>
Date:	Thu, 28 Dec 2006 17:38:38 -0800 (PST)
From:	Linus Torvalds <torvalds@...l.org>
To:	David Miller <davem@...emloft.net>
cc:	akpm@...l.org, guichaz@...oo.fr, ranma@...edrich.de,
	gordonfarquharson@...il.com, mh+linux-kernel@...schlus.de,
	nickpiggin@...oo.com.au, andrei.popa@...eo.ro,
	linux-kernel@...r.kernel.org, a.p.zijlstra@...llo.nl,
	hugh@...itas.com, fw@...eb.enyo.de, tbm@...ius.com,
	arjan@...radead.org, kenneth.w.chen@...el.com
Subject: Re: 2.6.19 file content corruption on ext3


Btw, 
 much cleaned-up page tracing patch here, in case anybody cares (and 
"test.c" attached, although I don't think it changed since last time). 

The test.c output is a bit hard to read at times, since it will give 
offsets in bytes as hex (ie "00a77664" means page frame 00000a77, and byte 
664h within that page), while the kernel output is obvioiusly the page 
indexes (but the page fault _addresses_ can contain information about the 
exact byte in a page, so you can match them up when some kernel event is 
related to a page fault).

So both forms are necessary/logical, but it means that to match things up, 
you often need to ignore the last three hex digits of the address that 
"test.c" outputs.

This one also adds traces for the tags and the writeback activity, but 
since I'm going out for birthday dinner, I won't have time to try to 
actually analyse the trace I have.. Which is why I'm sending it out, in 
the hope that somebody else is working on this corruption issue and is 
interested..

		Linus

----
diff --git a/fs/buffer.c b/fs/buffer.c
index 263f88e..f5e132a 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -722,6 +722,7 @@ int __set_page_dirty_buffers(struct page *page)
 			set_buffer_dirty(bh);
 			bh = bh->b_this_page;
 		} while (bh != head);
+		PAGE_TRACE(page, "dirtied buffers");
 	}
 	spin_unlock(&mapping->private_lock);
 
@@ -734,6 +735,7 @@ int __set_page_dirty_buffers(struct page *page)
 			__inc_zone_page_state(page, NR_FILE_DIRTY);
 			task_io_account_write(PAGE_CACHE_SIZE);
 		}
+		PAGE_TRACE(page, "setting TAG_DIRTY");
 		radix_tree_tag_set(&mapping->page_tree,
 				page_index(page), PAGECACHE_TAG_DIRTY);
 	}
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 350878a..0cf3dce 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -91,6 +91,14 @@
 #define PG_nosave_free		18	/* Used for system suspend/resume */
 #define PG_buddy		19	/* Page is free, on buddy lists */
 
+#define SetPageInteresting(page) set_bit(PG_arch_1, &(page)->flags)
+#define PageInteresting(page)	test_bit(PG_arch_1, &(page)->flags)
+
+#define PAGE_TRACE(page, msg, arg...) do {	 				\
+	if (PageInteresting(page))	 					\
+		printk(KERN_DEBUG "PG %08lx: %s:%d " msg "\n", 			\
+			(page)->index, __FILE__, __LINE__ ,##arg );		\
+} while (0)
 
 #if (BITS_PER_LONG > 32)
 /*
@@ -183,32 +191,38 @@ static inline void SetPageUptodate(struct page *page)
 #define PageWriteback(page)	test_bit(PG_writeback, &(page)->flags)
 #define SetPageWriteback(page)						\
 	do {								\
-		if (!test_and_set_bit(PG_writeback,			\
-				&(page)->flags))			\
+		if (!test_and_set_bit(PG_writeback, &(page)->flags)) {	\
+			PAGE_TRACE(page, "set writeback");		\
 			inc_zone_page_state(page, NR_WRITEBACK);	\
+		}							\
 	} while (0)
 #define TestSetPageWriteback(page)					\
 	({								\
 		int ret;						\
 		ret = test_and_set_bit(PG_writeback,			\
 					&(page)->flags);		\
-		if (!ret)						\
+		if (!ret) {						\
+			PAGE_TRACE(page, "set writeback");		\
 			inc_zone_page_state(page, NR_WRITEBACK);	\
+		}							\
 		ret;							\
 	})
 #define ClearPageWriteback(page)					\
 	do {								\
-		if (test_and_clear_bit(PG_writeback,			\
-				&(page)->flags))			\
+		if (test_and_clear_bit(PG_writeback, &(page)->flags)) {	\
+			PAGE_TRACE(page, "end writeback");		\
 			dec_zone_page_state(page, NR_WRITEBACK);	\
+		}							\
 	} while (0)
 #define TestClearPageWriteback(page)					\
 	({								\
 		int ret;						\
 		ret = test_and_clear_bit(PG_writeback,			\
 				&(page)->flags);			\
-		if (ret)						\
+		if (ret) {						\
+			PAGE_TRACE(page, "end writeback");		\
 			dec_zone_page_state(page, NR_WRITEBACK);	\
+		}							\
 		ret;							\
 	})
 
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 5c26818..7735b83 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -79,7 +79,7 @@ config DEBUG_KERNEL
 
 config LOG_BUF_SHIFT
 	int "Kernel log buffer size (16 => 64KB, 17 => 128KB)" if DEBUG_KERNEL
-	range 12 21
+	range 12 24
 	default 17 if S390 || LOCKDEP
 	default 16 if X86_NUMAQ || IA64
 	default 15 if SMP
diff --git a/mm/filemap.c b/mm/filemap.c
index 8332c77..4df7d35 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -116,6 +116,7 @@ void __remove_from_page_cache(struct page *page)
 {
 	struct address_space *mapping = page->mapping;
 
+	PAGE_TRACE(page, "Removing page cache");
 	radix_tree_delete(&mapping->page_tree, page->index);
 	page->mapping = NULL;
 	mapping->nrpages--;
@@ -421,6 +422,23 @@ int filemap_write_and_wait_range(struct address_space *mapping,
 	return err;
 }
 
+static noinline int is_interesting(struct address_space *mapping)
+{
+	struct inode *inode = mapping->host;
+	struct dentry *dentry;
+	int retval = 0;
+
+	spin_lock(&dcache_lock);
+	list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+		if (strcmp(dentry->d_name.name, "mapfile"))
+			continue;
+		retval = 1;
+		break;
+	}
+	spin_unlock(&dcache_lock);
+	return retval;
+}
+
 /**
  * add_to_page_cache - add newly allocated pagecache pages
  * @page:	page to add
@@ -439,6 +457,9 @@ int add_to_page_cache(struct page *page, struct address_space *mapping,
 {
 	int error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
 
+	if (is_interesting(mapping))
+		SetPageInteresting(page);
+
 	if (error == 0) {
 		write_lock_irq(&mapping->tree_lock);
 		error = radix_tree_insert(&mapping->page_tree, offset, page);
diff --git a/mm/memory.c b/mm/memory.c
index 563792f..20af32f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -667,6 +667,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
 			tlb_remove_tlb_entry(tlb, pte, addr);
 			if (unlikely(!page))
 				continue;
+			PAGE_TRACE(page, "unmapped at %08lx", addr);
 			if (unlikely(details) && details->nonlinear_vma
 			    && linear_page_index(details->nonlinear_vma,
 						addr) != page->index)
@@ -1605,6 +1606,7 @@ gotten:
 		 */
 		ptep_clear_flush(vma, address, page_table);
 		set_pte_at(mm, address, page_table, entry);
+		PAGE_TRACE(new_page, "write fault at %08lx", address);
 		update_mmu_cache(vma, address, entry);
 		lru_cache_add_active(new_page);
 		page_add_new_anon_rmap(new_page, vma, address);
@@ -2249,6 +2251,7 @@ retry:
 		entry = mk_pte(new_page, vma->vm_page_prot);
 		if (write_access)
 			entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+		PAGE_TRACE(new_page, "mapping at %08lx (%s)", address, write_access ? "write" : "read");
 		set_pte_at(mm, address, page_table, entry);
 		if (anon) {
 			inc_mm_counter(mm, anon_rss);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index b3a198c..15f3aaf 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -773,6 +773,7 @@ int __set_page_dirty_nobuffers(struct page *page)
 				__inc_zone_page_state(page, NR_FILE_DIRTY);
 				task_io_account_write(PAGE_CACHE_SIZE);
 			}
+			PAGE_TRACE(page, "setting TAG_DIRTY");
 			radix_tree_tag_set(&mapping->page_tree,
 				page_index(page), PAGECACHE_TAG_DIRTY);
 		}
@@ -813,6 +814,7 @@ int fastcall set_page_dirty(struct page *page)
 		if (!spd)
 			spd = __set_page_dirty_buffers;
 #endif
+		PAGE_TRACE(page, "setting dirty");
 		return (*spd)(page);
 	}
 	if (!PageDirty(page)) {
@@ -867,6 +869,7 @@ int clear_page_dirty_for_io(struct page *page)
 
 	if (TestClearPageDirty(page)) {
 		if (mapping_cap_account_dirty(mapping)) {
+			PAGE_TRACE(page, "clean_for_io");
 			page_mkclean(page);
 			dec_zone_page_state(page, NR_FILE_DIRTY);
 		}
@@ -886,10 +889,12 @@ int test_clear_page_writeback(struct page *page)
 
 		write_lock_irqsave(&mapping->tree_lock, flags);
 		ret = TestClearPageWriteback(page);
-		if (ret)
+		if (ret) {
+			PAGE_TRACE(page, "clearing TAG_WRITEBACK");
 			radix_tree_tag_clear(&mapping->page_tree,
 						page_index(page),
 						PAGECACHE_TAG_WRITEBACK);
+		}
 		write_unlock_irqrestore(&mapping->tree_lock, flags);
 	} else {
 		ret = TestClearPageWriteback(page);
@@ -907,14 +912,18 @@ int test_set_page_writeback(struct page *page)
 
 		write_lock_irqsave(&mapping->tree_lock, flags);
 		ret = TestSetPageWriteback(page);
-		if (!ret)
+		if (!ret) {
+			PAGE_TRACE(page, "setting TAG_WRITEBACK");
 			radix_tree_tag_set(&mapping->page_tree,
 						page_index(page),
 						PAGECACHE_TAG_WRITEBACK);
-		if (!PageDirty(page))
+		}
+		if (!PageDirty(page)) {
+			PAGE_TRACE(page, "clearing TAG_DIRTY");
 			radix_tree_tag_clear(&mapping->page_tree,
 						page_index(page),
 						PAGECACHE_TAG_DIRTY);
+		}
 		write_unlock_irqrestore(&mapping->tree_lock, flags);
 	} else {
 		ret = TestSetPageWriteback(page);
diff --git a/mm/rmap.c b/mm/rmap.c
index 57306fa..e6b4676 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -448,6 +448,7 @@ static int page_mkclean_one(struct page *page, struct vm_area_struct *vma)
 	if (pte_dirty(*pte) || pte_write(*pte)) {
 		pte_t entry;
 
+		PAGE_TRACE(page, "cleaning PTE %08lx", address);
 		flush_cache_page(vma, address, pte_pfn(*pte));
 		entry = ptep_clear_flush(vma, address, pte);
 		entry = pte_wrprotect(entry);
@@ -637,6 +638,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 		goto out_unmap;
 	}
 
+	PAGE_TRACE(page, "unmapping from %08lx", address);
 	/* Nuke the page table entry. */
 	flush_cache_page(vma, address, page_to_pfn(page));
 	pteval = ptep_clear_flush(vma, address, pte);
@@ -767,6 +769,7 @@ static void try_to_unmap_cluster(unsigned long cursor,
 		if (ptep_clear_flush_young(vma, address, pte))
 			continue;
 
+		PAGE_TRACE(page, "unmapping from %08lx", address);
 		/* Nuke the page table entry. */
 		flush_cache_page(vma, address, pte_pfn(*pte));
 		pteval = ptep_clear_flush(vma, address, pte);
View attachment "test.c" of type "TEXT/PLAIN" (2975 bytes)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ