[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <2812963.1675725632@warthog.procyon.org.uk>
Date: Mon, 06 Feb 2023 23:20:32 +0000
From: David Howells <dhowells@...hat.com>
To: Jens Axboe <axboe@...nel.dk>, Christoph Hellwig <hch@....de>
Cc: dhowells@...hat.com, David Hildenbrand <david@...hat.com>,
John Hubbard <jhubbard@...dia.com>, linux-mm@...ck.org,
linux-block@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: Need help tracking down a bug in the bio-FOLL_PIN patches
For reference, here's the debugging code I'm using.
Enable the followng:
CONFIG_DEBUG_PAGE_REF
CONFIG_DEBUG_PAGE_MARK
CONFIG_DEBUG_PAGE_REF_ONLY_MARKED
and then enable the page_ref tracepoints:
echo 1 >/sys/kernel/debug/tracing/events/page_ref/enable
echo 1 >/sys/kernel/debug/tracing/events/block/bio/enable
echo 1 >/sys/kernel/debug/tracing/events/block/bio_endio/enable
David
---
block/bio.c | 50 ++++++++++++++++++++-
fs/iomap/buffered-io.c | 10 ++++
fs/pipe.c | 1
fs/splice.c | 23 +++++++++
include/linux/bio.h | 4 -
include/linux/blk_types.h | 2
include/linux/page-flags.h | 12 +++++
include/linux/page_ref.h | 34 ++++++++------
include/linux/uio.h | 1
include/trace/events/block.h | 95 +++++++++++++++++++++++++++++++++++++++++
include/trace/events/mmflags.h | 9 +++
lib/iov_iter.c | 28 ++++++++++++
mm/Kconfig.debug | 17 +++++++
mm/page_alloc.c | 3 +
mm/readahead.c | 8 +++
15 files changed, 276 insertions(+), 21 deletions(-)
diff --git a/block/bio.c b/block/bio.c
index fc57f0aa098e..ae0997688e08 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -20,6 +20,7 @@
#include <linux/blk-crypto.h>
#include <linux/xarray.h>
+#include <trace/events/page_ref.h>
#include <trace/events/block.h>
#include "blk.h"
#include "blk-rq-qos.h"
@@ -214,6 +215,8 @@ struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs,
void bio_uninit(struct bio *bio)
{
+ if (bio_flagged(bio, BIO_TRACE))
+ trace_bio(bio, bio_trace_where_uninit, 0);
#ifdef CONFIG_BLK_CGROUP
if (bio->bi_blkg) {
blkg_put(bio->bi_blkg);
@@ -232,6 +235,9 @@ static void bio_free(struct bio *bio)
struct bio_set *bs = bio->bi_pool;
void *p = bio;
+ if (bio_flagged(bio, BIO_TRACE))
+ trace_bio(bio, bio_trace_where_free, 0);
+
WARN_ON_ONCE(!bs);
bio_uninit(bio);
@@ -247,6 +253,9 @@ static void bio_free(struct bio *bio)
void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table,
unsigned short max_vecs, blk_opf_t opf)
{
+ static atomic_t bio_debug_ids;
+
+ bio->bi_debug_id = atomic_inc_return(&bio_debug_ids);
bio->bi_next = NULL;
bio->bi_bdev = bdev;
bio->bi_opf = opf;
@@ -1110,6 +1119,13 @@ void __bio_add_page(struct bio *bio, struct page *page,
{
struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt];
+ if (PageDebugMark(page)) {
+ trace_page_ref_set(page, 666);
+ bio_set_flag(bio, BIO_TRACE);
+ trace_bio(bio, bio_trace_where_add_page,
+ page_to_pfn(page));
+ }
+
WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED));
WARN_ON_ONCE(bio_full(bio, len));
@@ -1172,12 +1188,23 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty)
{
struct bvec_iter_all iter_all;
struct bio_vec *bvec;
+ unsigned int i = 0;
+
+ if (bio_flagged(bio, BIO_TRACE))
+ trace_bio(bio, bio_trace_where_rel_pages, 0);
bio_for_each_segment_all(bvec, bio, iter_all) {
- if (mark_dirty && !PageCompound(bvec->bv_page))
- set_page_dirty_lock(bvec->bv_page);
- bio_release_page(bio, bvec->bv_page);
+ if (PageDebugMark(bvec->bv_page))
+ trace_page_ref_set(bvec->bv_page, 980 + i++);
}
+
+ if (bio_flagged(bio, BIO_PAGE_REFFED) ||
+ bio_flagged(bio, BIO_PAGE_PINNED))
+ bio_for_each_segment_all(bvec, bio, iter_all) {
+ if (mark_dirty && !PageCompound(bvec->bv_page))
+ set_page_dirty_lock(bvec->bv_page);
+ bio_release_page(bio, bvec->bv_page);
+ }
}
EXPORT_SYMBOL_GPL(__bio_release_pages);
@@ -1445,6 +1472,9 @@ void bio_free_pages(struct bio *bio)
struct bio_vec *bvec;
struct bvec_iter_all iter_all;
+ if (bio_flagged(bio, BIO_TRACE))
+ trace_bio(bio, bio_trace_where_free_pages, 0);
+
bio_for_each_segment_all(bvec, bio, iter_all)
__free_page(bvec->bv_page);
}
@@ -1534,6 +1564,8 @@ void bio_check_pages_dirty(struct bio *bio)
struct bvec_iter_all iter_all;
bio_for_each_segment_all(bvec, bio, iter_all) {
+ if (PageDebugMark(bvec->bv_page))
+ trace_page_ref_set(bvec->bv_page, 654);
if (!PageDirty(bvec->bv_page) && !PageCompound(bvec->bv_page))
goto defer;
}
@@ -1583,6 +1615,8 @@ static inline bool bio_remaining_done(struct bio *bio)
**/
void bio_endio(struct bio *bio)
{
+ struct bvec_iter_all iter_all;
+ struct bio_vec *bvec;
again:
if (!bio_remaining_done(bio))
return;
@@ -1591,6 +1625,14 @@ void bio_endio(struct bio *bio)
rq_qos_done_bio(bio);
+ if (bio_flagged(bio, BIO_TRACE))
+ trace_bio(bio, bio_trace_where_endio, 0);
+
+ bio_for_each_segment_all(bvec, bio, iter_all) {
+ if (PageDebugMark(bvec->bv_page))
+ trace_page_ref_set(bvec->bv_page, 623);
+ }
+
if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
trace_block_bio_complete(bdev_get_queue(bio->bi_bdev), bio);
bio_clear_flag(bio, BIO_TRACE_COMPLETION);
@@ -1612,6 +1654,8 @@ void bio_endio(struct bio *bio)
blk_throtl_bio_endio(bio);
/* release cgroup info */
bio_uninit(bio);
+ if (bio_flagged(bio, BIO_TRACE))
+ trace_bio_endio(bio);
if (bio->bi_end_io)
bio->bi_end_io(bio);
}
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 356193e44cf0..21790ce471d3 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -18,6 +18,7 @@
#include <linux/sched/signal.h>
#include <linux/migrate.h>
#include "trace.h"
+#include <trace/events/block.h>
#include "../internal.h"
@@ -619,6 +620,12 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
goto out_no_page;
}
+#if 0
+#define XFS_SUPER_MAGIC 0x58465342 /* "XFSB" */
+ if (folio->mapping->host->i_sb->s_magic == XFS_SUPER_MAGIC)
+ folio_set_debug_mark(folio);
+#endif
+
/*
* Now we have a locked folio, before we do anything with it we need to
* check that the iomap we have cached is not stale. The inode extent
@@ -1311,6 +1318,9 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error)
next = bio->bi_private;
/* walk all folios in bio, ending page IO on them */
+ if (bio_flagged(bio, BIO_TRACE))
+ trace_bio(bio, bio_trace_where_ioend, 0);
+
bio_for_each_folio_all(fi, bio) {
iomap_finish_folio_write(inode, fi.folio, fi.length,
error);
diff --git a/fs/pipe.c b/fs/pipe.c
index 42c7ff41c2db..56293d706ef3 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -208,6 +208,7 @@ void generic_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
put_page(buf->page);
+ buf->page = (void *)0xaa55aa55aa55aa55UL;
}
EXPORT_SYMBOL(generic_pipe_buf_release);
diff --git a/fs/splice.c b/fs/splice.c
index 5969b7a1d353..fc59b5038f2e 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -34,6 +34,7 @@
#include <linux/gfp.h>
#include <linux/socket.h>
#include <linux/sched/signal.h>
+#include <trace/events/page_ref.h>
#include "internal.h"
@@ -304,6 +305,7 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
int ret;
iov_iter_pipe(&to, ITER_DEST, pipe, len);
+ to.debug = true;
init_sync_kiocb(&kiocb, in);
kiocb.ki_pos = *ppos;
ret = call_read_iter(in, &kiocb, &to);
@@ -597,6 +599,9 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
return ret;
}
+static struct page *splice_tmp;
+static DEFINE_MUTEX(splice_tmp_lock);
+
/**
* iter_file_splice_write - splice data from a pipe to a file
* @pipe: pipe info
@@ -626,6 +631,19 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
GFP_KERNEL);
ssize_t ret;
+ mutex_lock(&splice_tmp_lock);
+ if (!splice_tmp) {
+ pr_notice("alloc splice_tmp\n");
+ splice_tmp = alloc_page(GFP_USER);
+ if (splice_tmp) {
+ SetPageDebugMark(splice_tmp);
+ page_ref_add(splice_tmp, 100);
+ }
+ }
+ mutex_unlock(&splice_tmp_lock);
+ if (!splice_tmp)
+ return -ENOMEM;
+
if (unlikely(!array))
return -ENOMEM;
@@ -675,7 +693,12 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
goto done;
}
+ if (PageDebugMark(buf->page))
+ trace_page_ref_set(buf->page, 888);
+
array[n].bv_page = buf->page;
+ //array[n].bv_page = splice_tmp;
+ //trace_page_ref_set(splice_tmp, 887);
array[n].bv_len = this_len;
array[n].bv_offset = buf->offset;
left -= this_len;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index b2c09997d79c..cafa26637067 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -484,8 +484,8 @@ void zero_fill_bio(struct bio *bio);
static inline void bio_release_pages(struct bio *bio, bool mark_dirty)
{
- if (bio_flagged(bio, BIO_PAGE_REFFED) ||
- bio_flagged(bio, BIO_PAGE_PINNED))
+ //if (bio_flagged(bio, BIO_PAGE_REFFED) ||
+ // bio_flagged(bio, BIO_PAGE_PINNED))
__bio_release_pages(bio, mark_dirty);
}
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index a0e339ff3d09..b4e563595a5a 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -302,6 +302,7 @@ struct bio {
struct bio_vec *bi_io_vec; /* the actual vec list */
struct bio_set *bi_pool;
+ unsigned int bi_debug_id; /* Tracing debug ID */
/*
* We can inline a number of vecs at the end of the bio, to avoid
@@ -334,6 +335,7 @@ enum {
BIO_QOS_MERGED, /* but went through rq_qos merge path */
BIO_REMAPPED,
BIO_ZONE_WRITE_LOCKED, /* Owns a zoned device zone write lock */
+ BIO_TRACE, /* Trace bio lifetime */
BIO_FLAG_LAST
};
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 69e93a0c1277..80cbf784239e 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -138,6 +138,9 @@ enum pageflags {
#endif
#ifdef CONFIG_KASAN_HW_TAGS
PG_skip_kasan_poison,
+#endif
+#ifdef CONFIG_DEBUG_PAGE_MARK
+ PG_debug_mark,
#endif
__NR_PAGEFLAGS,
@@ -694,6 +697,15 @@ static __always_inline bool PageKsm(struct page *page)
TESTPAGEFLAG_FALSE(Ksm, ksm)
#endif
+#ifdef CONFIG_DEBUG_PAGE_MARK
+/*
+ * Debug marks are just used for page_ref tracepoint control and display.
+ */
+PAGEFLAG(DebugMark, debug_mark, PF_ANY)
+#else
+TESTPAGEFLAG_FALSE(DebugMark, debug_mark)
+#endif
+
u64 stable_page_flags(struct page *page);
/**
diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index d7c2d33baa7f..7bc1a94d9cbb 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -24,7 +24,11 @@ DECLARE_TRACEPOINT(page_ref_unfreeze);
*
* See trace_##name##_enabled(void) in include/linux/tracepoint.h
*/
-#define page_ref_tracepoint_active(t) tracepoint_enabled(t)
+#ifndef CONFIG_DEBUG_PAGE_REF_ONLY_MARKED
+#define page_ref_tracepoint_active(p, t) tracepoint_enabled(t)
+#else
+#define page_ref_tracepoint_active(p, t) (tracepoint_enabled(t) && PageDebugMark(p))
+#endif
extern void __page_ref_set(struct page *page, int v);
extern void __page_ref_mod(struct page *page, int v);
@@ -36,7 +40,7 @@ extern void __page_ref_unfreeze(struct page *page, int v);
#else
-#define page_ref_tracepoint_active(t) false
+#define page_ref_tracepoint_active(page, t) false
static inline void __page_ref_set(struct page *page, int v)
{
@@ -97,7 +101,7 @@ static inline int page_count(const struct page *page)
static inline void set_page_count(struct page *page, int v)
{
atomic_set(&page->_refcount, v);
- if (page_ref_tracepoint_active(page_ref_set))
+ if (page_ref_tracepoint_active(page, page_ref_set))
__page_ref_set(page, v);
}
@@ -118,7 +122,7 @@ static inline void init_page_count(struct page *page)
static inline void page_ref_add(struct page *page, int nr)
{
atomic_add(nr, &page->_refcount);
- if (page_ref_tracepoint_active(page_ref_mod))
+ if (page_ref_tracepoint_active(page, page_ref_mod))
__page_ref_mod(page, nr);
}
@@ -130,7 +134,7 @@ static inline void folio_ref_add(struct folio *folio, int nr)
static inline void page_ref_sub(struct page *page, int nr)
{
atomic_sub(nr, &page->_refcount);
- if (page_ref_tracepoint_active(page_ref_mod))
+ if (page_ref_tracepoint_active(page, page_ref_mod))
__page_ref_mod(page, -nr);
}
@@ -143,7 +147,7 @@ static inline int page_ref_sub_return(struct page *page, int nr)
{
int ret = atomic_sub_return(nr, &page->_refcount);
- if (page_ref_tracepoint_active(page_ref_mod_and_return))
+ if (page_ref_tracepoint_active(page, page_ref_mod_and_return))
__page_ref_mod_and_return(page, -nr, ret);
return ret;
}
@@ -156,7 +160,7 @@ static inline int folio_ref_sub_return(struct folio *folio, int nr)
static inline void page_ref_inc(struct page *page)
{
atomic_inc(&page->_refcount);
- if (page_ref_tracepoint_active(page_ref_mod))
+ if (page_ref_tracepoint_active(page, page_ref_mod))
__page_ref_mod(page, 1);
}
@@ -168,7 +172,7 @@ static inline void folio_ref_inc(struct folio *folio)
static inline void page_ref_dec(struct page *page)
{
atomic_dec(&page->_refcount);
- if (page_ref_tracepoint_active(page_ref_mod))
+ if (page_ref_tracepoint_active(page, page_ref_mod))
__page_ref_mod(page, -1);
}
@@ -181,7 +185,7 @@ static inline int page_ref_sub_and_test(struct page *page, int nr)
{
int ret = atomic_sub_and_test(nr, &page->_refcount);
- if (page_ref_tracepoint_active(page_ref_mod_and_test))
+ if (page_ref_tracepoint_active(page, page_ref_mod_and_test))
__page_ref_mod_and_test(page, -nr, ret);
return ret;
}
@@ -195,7 +199,7 @@ static inline int page_ref_inc_return(struct page *page)
{
int ret = atomic_inc_return(&page->_refcount);
- if (page_ref_tracepoint_active(page_ref_mod_and_return))
+ if (page_ref_tracepoint_active(page, page_ref_mod_and_return))
__page_ref_mod_and_return(page, 1, ret);
return ret;
}
@@ -209,7 +213,7 @@ static inline int page_ref_dec_and_test(struct page *page)
{
int ret = atomic_dec_and_test(&page->_refcount);
- if (page_ref_tracepoint_active(page_ref_mod_and_test))
+ if (page_ref_tracepoint_active(page, page_ref_mod_and_test))
__page_ref_mod_and_test(page, -1, ret);
return ret;
}
@@ -223,7 +227,7 @@ static inline int page_ref_dec_return(struct page *page)
{
int ret = atomic_dec_return(&page->_refcount);
- if (page_ref_tracepoint_active(page_ref_mod_and_return))
+ if (page_ref_tracepoint_active(page, page_ref_mod_and_return))
__page_ref_mod_and_return(page, -1, ret);
return ret;
}
@@ -237,7 +241,7 @@ static inline bool page_ref_add_unless(struct page *page, int nr, int u)
{
bool ret = atomic_add_unless(&page->_refcount, nr, u);
- if (page_ref_tracepoint_active(page_ref_mod_unless))
+ if (page_ref_tracepoint_active(page, page_ref_mod_unless))
__page_ref_mod_unless(page, nr, ret);
return ret;
}
@@ -317,7 +321,7 @@ static inline int page_ref_freeze(struct page *page, int count)
{
int ret = likely(atomic_cmpxchg(&page->_refcount, count, 0) == count);
- if (page_ref_tracepoint_active(page_ref_freeze))
+ if (page_ref_tracepoint_active(page, page_ref_freeze))
__page_ref_freeze(page, count, ret);
return ret;
}
@@ -333,7 +337,7 @@ static inline void page_ref_unfreeze(struct page *page, int count)
VM_BUG_ON(count == 0);
atomic_set_release(&page->_refcount, count);
- if (page_ref_tracepoint_active(page_ref_unfreeze))
+ if (page_ref_tracepoint_active(page, page_ref_unfreeze))
__page_ref_unfreeze(page, count);
}
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 514e3b7b06b8..89272c05d74d 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -45,6 +45,7 @@ struct iov_iter {
bool nofault;
bool data_source;
bool user_backed;
+ bool debug;
union {
size_t iov_offset;
int last_offset;
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index 7f4dfbdf12a6..7eabf99b4317 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -12,6 +12,56 @@
#define RWBS_LEN 8
+/*
+ * Declare tracing information enums and their string mappings for display.
+ */
+#define bio_trace_wheres \
+ EM(bio_trace_where_add_page, "ADD-PG") \
+ EM(bio_trace_where_endio, "END-IO") \
+ EM(bio_trace_where_free, "FREE ") \
+ EM(bio_trace_where_free_pages, "FREEPG") \
+ EM(bio_trace_where_init, "INIT ") \
+ EM(bio_trace_where_ioend, "IOEND ") \
+ EM(bio_trace_where_rel_pages, "REL-PG") \
+ E_(bio_trace_where_uninit, "UNINIT")
+
+/*
+ * Generate enums for tracing information.
+ */
+#ifndef __BIO_DECLARE_TRACE_ENUMS_ONCE_ONLY
+#define __BIO_DECLARE_TRACE_ENUMS_ONCE_ONLY
+
+#undef EM
+#undef E_
+#define EM(a, b) a,
+#define E_(a, b) a
+
+enum bio_trace_where { bio_trace_wheres } __mode(byte);
+
+#endif /* end __BIO_DECLARE_TRACE_ENUMS_ONCE_ONLY */
+
+/*
+ * Export enum symbols via userspace.
+ */
+#undef EM
+#undef E_
+
+#ifndef BIO_TRACE_ONLY_DEFINE_ENUMS
+
+#define EM(a, b) TRACE_DEFINE_ENUM(a);
+#define E_(a, b) TRACE_DEFINE_ENUM(a);
+
+bio_trace_wheres;
+
+/*
+ * Now redefine the EM() and E_() macros to map the enums to the strings that
+ * will be printed in the output.
+ */
+#undef EM
+#undef E_
+#define EM(a, b) { a, b },
+#define E_(a, b) { a, b }
+
DECLARE_EVENT_CLASS(block_buffer,
TP_PROTO(struct buffer_head *bh),
@@ -552,6 +602,51 @@ TRACE_EVENT(block_rq_remap,
(unsigned long long)__entry->old_sector, __entry->nr_bios)
);
+TRACE_EVENT(bio,
+ TP_PROTO(struct bio *bio, enum bio_trace_where where,
+ unsigned long info),
+
+ TP_ARGS(bio, where, info),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, bi_debug_id )
+ __field(enum bio_trace_where, where )
+ __field(unsigned long, info )
+ ),
+
+ TP_fast_assign(
+ __entry->bi_debug_id = bio->bi_debug_id;
+ __entry->where = where;
+ __entry->info = info;
+ ),
+
+ TP_printk("bio=%08x %s I=%lx",
+ __entry->bi_debug_id,
+ __print_symbolic(__entry->where, bio_trace_wheres),
+ __entry->info)
+);
+
+TRACE_EVENT(bio_endio,
+ TP_PROTO(struct bio *bio),
+
+ TP_ARGS(bio),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, bi_debug_id )
+ __field(const void *, bi_end_io )
+ ),
+
+ TP_fast_assign(
+ __entry->bi_debug_id = bio->bi_debug_id;
+ __entry->bi_end_io = bio->bi_end_io;
+ ),
+
+ TP_printk("bio=%08x %pSR",
+ __entry->bi_debug_id,
+ __entry->bi_end_io)
+);
+
+#endif /* BIO_TRACE_ONLY_DEFINE_ENUMS */
#endif /* _TRACE_BLOCK_H */
/* This part must be outside protection */
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index 412b5a46374c..5f3b9b0e4b53 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -103,6 +103,12 @@
#define IF_HAVE_PG_SKIP_KASAN_POISON(flag,string)
#endif
+#ifdef CONFIG_DEBUG_PAGE_MARK
+#define IF_HAVE_PG_DEBUG_MARK(flag,string) ,{1UL << flag, string}
+#else
+#define IF_HAVE_PG_DEBUG_MARK(flag,string)
+#endif
+
#define __def_pageflag_names \
{1UL << PG_locked, "locked" }, \
{1UL << PG_waiters, "waiters" }, \
@@ -132,7 +138,8 @@ IF_HAVE_PG_IDLE(PG_young, "young" ) \
IF_HAVE_PG_IDLE(PG_idle, "idle" ) \
IF_HAVE_PG_ARCH_X(PG_arch_2, "arch_2" ) \
IF_HAVE_PG_ARCH_X(PG_arch_3, "arch_3" ) \
-IF_HAVE_PG_SKIP_KASAN_POISON(PG_skip_kasan_poison, "skip_kasan_poison")
+IF_HAVE_PG_SKIP_KASAN_POISON(PG_skip_kasan_poison, "skip_kasan_poison") \
+IF_HAVE_PG_DEBUG_MARK(PG_debug_mark, "debug_mark" )
#define show_page_flags(flags) \
(flags) ? __print_flags(flags, "|", \
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index d69a05950555..b3b2f1e6dc1b 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -10,9 +10,11 @@
#include <linux/vmalloc.h>
#include <linux/splice.h>
#include <linux/compat.h>
+#include <linux/page-flags.h>
#include <net/checksum.h>
#include <linux/scatterlist.h>
#include <linux/instrumented.h>
+#include <trace/events/page_ref.h>
#define PIPE_PARANOIA /* for now */
@@ -1331,6 +1333,10 @@ static ssize_t pipe_get_pages(struct iov_iter *i,
struct page *page = append_pipe(i, left, &off);
if (!page)
break;
+ if (i->debug && !PageDebugMark(page)) {
+ //SetPageDebugMark(page);
+ //get_page(page);
+ }
chunk = min_t(size_t, left, PAGE_SIZE - off);
get_page(*p++ = page);
}
@@ -1917,6 +1923,9 @@ void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state)
i->nr_segs = state->nr_segs;
}
+static struct page *extract_tmp;
+static DEFINE_MUTEX(extract_tmp_lock);
+
/*
* Extract a list of contiguous pages from an ITER_PIPE iterator. This does
* not get references of its own on the pages, nor does it get a pin on them.
@@ -1936,6 +1945,19 @@ static ssize_t iov_iter_extract_pipe_pages(struct iov_iter *i,
struct page **p;
size_t left;
+ mutex_lock(&extract_tmp_lock);
+ if (!extract_tmp) {
+ pr_notice("alloc extract_tmp\n");
+ extract_tmp = alloc_page(GFP_USER);
+ if (extract_tmp) {
+ SetPageDebugMark(extract_tmp);
+ page_ref_add(extract_tmp, 200);
+ }
+ }
+ mutex_unlock(&extract_tmp_lock);
+ if (!extract_tmp)
+ return -ENOMEM;
+
if (!sanity(i))
return -EFAULT;
@@ -1955,9 +1977,15 @@ static ssize_t iov_iter_extract_pipe_pages(struct iov_iter *i,
struct page *page = append_pipe(i, left, &offset);
if (!page)
break;
+ if (i->debug && !PageDebugMark(page)) {
+ SetPageDebugMark(page);
+ trace_page_ref_set(page, 777);
+ //get_page(page);
+ }
chunk = min_t(size_t, left, PAGE_SIZE - offset);
left -= chunk;
*p++ = page;
+ //*p++ = extract_tmp;
}
if (!j)
return -EFAULT;
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index fca699ad1fb0..111a946a676f 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -149,6 +149,23 @@ config DEBUG_PAGE_REF
kernel code. However the runtime performance overhead is virtually
nil until the tracepoints are actually enabled.
+config DEBUG_PAGE_MARK
+ bool "Reserve a page bit to mark pages to be debugged"
+ depends on DEBUG_PAGE_REF
+ help
+ This option adds an extra page flag that can be used to mark pages
+ for debugging. The mark can be observed in the page_ref tracepoints.
+ The mark isn't set on any pages without alteration of the code. This
+ is intended for filesystem debugging and code to set the mark must be
+ added manually into the source.
+
+config DEBUG_PAGE_REF_ONLY_MARKED
+ bool "Only trace marked pages"
+ depends on DEBUG_PAGE_REF && DEBUG_PAGE_MARK
+ help
+ This option restricts the page_ref tracepoints to only track marked
+ pages.
+
config DEBUG_RODATA_TEST
bool "Testcase for the marking rodata read-only"
depends on STRICT_KERNEL_RWX
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0745aedebb37..37f146e5b2eb 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1102,6 +1102,9 @@ static inline void __free_one_page(struct page *page,
VM_BUG_ON(!zone_is_initialized(zone));
VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page);
+#ifdef CONFIG_DEBUG_PAGE_MARK
+ ClearPageDebugMark(page);
+#endif
VM_BUG_ON(migratetype == -1);
if (likely(!is_migrate_isolate(migratetype)))
diff --git a/mm/readahead.c b/mm/readahead.c
index b10f0cf81d80..458559fd0e67 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -248,6 +248,12 @@ void page_cache_ra_unbounded(struct readahead_control *ractl,
folio = filemap_alloc_folio(gfp_mask, 0);
if (!folio)
break;
+#if 0
+#define XFS_SUPER_MAGIC 0x58465342 /* "XFSB" */
+ if (mapping->host->i_sb->s_magic == XFS_SUPER_MAGIC)
+ folio_set_debug_mark(folio);
+#endif
+
if (filemap_add_folio(mapping, folio, index + i,
gfp_mask) < 0) {
folio_put(folio);
@@ -809,6 +815,7 @@ void readahead_expand(struct readahead_control *ractl,
page = __page_cache_alloc(gfp_mask);
if (!page)
return;
+ //SetPageDebugMark(page);
if (add_to_page_cache_lru(page, mapping, index, gfp_mask) < 0) {
put_page(page);
return;
@@ -832,6 +839,7 @@ void readahead_expand(struct readahead_control *ractl,
page = __page_cache_alloc(gfp_mask);
if (!page)
return;
+ //SetPageDebugMark(page);
if (add_to_page_cache_lru(page, mapping, index, gfp_mask) < 0) {
put_page(page);
return;
Powered by blists - more mailing lists