[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <87k30ic0lw.fsf@openvz.org>
Date: Mon, 19 Jan 2015 18:21:31 +0400
From: Dmitry Monakhov <dmonlist@...il.com>
To: Theodore Ts'o <tytso@....edu>,
Linux Filesystem Development List
<linux-fsdevel@...r.kernel.org>
Cc: Ext4 Developers List <linux-ext4@...r.kernel.org>,
Theodore Ts'o <tytso@....edu>
Subject: Re: [PATCH-v7 1/3] vfs: add support for a lazytime mount option
Theodore Ts'o <tytso@....edu> writes:
> Add a new mount option which enables a new "lazytime" mode. This mode
> causes atime, mtime, and ctime updates to only be made to the
> in-memory version of the inode. The on-disk times will only get
> updated when (a) if the inode needs to be updated for some non-time
> related change, (b) if userspace calls fsync(), syncfs() or sync(), or
> (c) just before an undeleted inode is evicted from memory.
>
> This is OK according to POSIX because there are no guarantees after a
> crash unless userspace explicitly requests via a fsync(2) call.
>
> For workloads which feature a large number of random write to a
> preallocated file, the lazytime mount option significantly reduces
> writes to the inode table. The repeated 4k writes to a single block
> will result in undesirable stress on flash devices and SMR disk
> drives. Even on conventional HDD's, the repeated writes to the inode
> table block will trigger Adjacent Track Interference (ATI) remediation
> latencies, which very negatively impact long tail latencies --- which
> is a very big deal for web serving tiers (for example).
>
> Google-Bug-Id: 18297052
>
> Signed-off-by: Theodore Ts'o <tytso@....edu>
> ---
> fs/ext4/inode.c | 6 ++++
> fs/fs-writeback.c | 64 ++++++++++++++++++++++++++++++++--------
> fs/gfs2/file.c | 4 +--
> fs/inode.c | 56 +++++++++++++++++++++++++----------
> fs/jfs/file.c | 2 +-
> fs/libfs.c | 2 +-
> fs/proc_namespace.c | 1 +
> fs/sync.c | 8 +++++
> include/linux/backing-dev.h | 1 +
> include/linux/fs.h | 5 ++++
> include/trace/events/writeback.h | 60 ++++++++++++++++++++++++++++++++++++-
> include/uapi/linux/fs.h | 4 ++-
> mm/backing-dev.c | 10 +++++--
> 13 files changed, 187 insertions(+), 36 deletions(-)
>
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 5653fa4..628df5b 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -4840,11 +4840,17 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
> * If the inode is marked synchronous, we don't honour that here - doing
> * so would cause a commit on atime updates, which we don't bother doing.
> * We handle synchronous inodes at the highest possible level.
> + *
> + * If only the I_DIRTY_TIME flag is set, we can skip everything. If
> + * I_DIRTY_TIME and I_DIRTY_SYNC is set, the only inode fields we need
> + * to copy into the on-disk inode structure are the timestamp files.
> */
> void ext4_dirty_inode(struct inode *inode, int flags)
> {
> handle_t *handle;
>
> + if (flags == I_DIRTY_TIME)
> + return;
> handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
> if (IS_ERR(handle))
> goto out;
> diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
> index ef9bef1..d5e02b8 100644
> --- a/fs/fs-writeback.c
> +++ b/fs/fs-writeback.c
> @@ -247,14 +247,19 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t)
> return ret;
> }
>
> +#define EXPIRE_DIRTY_ATIME 0x0001
> +
> /*
> * Move expired (dirtied before work->older_than_this) dirty inodes from
> * @delaying_queue to @dispatch_queue.
> */
> static int move_expired_inodes(struct list_head *delaying_queue,
> struct list_head *dispatch_queue,
> + int flags,
> struct wb_writeback_work *work)
> {
> + unsigned long *older_than_this = NULL;
> + unsigned long expire_time;
> LIST_HEAD(tmp);
> struct list_head *pos, *node;
> struct super_block *sb = NULL;
> @@ -262,13 +267,21 @@ static int move_expired_inodes(struct list_head *delaying_queue,
> int do_sb_sort = 0;
> int moved = 0;
>
> + if ((flags & EXPIRE_DIRTY_ATIME) == 0)
> + older_than_this = work->older_than_this;
> + else if ((work->reason == WB_REASON_SYNC) == 0) {
> + expire_time = jiffies - (HZ * 86400);
> + older_than_this = &expire_time;
> + }
> while (!list_empty(delaying_queue)) {
> inode = wb_inode(delaying_queue->prev);
> - if (work->older_than_this &&
> - inode_dirtied_after(inode, *work->older_than_this))
> + if (older_than_this &&
> + inode_dirtied_after(inode, *older_than_this))
> break;
> list_move(&inode->i_wb_list, &tmp);
> moved++;
> + if (flags & EXPIRE_DIRTY_ATIME)
> + set_bit(__I_DIRTY_TIME_EXPIRED, &inode->i_state);
> if (sb_is_blkdev_sb(inode->i_sb))
> continue;
> if (sb && sb != inode->i_sb)
> @@ -309,9 +322,12 @@ out:
> static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work)
> {
> int moved;
> +
> assert_spin_locked(&wb->list_lock);
> list_splice_init(&wb->b_more_io, &wb->b_io);
> - moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, work);
> + moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, 0, work);
> + moved += move_expired_inodes(&wb->b_dirty_time, &wb->b_io,
> + EXPIRE_DIRTY_ATIME, work);
> trace_writeback_queue_io(wb, work, moved);
> }
>
> @@ -435,6 +451,8 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
> * updates after data IO completion.
> */
> redirty_tail(inode, wb);
> + } else if (inode->i_state & I_DIRTY_TIME) {
> + list_move(&inode->i_wb_list, &wb->b_dirty_time);
> } else {
> /* The inode is clean. Remove from writeback lists. */
> list_del_init(&inode->i_wb_list);
> @@ -482,11 +500,18 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
> /* Clear I_DIRTY_PAGES if we've written out all dirty pages */
> if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
> inode->i_state &= ~I_DIRTY_PAGES;
> - dirty = inode->i_state & I_DIRTY;
> - inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC);
> + dirty = inode->i_state & (I_DIRTY_SYNC | I_DIRTY_DATASYNC);
> + if ((dirty && (inode->i_state & I_DIRTY_TIME)) ||
> + (inode->i_state & I_DIRTY_TIME_EXPIRED)) {
> + dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED;
> + trace_writeback_lazytime(inode);
> + }
> + inode->i_state &= ~dirty;
> spin_unlock(&inode->i_lock);
> + if (dirty & I_DIRTY_TIME)
> + mark_inode_dirty_sync(inode);
> /* Don't write the inode if only I_DIRTY_PAGES was set */
> - if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
> + if (dirty) {
> int err = write_inode(inode, wbc);
> if (ret == 0)
> ret = err;
> @@ -534,7 +559,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
> * make sure inode is on some writeback list and leave it there unless
> * we have completely cleaned the inode.
> */
> - if (!(inode->i_state & I_DIRTY) &&
> + if (!(inode->i_state & I_DIRTY_ALL) &&
> (wbc->sync_mode != WB_SYNC_ALL ||
> !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK)))
> goto out;
> @@ -549,7 +574,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
> * If inode is clean, remove it from writeback lists. Otherwise don't
> * touch it. See comment above for explanation.
> */
> - if (!(inode->i_state & I_DIRTY))
> + if (!(inode->i_state & I_DIRTY_ALL))
> list_del_init(&inode->i_wb_list);
> spin_unlock(&wb->list_lock);
> inode_sync_complete(inode);
> @@ -691,7 +716,7 @@ static long writeback_sb_inodes(struct super_block *sb,
> wrote += write_chunk - wbc.nr_to_write;
> spin_lock(&wb->list_lock);
> spin_lock(&inode->i_lock);
> - if (!(inode->i_state & I_DIRTY))
> + if (!(inode->i_state & I_DIRTY_ALL))
> wrote++;
> requeue_inode(inode, wb, &wbc);
> inode_sync_complete(inode);
> @@ -1129,16 +1154,20 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode)
> * page->mapping->host, so the page-dirtying time is recorded in the internal
> * blockdev inode.
> */
> +#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
> void __mark_inode_dirty(struct inode *inode, int flags)
> {
> struct super_block *sb = inode->i_sb;
> struct backing_dev_info *bdi = NULL;
> + int dirtytime;
> +
> + trace_writeback_mark_inode_dirty(inode, flags);
>
> /*
> * Don't do this for I_DIRTY_PAGES - that doesn't actually
> * dirty the inode itself
> */
> - if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
> + if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_TIME)) {
> trace_writeback_dirty_inode_start(inode, flags);
>
> if (sb->s_op->dirty_inode)
> @@ -1146,6 +1175,9 @@ void __mark_inode_dirty(struct inode *inode, int flags)
>
> trace_writeback_dirty_inode(inode, flags);
> }
> + if (flags & I_DIRTY_INODE)
> + flags &= ~I_DIRTY_TIME;
> + dirtytime = flags & I_DIRTY_TIME;
TYPO? 'dirtytime' is always false because you have already cleared that bit.
Probably you want to do that:
dirtytime = flags & I_DIRTY_TIME;
if (flags & I_DIRTY_INODE)
flags &= ~I_DIRTY_TIME;
>
> /*
> * make sure that changes are seen by all cpus before we test i_state
> @@ -1154,16 +1186,22 @@ void __mark_inode_dirty(struct inode *inode, int flags)
> smp_mb();
>
> /* avoid the locking if we can */
> - if ((inode->i_state & flags) == flags)
> + if (((inode->i_state & flags) == flags) ||
> + (dirtytime && (inode->i_state & I_DIRTY_INODE)))
> return;
>
> if (unlikely(block_dump))
> block_dump___mark_inode_dirty(inode);
>
> spin_lock(&inode->i_lock);
> + if (dirtytime && (inode->i_state & I_DIRTY_INODE))
> + return;
> if ((inode->i_state & flags) != flags) {
> const int was_dirty = inode->i_state & I_DIRTY;
>
> + if (dirtytime && (inode->i_state & I_DIRTY_INODE))
> + inode->i_state &= ~I_DIRTY_TIME;
> +
> inode->i_state |= flags;
>
> /*
> @@ -1210,8 +1248,10 @@ void __mark_inode_dirty(struct inode *inode, int flags)
> }
>
> inode->dirtied_when = jiffies;
> - list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
> + list_move(&inode->i_wb_list, dirtytime ?
> + &bdi->wb.b_dirty_time : &bdi->wb.b_dirty);
> spin_unlock(&bdi->wb.list_lock);
> + trace_writeback_dirty_inode_enqueue(inode);
>
> if (wakeup_bdi)
> bdi_wakeup_thread_delayed(bdi);
> diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
> index 80dd44d..e584bf9 100644
> --- a/fs/gfs2/file.c
> +++ b/fs/gfs2/file.c
> @@ -654,7 +654,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
> {
> struct address_space *mapping = file->f_mapping;
> struct inode *inode = mapping->host;
> - int sync_state = inode->i_state & I_DIRTY;
> + int sync_state = inode->i_state & I_DIRTY_ALL;
> struct gfs2_inode *ip = GFS2_I(inode);
> int ret = 0, ret1 = 0;
>
> @@ -667,7 +667,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
> if (!gfs2_is_jdata(ip))
> sync_state &= ~I_DIRTY_PAGES;
> if (datasync)
> - sync_state &= ~I_DIRTY_SYNC;
> + sync_state &= ~(I_DIRTY_SYNC | I_DIRTY_TIME);
>
> if (sync_state) {
> ret = sync_inode_metadata(inode, 1);
> diff --git a/fs/inode.c b/fs/inode.c
> index 26753ba..dc48a23 100644
> --- a/fs/inode.c
> +++ b/fs/inode.c
> @@ -18,6 +18,7 @@
> #include <linux/buffer_head.h> /* for inode_has_buffers */
> #include <linux/ratelimit.h>
> #include <linux/list_lru.h>
> +#include <trace/events/writeback.h>
> #include "internal.h"
>
> /*
> @@ -30,7 +31,7 @@
> * inode_sb_list_lock protects:
> * sb->s_inodes, inode->i_sb_list
> * bdi->wb.list_lock protects:
> - * bdi->wb.b_{dirty,io,more_io}, inode->i_wb_list
> + * bdi->wb.b_{dirty,io,more_io,dirty_time}, inode->i_wb_list
> * inode_hash_lock protects:
> * inode_hashtable, inode->i_hash
> *
> @@ -414,7 +415,8 @@ static void inode_lru_list_add(struct inode *inode)
> */
> void inode_add_lru(struct inode *inode)
> {
> - if (!(inode->i_state & (I_DIRTY | I_SYNC | I_FREEING | I_WILL_FREE)) &&
> + if (!(inode->i_state & (I_DIRTY_ALL | I_SYNC |
> + I_FREEING | I_WILL_FREE)) &&
> !atomic_read(&inode->i_count) && inode->i_sb->s_flags & MS_ACTIVE)
> inode_lru_list_add(inode);
> }
> @@ -645,7 +647,7 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
> spin_unlock(&inode->i_lock);
> continue;
> }
> - if (inode->i_state & I_DIRTY && !kill_dirty) {
> + if (inode->i_state & I_DIRTY_ALL && !kill_dirty) {
> spin_unlock(&inode->i_lock);
> busy = 1;
> continue;
> @@ -1430,11 +1432,20 @@ static void iput_final(struct inode *inode)
> */
> void iput(struct inode *inode)
> {
> - if (inode) {
> - BUG_ON(inode->i_state & I_CLEAR);
> -
> - if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock))
> - iput_final(inode);
> + if (!inode)
> + return;
> + BUG_ON(inode->i_state & I_CLEAR);
> +retry:
> + if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) {
> + if (inode->i_nlink && (inode->i_state & I_DIRTY_TIME)) {
> + atomic_inc(&inode->i_count);
> + inode->i_state &= ~I_DIRTY_TIME;
> + spin_unlock(&inode->i_lock);
> + trace_writeback_lazytime_iput(inode);
> + mark_inode_dirty_sync(inode);
> + goto retry;
> + }
> + iput_final(inode);
> }
> }
> EXPORT_SYMBOL(iput);
> @@ -1493,14 +1504,9 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
> return 0;
> }
>
> -/*
> - * This does the actual work of updating an inodes time or version. Must have
> - * had called mnt_want_write() before calling this.
> - */
> -static int update_time(struct inode *inode, struct timespec *time, int flags)
> +int generic_update_time(struct inode *inode, struct timespec *time, int flags)
> {
> - if (inode->i_op->update_time)
> - return inode->i_op->update_time(inode, time, flags);
> + int iflags = I_DIRTY_TIME;
>
> if (flags & S_ATIME)
> inode->i_atime = *time;
> @@ -1510,9 +1516,27 @@ static int update_time(struct inode *inode, struct timespec *time, int flags)
> inode->i_ctime = *time;
> if (flags & S_MTIME)
> inode->i_mtime = *time;
> - mark_inode_dirty_sync(inode);
> +
> + if (!(inode->i_sb->s_flags & MS_LAZYTIME) || (flags & S_VERSION))
> + iflags |= I_DIRTY_SYNC;
> + __mark_inode_dirty(inode, iflags);
> return 0;
> }
> +EXPORT_SYMBOL(generic_update_time);
> +
> +/*
> + * This does the actual work of updating an inodes time or version. Must have
> + * had called mnt_want_write() before calling this.
> + */
> +static int update_time(struct inode *inode, struct timespec *time, int flags)
> +{
> + int (*update_time)(struct inode *, struct timespec *, int);
> +
> + update_time = inode->i_op->update_time ? inode->i_op->update_time :
> + generic_update_time;
> +
> + return update_time(inode, time, flags);
> +}
>
> /**
> * touch_atime - update the access time
> diff --git a/fs/jfs/file.c b/fs/jfs/file.c
> index 33aa0cc..10815f8 100644
> --- a/fs/jfs/file.c
> +++ b/fs/jfs/file.c
> @@ -39,7 +39,7 @@ int jfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
> return rc;
>
> mutex_lock(&inode->i_mutex);
> - if (!(inode->i_state & I_DIRTY) ||
> + if (!(inode->i_state & I_DIRTY_ALL) ||
> (datasync && !(inode->i_state & I_DIRTY_DATASYNC))) {
> /* Make sure committed changes hit the disk */
> jfs_flush_journal(JFS_SBI(inode->i_sb)->log, 1);
> diff --git a/fs/libfs.c b/fs/libfs.c
> index 171d284..7cb9cef 100644
> --- a/fs/libfs.c
> +++ b/fs/libfs.c
> @@ -948,7 +948,7 @@ int __generic_file_fsync(struct file *file, loff_t start, loff_t end,
>
> mutex_lock(&inode->i_mutex);
> ret = sync_mapping_buffers(inode->i_mapping);
> - if (!(inode->i_state & I_DIRTY))
> + if (!(inode->i_state & I_DIRTY_ALL))
> goto out;
> if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
> goto out;
> diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
> index 73ca174..f98234a 100644
> --- a/fs/proc_namespace.c
> +++ b/fs/proc_namespace.c
> @@ -44,6 +44,7 @@ static int show_sb_opts(struct seq_file *m, struct super_block *sb)
> { MS_SYNCHRONOUS, ",sync" },
> { MS_DIRSYNC, ",dirsync" },
> { MS_MANDLOCK, ",mand" },
> + { MS_LAZYTIME, ",lazytime" },
> { 0, NULL }
> };
> const struct proc_fs_info *fs_infop;
> diff --git a/fs/sync.c b/fs/sync.c
> index bdc729d..6ac7bf0 100644
> --- a/fs/sync.c
> +++ b/fs/sync.c
> @@ -177,8 +177,16 @@ SYSCALL_DEFINE1(syncfs, int, fd)
> */
> int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync)
> {
> + struct inode *inode = file->f_mapping->host;
> +
> if (!file->f_op->fsync)
> return -EINVAL;
> + if (!datasync && (inode->i_state & I_DIRTY_TIME)) {
> + spin_lock(&inode->i_lock);
> + inode->i_state &= ~I_DIRTY_TIME;
> + spin_unlock(&inode->i_lock);
> + mark_inode_dirty_sync(inode);
> + }
> return file->f_op->fsync(file, start, end, datasync);
> }
> EXPORT_SYMBOL(vfs_fsync_range);
> diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
> index 5da6012..4cdf733 100644
> --- a/include/linux/backing-dev.h
> +++ b/include/linux/backing-dev.h
> @@ -55,6 +55,7 @@ struct bdi_writeback {
> struct list_head b_dirty; /* dirty inodes */
> struct list_head b_io; /* parked for writeback */
> struct list_head b_more_io; /* parked for more writeback */
> + struct list_head b_dirty_time; /* time stamps are dirty */
> spinlock_t list_lock; /* protects the b_* lists */
> };
>
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 9ab779e..bf00e98 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1720,8 +1720,12 @@ struct super_operations {
> #define __I_DIO_WAKEUP 9
> #define I_DIO_WAKEUP (1 << I_DIO_WAKEUP)
> #define I_LINKABLE (1 << 10)
> +#define I_DIRTY_TIME (1 << 11)
> +#define __I_DIRTY_TIME_EXPIRED 12
> +#define I_DIRTY_TIME_EXPIRED (1 << __I_DIRTY_TIME_EXPIRED)
>
> #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
> +#define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME)
>
> extern void __mark_inode_dirty(struct inode *, int);
> static inline void mark_inode_dirty(struct inode *inode)
> @@ -1884,6 +1888,7 @@ extern int current_umask(void);
>
> extern void ihold(struct inode * inode);
> extern void iput(struct inode *);
> +extern int generic_update_time(struct inode *, struct timespec *, int);
>
> static inline struct inode *file_inode(const struct file *f)
> {
> diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
> index cee02d6..5ecb4c2 100644
> --- a/include/trace/events/writeback.h
> +++ b/include/trace/events/writeback.h
> @@ -18,6 +18,8 @@
> {I_FREEING, "I_FREEING"}, \
> {I_CLEAR, "I_CLEAR"}, \
> {I_SYNC, "I_SYNC"}, \
> + {I_DIRTY_TIME, "I_DIRTY_TIME"}, \
> + {I_DIRTY_TIME_EXPIRED, "I_DIRTY_TIME_EXPIRED"}, \
> {I_REFERENCED, "I_REFERENCED"} \
> )
>
> @@ -68,6 +70,7 @@ DECLARE_EVENT_CLASS(writeback_dirty_inode_template,
> TP_STRUCT__entry (
> __array(char, name, 32)
> __field(unsigned long, ino)
> + __field(unsigned long, state)
> __field(unsigned long, flags)
> ),
>
> @@ -78,16 +81,25 @@ DECLARE_EVENT_CLASS(writeback_dirty_inode_template,
> strncpy(__entry->name,
> bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32);
> __entry->ino = inode->i_ino;
> + __entry->state = inode->i_state;
> __entry->flags = flags;
> ),
>
> - TP_printk("bdi %s: ino=%lu flags=%s",
> + TP_printk("bdi %s: ino=%lu state=%s flags=%s",
> __entry->name,
> __entry->ino,
> + show_inode_state(__entry->state),
> show_inode_state(__entry->flags)
> )
> );
>
> +DEFINE_EVENT(writeback_dirty_inode_template, writeback_mark_inode_dirty,
> +
> + TP_PROTO(struct inode *inode, int flags),
> +
> + TP_ARGS(inode, flags)
> +);
> +
> DEFINE_EVENT(writeback_dirty_inode_template, writeback_dirty_inode_start,
>
> TP_PROTO(struct inode *inode, int flags),
> @@ -598,6 +610,52 @@ DEFINE_EVENT(writeback_single_inode_template, writeback_single_inode,
> TP_ARGS(inode, wbc, nr_to_write)
> );
>
> +DECLARE_EVENT_CLASS(writeback_lazytime_template,
> + TP_PROTO(struct inode *inode),
> +
> + TP_ARGS(inode),
> +
> + TP_STRUCT__entry(
> + __field( dev_t, dev )
> + __field(unsigned long, ino )
> + __field(unsigned long, state )
> + __field( __u16, mode )
> + __field(unsigned long, dirtied_when )
> + ),
> +
> + TP_fast_assign(
> + __entry->dev = inode->i_sb->s_dev;
> + __entry->ino = inode->i_ino;
> + __entry->state = inode->i_state;
> + __entry->mode = inode->i_mode;
> + __entry->dirtied_when = inode->dirtied_when;
> + ),
> +
> + TP_printk("dev %d,%d ino %lu dirtied %lu state %s mode 0%o",
> + MAJOR(__entry->dev), MINOR(__entry->dev),
> + __entry->ino, __entry->dirtied_when,
> + show_inode_state(__entry->state), __entry->mode)
> +);
> +
> +DEFINE_EVENT(writeback_lazytime_template, writeback_lazytime,
> + TP_PROTO(struct inode *inode),
> +
> + TP_ARGS(inode)
> +);
> +
> +DEFINE_EVENT(writeback_lazytime_template, writeback_lazytime_iput,
> + TP_PROTO(struct inode *inode),
> +
> + TP_ARGS(inode)
> +);
> +
> +DEFINE_EVENT(writeback_lazytime_template, writeback_dirty_inode_enqueue,
> +
> + TP_PROTO(struct inode *inode),
> +
> + TP_ARGS(inode)
> +);
> +
> #endif /* _TRACE_WRITEBACK_H */
>
> /* This part must be outside protection */
> diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
> index 3735fa0..9b964a5 100644
> --- a/include/uapi/linux/fs.h
> +++ b/include/uapi/linux/fs.h
> @@ -90,6 +90,7 @@ struct inodes_stat_t {
> #define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */
> #define MS_I_VERSION (1<<23) /* Update inode I_version field */
> #define MS_STRICTATIME (1<<24) /* Always perform atime updates */
> +#define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */
>
> /* These sb flags are internal to the kernel */
> #define MS_NOSEC (1<<28)
> @@ -100,7 +101,8 @@ struct inodes_stat_t {
> /*
> * Superblock flags that can be altered by MS_REMOUNT
> */
> -#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION)
> +#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION|\
> + MS_LAZYTIME)
>
> /*
> * Old magic mount flag and mask
> diff --git a/mm/backing-dev.c b/mm/backing-dev.c
> index 0ae0df5..915feea 100644
> --- a/mm/backing-dev.c
> +++ b/mm/backing-dev.c
> @@ -69,10 +69,10 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
> unsigned long background_thresh;
> unsigned long dirty_thresh;
> unsigned long bdi_thresh;
> - unsigned long nr_dirty, nr_io, nr_more_io;
> + unsigned long nr_dirty, nr_io, nr_more_io, nr_dirty_time;
> struct inode *inode;
>
> - nr_dirty = nr_io = nr_more_io = 0;
> + nr_dirty = nr_io = nr_more_io = nr_dirty_time = 0;
> spin_lock(&wb->list_lock);
> list_for_each_entry(inode, &wb->b_dirty, i_wb_list)
> nr_dirty++;
> @@ -80,6 +80,9 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
> nr_io++;
> list_for_each_entry(inode, &wb->b_more_io, i_wb_list)
> nr_more_io++;
> + list_for_each_entry(inode, &wb->b_dirty_time, i_wb_list)
> + if (inode->i_state & I_DIRTY_TIME)
> + nr_dirty_time++;
> spin_unlock(&wb->list_lock);
>
> global_dirty_limits(&background_thresh, &dirty_thresh);
> @@ -98,6 +101,7 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
> "b_dirty: %10lu\n"
> "b_io: %10lu\n"
> "b_more_io: %10lu\n"
> + "b_dirty_time: %10lu\n"
> "bdi_list: %10u\n"
> "state: %10lx\n",
> (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)),
> @@ -111,6 +115,7 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
> nr_dirty,
> nr_io,
> nr_more_io,
> + nr_dirty_time,
> !list_empty(&bdi->bdi_list), bdi->state);
> #undef K
>
> @@ -418,6 +423,7 @@ static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
> INIT_LIST_HEAD(&wb->b_dirty);
> INIT_LIST_HEAD(&wb->b_io);
> INIT_LIST_HEAD(&wb->b_more_io);
> + INIT_LIST_HEAD(&wb->b_dirty_time);
> spin_lock_init(&wb->list_lock);
> INIT_DELAYED_WORK(&wb->dwork, bdi_writeback_workfn);
> }
> --
> 2.1.0
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to majordomo@...r.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists