lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Tue, 03 Jun 2014 00:26:04 -0700
From:	Daniel Phillips <daniel@...nq.net>
To:	Dave Chinner <david@...morbit.com>
CC:	linux-kernel@...r.kernel.org, linux-fsdevel@...r.kernel.org,
	Linus Torvalds <torvalds@...ux-foundation.org>,
	Andrew Morton <akpm@...ux-foundation.org>,
	OGAWA Hirofumi <hirofumi@...l.parknet.co.jp>
Subject: Re: [RFC][PATCH 1/2] Add a super operation for writeback

Hi Dave,

Here is a non-incremental patch. This implements your suggestion
from yesterday, except that the wb list lock is dropped before
calling ->writeback().

Regards,

Daniel

>From d030d328757b160b39b252e82811a94843513cfc Mon Sep 17 00:00:00 2001
From: Daniel Phillips <daniel@...3.org>
Date: Tue, 3 Jun 2014 00:19:11 -0700
Subject: [PATCH]     Add a super operation for writeback

Add a "writeback" super operation to be called in the
form:

   progress = s_op->writeback(sb, wb, work, wbc);

Where sb is (struct super_block *), wb is (struct
bdi_writeback *), work is (struct wb_writeback_work *),
and wbc is (struct writeback_control *).

The filesystem is expected to flush some inodes to disk
and return progress of at least 1, or if no inodes are
flushed, return progress of zero. The filesystem should
try to flush at least the number of pages specified in
work->nr_pages, or if that is not possible, return
approximately the number of pages that were not flushed
in work->nr_pages.

Within the ->writeback callback, the filesystem should
call inode_writeback_done(inode) for each inode flushed
and therefore set clean) or inode_writeback_touch(inode)
for any inode that will be retained dirty in cache.

Signed-off-by: Daniel Phillips <daniel@...3.org>
---
 fs/fs-writeback.c         | 107 +++++++++++++++++++++++++++++-----------------
 include/linux/fs.h        |   9 +++-
 include/linux/writeback.h |  19 ++++++++
 3 files changed, 95 insertions(+), 40 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index be568b7..98810bd 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -34,25 +34,6 @@
  */
 #define MIN_WRITEBACK_PAGES    (4096UL >> (PAGE_CACHE_SHIFT - 10))
 
-/*
- * Passed into wb_writeback(), essentially a subset of writeback_control
- */
-struct wb_writeback_work {
-    long nr_pages;
-    struct super_block *sb;
-    unsigned long *older_than_this;
-    enum writeback_sync_modes sync_mode;
-    unsigned int tagged_writepages:1;
-    unsigned int for_kupdate:1;
-    unsigned int range_cyclic:1;
-    unsigned int for_background:1;
-    unsigned int for_sync:1;    /* sync(2) WB_SYNC_ALL writeback */
-    enum wb_reason reason;        /* why was writeback initiated? */
-
-    struct list_head list;        /* pending work list */
-    struct completion *done;    /* set if the caller waits */
-};
-
 /**
  * writeback_in_progress - determine whether there is writeback in progress
  * @bdi: the device's backing_dev_info structure.
@@ -192,6 +173,35 @@ void inode_wb_list_del(struct inode *inode)
 }
 
 /*
+ * Remove inode from writeback list if clean.
+ */
+void inode_writeback_done(struct inode *inode)
+{
+    struct backing_dev_info *bdi = inode_to_bdi(inode);
+
+    spin_lock(&bdi->wb.list_lock);
+    spin_lock(&inode->i_lock);
+    if (!(inode->i_state & I_DIRTY))
+        list_del_init(&inode->i_wb_list);
+    spin_unlock(&inode->i_lock);
+    spin_unlock(&bdi->wb.list_lock);
+}
+EXPORT_SYMBOL_GPL(inode_writeback_done);
+
+/*
+ * Add inode to writeback dirty list with current time.
+ */
+void inode_writeback_touch(struct inode *inode)
+{
+    struct backing_dev_info *bdi = inode->i_sb->s_bdi;
+    spin_lock(&bdi->wb.list_lock);
+    inode->dirtied_when = jiffies;
+    list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
+    spin_unlock(&bdi->wb.list_lock);
+}
+EXPORT_SYMBOL_GPL(inode_writeback_touch);
+
+/*
  * Redirty an inode: set its when-it-was dirtied timestamp and move it to the
  * furthest end of its superblock's dirty-inode list.
  *
@@ -593,20 +603,12 @@ static long writeback_chunk_size(struct backing_dev_info *bdi,
  *
  * Return the number of pages and/or inodes written.
  */
-static long writeback_sb_inodes(struct super_block *sb,
-                struct bdi_writeback *wb,
-                struct wb_writeback_work *work)
+long generic_writeback_sb_inodes(
+    struct super_block *sb,
+    struct bdi_writeback *wb,
+    struct wb_writeback_work *work,
+    struct writeback_control *wbc)
 {
-    struct writeback_control wbc = {
-        .sync_mode        = work->sync_mode,
-        .tagged_writepages    = work->tagged_writepages,
-        .for_kupdate        = work->for_kupdate,
-        .for_background        = work->for_background,
-        .for_sync        = work->for_sync,
-        .range_cyclic        = work->range_cyclic,
-        .range_start        = 0,
-        .range_end        = LLONG_MAX,
-    };
     unsigned long start_time = jiffies;
     long write_chunk;
     long wrote = 0;  /* count both pages and inodes */
@@ -644,7 +646,7 @@ static long writeback_sb_inodes(struct super_block *sb,
             redirty_tail(inode, wb);
             continue;
         }
-        if ((inode->i_state & I_SYNC) && wbc.sync_mode != WB_SYNC_ALL) {
+        if ((inode->i_state & I_SYNC) && wbc->sync_mode != WB_SYNC_ALL) {
             /*
              * If this inode is locked for writeback and we are not
              * doing writeback-for-data-integrity, move it to
@@ -677,22 +679,22 @@ static long writeback_sb_inodes(struct super_block *sb,
         spin_unlock(&inode->i_lock);
 
         write_chunk = writeback_chunk_size(wb->bdi, work);
-        wbc.nr_to_write = write_chunk;
-        wbc.pages_skipped = 0;
+        wbc->nr_to_write = write_chunk;
+        wbc->pages_skipped = 0;
 
         /*
          * We use I_SYNC to pin the inode in memory. While it is set
          * evict_inode() will wait so the inode cannot be freed.
          */
-        __writeback_single_inode(inode, &wbc);
+        __writeback_single_inode(inode, wbc);
 
-        work->nr_pages -= write_chunk - wbc.nr_to_write;
-        wrote += write_chunk - wbc.nr_to_write;
+        work->nr_pages -= write_chunk - wbc->nr_to_write;
+        wrote += write_chunk - wbc->nr_to_write;
         spin_lock(&wb->list_lock);
         spin_lock(&inode->i_lock);
         if (!(inode->i_state & I_DIRTY))
             wrote++;
-        requeue_inode(inode, wb, &wbc);
+        requeue_inode(inode, wb, wbc);
         inode_sync_complete(inode);
         spin_unlock(&inode->i_lock);
         cond_resched_lock(&wb->list_lock);
@@ -710,6 +712,33 @@ static long writeback_sb_inodes(struct super_block *sb,
     return wrote;
 }
 
+static long writeback_sb_inodes(
+    struct super_block *sb,
+    struct bdi_writeback *wb,
+    struct wb_writeback_work *work)
+{
+    struct writeback_control wbc = {
+        .sync_mode        = work->sync_mode,
+        .tagged_writepages    = work->tagged_writepages,
+        .for_kupdate        = work->for_kupdate,
+        .for_background        = work->for_background,
+        .for_sync        = work->for_sync,
+        .range_cyclic        = work->range_cyclic,
+        .range_start        = 0,
+        .range_end        = LLONG_MAX,
+    };
+
+    if (sb->s_op->writeback) {
+        long ret;
+        spin_unlock(&wb->list_lock);
+        ret = sb->s_op->writeback(sb, wb, work, &wbc);
+        spin_lock(&wb->list_lock);
+        return ret;
+    }
+
+    return generic_writeback_sb_inodes(sb, wb, work, &wbc);
+}
+
 static long __writeback_inodes_wb(struct bdi_writeback *wb,
                   struct wb_writeback_work *work)
 {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8780312..fc07d33 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -295,6 +295,8 @@ enum positive_aop_returns {
 struct page;
 struct address_space;
 struct writeback_control;
+struct wb_writeback_work;
+struct bdi_writeback;
 
 /*
  * "descriptor" for what we're up to with a read.
@@ -1542,7 +1544,10 @@ struct super_operations {
     int (*statfs) (struct dentry *, struct kstatfs *);
     int (*remount_fs) (struct super_block *, int *, char *);
     void (*umount_begin) (struct super_block *);
-
+    long (*writeback)(struct super_block *sb,
+                struct bdi_writeback *wb,
+                struct wb_writeback_work *work,
+                struct writeback_control *wbc);
     int (*show_options)(struct seq_file *, struct dentry *);
     int (*show_devname)(struct seq_file *, struct dentry *);
     int (*show_path)(struct seq_file *, struct dentry *);
@@ -1739,6 +1744,8 @@ static inline void file_accessed(struct file *file)
 
 int sync_inode(struct inode *inode, struct writeback_control *wbc);
 int sync_inode_metadata(struct inode *inode, int wait);
+void inode_writeback_done(struct inode *inode);
+void inode_writeback_touch(struct inode *inode);
 
 struct file_system_type {
     const char *name;
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 5777c13..24e12be 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -87,6 +87,25 @@ struct writeback_control {
 };
 
 /*
+ * Passed into wb_writeback(), essentially a subset of writeback_control
+ */
+struct wb_writeback_work {
+    long nr_pages;
+    struct super_block *sb;
+    unsigned long *older_than_this;
+    enum writeback_sync_modes sync_mode;
+    unsigned int tagged_writepages:1;
+    unsigned int for_kupdate:1;
+    unsigned int range_cyclic:1;
+    unsigned int for_background:1;
+    unsigned int for_sync:1;    /* sync(2) WB_SYNC_ALL writeback */
+    enum wb_reason reason;        /* why was writeback initiated? */
+
+    struct list_head list;        /* pending work list */
+    struct completion *done;    /* set if the caller waits */
+};
+
+/*
  * fs/fs-writeback.c
  */   
 struct bdi_writeback;

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ