[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1356335742-11793-8-git-send-email-wenqing.lz@taobao.com>
Date: Mon, 24 Dec 2012 15:55:40 +0800
From: Zheng Liu <gnehzuil.liu@...il.com>
To: linux-ext4@...r.kernel.org
Cc: Zheng Liu <wenqing.lz@...bao.com>
Subject: [RFC][PATCH 7/9 v1] ext4: add a new convert function to convert an unwritten extent in extent status tree
From: Zheng Liu <wenqing.lz@...bao.com>
A new function called ext4_es_convert_unwritten_extents() is defined to convert
a range of unwritten extents to written in extent status tree.
This function aims to improve the unwritten extent conversion in DIO end_io.
Meanwhile all locks are changed to save irq flags due to DIO end_io is in irq
context.
Signed-off-by: Zheng Liu <wenqing.lz@...bao.com>
---
fs/ext4/extents_status.c | 161 ++++++++++++++++++++++++++++++++++++++++++++---
fs/ext4/extents_status.h | 2 +
2 files changed, 155 insertions(+), 8 deletions(-)
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index ccd940c..9db9e05 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -239,10 +239,11 @@ ext4_lblk_t ext4_es_find_extent(struct inode *inode, struct extent_status *es)
struct extent_status *es1 = NULL;
struct rb_node *node;
ext4_lblk_t ret = EXT_MAX_BLOCKS;
+ unsigned long flags;
trace_ext4_es_find_extent_enter(inode, es->es_lblk);
- read_lock(&EXT4_I(inode)->i_es_lock);
+ read_lock_irqsave(&EXT4_I(inode)->i_es_lock, flags);
tree = &EXT4_I(inode)->i_es_tree;
/* find delay extent in cache firstly */
@@ -273,7 +274,7 @@ out:
}
}
- read_unlock(&EXT4_I(inode)->i_es_lock);
+ read_unlock_irqrestore(&EXT4_I(inode)->i_es_lock, flags);
trace_ext4_es_find_extent_exit(inode, es, ret);
return ret;
@@ -426,6 +427,7 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
struct ext4_es_tree *tree;
struct extent_status newes;
ext4_lblk_t end = lblk + len - 1;
+ unsigned long flags;
int err = 0;
es_debug("add [%u/%u) %llu %d to extent status tree of inode %lu\n",
@@ -439,7 +441,7 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
newes.es_status = status;
trace_ext4_es_insert_extent(inode, &newes);
- write_lock(&EXT4_I(inode)->i_es_lock);
+ write_lock_irqsave(&EXT4_I(inode)->i_es_lock, flags);
tree = &EXT4_I(inode)->i_es_tree;
err = __es_remove_extent(tree, lblk, end);
if (err != 0)
@@ -447,7 +449,7 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
err = __es_insert_extent(tree, &newes);
error:
- write_unlock(&EXT4_I(inode)->i_es_lock);
+ write_unlock_irqrestore(&EXT4_I(inode)->i_es_lock, flags);
ext4_es_print_tree(inode);
@@ -466,12 +468,13 @@ int ext4_es_lookup_extent(struct inode *inode, struct extent_status *es)
struct ext4_es_tree *tree;
struct extent_status *es1;
struct rb_node *node;
+ unsigned long flags;
int found = 0;
es_debug("lookup extent in block %u\n", es->es_lblk);
tree = &EXT4_I(inode)->i_es_tree;
- read_lock(&EXT4_I(inode)->i_es_lock);
+ read_lock_irqsave(&EXT4_I(inode)->i_es_lock, flags);
/* find delay extent in cache firstly */
if (tree->cache_es) {
@@ -506,7 +509,7 @@ out:
es->es_status = es1->es_status;
}
- read_unlock(&EXT4_I(inode)->i_es_lock);
+ read_unlock_irqrestore(&EXT4_I(inode)->i_es_lock, flags);
return found;
}
@@ -605,6 +608,7 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
{
struct ext4_es_tree *tree;
ext4_lblk_t end;
+ unsigned long flags;
int err = 0;
trace_ext4_es_remove_extent(inode, lblk, len);
@@ -616,9 +620,150 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
tree = &EXT4_I(inode)->i_es_tree;
- write_lock(&EXT4_I(inode)->i_es_lock);
+ write_lock_irqsave(&EXT4_I(inode)->i_es_lock, flags);
err = __es_remove_extent(tree, lblk, end);
- write_unlock(&EXT4_I(inode)->i_es_lock);
+ write_unlock_irqrestore(&EXT4_I(inode)->i_es_lock, flags);
+ ext4_es_print_tree(inode);
+ return err;
+}
+
+int ext4_es_convert_unwritten_extents(struct inode *inode, loff_t offset,
+ size_t size)
+{
+ struct ext4_es_tree *tree;
+ struct rb_node *node;
+ struct extent_status *es, orig_es, conv_es;
+ ext4_lblk_t end, len1, len2;
+ ext4_lblk_t lblk = 0, len = 0;
+ unsigned long flags;
+ unsigned int blkbits;
+ int err = 0;
+
+ /* add trace point and debug */
+ blkbits = inode->i_blkbits;
+ lblk = offset >> blkbits;
+ len = (EXT4_BLOCK_ALIGN(offset + size, blkbits) >> blkbits) - lblk;
+
+ end = lblk + len - 1;
+ BUG_ON(end < lblk);
+
+ tree = &EXT4_I(inode)->i_es_tree;
+
+ write_lock_irqsave(&EXT4_I(inode)->i_es_lock, flags);
+
+ es = __es_tree_search(&tree->root, lblk);
+ if (!es)
+ goto out;
+ if (es->es_lblk > end)
+ goto out;
+
+ tree->cache_es = NULL;
+
+ orig_es.es_lblk = es->es_lblk;
+ orig_es.es_len = es->es_len;
+ orig_es.es_pblk = es->es_pblk;
+ orig_es.es_status = es->es_status;
+
+ len1 = lblk > es->es_lblk ? lblk - es->es_lblk : 0;
+ len2 = extent_status_end(es) > end ?
+ extent_status_end(es) - end : 0;
+ if (len1 > 0)
+ es->es_len = len1;
+ if (len2 > 0) {
+ if (len1 > 0) {
+ struct extent_status newes;
+
+ newes.es_lblk = end + 1;
+ newes.es_len = len2;
+ newes.es_pblk = orig_es.es_pblk + orig_es.es_len - len2;
+ newes.es_status = orig_es.es_status;
+ /*BUG_ON(newes.es_status != EXTENT_STATUS_UNWRITTEN);*/
+ err = __es_insert_extent(tree, &newes);
+ if (err) {
+ es->es_lblk = orig_es.es_lblk;
+ es->es_len = orig_es.es_len;
+ goto out;
+ }
+
+ conv_es.es_lblk = orig_es.es_lblk + len1;
+ conv_es.es_len = orig_es.es_len - len1 - len2;
+ conv_es.es_pblk = orig_es.es_pblk + len1;
+ conv_es.es_status = EXTENT_STATUS_WRITTEN;
+ err = __es_insert_extent(tree, &conv_es);
+ if (err) {
+ int err2;
+ err2 = __es_remove_extent(tree, newes.es_lblk,
+ extent_status_end(&newes));
+ if (err2)
+ goto out;
+ es->es_lblk = orig_es.es_lblk;
+ es->es_len = orig_es.es_len;
+ goto out;
+ }
+ } else {
+ es->es_lblk = end + 1;
+ es->es_len = len2;
+ es->es_pblk = orig_es.es_pblk + orig_es.es_len - len2;
+ /*BUG_ON(newes.es_status != EXTENT_STATUS_UNWRITTEN);*/
+
+ conv_es.es_lblk = orig_es.es_lblk;
+ conv_es.es_len = orig_es.es_len - len2;
+ conv_es.es_pblk = orig_es.es_pblk;
+ conv_es.es_status = EXTENT_STATUS_WRITTEN;
+ err = __es_insert_extent(tree, &conv_es);
+ if (err) {
+ es->es_lblk = orig_es.es_lblk;
+ es->es_len = orig_es.es_len;
+ es->es_pblk = orig_es.es_pblk;
+ }
+ }
+
+ goto out;
+ }
+
+ if (len1 > 0) {
+ node = rb_next(&es->rb_node);
+ if (node)
+ es = rb_entry(node, struct extent_status, rb_node);
+ else
+ es = NULL;
+ }
+
+ while (es && extent_status_end(es) <= end) {
+ node = rb_next(&es->rb_node);
+ es->es_status = EXTENT_STATUS_WRITTEN;
+ if (!node) {
+ es = NULL;
+ break;
+ }
+ es = rb_entry(node, struct extent_status, rb_node);
+ }
+
+ if (es && es->es_lblk < end + 1) {
+ ext4_lblk_t orig_len = es->es_len;
+
+ /*
+ * Here we first set conv_es just because of avoiding copy the
+ * value of es to a tmporary variable.
+ */
+ len1 = extent_status_end(es) - end;
+ conv_es.es_lblk = es->es_lblk;
+ conv_es.es_len = es->es_len - len1;
+ conv_es.es_pblk = es->es_pblk;
+ conv_es.es_status = EXTENT_STATUS_WRITTEN;
+
+ es->es_lblk = end + 1;
+ es->es_len = len1;
+ es->es_pblk = es->es_pblk + orig_len - len1;
+
+ err = __es_insert_extent(tree, &conv_es);
+ if (err)
+ goto out;
+ }
+
+out:
+ write_unlock_irqrestore(&EXT4_I(inode)->i_es_lock, flags);
+
ext4_es_print_tree(inode);
return err;
}
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index 1890f80..9069ecf 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -51,6 +51,8 @@ extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
extern ext4_lblk_t ext4_es_find_extent(struct inode *inode,
struct extent_status *es);
extern int ext4_es_lookup_extent(struct inode *inode, struct extent_status *es);
+extern int ext4_es_convert_unwritten_extents(struct inode *inode,
+ loff_t offset, size_t size);
static inline int ext4_es_is_written(struct extent_status *es)
{
--
1.7.12.rc2.18.g61b472e
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists