lists.openwall.net | lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC | |
Open Source and information security mailing list archives
| ||
|
Message-Id: <1365456754-29373-23-git-send-email-jack@suse.cz> Date: Mon, 8 Apr 2013 23:32:27 +0200 From: Jan Kara <jack@...e.cz> To: Ted Tso <tytso@....edu> Cc: linux-ext4@...r.kernel.org, Jan Kara <jack@...e.cz> Subject: [PATCH 22/29] ext4: Defer clearing of PageWriteback after extent conversion Currently PageWriteback bit gets cleared from put_io_page() called from ext4_end_bio(). This is somewhat inconvenient as extent tree is not fully updated at that time (unwritten extents are not marked as written) so we cannot read the data back yet. This design was dictated by lock ordering as we cannot start a transaction while PageWriteback bit is set (we could easily deadlock with ext4_da_writepages()). But now that we use transaction reservation for extent conversion, locking issues are solved and we can move PageWriteback bit clearing after extent conversion is done. As a result we can remove wait for unwritt en extent conversion from ext4_sync_file() because it already implicitely happe ns through wait_on_page_writeback(). We implement deferring of PageWriteback clearing by queueing completed bios to appropriate io_end and processing all the pages when io_end is going to be freed instead of at the moment ext4_io_end() is called. Signed-off-by: Jan Kara <jack@...e.cz> --- fs/ext4/ext4.h | 2 + fs/ext4/fsync.c | 4 -- fs/ext4/page-io.c | 132 +++++++++++++++++++++++++++++------------------------ 3 files changed, 74 insertions(+), 64 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index a594a94..2b0dd9a 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -190,6 +190,8 @@ typedef struct ext4_io_end { handle_t *handle; /* handle reserved for extent * conversion */ struct inode *inode; /* file being written to */ + struct bio *bio; /* Linked list of completed + * bios covering the extent */ unsigned int flag; /* unwritten or not */ loff_t offset; /* offset in the file */ ssize_t size; /* size of the extent */ diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 3278e64..e02ba1b 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -132,10 +132,6 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) if (inode->i_sb->s_flags & MS_RDONLY) goto out; - ret = ext4_flush_unwritten_io(inode); - if (ret < 0) - goto out; - if (!journal) { ret = __sync_inode(inode, datasync); if (!ret && !hlist_empty(&inode->i_dentry)) diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 8bff3b3..2967794 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -51,14 +51,83 @@ void ext4_ioend_wait(struct inode *inode) wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0)); } +/* + * Print an buffer I/O error compatible with the fs/buffer.c. This + * provides compatibility with dmesg scrapers that look for a specific + * buffer I/O error message. We really need a unified error reporting + * structure to userspace ala Digital Unix's uerf system, but it's + * probably not going to happen in my lifetime, due to LKML politics... + */ +static void buffer_io_error(struct buffer_head *bh) +{ + char b[BDEVNAME_SIZE]; + printk(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n", + bdevname(bh->b_bdev, b), + (unsigned long long)bh->b_blocknr); +} + +static void ext4_finish_bio(struct bio *bio) +{ + int i; + int error = !test_bit(BIO_UPTODATE, &bio->bi_flags); + + for (i = 0; i < bio->bi_vcnt; i++) { + struct bio_vec *bvec = &bio->bi_io_vec[i]; + struct page *page = bvec->bv_page; + struct buffer_head *bh, *head; + unsigned bio_start = bvec->bv_offset; + unsigned bio_end = bio_start + bvec->bv_len; + unsigned under_io = 0; + unsigned long flags; + + if (!page) + continue; + + if (error) { + SetPageError(page); + set_bit(AS_EIO, &page->mapping->flags); + } + bh = head = page_buffers(page); + /* + * We check all buffers in the page under BH_Uptodate_Lock + * to avoid races with other end io clearing async_write flags + */ + local_irq_save(flags); + bit_spin_lock(BH_Uptodate_Lock, &head->b_state); + do { + if (bh_offset(bh) < bio_start || + bh_offset(bh) + bh->b_size > bio_end) { + if (buffer_async_write(bh)) + under_io++; + continue; + } + clear_buffer_async_write(bh); + if (error) + buffer_io_error(bh); + } while ((bh = bh->b_this_page) != head); + bit_spin_unlock(BH_Uptodate_Lock, &head->b_state); + local_irq_restore(flags); + if (!under_io) + end_page_writeback(page); + } +} + static void ext4_release_io_end(ext4_io_end_t *io_end) { + struct bio *bio, *next_bio; + BUG_ON(!list_empty(&io_end->list)); BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN); WARN_ON(io_end->handle); if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count)) wake_up_all(ext4_ioend_wq(io_end->inode)); + + for (bio = io_end->bio; bio; bio = next_bio) { + next_bio = bio->bi_private; + ext4_finish_bio(bio); + bio_put(bio); + } if (io_end->flag & EXT4_IO_END_DIRECT) inode_dio_done(io_end->inode); if (io_end->iocb) @@ -254,76 +323,20 @@ ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end) return io_end; } -/* - * Print an buffer I/O error compatible with the fs/buffer.c. This - * provides compatibility with dmesg scrapers that look for a specific - * buffer I/O error message. We really need a unified error reporting - * structure to userspace ala Digital Unix's uerf system, but it's - * probably not going to happen in my lifetime, due to LKML politics... - */ -static void buffer_io_error(struct buffer_head *bh) -{ - char b[BDEVNAME_SIZE]; - printk(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n", - bdevname(bh->b_bdev, b), - (unsigned long long)bh->b_blocknr); -} - static void ext4_end_bio(struct bio *bio, int error) { ext4_io_end_t *io_end = bio->bi_private; struct inode *inode; - int i; - int blocksize; sector_t bi_sector = bio->bi_sector; BUG_ON(!io_end); inode = io_end->inode; - blocksize = 1 << inode->i_blkbits; - bio->bi_private = NULL; bio->bi_end_io = NULL; + /* Link bio into list hanging from io_end */ + bio->bi_private = io_end->bio; + io_end->bio = bio; if (test_bit(BIO_UPTODATE, &bio->bi_flags)) error = 0; - for (i = 0; i < bio->bi_vcnt; i++) { - struct bio_vec *bvec = &bio->bi_io_vec[i]; - struct page *page = bvec->bv_page; - struct buffer_head *bh, *head; - unsigned bio_start = bvec->bv_offset; - unsigned bio_end = bio_start + bvec->bv_len; - unsigned under_io = 0; - unsigned long flags; - - if (!page) - continue; - - if (error) { - SetPageError(page); - set_bit(AS_EIO, &page->mapping->flags); - } - bh = head = page_buffers(page); - /* - * We check all buffers in the page under BH_Uptodate_Lock - * to avoid races with other end io clearing async_write flags - */ - local_irq_save(flags); - bit_spin_lock(BH_Uptodate_Lock, &head->b_state); - do { - if (bh_offset(bh) < bio_start || - bh_offset(bh) + blocksize > bio_end) { - if (buffer_async_write(bh)) - under_io++; - continue; - } - clear_buffer_async_write(bh); - if (error) - buffer_io_error(bh); - } while ((bh = bh->b_this_page) != head); - bit_spin_unlock(BH_Uptodate_Lock, &head->b_state); - local_irq_restore(flags); - if (!under_io) - end_page_writeback(page); - } - bio_put(bio); if (error) { io_end->flag |= EXT4_IO_END_ERROR; @@ -335,7 +348,6 @@ static void ext4_end_bio(struct bio *bio, int error) (unsigned long long) bi_sector >> (inode->i_blkbits - 9)); } - ext4_put_io_end_defer(io_end); } -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@...r.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists