[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Sun, 5 May 2013 20:40:45 +0800
From: Zheng Liu <gnehzuil.liu@...il.com>
To: Jan Kara <jack@...e.cz>
Cc: Ted Tso <tytso@....edu>, linux-ext4@...r.kernel.org
Subject: Re: [PATCH 14/29] ext4: Stop messing with nr_to_write in
ext4_da_writepages()
On Mon, Apr 08, 2013 at 11:32:19PM +0200, Jan Kara wrote:
> Writeback code got better in how it submits IO and now the number of
> pages requested to be written is usually higher than original 1024. The
> number is now dynamically computed based on observed throughput and is
> set to be about 0.5 s worth of writeback. E.g. on ordinary SATA drive
> this ends up somewhere around 10000 as my testing shows. So remove the
> unnecessary smarts from ext4_da_writepages().
>
> Signed-off-by: Jan Kara <jack@...e.cz>
This patch needs to be rebase against latest dev branch of ext4 tree.
Otherwise the patch looks good to me.
Reviewed-by: Zheng Liu <wenqing.lz@...bao.com>
Regards,
- Zheng
> ---
> fs/ext4/inode.c | 96 -------------------------------------------------------
> 1 files changed, 0 insertions(+), 96 deletions(-)
>
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index ba07412..f4dc4a1 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -423,66 +423,6 @@ static int __check_block_validity(struct inode *inode, const char *func,
> __check_block_validity((inode), __func__, __LINE__, (map))
>
> /*
> - * Return the number of contiguous dirty pages in a given inode
> - * starting at page frame idx.
> - */
> -static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
> - unsigned int max_pages)
> -{
> - struct address_space *mapping = inode->i_mapping;
> - pgoff_t index;
> - struct pagevec pvec;
> - pgoff_t num = 0;
> - int i, nr_pages, done = 0;
> -
> - if (max_pages == 0)
> - return 0;
> - pagevec_init(&pvec, 0);
> - while (!done) {
> - index = idx;
> - nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
> - PAGECACHE_TAG_DIRTY,
> - (pgoff_t)PAGEVEC_SIZE);
> - if (nr_pages == 0)
> - break;
> - for (i = 0; i < nr_pages; i++) {
> - struct page *page = pvec.pages[i];
> - struct buffer_head *bh, *head;
> -
> - lock_page(page);
> - if (unlikely(page->mapping != mapping) ||
> - !PageDirty(page) ||
> - PageWriteback(page) ||
> - page->index != idx) {
> - done = 1;
> - unlock_page(page);
> - break;
> - }
> - if (page_has_buffers(page)) {
> - bh = head = page_buffers(page);
> - do {
> - if (!buffer_delay(bh) &&
> - !buffer_unwritten(bh))
> - done = 1;
> - bh = bh->b_this_page;
> - } while (!done && (bh != head));
> - }
> - unlock_page(page);
> - if (done)
> - break;
> - idx++;
> - num++;
> - if (num >= max_pages) {
> - done = 1;
> - break;
> - }
> - }
> - pagevec_release(&pvec);
> - }
> - return num;
> -}
> -
> -/*
> * The ext4_map_blocks() function tries to look up the requested blocks,
> * and returns if the blocks are already mapped.
> *
> @@ -2334,10 +2274,8 @@ static int ext4_da_writepages(struct address_space *mapping,
> struct mpage_da_data mpd;
> struct inode *inode = mapping->host;
> int pages_written = 0;
> - unsigned int max_pages;
> int range_cyclic, cycled = 1, io_done = 0;
> int needed_blocks, ret = 0;
> - long desired_nr_to_write, nr_to_writebump = 0;
> loff_t range_start = wbc->range_start;
> struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
> pgoff_t done_index = 0;
> @@ -2384,39 +2322,6 @@ static int ext4_da_writepages(struct address_space *mapping,
> end = wbc->range_end >> PAGE_CACHE_SHIFT;
> }
>
> - /*
> - * This works around two forms of stupidity. The first is in
> - * the writeback code, which caps the maximum number of pages
> - * written to be 1024 pages. This is wrong on multiple
> - * levels; different architectues have a different page size,
> - * which changes the maximum amount of data which gets
> - * written. Secondly, 4 megabytes is way too small. XFS
> - * forces this value to be 16 megabytes by multiplying
> - * nr_to_write parameter by four, and then relies on its
> - * allocator to allocate larger extents to make them
> - * contiguous. Unfortunately this brings us to the second
> - * stupidity, which is that ext4's mballoc code only allocates
> - * at most 2048 blocks. So we force contiguous writes up to
> - * the number of dirty blocks in the inode, or
> - * sbi->max_writeback_mb_bump whichever is smaller.
> - */
> - max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT);
> - if (!range_cyclic && range_whole) {
> - if (wbc->nr_to_write == LONG_MAX)
> - desired_nr_to_write = wbc->nr_to_write;
> - else
> - desired_nr_to_write = wbc->nr_to_write * 8;
> - } else
> - desired_nr_to_write = ext4_num_dirty_pages(inode, index,
> - max_pages);
> - if (desired_nr_to_write > max_pages)
> - desired_nr_to_write = max_pages;
> -
> - if (wbc->nr_to_write < desired_nr_to_write) {
> - nr_to_writebump = desired_nr_to_write - wbc->nr_to_write;
> - wbc->nr_to_write = desired_nr_to_write;
> - }
> -
> retry:
> if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
> tag_pages_for_writeback(mapping, index, end);
> @@ -2509,7 +2414,6 @@ retry:
> mapping->writeback_index = done_index;
>
> out_writepages:
> - wbc->nr_to_write -= nr_to_writebump;
> wbc->range_start = range_start;
> trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
> return ret;
> --
> 1.7.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to majordomo@...r.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists