[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20131108082354.GD4038@bbox>
Date: Fri, 8 Nov 2013 17:23:54 +0900
From: Minchan Kim <minchan@...nel.org>
To: Phillip Lougher <phillip@...ashfs.org.uk>
Cc: linux-kernel@...r.kernel.org
Subject: Re: [PATCH 6/6] Squashfs: Directly decompress into the page cache
for file data (V2)
On Thu, Nov 07, 2013 at 08:24:25PM +0000, Phillip Lougher wrote:
> This introduces an implementation of squashfs_readpage_block()
> that directly decompresses into the page cache.
>
> This uses the previously added page handler abstraction to push
> down the necessary kmap_atomic/kunmap_atomic operations on the
> page cache buffers into the decompressors. This enables
> direct copying into the page cache without using the slow
> kmap/kunmap calls.
>
> The code detects when multiple threads are racing in
> squashfs_readpage() to decompress the same block, and avoids
> this regression by falling back to using an intermediate
> buffer.
>
> This patch enhances the performance of Squashfs significantly
> when multiple processes are accessing the filesystem simultaneously
> because it not only reduces memcopying, but it more importantly
> eliminates the lock contention on the intermediate buffer.
>
> Using single-thread decompression.
>
> dd if=file1 of=/dev/null bs=4096 &
> dd if=file2 of=/dev/null bs=4096 &
> dd if=file3 of=/dev/null bs=4096 &
> dd if=file4 of=/dev/null bs=4096
>
> Before:
>
> 629145600 bytes (629 MB) copied, 45.8046 s, 13.7 MB/s
>
> After:
>
> 629145600 bytes (629 MB) copied, 9.29414 s, 67.7 MB/s
>
> V2:
> * update comment adding failure to grab pages could be
> because we've been VM reclaimed, but the other pages are
> still in the page cache and uptodate.
> * Make Kconfig option a choice, making the either-other nature of
> the option more explicit, and also tidying up the ifdef in the
> Makefile
>
> Signed-off-by: Phillip Lougher <phillip@...ashfs.org.uk>
> ---
> fs/squashfs/Kconfig | 28 +++++++
> fs/squashfs/Makefile | 4 +-
> fs/squashfs/file_direct.c | 178 +++++++++++++++++++++++++++++++++++++++++++++
> fs/squashfs/page_actor.c | 104 ++++++++++++++++++++++++++
> fs/squashfs/page_actor.h | 32 ++++++++
> 5 files changed, 345 insertions(+), 1 deletion(-)
> create mode 100644 fs/squashfs/file_direct.c
> create mode 100644 fs/squashfs/page_actor.c
>
> diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
> index c92c75f..3a21adf 100644
> --- a/fs/squashfs/Kconfig
> +++ b/fs/squashfs/Kconfig
> @@ -26,6 +26,34 @@ config SQUASHFS
> If unsure, say N.
>
> choice
> + prompt "File decompression options"
> + depends on SQUASHFS
> + help
> + Squashfs now supports two options for decompressing file
> + data. Traditionally Squashfs has decompressed into an
> + intermediate buffer and then memcopied it into the page cache.
> + Squashfs now supports the ability to decompress directly into
> + the page cache.
> +
> + If unsure, select "Decompress file data into an intermediate buffer"
> +
> +config SQUASHFS_FILE_CACHE
> + bool "Decompress file data into an intermediate buffer"
> + help
> + Decompress file data into an intermediate buffer and then
> + memcopy it into the page cache.
> +
> +config SQUASHFS_FILE_DIRECT
> + bool "Decompress files directly into the page cache"
> + help
> + Directly decompress file data into the page cache.
> + Doing so can significantly improve performance because
> + it eliminates a mempcpy and it also removes the lock contention
memcpy
> + on the single buffer.
> +
> +endchoice
> +
> +choice
> prompt "Decompressor parallelisation options"
> depends on SQUASHFS
> help
> diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile
> index 908c0d9..4132520 100644
> --- a/fs/squashfs/Makefile
> +++ b/fs/squashfs/Makefile
> @@ -4,7 +4,9 @@
>
> obj-$(CONFIG_SQUASHFS) += squashfs.o
> squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o
> -squashfs-y += namei.o super.o symlink.o decompressor.o file_cache.c
> +squashfs-y += namei.o super.o symlink.o decompressor.o
> +squashfs-$(CONFIG_SQUASHFS_FILE_CACHE) += file_cache.o
> +squashfs-$(CONFIG_SQUASHFS_FILE_DIRECT) += file_direct.o page_actor.o
> squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o
> squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o
> squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o
> diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c
> new file mode 100644
> index 0000000..d020d94
> --- /dev/null
> +++ b/fs/squashfs/file_direct.c
> @@ -0,0 +1,178 @@
> +/*
> + * Copyright (c) 2013
> + * Phillip Lougher <phillip@...ashfs.org.uk>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2. See
> + * the COPYING file in the top-level directory.
> + */
> +
> +#include <linux/fs.h>
> +#include <linux/vfs.h>
> +#include <linux/kernel.h>
> +#include <linux/slab.h>
> +#include <linux/string.h>
> +#include <linux/pagemap.h>
> +#include <linux/mutex.h>
> +
> +#include "squashfs_fs.h"
> +#include "squashfs_fs_sb.h"
> +#include "squashfs_fs_i.h"
> +#include "squashfs.h"
> +#include "page_actor.h"
> +
> +static int squashfs_read_cache(struct page *target_page, u64 block, int bsize,
> + int pages, struct page **page);
> +
> +/* Read separately compressed datablock directly into page cache */
> +int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
> +
> +{
> + struct inode *inode = target_page->mapping->host;
> + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
> +
> + int file_end = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
> + int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
> + int start_index = target_page->index & ~mask;
> + int end_index = start_index | mask;
> + int i, n, pages, missing_pages, bytes, res = -ENOMEM;
> + struct page **page;
> + struct squashfs_page_actor *actor;
> + void *pageaddr;
> +
> + if (end_index > file_end)
> + end_index = file_end;
> +
> + pages = end_index - start_index + 1;
> +
> + page = kmalloc(sizeof(void *) * pages, GFP_KERNEL);
> + if (page == NULL)
> + goto error_out;
> +
> + /*
> + * Create a "page actor" which will kmap and kunmap the
> + * page cache pages appropriately within the decompressor
> + */
> + actor = squashfs_page_actor_init_special(page, pages, 0);
> + if (actor == NULL)
> + goto error_out2;
> +
> + /* Try to grab all the pages covered by the Squashfs block */
> + for (missing_pages = 0, i = 0, n = start_index; i < pages; i++, n++) {
> + page[i] = (n == target_page->index) ? target_page :
> + grab_cache_page_nowait(target_page->mapping, n);
> +
> + if (page[i] == NULL) {
> + missing_pages++;
> + continue;
> + }
> +
> + if (PageUptodate(page[i])) {
> + unlock_page(page[i]);
> + page_cache_release(page[i]);
> + page[i] = NULL;
> + missing_pages++;
> + }
> + }
> +
> + if (missing_pages) {
> + /*
> + * Couldn't get one or more pages, this page has either
> + * been VM reclaimed, but others are still in the page cache
> + * and uptodate, or we're racing with another thread in
> + * squashfs_readpage also trying to grab them. Fall back to
> + * using an intermediate buffer.
> + */
> + kfree(actor);
> + return squashfs_read_cache(target_page, block, bsize, pages,
> + page);
> + }
> +
> + /* Decompress directly into the page cache buffers */
> + res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor);
> + if (res < 0)
> + goto mark_errored;
> +
> + /* Last page may have trailing bytes not filled */
> + bytes = res % PAGE_CACHE_SIZE;
> + if (bytes) {
> + pageaddr = kmap_atomic(page[pages - 1]);
> + memset(pageaddr + bytes, 0, PAGE_CACHE_SIZE - bytes);
> + kunmap_atomic(pageaddr);
> + }
> +
> + /* Mark pages as uptodate, unlock and release */
> + for (i = 0; i < pages; i++) {
> + flush_dcache_page(page[i]);
> + SetPageUptodate(page[i]);
> + unlock_page(page[i]);
> + if (page[i] != target_page)
> + page_cache_release(page[i]);
> + }
> +
> + kfree(actor);
> + kfree(page);
> +
> + return 0;
> +
> +mark_errored:
> + /* Decompression failed, mark pages as errored. Target_page is
> + * dealt with by the caller
> + */
> + for (i = 0; i < pages; i++) {
> + if (page[i] == target_page)
> + continue;
> + pageaddr = kmap_atomic(page[i]);
> + memset(pageaddr, 0, PAGE_CACHE_SIZE);
Do we need page zeroing?
If others see !PG_uptodate, it will retry to read so I guess we don't need it.
> + kunmap_atomic(pageaddr);
> + flush_dcache_page(page[i]);
> + SetPageError(page[i]);
> + unlock_page(page[i]);
> + page_cache_release(page[i]);
> + }
> +
> + kfree(actor);
> +error_out2:
> + kfree(page);
> +error_out:
> + return res;
> +}
> +
> +
> +static int squashfs_read_cache(struct page *target_page, u64 block, int bsize,
> + int pages, struct page **page)
> +{
> + struct inode *i = target_page->mapping->host;
> + struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb,
> + block, bsize);
> + int bytes = buffer->length, res = buffer->error, n, offset = 0;
> + void *pageaddr;
> +
> + if (res) {
> + ERROR("Unable to read page, block %llx, size %x\n", block,
> + bsize);
> + goto out;
> + }
> +
> + for (n = 0; n < pages && bytes > 0; n++,
> + bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) {
> + int avail = min_t(int, bytes, PAGE_CACHE_SIZE);
> +
> + if (page[n] == NULL)
> + continue;
> +
> + pageaddr = kmap_atomic(page[n]);
> + squashfs_copy_data(pageaddr, buffer, offset, avail);
> + memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail);
> + kunmap_atomic(pageaddr);
> + flush_dcache_page(page[n]);
> + SetPageUptodate(page[n]);
> + unlock_page(page[n]);
> + if (page[n] != target_page)
> + page_cache_release(page[n]);
> + }
> +
> +out:
> + squashfs_cache_put(buffer);
Nitpick:
It would be better to free page in caller rather than caller if the function
return error?
> + kfree(page);
> + return res;
> +}
> diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c
> new file mode 100644
> index 0000000..8e754ff
> --- /dev/null
> +++ b/fs/squashfs/page_actor.c
> @@ -0,0 +1,104 @@
> +/*
> + * Copyright (c) 2013
> + * Phillip Lougher <phillip@...ashfs.org.uk>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2. See
> + * the COPYING file in the top-level directory.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/slab.h>
> +#include <linux/pagemap.h>
> +#include "page_actor.h"
> +
> +/* Implementation of page_actor for decompressing into intermediate buffer */
> +static void *cache_first_page(struct squashfs_page_actor *actor)
> +{
> + actor->next_page = 1;
> + return actor->buffer[0];
> +}
> +
> +static void *cache_next_page(struct squashfs_page_actor *actor)
> +{
> + if (actor->next_page == actor->pages)
> + return NULL;
> +
> + return actor->buffer[actor->next_page++];
> +}
> +
> +static void cache_finish_page(struct squashfs_page_actor *actor)
> +{
> + /* empty */
> +}
> +
> +struct squashfs_page_actor *squashfs_page_actor_init(void **buffer,
> + int pages, int length)
> +{
> + struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
> +
> + if (actor == NULL)
> + return NULL;
> +
> + if (length)
> + actor->length = length;
> + else
> + actor->length = pages * PAGE_CACHE_SIZE;
> + actor->buffer = buffer;
> + actor->pages = pages;
> + actor->next_page = 0;
> +
> + actor->squashfs_first_page = cache_first_page;
> + actor->squashfs_next_page = cache_next_page;
> + actor->squashfs_finish_page = cache_finish_page;
> + return actor;
> +}
> +
> +/* Implementation of page_actor for decompressing directly into page cache */
> +static void *direct_first_page(struct squashfs_page_actor *actor)
> +{
> + actor->next_page = 1;
> + return actor->pageaddr = kmap_atomic(actor->page[0]);
> +}
Just my two cents
It makes new rule that we shouldn't call blocking function during page
enumerating with page_actor. Somewhere comment about that will be helpful.
> +
> +static void *direct_next_page(struct squashfs_page_actor *actor)
> +{
> + if (actor->pageaddr)
> + kunmap_atomic(actor->pageaddr);
> +
> + if (actor->next_page == actor->pages) {
> + actor->pageaddr = NULL;
> + return NULL;
> + }
> +
> + return actor->pageaddr = kmap_atomic(actor->page[actor->next_page++]);
> +}
> +
> +static void direct_finish_page(struct squashfs_page_actor *actor)
> +{
> + if (actor->pageaddr)
> + kunmap_atomic(actor->pageaddr);
> +}
> +
> +
> +struct squashfs_page_actor *squashfs_page_actor_init_special(struct page **page,
> + int pages, int length)
> +{
> + struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
> +
> + if (actor == NULL)
> + return NULL;
> +
> + if (length)
> + actor->length = length;
> + else
> + actor->length = pages * PAGE_CACHE_SIZE;
> + actor->page = page;
> + actor->pages = pages;
> + actor->next_page = 0;
> + actor->pageaddr = NULL;
> +
> + actor->squashfs_first_page = direct_first_page;
> + actor->squashfs_next_page = direct_next_page;
> + actor->squashfs_finish_page = direct_finish_page;
> + return actor;
> +}
> diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h
> index 19a66a3..22731c7 100644
> --- a/fs/squashfs/page_actor.h
> +++ b/fs/squashfs/page_actor.h
> @@ -8,6 +8,7 @@
> * the COPYING file in the top-level directory.
> */
>
> +#ifndef CONFIG_SQUASHFS_FILE_DIRECT
> struct squashfs_page_actor {
> void **page;
> int pages;
> @@ -51,4 +52,35 @@ static inline void squashfs_finish_page(struct squashfs_page_actor *actor)
> {
> /* empty */
> }
> +#else
> +struct squashfs_page_actor {
> + union {
> + void **buffer;
> + struct page **page;
> + };
> + void *pageaddr;
> + void *(*squashfs_first_page)(struct squashfs_page_actor *);
> + void *(*squashfs_next_page)(struct squashfs_page_actor *);
> + void (*squashfs_finish_page)(struct squashfs_page_actor *);
> + int pages;
> + int length;
> + int next_page;
> +};
> +
> +extern struct squashfs_page_actor *squashfs_page_actor_init(void **, int, int);
> +extern struct squashfs_page_actor *squashfs_page_actor_init_special(struct page
> + **, int, int);
> +static inline void *squashfs_first_page(struct squashfs_page_actor *actor)
> +{
> + return actor->squashfs_first_page(actor);
> +}
> +static inline void *squashfs_next_page(struct squashfs_page_actor *actor)
> +{
> + return actor->squashfs_next_page(actor);
> +}
> +static inline void squashfs_finish_page(struct squashfs_page_actor *actor)
> +{
> + actor->squashfs_finish_page(actor);
> +}
> +#endif
> #endif
Most of thing from me are just nitpicks.
Looks great to me.
Thanks, Phillip.
Reviewed-by: Minchan Kim <minchan@...nel.org>
--
Kind regards,
Minchan Kim
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists