[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <9262696d-3ef0-c37f-3b07-0a1bb6f8cc19@infradead.org>
Date: Fri, 6 Apr 2018 10:34:19 -0700
From: Randy Dunlap <rdunlap@...radead.org>
To: Sayan Ghosh <sgdgp.2014@...il.com>, linux-ext4@...r.kernel.org
Cc: linux-fsdevel@...r.kernel.org,
"Bhattacharya, Suparna" <suparna.bhattacharya@....com>,
niloy ganguly <ganguly.niloy@...il.com>,
Madhumita Mallick <madhu.cse.ju@...il.com>,
"Bharde, Madhumita" <madhumita.bharde@....com>
Subject: Re: [Patch 4/4] Support for obtaining reduced view of a graded file
On 04/06/2018 04:42 AM, Sayan Ghosh wrote:
> The patch is on top of Linux Kernel 4.7.2.
>
> Signed-off-by: Sayan Ghosh <sgdgp.2014@...il.com>
> ---
> fs/dax.c | 139 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> fs/ext4/ext4.h | 1 +
> fs/ext4/file.c | 79 +++++++++++++++++++++++++-------
> 3 files changed, 203 insertions(+), 16 deletions(-)
>
> diff --git a/fs/dax.c b/fs/dax.c
> index e207f8f..1930307 100755
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -793,6 +793,41 @@ int dax_writeback_mapping_range(struct
> address_space *mapping,
> }
> EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
>
> +/*
> + * This function is a copy of dax_insert_mapping.
> + * It is called in skip_dax_fault_handler.
> + */
> +static int skip_dax_insert_mapping(struct address_space *mapping,
> + struct buffer_head *bh, void **entryp,
> + struct vm_area_struct *vma, struct vm_fault *vmf, sector_t blknum)
> +{
> + unsigned long vaddr = (unsigned long)vmf->virtual_address;
> + struct inode *inode = mapping->host;
> + struct block_device *bdev = bh->b_bdev;
> + bdev->bd_inode->i_ino=mapping->host->i_ino;
> + struct blk_dax_ctl dax = {
> + .sector = to_sector(bh, mapping->host),
> + .size = bh->b_size,
> + };
> + int error;
> + sector_t block;
> + void *ret;
> + void *entry = *entryp;
> + block = (sector_t)vmf->pgoff << (PAGE_SHIFT - mapping->host->i_blkbits);
> + dax.sector = blknum << (mapping->host->i_blkbits - 9);
> + if (dax_map_atomic(bdev, &dax) < 0){
> + return PTR_ERR(dax.addr);
> + }
Indentation size.
Use tabs instead of spaces.
> + dax_unmap_atomic(bdev, &dax);
> + ret = dax_insert_mapping_entry(mapping, vmf, entry, dax.sector);
> + if (IS_ERR(ret)){
> + return PTR_ERR(ret);
> + }
> + *entryp = ret;
> +
> + vm_insert_mixed(vma, vaddr, dax.pfn);
> +}
> +
> static int dax_insert_mapping(struct address_space *mapping,
> struct buffer_head *bh, void **entryp,
> struct vm_area_struct *vma, struct vm_fault *vmf)
> @@ -915,6 +950,110 @@ int __dax_fault(struct vm_area_struct *vma,
> struct vm_fault *vmf,
> }
> EXPORT_SYMBOL(__dax_fault);
>
> +/*
> + * This is the modified __dax_fault handler.
> + * Most of the code is copied from __dax_fault function.
> + * One more parameter is passed here, namely skip_dax.
> + */
> +int __skip_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
> + get_block_t get_block,long skip_dax)
> +{
> + struct file *file = vma->vm_file;
> + struct address_space *mapping = file->f_mapping;
> + struct inode *inode = mapping->host;
> + void *entry;
> + struct buffer_head bh;
> + unsigned long vaddr = (unsigned long)vmf->virtual_address;
> + unsigned blkbits = inode->i_blkbits;
> + sector_t block;
> + sector_t corrected_sector,corrected_new_block;
> + pgoff_t size;
> + int error;
> + int new_error;
> + int major = 0;
> +
> + /*
> + * Check whether offset isn't beyond end of file now. Caller is supposed
> + * to hold locks serializing us with truncate / punch hole so this is
> + * a reliable test.
> + */
> + size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
> + if (vmf->pgoff >= size)
> + return VM_FAULT_SIGBUS;
> +
> + memset(&bh, 0, sizeof(bh));
> + block = (sector_t)vmf->pgoff << (PAGE_SHIFT - blkbits);
> + bh.b_bdev = inode->i_sb->s_bdev;
> + bh.b_size = PAGE_SIZE;
> +
> + entry = grab_mapping_entry(mapping, vmf->pgoff);
> + if (IS_ERR(entry)) {
> + error = PTR_ERR(entry);
> + goto out;
> + }
> +
> + error = get_block(inode, block, &bh, 0);
> + if (!error && (bh.b_size < PAGE_SIZE))
> + error = -EIO; /* fs corruption? */
> + if (error){
> + goto unlock_entry;
> + }
> +
> + if (vmf->cow_page) {
> + struct page *new_page = vmf->cow_page;
> + if (buffer_written(&bh))
> + error = copy_user_bh(new_page, inode, &bh, vaddr);
> + else
> + clear_user_highpage(new_page, vaddr);
> + if (error){
> + goto unlock_entry;
> + }
> + if (!radix_tree_exceptional_entry(entry)) {
> + vmf->page = entry;
> + return VM_FAULT_LOCKED;
> + }
> + vmf->entry = entry;
> + return VM_FAULT_DAX_LOCKED;
> + }
> +
> + if (!buffer_mapped(&bh)) {
> + if (vmf->flags & FAULT_FLAG_WRITE) {
> + error = get_block(inode, block, &bh, 1);
> + count_vm_event(PGMAJFAULT);
> + mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
> + major = VM_FAULT_MAJOR;
> + if (!error && (bh.b_size < PAGE_SIZE))
> + error = -EIO;
> + if (error)
> + goto unlock_entry;
> + } else {
> + goto out2;
> + }
> + }
> +
> + /* Filesystem should not return unwritten buffers to us! */
> + WARN_ON_ONCE(buffer_unwritten(&bh) || buffer_new(&bh));
> +out2:
> + /* We take the new block here, the next higher
> + * graded block
> + */
> + corrected_sector = skip_dax;
> + new_error = get_block(inode, corrected_sector, &bh, 0);
> + corrected_new_block = bh.b_blocknr;
> + error = get_block(inode, block, &bh, 0);
> + error = skip_dax_insert_mapping(mapping, &bh, &entry, vma, vmf,
> corrected_new_block);
> + unlock_entry:
> + put_locked_mapping_entry(mapping, vmf->pgoff, entry);
> + out:
> + if (error == -ENOMEM)
> + return VM_FAULT_OOM | major;
> + /* -EBUSY is fine, somebody else faulted on the same PTE */
> + if ((error < 0) && (error != -EBUSY))
> + return VM_FAULT_SIGBUS | major;
> + return VM_FAULT_NOPAGE | major;
> +}
> +EXPORT_SYMBOL(__skip_dax_fault);
> +
> /**
> * dax_fault - handle a page fault on a DAX file
> * @vma: The virtual memory area where the fault occurred
> diff --git a/fs/ext4/file.c b/fs/ext4/file.c
> index 368cf53..5dafd52 100755
> --- a/fs/ext4/file.c
> +++ b/fs/ext4/file.c
> @@ -32,6 +32,20 @@
> #include "acl.h"
>
> /*
> + * read_high() returns 0 or 1 depending whether we want to read all the file
> + * blocks or only high graded, respectively.
> + * It gets this information from the extended attribute set by user beforehand.
> + */
> +int read_high(struct inode *inode)
> +{
> + const char *xattr_name = "read_high";
> + int read_high = 0;
> + int xattr_size = sizeof(int);
> + xattr_size = ext4_xattr_get(inode,
> EXT4_XATTR_INDEX_USER,xattr_name, (void *)&read_high,xattr_size);
line too long.
> + return read_high;
> +}
> +
> +/*
> * Called when an inode is released. Note that this is different
> * from ext4_file_open: open gets called at every open, but release
> * gets called only when /all/ the files are closed.
> @@ -349,22 +363,55 @@ static int graded_ext4_fault(struct
> vm_area_struct *vma, struct vm_fault *vmf){
> }
> }
> else{
> - /*
> - * Here the higher graded blocks are redirected via DAX path
> - * since we consider Persistent Memory as higher tier.
> - *
> - * ** TODO **
> - * To take care of the case when the higher tier is not
> - * persistent memory (can be HDD-SSD combination), a check
> - * of the same needs to be provided before re-direction.
> - */
> - unsigned long long temp;
> - if(find_grade(grade_array,total,block,&temp) == 1){
> - result = __dax_fault(vma, vmf, ext4_dax_get_block);
> - }
> - else if(find_grade(grade_array,total,block,&temp) == 0){
> - result = ext4_filemap_fault(vma,vmf);
> - }
> + /*
> + * If read_high is enabled then read the higher
> + * grade blocks only.
> + * It uses a modified dax_fault handler with
> + * the assumption that high grade blocks are
> + * in Persistent Memory.
> + *
> + * ** TODO 1**
> + * To take care when high grade blocks are allocated elsewhere.
> + * Checking of allocated space of each high graded block needs
> + * to be done.
> + *
> + * ** TODO 2**
> + * Modifying vmf according to the target_block in order to
> + * use the existing dax_fault handler needs to be done.
> + */
> + if(read_high(inode) == 1)
> + {
if (read_high(inode) == 1) {
> + ext4_lblk_t target_block;
> + if(block >= total)
if (block >= total) {
> + {
> + goto out;
> + }
> + else{
> + target_block = block;
> + goto pm_fault_handler;
> + }
> + pm_fault_handler:
> + result = __skip_dax_fault(vma, vmf,
> ext4_dax_get_block,target_block);
> + }
> + else
> + {
> + /*
> + * Here the higher graded blocks are redirected via DAX path
> + * since we consider Persistent Memory as higher tier.
> + *
> + * ** TODO **
> + * To take care of the case when the higher tier is not
> + * persistent memory (can be HDD-SSD combination), a check
> + * of the same needs to be provided before re-direction.
> + */
> + unsigned long long temp;
> + if(find_grade(grade_array,total,block,&temp) == 1){> + result = __dax_fault(vma, vmf, ext4_dax_get_block);
> + }
> + else if(find_grade(grade_array,total,block,&temp) == 0){
> + result = ext4_filemap_fault(vma,vmf);
> + }
> + }
> }
> }
> out:
>
>
--
~Randy
Powered by blists - more mailing lists