[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <058d01d149dd$5b436ac0$11ca4040$@alibaba-inc.com>
Date:	Fri, 08 Jan 2016 14:25:23 +0800
From:	"Hillf Danton" <hillf.zj@...baba-inc.com>
To:	"'Mike Kravetz'" <mike.kravetz@...cle.com>,
	<linux-kernel@...r.kernel.org>, <linux-mm@...ck.org>
Cc:	"'Hugh Dickins'" <hughd@...gle.com>,
	"'Naoya Horiguchi'" <n-horiguchi@...jp.nec.com>,
	"'Davidlohr Bueso'" <dave@...olabs.net>,
	"'Dave Hansen'" <dave.hansen@...ux.intel.com>,
	"'Andrew Morton'" <akpm@...ux-foundation.org>,
	"'Michel Lespinasse'" <walken@...gle.com>
Subject: Re: [PATCH] mm/hugetlbfs: Unmap pages if page fault raced with hole punch
> On 01/07/2016 12:06 AM, Hillf Danton wrote:
> >> diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
> >> index 0444760..0871d70 100644
> >> --- a/fs/hugetlbfs/inode.c
> >> +++ b/fs/hugetlbfs/inode.c
> >> @@ -324,11 +324,46 @@ static void remove_huge_page(struct page *page)
> >>  	delete_from_page_cache(page);
> >>  }
> >>
> >> +static inline void
> >> +hugetlb_vmdelete_list(struct rb_root *root, pgoff_t start, pgoff_t end)
> >> +{
> >> +	struct vm_area_struct *vma;
> >> +
> >> +	/*
> >> +	 * end == 0 indicates that the entire range after
> >> +	 * start should be unmapped.
> >> +	 */
> >> +	vma_interval_tree_foreach(vma, root, start, end ? end : ULONG_MAX) {
> >
> > [1] perhaps end can be reused.
> >
> >> +		unsigned long v_offset;
> >> +
> >> +		/*
> >> +		 * Can the expression below overflow on 32-bit arches?
> >> +		 * No, because the interval tree returns us only those vmas
> >> +		 * which overlap the truncated area starting at pgoff,
> >> +		 * and no vma on a 32-bit arch can span beyond the 4GB.
> >> +		 */
> >> +		if (vma->vm_pgoff < start)
> >> +			v_offset = (start - vma->vm_pgoff) << PAGE_SHIFT;
> >> +		else
> >> +			v_offset = 0;
> >> +
> >> +		if (end) {
> >> +			end = ((end - start) << PAGE_SHIFT) +
> >> +			       vma->vm_start + v_offset;
> >
> > [2] end is input to be pgoff_t, but changed to be the type of v_offset.
> > Further we cannot handle the case that end is input to be zero.
> > See the diff below please.
> >
> <snip>
> >
> > --- a/fs/hugetlbfs/inode.c	Thu Jan  7 15:04:35 2016
> > +++ b/fs/hugetlbfs/inode.c	Thu Jan  7 15:31:03 2016
> > @@ -461,8 +461,11 @@ hugetlb_vmdelete_list(struct rb_root *ro
> >  	 * end == 0 indicates that the entire range after
> >  	 * start should be unmapped.
> >  	 */
> > -	vma_interval_tree_foreach(vma, root, start, end ? end : ULONG_MAX) {
> > +	if (!end)
> > +		end = ULONG_MAX;
> > +	vma_interval_tree_foreach(vma, root, start, end) {
> >  		unsigned long v_offset;
> > +		unsigned long v_end;
> >
> >  		/*
> >  		 * Can the expression below overflow on 32-bit arches?
> > @@ -475,15 +478,12 @@ hugetlb_vmdelete_list(struct rb_root *ro
> >  		else
> >  			v_offset = 0;
> >
> > -		if (end) {
> > -			end = ((end - start) << PAGE_SHIFT) +
> > +		v_end = ((end - start) << PAGE_SHIFT) +
> >  			       vma->vm_start + v_offset;
> > -			if (end > vma->vm_end)
> > -				end = vma->vm_end;
> > -		} else
> > -			end = vma->vm_end;
> > +		if (v_end > vma->vm_end)
> > +			v_end = vma->vm_end;
> >
> > -		unmap_hugepage_range(vma, vma->vm_start + v_offset, end, NULL);
> > +		unmap_hugepage_range(vma, vma->vm_start + v_offset, v_end, NULL);
> >  	}
> >  }
> >
> > --
> 
> Unfortunately, that calculation of v_end is not correct.  I know it
> is based on the existing code, but the existing code it not correct.
> 
> I attempted to fix in a patch earlier today, but that was not correct
> either.  Below is a proposed new version of hugetlb_vmdelete_list.
Thanks Mike.
> Let me know what you think.
> 
> static inline void
> hugetlb_vmdelete_list(struct rb_root *root, pgoff_t start, pgoff_t end)
> {
> 	struct vm_area_struct *vma;
> 
> 	/*
> 	 * end == 0 indicates that the entire range after
> 	 * start should be unmapped.
> 	 */
> 	vma_interval_tree_foreach(vma, root, start, end ? end : ULONG_MAX) {
> 		unsigned long v_offset;
> 		unsigned long v_end;
> 
> 		/*
> 		 * Can the expression below overflow on 32-bit arches?
> 		 * No, because the interval tree returns us only those vmas
> 		 * which overlap the truncated area starting at pgoff,
> 		 * and no vma on a 32-bit arch can span beyond the 4GB.
> 		 */
> 		if (vma->vm_pgoff < start)
> 			v_offset = (start - vma->vm_pgoff) << PAGE_SHIFT;
> 		else
> 			v_offset = 0;
> 
> 		if (!end)
> 			v_end = vma->vm_end;
> 		else {
> 			v_end = ((end - vma->vm_pgoff) << PAGE_SHIFT)
> 							+ vma->vm_start;
> 			if (v_end > vma->vm_end)
> 				v_end = vma->vm_end;
> 		}
> 
> 		unmap_hugepage_range(vma, vma->vm_start + v_offset, v_end,
> 									NULL);
> 	}
> }
> 
Looks good to me.
Hillf
Powered by blists - more mailing lists
 
