lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Thu, 23 Apr 2015 15:13:13 -0700
From:	Mike Kravetz <mike.kravetz@...cle.com>
To:	linux-mm@...ck.org, linux-kernel@...r.kernel.org
Cc:	Dave Hansen <dave.hansen@...ux.intel.com>,
	Naoya Horiguchi <n-horiguchi@...jp.nec.com>,
	David Rientjes <rientjes@...gle.com>,
	Hugh Dickins <hughd@...gle.com>,
	Davidlohr Bueso <dave@...olabs.net>,
	Aneesh Kumar <aneesh.kumar@...ux.vnet.ibm.com>,
	Hillf Danton <hillf.zj@...baba-inc.com>,
	Christoph Hellwig <hch@...radead.org>,
	Mike Kravetz <mike.kravetz@...cle.com>
Subject: [RFC v2 PATCH 1/5] hugetlbfs: truncate_hugepages() takes a range of pages

Modify truncate_hugepages() to take a range of pages (start, end)
instead of simply start.  If the value of end is -1, this indicates
the end of the range is the end of the file.  This functionality
will be used for fallocate hole punching.

Downstream of truncate_hugepages, the routines hugetlb_unreserve_pages
must also be modified to accept a range of pages.

A new region tracking/resv_map routine region_del() is added to delete
a range of regions within the reserve maps.  As in truncate_hugepages,
a range end value of -1 indicates all regions after the starting value
should be deleted.

Based-on code-by: Dave Hansen <dave.hansen@...ux.intel.com>
Signed-off-by: Mike Kravetz <mike.kravetz@...cle.com>
---
 fs/hugetlbfs/inode.c    | 31 +++++++++++++++-----
 include/linux/hugetlb.h |  3 +-
 mm/hugetlb.c            | 76 +++++++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 100 insertions(+), 10 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index c274aca..2faf2c4 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -324,19 +324,32 @@ static void truncate_huge_page(struct page *page)
 	delete_from_page_cache(page);
 }
 
-static void truncate_hugepages(struct inode *inode, loff_t lstart)
+static void truncate_hugepages(struct inode *inode, loff_t lstart, loff_t lend)
 {
 	struct hstate *h = hstate_inode(inode);
 	struct address_space *mapping = &inode->i_data;
 	const pgoff_t start = lstart >> huge_page_shift(h);
+	const pgoff_t end = lend >> huge_page_shift(h);
 	struct pagevec pvec;
 	pgoff_t next;
 	int i, freed = 0;
+	long lookup_nr = PAGEVEC_SIZE;
 
 	pagevec_init(&pvec, 0);
 	next = start;
-	while (1) {
-		if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
+	while (next < end) {
+		/*
+		 * Make sure to never grab more pages that we
+		 * might possibly need.
+		 */
+		if (end - next < lookup_nr)
+			lookup_nr = end - next;
+
+		/*
+		 * This pagevec_lookup() may return pages past 'end',
+		 * so we must check for page->index > end.
+		 */
+		if (!pagevec_lookup(&pvec, mapping, next, lookup_nr)) {
 			if (next == start)
 				break;
 			next = start;
@@ -347,6 +360,11 @@ static void truncate_hugepages(struct inode *inode, loff_t lstart)
 			struct page *page = pvec.pages[i];
 
 			lock_page(page);
+			if (page->index >= end) {
+				unlock_page(page);
+				next = end;	/* we are done */
+				break;
+			}
 			if (page->index > next)
 				next = page->index;
 			++next;
@@ -356,15 +374,14 @@ static void truncate_hugepages(struct inode *inode, loff_t lstart)
 		}
 		huge_pagevec_release(&pvec);
 	}
-	BUG_ON(!lstart && mapping->nrpages);
-	hugetlb_unreserve_pages(inode, start, freed);
+	hugetlb_unreserve_pages(inode, start, end, freed);
 }
 
 static void hugetlbfs_evict_inode(struct inode *inode)
 {
 	struct resv_map *resv_map;
 
-	truncate_hugepages(inode, 0);
+	truncate_hugepages(inode, 0, -1);
 	resv_map = (struct resv_map *)inode->i_mapping->private_data;
 	/* root inode doesn't have the resv_map, so we should check it */
 	if (resv_map)
@@ -410,7 +427,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
 	if (!RB_EMPTY_ROOT(&mapping->i_mmap))
 		hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff);
 	i_mmap_unlock_write(mapping);
-	truncate_hugepages(inode, offset);
+	truncate_hugepages(inode, offset, -1);
 	return 0;
 }
 
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 7b57850..de39705 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -75,7 +75,8 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 int hugetlb_reserve_pages(struct inode *inode, long from, long to,
 						struct vm_area_struct *vma,
 						vm_flags_t vm_flags);
-void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed);
+void hugetlb_unreserve_pages(struct inode *inode, long start, long end,
+						long freed);
 int dequeue_hwpoisoned_huge_page(struct page *page);
 bool isolate_huge_page(struct page *page, struct list_head *list);
 void putback_active_hugepage(struct page *page);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index c41b2a0..31e36cd 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -257,6 +257,77 @@ out_nrg:
 	return chg;
 }
 
+static long region_del(struct resv_map *resv, long f, long t)
+{
+	struct list_head *head = &resv->regions;
+	struct file_region *rg, *trg;
+	struct file_region *nrg = NULL;
+	long chg = 0;
+
+	/*
+	 * Locate segments we overlap and etiher split, remove or
+	 * trim the existing regions.  The end of region (t) == -1
+	 * indicates all remaining regions.  Special case t == -1 as
+	 * all comparisons are signed.
+	 */
+	if (t == -1)
+		t = LONG_MAX;
+retry:
+	spin_lock(&resv->lock);
+	list_for_each_entry_safe(rg, trg, head, link) {
+		if (rg->to <= f)
+			continue;
+		if (rg->from >= t)
+			break;
+
+		if (f > rg->from && t < rg->to) { /* must split region */
+			if (!nrg) {
+				spin_unlock(&resv->lock);
+				nrg = kmalloc(sizeof(*nrg),
+						GFP_KERNEL |  __GFP_REPEAT);
+				if (!nrg) {
+					/* FIXME FIXME FIXME FIXME */
+					return -ENOMEM;
+				}
+				goto retry;
+			}
+
+			chg += t - f;
+
+			/* new entry for end of split region */
+			nrg->from = t;
+			nrg->to = rg->to;
+			INIT_LIST_HEAD(&nrg->link);
+
+			/* original entry is trimmed */
+			rg->to = f;
+
+			list_add(&nrg->link, &rg->link);
+			nrg = NULL;
+			break;
+		}
+
+		if (f <= rg->from && t >= rg->to) { /* remove entire region */
+			chg += rg->to - rg->from;
+			list_del(&rg->link);
+			kfree(rg);
+			continue;
+		}
+
+		if (f <= rg->from) {	/* trim beginning of region */
+			chg += t - rg->from;
+			rg->from = t;
+		} else {		/* trim end of region */
+			chg += rg->to - f;
+			rg->to = f;
+		}
+	}
+
+	spin_unlock(&resv->lock);
+	kfree(nrg);
+	return chg;
+}
+
 static long region_truncate(struct resv_map *resv, long end)
 {
 	struct list_head *head = &resv->regions;
@@ -3510,7 +3581,8 @@ out_err:
 	return ret;
 }
 
-void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
+void hugetlb_unreserve_pages(struct inode *inode, long start, long end,
+								long freed)
 {
 	struct hstate *h = hstate_inode(inode);
 	struct resv_map *resv_map = inode_resv_map(inode);
@@ -3518,7 +3590,7 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
 	struct hugepage_subpool *spool = subpool_inode(inode);
 
 	if (resv_map)
-		chg = region_truncate(resv_map, offset);
+		chg = region_del(resv_map, start, end);
 	spin_lock(&inode->i_lock);
 	inode->i_blocks -= (blocks_per_huge_page(h) * freed);
 	spin_unlock(&inode->i_lock);
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ