[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1432239792-5002-9-git-send-email-jglisse@redhat.com>
Date: Thu, 21 May 2015 16:23:04 -0400
From: jglisse@...hat.com
To: akpm@...ux-foundation.org
Cc: <linux-kernel@...r.kernel.org>, linux-mm@...ck.org,
Linus Torvalds <torvalds@...ux-foundation.org>,
<joro@...tes.org>, Mel Gorman <mgorman@...e.de>,
"H. Peter Anvin" <hpa@...or.com>,
Peter Zijlstra <peterz@...radead.org>,
Andrea Arcangeli <aarcange@...hat.com>,
Johannes Weiner <jweiner@...hat.com>,
Larry Woodman <lwoodman@...hat.com>,
Rik van Riel <riel@...hat.com>,
Dave Airlie <airlied@...hat.com>,
Brendan Conoboy <blc@...hat.com>,
Joe Donohue <jdonohue@...hat.com>,
Duncan Poole <dpoole@...dia.com>,
Sherry Cheung <SCheung@...dia.com>,
Subhash Gutti <sgutti@...dia.com>,
John Hubbard <jhubbard@...dia.com>,
Mark Hairgrove <mhairgrove@...dia.com>,
Lucien Dunning <ldunning@...dia.com>,
Cameron Buschardt <cabuschardt@...dia.com>,
Arvind Gopalakrishnan <arvindg@...dia.com>,
Haggai Eran <haggaie@...lanox.com>,
Shachar Raindel <raindel@...lanox.com>,
Liran Liss <liranl@...lanox.com>,
Roland Dreier <roland@...estorage.com>,
Ben Sander <ben.sander@....com>,
Greg Stoner <Greg.Stoner@....com>,
John Bridgman <John.Bridgman@....com>,
Michael Mantor <Michael.Mantor@....com>,
Paul Blinzer <Paul.Blinzer@....com>,
Laurent Morichetti <Laurent.Morichetti@....com>,
Alexander Deucher <Alexander.Deucher@....com>,
Oded Gabbay <Oded.Gabbay@....com>,
Jérôme Glisse <jglisse@...hat.com>,
Jatin Kumar <jakumar@...dia.com>
Subject: [PATCH 28/36] HMM: add mirror fault support for system to device memory migration.
From: Jérôme Glisse <jglisse@...hat.com>
Migration to device memory is done as a special kind of device mirror
fault. Memory migration being initiated by device driver and never by
HMM (unless it is a migration back to system memory).
Signed-off-by: Jérôme Glisse <jglisse@...hat.com>
Signed-off-by: Sherry Cheung <SCheung@...dia.com>
Signed-off-by: Subhash Gutti <sgutti@...dia.com>
Signed-off-by: Mark Hairgrove <mhairgrove@...dia.com>
Signed-off-by: John Hubbard <jhubbard@...dia.com>
Signed-off-by: Jatin Kumar <jakumar@...dia.com>
---
mm/hmm.c | 181 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 181 insertions(+)
diff --git a/mm/hmm.c b/mm/hmm.c
index 1a7554d..7c044f0 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -56,6 +56,10 @@ static int hmm_mirror_migrate_back(struct hmm_mirror *mirror,
dma_addr_t *dst,
unsigned long start,
unsigned long end);
+static int hmm_mirror_migrate(struct hmm_mirror *mirror,
+ struct hmm_event *event,
+ struct vm_area_struct *vma,
+ struct hmm_pt_iter *iter);
static inline int hmm_mirror_update(struct hmm_mirror *mirror,
struct hmm_event *event,
struct page *page);
@@ -110,6 +114,12 @@ static inline int hmm_event_init(struct hmm_event *event,
return 0;
}
+static inline unsigned long hmm_event_npages(const struct hmm_event *event)
+{
+ return (PAGE_ALIGN(event->end) - (event->start & PAGE_MASK)) >>
+ PAGE_SHIFT;
+}
+
/* hmm - core HMM functions.
*
@@ -1198,6 +1208,9 @@ retry:
}
switch (event->etype) {
+ case HMM_COPY_TO_DEVICE:
+ ret = hmm_mirror_migrate(mirror, event, vma, &iter);
+ break;
case HMM_DEVICE_RFAULT:
case HMM_DEVICE_WFAULT:
ret = hmm_mirror_handle_fault(mirror, event, vma, &iter);
@@ -1330,6 +1343,174 @@ static int hmm_mirror_migrate_back(struct hmm_mirror *mirror,
return ret ? ret : r;
}
+static int hmm_mirror_migrate(struct hmm_mirror *mirror,
+ struct hmm_event *event,
+ struct vm_area_struct *vma,
+ struct hmm_pt_iter *iter)
+{
+ struct hmm_device *device = mirror->device;
+ struct hmm *hmm = mirror->hmm;
+ struct hmm_event invalidate;
+ unsigned long addr, npages;
+ struct hmm_mirror *tmp;
+ dma_addr_t *dst;
+ pte_t *save_pte;
+ int r = 0, ret;
+
+ /* Only allow migration of private anonymous memory. */
+ if (vma->vm_ops || unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)))
+ return -EINVAL;
+
+ /*
+ * TODO More advance loop for splitting migration into several chunk.
+ * For now limit the amount that can be migrated in one shot. Also we
+ * would need to see if we need rescheduling if this is happening as
+ * part of system call to the device driver.
+ */
+ npages = hmm_event_npages(event);
+ if (npages * max(sizeof(*dst), sizeof(*save_pte)) > PAGE_SIZE)
+ return -EINVAL;
+ dst = kzalloc(npages * sizeof(*dst), GFP_KERNEL);
+ if (dst == NULL)
+ return -ENOMEM;
+ save_pte = kzalloc(npages * sizeof(*save_pte), GFP_KERNEL);
+ if (save_pte == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = mm_hmm_migrate(hmm->mm, vma, save_pte, &event->backoff,
+ &hmm->mmu_notifier, event->start, event->end);
+ if (ret == -EAGAIN)
+ goto out;
+ if (ret)
+ goto out_cleanup;
+
+ /*
+ * Now invalidate for all other device, note that they can not race
+ * with us as the CPU page table is full of special entry.
+ */
+ hmm_event_init(&invalidate, mirror->hmm, event->start,
+ event->end, HMM_MIGRATE);
+again:
+ down_read(&hmm->rwsem);
+ hlist_for_each_entry(tmp, &hmm->mirrors, mlist) {
+ if (tmp == mirror)
+ continue;
+ if (hmm_mirror_update(tmp, &invalidate, NULL)) {
+ hmm_mirror_ref(tmp);
+ up_read(&hmm->rwsem);
+ hmm_mirror_kill(tmp);
+ hmm_mirror_unref(&tmp);
+ goto again;
+ }
+ }
+ up_read(&hmm->rwsem);
+
+ /*
+ * Populate the mirror page table with saved entry and also mark entry
+ * that can be migrated.
+ */
+ for (addr = event->start; addr < event->end;) {
+ unsigned long i, idx, next, npages;
+ dma_addr_t *hmm_pte;
+
+ hmm_pte = hmm_pt_iter_fault(iter, &mirror->pt, addr);
+ if (!hmm_pte) {
+ ret = -ENOMEM;
+ goto out_cleanup;
+ }
+
+ next = hmm_pt_level_next(&mirror->pt, addr, event->end,
+ mirror->pt.llevel - 1);
+
+ npages = (next - addr) >> PAGE_SHIFT;
+ idx = (addr - event->start) >> PAGE_SHIFT;
+ hmm_pt_iter_directory_lock(iter, &mirror->pt);
+ for (i = 0; i < npages; i++, idx++) {
+ hmm_pte_clear_select(&hmm_pte[i]);
+ if (!pte_present(save_pte[idx]))
+ continue;
+ hmm_pte_set_select(&hmm_pte[i]);
+ /* This can not be a valid device entry here. */
+ VM_BUG_ON(hmm_pte_test_valid_dev(&hmm_pte[i]));
+ if (hmm_pte_test_valid_dma(&hmm_pte[i]))
+ continue;
+
+ if (hmm_pte_test_valid_pfn(&hmm_pte[i]))
+ continue;
+
+ hmm_pt_iter_directory_ref(iter, mirror->pt.llevel);
+ hmm_pte[i] = hmm_pte_from_pfn(pte_pfn(save_pte[idx]));
+ if (pte_write(save_pte[idx]))
+ hmm_pte_set_write(&hmm_pte[i]);
+ hmm_pte_set_select(&hmm_pte[i]);
+ }
+ hmm_pt_iter_directory_unlock(iter, &mirror->pt);
+
+ if (device->dev) {
+ spinlock_t *lock;
+
+ lock = hmm_pt_iter_directory_lock_ptr(iter,
+ &mirror->pt);
+ ret = hmm_mirror_dma_map_range(mirror, hmm_pte,
+ lock, npages);
+ /* Keep going only for entry that have been mapped. */
+ if (ret) {
+ for (i = 0; i < npages; ++i) {
+ if (!hmm_pte_test_select(&dst[i]))
+ continue;
+ if (!hmm_pte_test_valid_dma(&dst[i]))
+ continue;
+ hmm_pte_clear_select(&hmm_pte[i]);
+ }
+ }
+ }
+ addr = next;
+ }
+
+ /* Now Waldo we can do the copy. */
+ r = device->ops->copy_to_device(mirror, event, dst,
+ event->start, event->end);
+
+ /* Update mirror page table with successfully migrated entry. */
+ for (addr = event->start; addr < event->end;) {
+ unsigned long i, idx, next, npages;
+ dma_addr_t *hmm_pte;
+
+ hmm_pte = hmm_pt_iter_update(iter, &mirror->pt, addr);
+ if (!hmm_pte) {
+ addr = hmm_pt_iter_next(iter, &mirror->pt,
+ addr, event->end);
+ continue;
+ }
+
+ next = hmm_pt_level_next(&mirror->pt, addr, event->end,
+ mirror->pt.llevel - 1);
+
+ npages = (next - addr) >> PAGE_SHIFT;
+ idx = (addr - event->start) >> PAGE_SHIFT;
+ hmm_pt_iter_directory_lock(iter, &mirror->pt);
+ for (i = 0; i < npages; i++, idx++) {
+ if (!hmm_pte_test_valid_dev(&dst[idx]))
+ continue;
+
+ VM_BUG_ON(!hmm_pte_test_select(&hmm_pte[i]));
+ hmm_pte[i] = dst[idx];
+ }
+ hmm_pt_iter_directory_unlock(iter, &mirror->pt);
+ addr = next;
+ }
+
+out_cleanup:
+ mm_hmm_migrate_cleanup(hmm->mm, vma, save_pte, dst,
+ event->start, event->end);
+out:
+ kfree(save_pte);
+ kfree(dst);
+ return ret ? ret : r;
+}
+
/* hmm_mirror_range_discard() - discard a range of address.
*
* @mirror: The mirror struct.
--
1.9.3
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists