[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1312893028.26263.129.camel@zakaz.uk.xensource.com>
Date: Tue, 9 Aug 2011 13:30:28 +0100
From: Ian Campbell <Ian.Campbell@...citrix.com>
To: Konrad Rzeszutek Wilk <konrad.wilk@...cle.com>
CC: Stefano Stabellini <Stefano.Stabellini@...citrix.com>,
"xen-devel@...ts.xensource.com" <xen-devel@...ts.xensource.com>,
"Jeremy Fitzhardinge" <Jeremy.Fitzhardinge@...rix.com>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH] xen: modify kernel mappings corresponding to granted
pages
On Tue, 2011-08-09 at 03:34 +0100, Konrad Rzeszutek Wilk wrote:
> On Tue, Jul 26, 2011 at 05:55:45PM +0100, stefano.stabellini@...citrix.com wrote:
> > From: Stefano Stabellini <stefano.stabellini@...citrix.com>
> >
> > If we want to use granted pages for AIO, changing the mappings of a user
> > vma and the corresponding p2m is not enough, we also need to update the
> > kernel mappings accordingly.
> > On x86_64 it is easy, we can issue another HYPERVISOR_grant_table_op
> > right away in m2p_add_override. We can remove the mappings using another
> > HYPERVISOR_grant_table_op in m2p_remove_override.
> > On x86_32 it is more difficult because the pages are highmem pages and
> > therefore we need to catch the set_pte that tries to map a granted page
> > and issue an HYPERVISOR_grant_table_op instead.
> > Same thing for unmapping them: we need to catch the pte clear or the
> > set_pte that try to unmap a granted page and issue an
> > HYPERVISOR_grant_table_op.
>
> So I hadn't looked at this in detail, but I wonder if we can use the
> MULTIcall for this? It looks like we need to do two hypercalls so why
> not batch it?
That was going to be my next question. We should definitely batch these
if possible.
> And while we are it - we could change the MMU ops to only do this on
> initial domain and for the domU case do the old mechanism?
We need this in domU for driver domains and the like, don't we?
> In essence
> adding two variants of the xen_set_pte - domU and dom0? .. And we
> should get rid of the debug one (xen_set_pte_debug) - it has helped a bit
> but I don't think anybody is using it except me.
>
> >
> > Signed-off-by: Stefano Stabellini <stefano.stabellini@...citrix.com>
> > ---
> > arch/x86/include/asm/xen/page.h | 5 ++-
> > arch/x86/xen/mmu.c | 68 +++++++++++++++++++++++++++++++++++++++
> > arch/x86/xen/p2m.c | 47 ++++++++++++++++++++------
> > drivers/xen/gntdev.c | 27 +++++++++++++++-
> > drivers/xen/grant-table.c | 4 +-
> > include/xen/grant_table.h | 1 +
> > 6 files changed, 137 insertions(+), 15 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
> > index 64a619d..ec7bbfb 100644
> > --- a/arch/x86/include/asm/xen/page.h
> > +++ b/arch/x86/include/asm/xen/page.h
> > @@ -12,6 +12,7 @@
> > #include <asm/pgtable.h>
> >
> > #include <xen/interface/xen.h>
> > +#include <xen/grant_table.h>
> > #include <xen/features.h>
> >
> > /* Xen machine address */
> > @@ -31,8 +32,10 @@ typedef struct xpaddr {
> > #define INVALID_P2M_ENTRY (~0UL)
> > #define FOREIGN_FRAME_BIT (1UL<<(BITS_PER_LONG-1))
> > #define IDENTITY_FRAME_BIT (1UL<<(BITS_PER_LONG-2))
> > +#define GRANT_FRAME_BIT (1UL<<(BITS_PER_LONG-3))
> > #define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT)
> > #define IDENTITY_FRAME(m) ((m) | IDENTITY_FRAME_BIT)
> > +#define GRANT_FRAME(m) ((m) | GRANT_FRAME_BIT)
> >
> > /* Maximum amount of memory we can handle in a domain in pages */
> > #define MAX_DOMAIN_PAGES \
> > @@ -48,7 +51,7 @@ extern unsigned long set_phys_range_identity(unsigned long pfn_s,
> > unsigned long pfn_e);
> >
> > extern int m2p_add_override(unsigned long mfn, struct page *page,
> > - bool clear_pte);
> > + struct gnttab_map_grant_ref *kmap_op);
> > extern int m2p_remove_override(struct page *page, bool clear_pte);
> > extern struct page *m2p_find_override(unsigned long mfn);
> > extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
> > diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
> > index 4e37a7c0..13f20d8 100644
> > --- a/arch/x86/xen/mmu.c
> > +++ b/arch/x86/xen/mmu.c
> > @@ -282,8 +282,70 @@ static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval)
> > return true;
> > }
> >
> > +#ifdef CONFIG_HIGHMEM
> > +static int xen_unmap_granted_page(pte_t *ptep)
> > +{
> > + unsigned long mfn;
> > + struct page *page;
> > +
> > + if (pte_flags(*ptep) & _PAGE_USER)
> > + return 1;
> > + mfn = (ptep->pte & PTE_PFN_MASK) >> PAGE_SHIFT;
> > + page = m2p_find_override(mfn);
> > + if (page != NULL && (page->private & GRANT_FRAME_BIT)) {
> > + int ret;
> > + struct gnttab_unmap_grant_ref kunmap_op;
> > + struct gnttab_map_grant_ref *kmap_op =
> > + (struct gnttab_map_grant_ref *) page->index;
> > + kunmap_op.host_addr = kmap_op->host_addr;
> > + kunmap_op.handle = kmap_op->handle;
> > + kunmap_op.dev_bus_addr = 0;
> > + ret = HYPERVISOR_grant_table_op(
> > + GNTTABOP_unmap_grant_ref, &kunmap_op, 1);
> > + WARN(ret, "m2p_remove_override: pfn %lx mfn %lx, failed to "
> > + "modify kernel mappings", page_to_pfn(page), mfn);
> > + return ret;
> > + }
> > + return 1;
> > +}
> > +#endif
> > +
> > static void xen_set_pte(pte_t *ptep, pte_t pteval)
> > {
> > +#ifdef CONFIG_HIGHMEM
> > + if (!(pte_flags(pteval) & _PAGE_USER)) {
> > + int ret;
> > + struct page *page;
> > + unsigned long mfn = (pteval.pte & PTE_PFN_MASK) >> PAGE_SHIFT;
> > + page = m2p_find_override(mfn);
> > + /*
> > + * if this is a granted page we need to use a grant table
> > + * hypercall to map it instead
> > + */
> > + if (page != NULL && (page->private & GRANT_FRAME_BIT)) {
> > + struct gnttab_map_grant_ref *kmap_op =
> > + (struct gnttab_map_grant_ref *) page->index;
> > + unsigned long old_mfn = kmap_op->dev_bus_addr;
> > + kmap_op->host_addr =
> > + arbitrary_virt_to_machine(ptep).maddr;
> > + kmap_op->dev_bus_addr = 0;
> > + ret = HYPERVISOR_grant_table_op(
> > + GNTTABOP_map_grant_ref, kmap_op, 1);
> > + WARN(ret, "xen_set_pte: pfn %lx mfn %lx, failed to "
> > + "modify kernel mappings",
> > + page_to_pfn(page), mfn);
> > + kmap_op->dev_bus_addr = old_mfn;
> > + return;
> > + }
> > + /*
> > + * even if it is not a granted page, the old page we are about
> > + * to overwrite could be a granted page and in that case we need
> > + * to unmap it using a grant table hypercall
> > + */
> > + xen_unmap_granted_page(ptep);
> > + }
> > +#endif
> > +
> > if (!xen_batched_set_pte(ptep, pteval))
> > native_set_pte(ptep, pteval);
> > }
> > @@ -548,6 +610,12 @@ static void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
> >
> > static void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
> > {
> > + /*
> > + * check if this is a granted page and unmap it using a grant table
> > + * hypercall in that case
> > + */
> > + if (!xen_unmap_granted_page(ptep))
> > + return;
> > if (!xen_batched_set_pte(ptep, native_make_pte(0)))
> > native_pte_clear(mm, addr, ptep);
> > }
> > diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
> > index 58efeb9..1c4d2b5 100644
> > --- a/arch/x86/xen/p2m.c
> > +++ b/arch/x86/xen/p2m.c
> > @@ -161,6 +161,7 @@
> > #include <asm/xen/page.h>
> > #include <asm/xen/hypercall.h>
> > #include <asm/xen/hypervisor.h>
> > +#include <xen/grant_table.h>
> >
> > #include "xen-ops.h"
> >
> > @@ -676,7 +677,8 @@ static unsigned long mfn_hash(unsigned long mfn)
> > }
> >
> > /* Add an MFN override for a particular page */
> > -int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte)
> > +int m2p_add_override(unsigned long mfn, struct page *page,
> > + struct gnttab_map_grant_ref *kmap_op)
> > {
> > unsigned long flags;
> > unsigned long pfn;
> > @@ -699,9 +701,18 @@ int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte)
> > if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn))))
> > return -ENOMEM;
> >
> > - if (clear_pte && !PageHighMem(page))
> > - /* Just zap old mapping for now */
> > - pte_clear(&init_mm, address, ptep);
> > + if (kmap_op != NULL) {
> > + if (!PageHighMem(page)) {
> > + int ret = HYPERVISOR_grant_table_op(
> > + GNTTABOP_map_grant_ref, kmap_op, 1);
> > + WARN(ret, "m2p_add_override: pfn %lx mfn %lx, "
> > + "failed to modify kernel mappings", pfn, mfn);
> > + }
> > + page->private |= GRANT_FRAME_BIT;
> > + /* let's use dev_bus_addr to record the old mfn instead */
> > + kmap_op->dev_bus_addr = page->index;
> > + page->index = (unsigned long) kmap_op;
> > + }
> > spin_lock_irqsave(&m2p_override_lock, flags);
> > list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]);
> > spin_unlock_irqrestore(&m2p_override_lock, flags);
> > @@ -735,13 +746,27 @@ int m2p_remove_override(struct page *page, bool clear_pte)
> > spin_lock_irqsave(&m2p_override_lock, flags);
> > list_del(&page->lru);
> > spin_unlock_irqrestore(&m2p_override_lock, flags);
> > - set_phys_to_machine(pfn, page->index);
> >
> > - if (clear_pte && !PageHighMem(page))
> > - set_pte_at(&init_mm, address, ptep,
> > - pfn_pte(pfn, PAGE_KERNEL));
> > - /* No tlb flush necessary because the caller already
> > - * left the pte unmapped. */
> > + if (clear_pte) {
> > + struct gnttab_map_grant_ref *map_op =
> > + (struct gnttab_map_grant_ref *) page->index;
> > + set_phys_to_machine(pfn, map_op->dev_bus_addr);
> > + if (!PageHighMem(page)) {
> > + int ret;
> > + struct gnttab_unmap_grant_ref unmap_op;
> > + unmap_op.host_addr = map_op->host_addr;
> > + unmap_op.handle = map_op->handle;
> > + unmap_op.dev_bus_addr = 0;
> > + ret = HYPERVISOR_grant_table_op(
> > + GNTTABOP_unmap_grant_ref, &unmap_op, 1);
> > + WARN(ret, "m2p_remove_override: pfn %lx mfn %lx, "
> > + "failed to modify kernel mappings", pfn, mfn);
> > + set_pte_at(&init_mm, address, ptep,
> > + pfn_pte(pfn, PAGE_KERNEL));
> > + __flush_tlb_single(address);
> > + }
> > + } else
> > + set_phys_to_machine(pfn, page->index);
> >
> > return 0;
> > }
> > @@ -758,7 +783,7 @@ struct page *m2p_find_override(unsigned long mfn)
> > spin_lock_irqsave(&m2p_override_lock, flags);
> >
> > list_for_each_entry(p, bucket, lru) {
> > - if (p->private == mfn) {
> > + if ((p->private & (~GRANT_FRAME_BIT)) == mfn) {
> > ret = p;
> > break;
> > }
> > diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
> > index f914b26..ca41772 100644
> > --- a/drivers/xen/gntdev.c
> > +++ b/drivers/xen/gntdev.c
> > @@ -83,6 +83,7 @@ struct grant_map {
> > struct ioctl_gntdev_grant_ref *grants;
> > struct gnttab_map_grant_ref *map_ops;
> > struct gnttab_unmap_grant_ref *unmap_ops;
> > + struct gnttab_map_grant_ref *kmap_ops;
> > struct page **pages;
> > };
> >
> > @@ -116,10 +117,12 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
> > add->grants = kzalloc(sizeof(add->grants[0]) * count, GFP_KERNEL);
> > add->map_ops = kzalloc(sizeof(add->map_ops[0]) * count, GFP_KERNEL);
> > add->unmap_ops = kzalloc(sizeof(add->unmap_ops[0]) * count, GFP_KERNEL);
> > + add->kmap_ops = kzalloc(sizeof(add->kmap_ops[0]) * count, GFP_KERNEL);
> > add->pages = kzalloc(sizeof(add->pages[0]) * count, GFP_KERNEL);
> > if (NULL == add->grants ||
> > NULL == add->map_ops ||
> > NULL == add->unmap_ops ||
> > + NULL == add->kmap_ops ||
> > NULL == add->pages)
> > goto err;
> >
> > @@ -129,6 +132,7 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
> > for (i = 0; i < count; i++) {
> > add->map_ops[i].handle = -1;
> > add->unmap_ops[i].handle = -1;
> > + add->kmap_ops[i].handle = -1;
> > }
> >
> > add->index = 0;
> > @@ -142,6 +146,7 @@ err:
> > kfree(add->grants);
> > kfree(add->map_ops);
> > kfree(add->unmap_ops);
> > + kfree(add->kmap_ops);
> > kfree(add);
> > return NULL;
> > }
> > @@ -243,10 +248,30 @@ static int map_grant_pages(struct grant_map *map)
> > gnttab_set_unmap_op(&map->unmap_ops[i], addr,
> > map->flags, -1 /* handle */);
> > }
> > + } else {
> > + for (i = 0; i < map->count; i++) {
> > + unsigned level;
> > + unsigned long address = (unsigned long)
> > + pfn_to_kaddr(page_to_pfn(map->pages[i]));
> > + pte_t *ptep;
> > + u64 pte_maddr = 0;
> > + if (!PageHighMem(map->pages[i])) {
> > + ptep = lookup_address(address, &level);
> > + pte_maddr =
> > + arbitrary_virt_to_machine(ptep).maddr;
> > + }
> > + gnttab_set_map_op(&map->kmap_ops[i], pte_maddr,
> > + map->flags |
> > + GNTMAP_host_map |
> > + GNTMAP_contains_pte,
> > + map->grants[i].ref,
> > + map->grants[i].domid);
> > + }
> > }
> >
> > pr_debug("map %d+%d\n", map->index, map->count);
> > - err = gnttab_map_refs(map->map_ops, map->pages, map->count);
> > + err = gnttab_map_refs(map->map_ops, use_ptemod ? map->kmap_ops : NULL,
> > + map->pages, map->count);
> > if (err)
> > return err;
> >
> > diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
> > index fd725cd..1e8669a 100644
> > --- a/drivers/xen/grant-table.c
> > +++ b/drivers/xen/grant-table.c
> > @@ -448,6 +448,7 @@ unsigned int gnttab_max_grant_frames(void)
> > EXPORT_SYMBOL_GPL(gnttab_max_grant_frames);
> >
> > int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
> > + struct gnttab_map_grant_ref *kmap_ops,
> > struct page **pages, unsigned int count)
> > {
> > int i, ret;
> > @@ -488,8 +489,7 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
> > */
> > return -EOPNOTSUPP;
> > }
> > - ret = m2p_add_override(mfn, pages[i],
> > - map_ops[i].flags & GNTMAP_contains_pte);
> > + ret = m2p_add_override(mfn, pages[i], &kmap_ops[i]);
> > if (ret)
> > return ret;
> > }
> > diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
> > index b1fab6b..6b99bfb 100644
> > --- a/include/xen/grant_table.h
> > +++ b/include/xen/grant_table.h
> > @@ -156,6 +156,7 @@ unsigned int gnttab_max_grant_frames(void);
> > #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr))
> >
> > int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
> > + struct gnttab_map_grant_ref *kmap_ops,
> > struct page **pages, unsigned int count);
> > int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
> > struct page **pages, unsigned int count);
> > --
> > 1.7.2.3
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists