[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20080429110919.dc7b8565.akpm@linux-foundation.org>
Date: Tue, 29 Apr 2008 11:09:19 -0700
From: Andrew Morton <akpm@...ux-foundation.org>
To: Rik van Riel <riel@...hat.com>
Cc: linux-kernel@...r.kernel.org, ajackson@...hat.com,
airlied@...hat.com, benh@...nel.crashing.org
Subject: Re: [PATCH 1/3] make access_process_vm work on device memory
On Tue, 29 Apr 2008 11:32:37 -0400
Rik van Riel <riel@...hat.com> wrote:
> Make access_process_vm work on VM_IO or VM_PFNMAP VMAs. This allows ptrace
> and /proc/pid/mem access to work on eg. video card memory mapped by the X
> server and lays the groundwork for gdb support for PPC Cell SPU memory.
>
> Signed-off-by: Rik van Riel <riel@...hat.com>
> Signed-off-by: Benjamin Herrensmidt <benh@...nel.crashing.org>
>
> ---
>
> arch/x86/mm/ioremap.c | 8 ++
> include/asm-x86/io_32.h | 3 +
> include/asm-x86/io_64.h | 3 +
> include/linux/mm.h | 6 ++
> mm/memory.c | 134 +++++++++++++++++++++++++++++++++++++++++-------
I'll consider this an MM patch, to be mastered in -mm. If agreeable, x86
review-and-acks would be nice, please.
>
> Index: linux-2.6.25-mm1/mm/memory.c
> ===================================================================
> --- linux-2.6.25-mm1.orig/mm/memory.c 2008-04-27 11:06:04.000000000 -0400
> +++ linux-2.6.25-mm1/mm/memory.c 2008-04-29 00:52:01.000000000 -0400
> @@ -2720,6 +2720,86 @@ int in_gate_area_no_task(unsigned long a
>
> #endif /* __HAVE_ARCH_GATE_AREA */
>
> +#ifdef _HAVE_ARCH_IOREMAP_PROT
urgh.
We have HAVE_ARCH*
We have __HAVE_ARCH*
We have ARCH_HAS*
We have __ARCH_HAS*
what a mess.
Probably the preferred (but still ugly) approach is to implement
CONFIG_ARCH_*.
> +static resource_size_t follow_phys(struct vm_area_struct *vma,
> + unsigned long address, unsigned int flags,
> + unsigned long *prot)
> +{
> + pgd_t *pgd;
> + pud_t *pud;
> + pmd_t *pmd;
> + pte_t *ptep, pte;
> + spinlock_t *ptl;
> + resource_size_t phys_addr = 0;
> + struct mm_struct *mm = vma->vm_mm;
> +
> + VM_BUG_ON(!(vma->vm_flags & (VM_IO | VM_PFNMAP)));
> +
> + pgd = pgd_offset(mm, address);
> + if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
> + goto no_page_table;
> +
> + pud = pud_offset(pgd, address);
> + if (pud_none(*pud) || unlikely(pud_bad(*pud)))
> + goto no_page_table;
> +
> + pmd = pmd_offset(pud, address);
> + if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
> + goto no_page_table;
> +
> + /* We cannot handle huge page PFN maps. Luckily they don't exist. */
> + if (pmd_huge(*pmd))
> + goto no_page_table;
> +
> + ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
> + if (!ptep)
> + goto out;
hm, more copy-n-paste.
> + pte = *ptep;
> + if (!pte_present(pte))
> + goto unlock;
> + if ((flags & FOLL_WRITE) && !pte_write(pte))
> + goto unlock;
> + phys_addr = pte_pfn(pte);
> + phys_addr <<= PAGE_SHIFT; /* Shift here to avoid overflow on PAE? */
That comment betrays a lack of confidence ;)
What's the score here?
> + *prot = pgprot_val(pte_pgprot(pte));
> +
> +unlock:
> + pte_unmap_unlock(ptep, ptl);
> +out:
> + return phys_addr;
> +no_page_table:
> + return 0;
> +}
> +
> +int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
> + void *buf, int len, int write)
> +{
> + resource_size_t phys_addr;
> + unsigned long prot = 0;
> + void *maddr;
> + int offset = addr & (PAGE_SIZE-1);
> +
> + if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
> + return -EINVAL;
> +
> + phys_addr = follow_phys(vma, addr, write, &prot);
> +
> + if (!phys_addr)
> + return -EINVAL;
> +
> + maddr = ioremap_prot(phys_addr, PAGE_SIZE, prot);
> + if (write)
> + memcpy_toio(maddr + offset, buf, len);
> + else
> + memcpy_fromio(buf, maddr + offset, len);
> + iounmap(maddr);
> +
> + return len;
> +}
> +#endif
> +
> /*
> * Access another process' address space.
> * Source/target buffer must be kernel space,
> @@ -2729,7 +2809,6 @@ int access_process_vm(struct task_struct
> {
> struct mm_struct *mm;
> struct vm_area_struct *vma;
> - struct page *page;
> void *old_buf = buf;
>
> mm = get_task_mm(tsk);
> @@ -2741,28 +2820,47 @@ int access_process_vm(struct task_struct
> while (len) {
> int bytes, ret, offset;
> void *maddr;
> + struct page *page = NULL;
>
> ret = get_user_pages(tsk, mm, addr, 1,
> write, 1, &page, &vma);
> - if (ret <= 0)
> - break;
> -
> - bytes = len;
> - offset = addr & (PAGE_SIZE-1);
> - if (bytes > PAGE_SIZE-offset)
> - bytes = PAGE_SIZE-offset;
> -
> - maddr = kmap(page);
> - if (write) {
> - copy_to_user_page(vma, page, addr,
> - maddr + offset, buf, bytes);
> - set_page_dirty_lock(page);
> + if (ret <= 0) {
> + /*
> + * Check if this is a VM_IO | VM_PFNMAP VMA, which
> + * we can access using slightly different code.
> + */
> +#ifdef _HAVE_ARCH_IOREMAP_PROT
> + vma = find_vma(mm, addr);
> + if (!vma)
> + break;
> + if (vma->vm_ops && vma->vm_ops->access)
> + ret = vma->vm_ops->access(vma, addr, buf,
> + len, write);
> + else
> + ret = generic_access_phys(vma, addr, buf,
> + len, write);
Should we do it this way, or should we ensure that all suitable vmas have
set their ->vm_ops->access to generic_access_phys()?
> + if (ret <= 0)
> +#endif
> + break;
> + bytes = ret;
> } else {
> - copy_from_user_page(vma, page, addr,
> - buf, maddr + offset, bytes);
> + bytes = len;
> + offset = addr & (PAGE_SIZE-1);
> + if (bytes > PAGE_SIZE-offset)
> + bytes = PAGE_SIZE-offset;
> +
> + maddr = kmap(page);
> + if (write) {
> + copy_to_user_page(vma, page, addr,
> + maddr + offset, buf, bytes);
> + set_page_dirty_lock(page);
> + } else {
> + copy_from_user_page(vma, page, addr,
> + buf, maddr + offset, bytes);
> + }
> + kunmap(page);
> + page_cache_release(page);
> }
> - kunmap(page);
> - page_cache_release(page);
> len -= bytes;
> buf += bytes;
> addr += bytes;
> Index: linux-2.6.25-mm1/arch/x86/mm/ioremap.c
> ===================================================================
> --- linux-2.6.25-mm1.orig/arch/x86/mm/ioremap.c 2008-04-22 10:33:42.000000000 -0400
> +++ linux-2.6.25-mm1/arch/x86/mm/ioremap.c 2008-04-28 22:06:08.000000000 -0400
> @@ -287,6 +287,14 @@ void __iomem *ioremap_cache(resource_siz
> }
> EXPORT_SYMBOL(ioremap_cache);
>
> +void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
> + unsigned long prot_val)
> +{
> + return __ioremap_caller(phys_addr, size, (prot_val & _PAGE_CACHE_MASK),
> + __builtin_return_address(0));
> +}
> +EXPORT_SYMBOL(ioremap_prot);
> +
> /**
> * iounmap - Free a IO remapping
> * @addr: virtual address from ioremap_*
> Index: linux-2.6.25-mm1/include/asm-x86/io_32.h
> ===================================================================
> --- linux-2.6.25-mm1.orig/include/asm-x86/io_32.h 2008-04-22 10:33:44.000000000 -0400
> +++ linux-2.6.25-mm1/include/asm-x86/io_32.h 2008-04-28 21:38:03.000000000 -0400
> @@ -110,6 +110,9 @@ static inline void *phys_to_virt(unsigne
> */
> extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size);
> extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size);
> +extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size,
> + unsigned long prot_val);
> +#define _HAVE_ARCH_IOREMAP_PROT
>
> /*
> * The default ioremap() behavior is non-cached:
> Index: linux-2.6.25-mm1/include/asm-x86/io_64.h
> ===================================================================
> --- linux-2.6.25-mm1.orig/include/asm-x86/io_64.h 2008-04-22 10:33:44.000000000 -0400
> +++ linux-2.6.25-mm1/include/asm-x86/io_64.h 2008-04-28 21:37:42.000000000 -0400
> @@ -175,6 +175,9 @@ extern void early_iounmap(void *addr, un
> */
> extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size);
> extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size);
> +extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size,
> + unsigned long prot_val);
> +#define _HAVE_ARCH_IOREMAP_PROT
I expect that any architecture which implements ioremap_prot() will have to
implement it with the same signature, yes?
So perhaps the declaration should be placed in include/linux/io.h.
> /*
> * The default ioremap() behavior is non-cached:
> Index: linux-2.6.25-mm1/include/linux/mm.h
> ===================================================================
> --- linux-2.6.25-mm1.orig/include/linux/mm.h 2008-04-27 11:06:04.000000000 -0400
> +++ linux-2.6.25-mm1/include/linux/mm.h 2008-04-28 22:01:26.000000000 -0400
> @@ -171,6 +171,12 @@ struct vm_operations_struct {
> /* notification that a previously read-only page is about to become
> * writable, if an error is returned it will cause a SIGBUS */
> int (*page_mkwrite)(struct vm_area_struct *vma, struct page *page);
> +
> + /* called by access_process_vm when get_user_pages() fails, typically
> + * for use by special VMAs that can switch between memory and hardware
> + */
> + int (*access)(struct vm_area_struct *vma, unsigned long addr,
> + void *buf, int len, int write);
> #ifdef CONFIG_NUMA
> /*
> * set_policy() op must add a reference to any non-NULL @new mempolicy
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists