[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1ce1978e-eecd-20af-3c1d-531a7dd046b6@gmail.com>
Date: Wed, 15 Jun 2022 19:56:47 +0300
From: Oleksandr <olekstysh@...il.com>
To: Stefano Stabellini <sstabellini@...nel.org>
Cc: xen-devel@...ts.xenproject.org, linux-kernel@...r.kernel.org,
Oleksandr Tyshchenko <oleksandr_tyshchenko@...m.com>,
Boris Ostrovsky <boris.ostrovsky@...cle.com>,
Juergen Gross <jgross@...e.com>, Julien Grall <julien@....org>
Subject: Re: [RFC PATCH 1/2] xen/unpopulated-alloc: Introduce helpers for DMA
allocations
On 15.06.22 03:45, Stefano Stabellini wrote:
Hello Stefano
> On Tue, 14 Jun 2022, Oleksandr wrote:
>> On 11.06.22 03:12, Stefano Stabellini wrote:
>>> On Wed, 8 Jun 2022, Oleksandr wrote:
>>>> 2. Drop the "page_list" entirely and use "dma_pool" for all (contiguous
>>>> and
>>>> non-contiguous) allocations. After all, all pages are initially contiguous
>>>> in
>>>> fill_list() as they are built from the resource. This changes behavior for
>>>> all
>>>> users of xen_alloc_unpopulated_pages()
>>>>
>>>> Below the diff for unpopulated-alloc.c. The patch is also available at:
>>>>
>>>> https://github.com/otyshchenko1/linux/commit/7be569f113a4acbdc4bcb9b20cb3995b3151387a
>>>>
>>>>
>>>> diff --git a/drivers/xen/unpopulated-alloc.c
>>>> b/drivers/xen/unpopulated-alloc.c
>>>> index a39f2d3..ab5c7bd 100644
>>>> --- a/drivers/xen/unpopulated-alloc.c
>>>> +++ b/drivers/xen/unpopulated-alloc.c
>>>> @@ -1,5 +1,7 @@
>>>> // SPDX-License-Identifier: GPL-2.0
>>>> +#include <linux/dma-mapping.h>
>>>> #include <linux/errno.h>
>>>> +#include <linux/genalloc.h>
>>>> #include <linux/gfp.h>
>>>> #include <linux/kernel.h>
>>>> #include <linux/mm.h>
>>>> @@ -13,8 +15,8 @@
>>>> #include <xen/xen.h>
>>>>
>>>> static DEFINE_MUTEX(list_lock);
>>>> -static struct page *page_list;
>>>> -static unsigned int list_count;
>>>> +
>>>> +static struct gen_pool *dma_pool;
>>>>
>>>> static struct resource *target_resource;
>>>>
>>>> @@ -36,7 +38,7 @@ static int fill_list(unsigned int nr_pages)
>>>> struct dev_pagemap *pgmap;
>>>> struct resource *res, *tmp_res = NULL;
>>>> void *vaddr;
>>>> - unsigned int i, alloc_pages = round_up(nr_pages,
>>>> PAGES_PER_SECTION);
>>>> + unsigned int alloc_pages = round_up(nr_pages, PAGES_PER_SECTION);
>>>> struct range mhp_range;
>>>> int ret;
>>>>
>>>> @@ -106,6 +108,7 @@ static int fill_list(unsigned int nr_pages)
>>>> * conflict with any devices.
>>>> */
>>>> if (!xen_feature(XENFEAT_auto_translated_physmap)) {
>>>> + unsigned int i;
>>>> xen_pfn_t pfn = PFN_DOWN(res->start);
>>>>
>>>> for (i = 0; i < alloc_pages; i++) {
>>>> @@ -125,16 +128,17 @@ static int fill_list(unsigned int nr_pages)
>>>> goto err_memremap;
>>>> }
>>>>
>>>> - for (i = 0; i < alloc_pages; i++) {
>>>> - struct page *pg = virt_to_page(vaddr + PAGE_SIZE * i);
>>>> -
>>>> - pg->zone_device_data = page_list;
>>>> - page_list = pg;
>>>> - list_count++;
>>>> + ret = gen_pool_add_virt(dma_pool, (unsigned long)vaddr,
>>>> res->start,
>>>> + alloc_pages * PAGE_SIZE, NUMA_NO_NODE);
>>>> + if (ret) {
>>>> + pr_err("Cannot add memory range to the pool\n");
>>>> + goto err_pool;
>>>> }
>>>>
>>>> return 0;
>>>>
>>>> +err_pool:
>>>> + memunmap_pages(pgmap);
>>>> err_memremap:
>>>> kfree(pgmap);
>>>> err_pgmap:
>>>> @@ -149,51 +153,49 @@ static int fill_list(unsigned int nr_pages)
>>>> return ret;
>>>> }
>>>>
>>>> -/**
>>>> - * xen_alloc_unpopulated_pages - alloc unpopulated pages
>>>> - * @nr_pages: Number of pages
>>>> - * @pages: pages returned
>>>> - * @return 0 on success, error otherwise
>>>> - */
>>>> -int xen_alloc_unpopulated_pages(unsigned int nr_pages, struct page
>>>> **pages)
>>>> +static int alloc_unpopulated_pages(unsigned int nr_pages, struct page
>>>> **pages,
>>>> + bool contiguous)
>>>> {
>>>> unsigned int i;
>>>> int ret = 0;
>>>> + void *vaddr;
>>>> + bool filled = false;
>>>>
>>>> /*
>>>> * Fallback to default behavior if we do not have any suitable
>>>> resource
>>>> * to allocate required region from and as the result we won't be
>>>> able
>>>> to
>>>> * construct pages.
>>>> */
>>>> - if (!target_resource)
>>>> + if (!target_resource) {
>>>> + if (contiguous)
>>>> + return -ENODEV;
>>>> +
>>>> return xen_alloc_ballooned_pages(nr_pages, pages);
>>>> + }
>>>>
>>>> mutex_lock(&list_lock);
>>>> - if (list_count < nr_pages) {
>>>> - ret = fill_list(nr_pages - list_count);
>>>> +
>>>> + while (!(vaddr = (void *)gen_pool_alloc(dma_pool, nr_pages *
>>>> PAGE_SIZE))) {
>>>> + if (filled)
>>>> + ret = -ENOMEM;
>>>> + else {
>>>> + ret = fill_list(nr_pages);
>>>> + filled = true;
>>>> + }
>>>> if (ret)
>>>> goto out;
>>>> }
>>>>
>>>> for (i = 0; i < nr_pages; i++) {
>>>> - struct page *pg = page_list;
>>>> -
>>>> - BUG_ON(!pg);
>>>> - page_list = pg->zone_device_data;
>>>> - list_count--;
>>>> - pages[i] = pg;
>>>> + pages[i] = virt_to_page(vaddr + PAGE_SIZE * i);
>>>>
>>>> #ifdef CONFIG_XEN_HAVE_PVMMU
>>>> if (!xen_feature(XENFEAT_auto_translated_physmap)) {
>>>> - ret = xen_alloc_p2m_entry(page_to_pfn(pg));
>>>> + ret = xen_alloc_p2m_entry(page_to_pfn(pages[i]));
>>>> if (ret < 0) {
>>>> - unsigned int j;
>>>> -
>>>> - for (j = 0; j <= i; j++) {
>>>> - pages[j]->zone_device_data = page_list;
>>>> - page_list = pages[j];
>>>> - list_count++;
>>>> - }
>>>> + /* XXX Do we need to zeroed pages[i]? */
>>>> + gen_pool_free(dma_pool, (unsigned
>>>> long)vaddr,
>>>> + nr_pages * PAGE_SIZE);
>>>> goto out;
>>>> }
>>>> }
>>>> @@ -204,32 +206,89 @@ int xen_alloc_unpopulated_pages(unsigned int
>>>> nr_pages,
>>>> struct page **pages)
>>>> mutex_unlock(&list_lock);
>>>> return ret;
>>>> }
>>>> -EXPORT_SYMBOL(xen_alloc_unpopulated_pages);
>>>>
>>>> -/**
>>>> - * xen_free_unpopulated_pages - return unpopulated pages
>>>> - * @nr_pages: Number of pages
>>>> - * @pages: pages to return
>>>> - */
>>>> -void xen_free_unpopulated_pages(unsigned int nr_pages, struct page
>>>> **pages)
>>>> +static void free_unpopulated_pages(unsigned int nr_pages, struct page
>>>> **pages,
>>>> + bool contiguous)
>>>> {
>>>> - unsigned int i;
>>>> -
>>>> if (!target_resource) {
>>>> + if (contiguous)
>>>> + return;
>>>> +
>>>> xen_free_ballooned_pages(nr_pages, pages);
>>>> return;
>>>> }
>>>>
>>>> mutex_lock(&list_lock);
>>>> - for (i = 0; i < nr_pages; i++) {
>>>> - pages[i]->zone_device_data = page_list;
>>>> - page_list = pages[i];
>>>> - list_count++;
>>>> +
>>>> + /* XXX Do we need to check the range (gen_pool_has_addr)? */
>>>> + if (contiguous)
>>>> + gen_pool_free(dma_pool, (unsigned
>>>> long)page_to_virt(pages[0]),
>>>> + nr_pages * PAGE_SIZE);
>>>> + else {
>>>> + unsigned int i;
>>>> +
>>>> + for (i = 0; i < nr_pages; i++)
>>>> + gen_pool_free(dma_pool, (unsigned
>>>> long)page_to_virt(pages[i]),
>>>> + PAGE_SIZE);
>>>> }
>>>> +
>>>> mutex_unlock(&list_lock);
>>>> }
>>>> +
>>>> +/**
>>>> + * xen_alloc_unpopulated_pages - alloc unpopulated pages
>>>> + * @nr_pages: Number of pages
>>>> + * @pages: pages returned
>>>> + * @return 0 on success, error otherwise
>>>> + */
>>>> +int xen_alloc_unpopulated_pages(unsigned int nr_pages, struct page
>>>> **pages)
>>>> +{
>>>> + return alloc_unpopulated_pages(nr_pages, pages, false);
>>>> +}
>>>> +EXPORT_SYMBOL(xen_alloc_unpopulated_pages);
>>>> +
>>>> +/**
>>>> + * xen_free_unpopulated_pages - return unpopulated pages
>>>> + * @nr_pages: Number of pages
>>>> + * @pages: pages to return
>>>> + */
>>>> +void xen_free_unpopulated_pages(unsigned int nr_pages, struct page
>>>> **pages)
>>>> +{
>>>> + free_unpopulated_pages(nr_pages, pages, false);
>>>> +}
>>>> EXPORT_SYMBOL(xen_free_unpopulated_pages);
>>>>
>>>> +/**
>>>> + * xen_alloc_unpopulated_dma_pages - alloc unpopulated DMAable pages
>>>> + * @dev: valid struct device pointer
>>>> + * @nr_pages: Number of pages
>>>> + * @pages: pages returned
>>>> + * @return 0 on success, error otherwise
>>>> + */
>>>> +int xen_alloc_unpopulated_dma_pages(struct device *dev, unsigned int
>>>> nr_pages,
>>>> + struct page **pages)
>>>> +{
>>>> + /* XXX Handle devices which support 64-bit DMA address only for
>>>> now */
>>>> + if (dma_get_mask(dev) != DMA_BIT_MASK(64))
>>>> + return -EINVAL;
>>>> +
>>>> + return alloc_unpopulated_pages(nr_pages, pages, true);
>>>> +}
>>>> +EXPORT_SYMBOL(xen_alloc_unpopulated_dma_pages);
>>>> +
>>>> +/**
>>>> + * xen_free_unpopulated_dma_pages - return unpopulated DMAable pages
>>>> + * @dev: valid struct device pointer
>>>> + * @nr_pages: Number of pages
>>>> + * @pages: pages to return
>>>> + */
>>>> +void xen_free_unpopulated_dma_pages(struct device *dev, unsigned int
>>>> nr_pages,
>>>> + struct page **pages)
>>>> +{
>>>> + free_unpopulated_pages(nr_pages, pages, true);
>>>> +}
>>>> +EXPORT_SYMBOL(xen_free_unpopulated_dma_pages);
>>>> +
>>>> static int __init unpopulated_init(void)
>>>> {
>>>> int ret;
>>>> @@ -237,9 +296,19 @@ static int __init unpopulated_init(void)
>>>> if (!xen_domain())
>>>> return -ENODEV;
>>>>
>>>> + dma_pool = gen_pool_create(PAGE_SHIFT, NUMA_NO_NODE);
>>>> + if (!dma_pool) {
>>>> + pr_err("xen:unpopulated: Cannot create DMA pool\n");
>>>> + return -ENOMEM;
>>>> + }
>>>> +
>>>> + gen_pool_set_algo(dma_pool, gen_pool_best_fit, NULL);
>>>> +
>>>> ret = arch_xen_unpopulated_init(&target_resource);
>>>> if (ret) {
>>>> pr_err("xen:unpopulated: Cannot initialize target
>>>> resource\n");
>>>> + gen_pool_destroy(dma_pool);
>>>> + dma_pool = NULL;
>>>> target_resource = NULL;
>>>> }
>>>>
>>>> [snip]
>>>>
>>>>
>>>> I think, regarding on the approach we would likely need to do some
>>>> renaming
>>>> for fill_list, page_list, list_lock, etc.
>>>>
>>>>
>>>> Both options work in my Arm64 based environment, not sure regarding x86.
>>>> Or do we have another option here?
>>>> I would be happy to go any route. What do you think?
>>> The second option (use "dma_pool" for all) looks great, thank you for
>>> looking into it!
>>
>> ok, great
>>
>>
>> May I please clarify a few moments before starting preparing non-RFC version:
>>
>>
>> 1. According to the discussion at "[RFC PATCH 2/2] xen/grant-table: Use
>> unpopulated DMAable pages instead of real RAM ones" we decided
>> to stay away from the "dma" in the name, also the second option (use
>> "dma_pool" for all) implies dropping the "page_list" entirely, so I am going
>> to do some renaming:
>>
>> - s/xen_alloc_unpopulated_dma_pages()/xen_alloc_unpopulated_contiguous_pages()
>> - s/dma_pool/unpopulated_pool
>> - s/list_lock/pool_lock
>> - s/fill_list()/fill_pool()
>>
>> Any objections?
>
> Looks good
thank you for the clarification
>
>
>> 2. I don't like much the fact that in free_unpopulated_pages() we have to free
>> "page by page" if contiguous is false, but unfortunately we cannot avoid doing
>> that.
>> I noticed that many users of unpopulated pages retain initially allocated
>> pages[i] array, so it passed here absolutely unmodified since being allocated,
>> but there is a code (for example, gnttab_page_cache_shrink() in grant-table.c)
>> that can pass pages[i] which contains arbitrary pages.
>>
>> static void free_unpopulated_pages(unsigned int nr_pages, struct page **pages,
>> bool contiguous)
>> {
>>
>> [snip]
>>
>> /* XXX Do we need to check the range (gen_pool_has_addr)? */
>> if (contiguous)
>> gen_pool_free(dma_pool, (unsigned long)page_to_virt(pages[0]),
>> nr_pages * PAGE_SIZE);
>> else {
>> unsigned int i;
>>
>> for (i = 0; i < nr_pages; i++)
>> gen_pool_free(dma_pool, (unsigned long)page_to_virt(pages[i]),
>> PAGE_SIZE);
>> }
>>
>> [snip]
>>
>> }
>>
>> I think, it wouldn't be a big deal for the small allocations, but for the big
>> allocations it might be not optimal for the speed.
>>
>> What do you think if we update some places which always require big
>> allocations to allocate (and free) contiguous pages instead?
>> The possible candidate is
>> gem_create()/xen_drm_front_gem_free_object_unlocked() in
>> drivers/gpu/drm/xen/xen_drm_front_gem.c.
>> OTOH I realize this might be inefficient use of resources. Or better not?
>
> Yes I think it is a good idea, more on this below.
thanks
>
>
>> 3. The alloc_unpopulated_pages() might be optimized for the non-contiguous
>> allocations, currently we always try to allocate a single chunk even if
>> contiguous is false.
>>
>> static int alloc_unpopulated_pages(unsigned int nr_pages, struct page **pages,
>> bool contiguous)
>> {
>>
>> [snip]
>>
>> /* XXX: Optimize for non-contiguous allocations */
>> while (!(vaddr = (void *)gen_pool_alloc(dma_pool, nr_pages * PAGE_SIZE)))
>> {
>> if (filled)
>> ret = -ENOMEM;
>> else {
>> ret = fill_list(nr_pages);
>> filled = true;
>> }
>> if (ret)
>> goto out;
>> }
>>
>> [snip]
>>
>> }
>>
>>
>> But we can allocate "page by page" for the non-contiguous allocations, it
>> might be not optimal for the speed, but would be optimal for the resource
>> usage. What do you think?
>>
>> static int alloc_unpopulated_pages(unsigned int nr_pages, struct page **pages,
>> bool contiguous)
>> {
>>
>> [snip]
>>
>> if (contiguous) {
>> while (!(vaddr = (void *)gen_pool_alloc(dma_pool, nr_pages *
>> PAGE_SIZE))) {
>> if (filled)
>> ret = -ENOMEM;
>> else {
>> ret = fill_list(nr_pages);
>> filled = true;
>> }
>> if (ret)
>> goto out;
>> }
>>
>> for (i = 0; i < nr_pages; i++)
>> pages[i] = virt_to_page(vaddr + PAGE_SIZE * i);
>> } else {
>> if (gen_pool_avail(dma_pool) < nr_pages) {
>> ret = fill_list(nr_pages - gen_pool_avail(dma_pool));
>> if (ret)
>> goto out;
>> }
>>
>> for (i = 0; i < nr_pages; i++) {
>> vaddr = (void *)gen_pool_alloc(dma_pool, PAGE_SIZE);
>> if (!vaddr) {
>> while (i--)
>> gen_pool_free(dma_pool, (unsigned
>> long)page_to_virt(pages[i]),
>> PAGE_SIZE);
>>
>> ret = -ENOMEM;
>> goto out;
>> }
>>
>> pages[i] = virt_to_page(vaddr);
>> }
>> }
>>
>> [snip]
>>
>> }
> Basically, if we allocate (and free) page-by-page it leads to more
> efficient resource utilization but it is slower.
yes, I think the same
> If we allocate larger
> contiguous chunks it is faster but it leads to less efficient resource
> utilization.
yes, I think the same
>
> Given that on both x86 and ARM the unpopulated memory resource is
> arbitrarily large, I don't think we need to worry about resource
> utilization. It is not backed by real memory. The only limitation is the
> address space size which is very large.
I agree
>
> So I would say optimize for speed and use larger contiguous chunks even
> when continuity is not strictly required.
thank you for the clarification, will do
--
Regards,
Oleksandr Tyshchenko
Powered by blists - more mailing lists