[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <9f4e8334-9aaf-5ec4-3af9-884160110689@gmail.com>
Date: Thu, 9 Dec 2021 19:26:19 +0800
From: Eric Ren <renzhengeek@...il.com>
To: David Hildenbrand <david@...hat.com>, linux-kernel@...r.kernel.org
Cc: "Michael S. Tsirkin" <mst@...hat.com>,
Jason Wang <jasowang@...hat.com>, Zi Yan <ziy@...dia.com>,
Gavin Shan <gshan@...hat.com>, Hui Zhu <teawater@...il.com>,
Sebastien Boeuf <sebastien.boeuf@...el.com>,
Pankaj Gupta <pankaj.gupta.linux@...il.com>,
Wei Yang <richard.weiyang@...ux.alibaba.com>,
virtualization@...ts.linux-foundation.org, linux-mm@...ck.org
Subject: Re: [PATCH v1 1/2] virtio-mem: prepare page onlining code for
granularity smaller than MAX_ORDER - 1
Hi David,
Thanks for working on this!
On 2021/11/26 21:42, David Hildenbrand wrote:
> Let's prepare our page onlining code for subblock size smaller than
> MAX_ORDER - 1: we'll get called for a MAX_ORDER - 1 page but might have
> some subblocks in the range plugged and some unplugged. In that case,
> fallback to subblock granularity to properly only expose the plugged
> parts to the buddy.
>
> Signed-off-by: David Hildenbrand <david@...hat.com>
> ---
> drivers/virtio/virtio_mem.c | 86 ++++++++++++++++++++++++++-----------
> 1 file changed, 62 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c
> index 96e5a8782769..03e1c5743699 100644
> --- a/drivers/virtio/virtio_mem.c
> +++ b/drivers/virtio/virtio_mem.c
> @@ -20,6 +20,7 @@
> #include <linux/mutex.h>
> #include <linux/bitmap.h>
> #include <linux/lockdep.h>
> +#include <linux/log2.h>
>
> #include <acpi/acpi_numa.h>
>
> @@ -1228,28 +1229,46 @@ static void virtio_mem_fake_offline_cancel_offline(unsigned long pfn,
> page_ref_inc(pfn_to_page(pfn + i));
> }
>
> -static void virtio_mem_online_page_cb(struct page *page, unsigned int order)
> +static void virtio_mem_online_page(struct virtio_mem *vm,
> + struct page *page, unsigned int order)
> {
> - const unsigned long addr = page_to_phys(page);
> - unsigned long id, sb_id;
> - struct virtio_mem *vm;
> + const unsigned long start = page_to_phys(page);
> + const unsigned long end = start + PFN_PHYS(1 << order);
> + unsigned long addr, next, id, sb_id, count;
> bool do_online;
>
> - rcu_read_lock();
> - list_for_each_entry_rcu(vm, &virtio_mem_devices, next) {
> - if (!virtio_mem_contains_range(vm, addr, PFN_PHYS(1 << order)))
> - continue;
> + /*
> + * We can get called with any order up to MAX_ORDER - 1. If our
> + * subblock size is smaller than that and we have a mixture of plugged
> + * and unplugged subblocks within such a page, we have to process in
> + * smaller granularity. In that case we'll adjust the order exactly once
> + * within the loop.
> + */
> + for (addr = start; addr < end; ) {
> + next = addr + PFN_PHYS(1 << order);
>
> if (vm->in_sbm) {
> - /*
> - * We exploit here that subblocks have at least
> - * MAX_ORDER_NR_PAGES size/alignment - so we cannot
> - * cross subblocks within one call.
> - */
> id = virtio_mem_phys_to_mb_id(addr);
> sb_id = virtio_mem_phys_to_sb_id(vm, addr);
> - do_online = virtio_mem_sbm_test_sb_plugged(vm, id,
> - sb_id, 1);
> + count = virtio_mem_phys_to_sb_id(vm, next - 1) - sb_id + 1;
> +
> + if (virtio_mem_sbm_test_sb_plugged(vm, id, sb_id, count)) {
> + /* Fully plugged. */
> + do_online = true;
> + } else if (count == 1 ||
> + virtio_mem_sbm_test_sb_unplugged(vm, id, sb_id, count)) {
> + /* Fully unplugged. */
> + do_online = false;
> + } else {
> + /*
> + * Mixture, process sub-blocks instead. This
> + * will be at least the size of a pageblock.
> + * We'll run into this case exactly once.
> + */
> + order = ilog2(vm->sbm.sb_size) - PAGE_SHIFT;
> + do_online = virtio_mem_sbm_test_sb_plugged(vm, id, sb_id, 1);
> + continue;
> + }
> } else {
> /*
> * If the whole block is marked fake offline, keep
> @@ -1260,18 +1279,38 @@ static void virtio_mem_online_page_cb(struct page *page, unsigned int order)
> VIRTIO_MEM_BBM_BB_FAKE_OFFLINE;
> }
>
> + if (do_online)
> + generic_online_page(pfn_to_page(PFN_DOWN(addr)), order);
> + else
> + virtio_mem_set_fake_offline(PFN_DOWN(addr), 1 << order,
> + false);
Should we just use PHYS_PFN() here? addr is obviously PFN aligned.
Anyway, it doesn't matter.
LGTM.
Reviewed-by: Eric Ren <renzhengeek@...il.com>
> + addr = next;
> + }
> +}
> +
> +static void virtio_mem_online_page_cb(struct page *page, unsigned int order)
> +{
> + const unsigned long addr = page_to_phys(page);
> + struct virtio_mem *vm;
> +
> + rcu_read_lock();
> + list_for_each_entry_rcu(vm, &virtio_mem_devices, next) {
> /*
> - * virtio_mem_set_fake_offline() might sleep, we don't need
> - * the device anymore. See virtio_mem_remove() how races
> + * Pages we're onlining will never cross memory blocks and,
> + * therefore, not virtio-mem devices.
> + */
> + if (!virtio_mem_contains_range(vm, addr, PFN_PHYS(1 << order)))
> + continue;
> +
> + /*
> + * virtio_mem_set_fake_offline() might sleep. We can safely
> + * drop the RCU lock at this point because the device
> + * cannot go away. See virtio_mem_remove() how races
> * between memory onlining and device removal are handled.
> */
> rcu_read_unlock();
>
> - if (do_online)
> - generic_online_page(page, order);
> - else
> - virtio_mem_set_fake_offline(PFN_DOWN(addr), 1 << order,
> - false);
> + virtio_mem_online_page(vm, page, order);
> return;
> }
> rcu_read_unlock();
> @@ -2438,8 +2477,7 @@ static int virtio_mem_init_hotplug(struct virtio_mem *vm)
> /*
> * We want subblocks to span at least MAX_ORDER_NR_PAGES and
> * pageblock_nr_pages pages. This:
> - * - Simplifies our page onlining code (virtio_mem_online_page_cb)
> - * and fake page onlining code (virtio_mem_fake_online).
> + * - Simplifies our fake page onlining code (virtio_mem_fake_online).
> * - Is required for now for alloc_contig_range() to work reliably -
> * it doesn't properly handle smaller granularity on ZONE_NORMAL.
> */
Powered by blists - more mailing lists