[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <BY2PR0301MB07111766BDC5DE63CDBD5CE2A02F0@BY2PR0301MB0711.namprd03.prod.outlook.com>
Date: Tue, 17 Feb 2015 15:42:30 +0000
From: KY Srinivasan <kys@...rosoft.com>
To: Vitaly Kuznetsov <vkuznets@...hat.com>,
"devel@...uxdriverproject.org" <devel@...uxdriverproject.org>
CC: Haiyang Zhang <haiyangz@...rosoft.com>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
Dexuan Cui <decui@...rosoft.com>
Subject: RE: [PATCH] Drivers: hv: hv_balloon: eliminate the trylock path in
acquire/release_region_mutex
> -----Original Message-----
> From: Vitaly Kuznetsov [mailto:vkuznets@...hat.com]
> Sent: Tuesday, February 17, 2015 7:20 AM
> To: KY Srinivasan; devel@...uxdriverproject.org
> Cc: Haiyang Zhang; linux-kernel@...r.kernel.org; Dexuan Cui
> Subject: [PATCH] Drivers: hv: hv_balloon: eliminate the trylock path in
> acquire/release_region_mutex
>
> When many memory regions are being added and automatically onlined the
> following lockup is sometimes observed:
>
> INFO: task udevd:1872 blocked for more than 120 seconds.
> ...
> Call Trace:
> [<ffffffff816ec0bc>] schedule_timeout+0x22c/0x350 [<ffffffff816eb98f>]
> wait_for_common+0x10f/0x160 [<ffffffff81067650>] ?
> default_wake_function+0x0/0x20 [<ffffffff816eb9fd>]
> wait_for_completion+0x1d/0x20 [<ffffffff8144cb9c>]
> hv_memory_notifier+0xdc/0x120 [<ffffffff816f298c>]
> notifier_call_chain+0x4c/0x70 ...
>
> When several memory blocks are going online simultaneously we got several
> hv_memory_notifier() trying to acquire the ha_region_mutex. When this
> mutex is being held by hot_add_req() all these competing
> acquire_region_mutex() do mutex_trylock, fail, and queue themselves into
> wait_for_completion(..). However when we do complete() from
> release_region_mutex() only one of them wakes up.
> This could be solved by changing complete() -> complete_all() memory
> onlining can be delayed as well, in that case we can still get several
> hv_memory_notifier() runners at the same time trying to grab the mutex.
> Only one of them will succeed and the others will hang for forever as
> complete() is not being called. We don't see this issue often because we
> have 5sec onlining timeout in hv_mem_hot_add() and usually all udev
> events arrive in this time frame.
>
> Get rid of the trylock path, waiting on the mutex is supposed to provide the
> required serialization.
>
> Signed-off-by: Vitaly Kuznetsov <vkuznets@...hat.com>
> ---
> drivers/hv/hv_balloon.c | 33 ++++++++++-----------------------
> 1 file changed, 10 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index
> ff16938..094de89 100644
> --- a/drivers/hv/hv_balloon.c
> +++ b/drivers/hv/hv_balloon.c
> @@ -534,7 +534,6 @@ struct hv_dynmem_device {
> struct task_struct *thread;
>
> struct mutex ha_region_mutex;
> - struct completion waiter_event;
>
> /*
> * A list of hot-add regions.
> @@ -554,25 +553,14 @@ static struct hv_dynmem_device dm_device; static
> void post_status(struct hv_dynmem_device *dm);
>
> #ifdef CONFIG_MEMORY_HOTPLUG
> -static void acquire_region_mutex(bool trylock)
> +static void acquire_region_mutex(void)
> {
> - if (trylock) {
> - reinit_completion(&dm_device.waiter_event);
> - while (!mutex_trylock(&dm_device.ha_region_mutex))
> - wait_for_completion(&dm_device.waiter_event);
> - } else {
> - mutex_lock(&dm_device.ha_region_mutex);
> - }
> + mutex_lock(&dm_device.ha_region_mutex);
> }
Why have the wrapper; get rid of it and use mutex_lock directly.
>
> -static void release_region_mutex(bool trylock)
> +static void release_region_mutex(void)
> {
> - if (trylock) {
> - mutex_unlock(&dm_device.ha_region_mutex);
> - } else {
> - mutex_unlock(&dm_device.ha_region_mutex);
> - complete(&dm_device.waiter_event);
> - }
> + mutex_unlock(&dm_device.ha_region_mutex);
> }
>
No wrapper needed.
> static int hv_memory_notifier(struct notifier_block *nb, unsigned long val,
> @@ -580,12 +568,12 @@ static int hv_memory_notifier(struct notifier_block
> *nb, unsigned long val, {
> switch (val) {
> case MEM_GOING_ONLINE:
> - acquire_region_mutex(true);
> + acquire_region_mutex();
> break;
>
> case MEM_ONLINE:
> case MEM_CANCEL_ONLINE:
> - release_region_mutex(true);
> + release_region_mutex();
> if (dm_device.ha_waiting) {
> dm_device.ha_waiting = false;
> complete(&dm_device.ol_waitevent);
> @@ -646,7 +634,7 @@ static void hv_mem_hot_add(unsigned long start,
> unsigned long size,
> init_completion(&dm_device.ol_waitevent);
> dm_device.ha_waiting = true;
>
> - release_region_mutex(false);
> + release_region_mutex();
> nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn));
> ret = add_memory(nid, PFN_PHYS((start_pfn)),
> (HA_CHUNK << PAGE_SHIFT));
> @@ -675,7 +663,7 @@ static void hv_mem_hot_add(unsigned long start,
> unsigned long size,
> * have not been "onlined" within the allowed time.
> */
> wait_for_completion_timeout(&dm_device.ol_waitevent,
> 5*HZ);
> - acquire_region_mutex(false);
> + acquire_region_mutex();
> post_status(&dm_device);
> }
>
> @@ -886,7 +874,7 @@ static void hot_add_req(struct work_struct *dummy)
> resp.hdr.size = sizeof(struct dm_hot_add_response);
>
> #ifdef CONFIG_MEMORY_HOTPLUG
> - acquire_region_mutex(false);
> + acquire_region_mutex();
> pg_start = dm->ha_wrk.ha_page_range.finfo.start_page;
> pfn_cnt = dm->ha_wrk.ha_page_range.finfo.page_cnt;
>
> @@ -918,7 +906,7 @@ static void hot_add_req(struct work_struct *dummy)
> if (do_hot_add)
> resp.page_count = process_hot_add(pg_start, pfn_cnt,
> rg_start, rg_sz);
> - release_region_mutex(false);
> + release_region_mutex();
> #endif
> /*
> * The result field of the response structure has the @@ -1439,7
> +1427,6 @@ static int balloon_probe(struct hv_device *dev,
> dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN7;
> init_completion(&dm_device.host_event);
> init_completion(&dm_device.config_event);
> - init_completion(&dm_device.waiter_event);
> INIT_LIST_HEAD(&dm_device.ha_region_list);
> mutex_init(&dm_device.ha_region_mutex);
> INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up);
> --
> 1.9.3
Thanks,
K. Y
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists