lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <BY2PR0301MB07111766BDC5DE63CDBD5CE2A02F0@BY2PR0301MB0711.namprd03.prod.outlook.com>
Date:	Tue, 17 Feb 2015 15:42:30 +0000
From:	KY Srinivasan <kys@...rosoft.com>
To:	Vitaly Kuznetsov <vkuznets@...hat.com>,
	"devel@...uxdriverproject.org" <devel@...uxdriverproject.org>
CC:	Haiyang Zhang <haiyangz@...rosoft.com>,
	"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
	Dexuan Cui <decui@...rosoft.com>
Subject: RE: [PATCH] Drivers: hv: hv_balloon: eliminate the trylock path in
 acquire/release_region_mutex



> -----Original Message-----
> From: Vitaly Kuznetsov [mailto:vkuznets@...hat.com]
> Sent: Tuesday, February 17, 2015 7:20 AM
> To: KY Srinivasan; devel@...uxdriverproject.org
> Cc: Haiyang Zhang; linux-kernel@...r.kernel.org; Dexuan Cui
> Subject: [PATCH] Drivers: hv: hv_balloon: eliminate the trylock path in
> acquire/release_region_mutex
> 
> When many memory regions are being added and automatically onlined the
> following lockup is sometimes observed:
> 
> INFO: task udevd:1872 blocked for more than 120 seconds.
> ...
> Call Trace:
>  [<ffffffff816ec0bc>] schedule_timeout+0x22c/0x350  [<ffffffff816eb98f>]
> wait_for_common+0x10f/0x160  [<ffffffff81067650>] ?
> default_wake_function+0x0/0x20  [<ffffffff816eb9fd>]
> wait_for_completion+0x1d/0x20  [<ffffffff8144cb9c>]
> hv_memory_notifier+0xdc/0x120  [<ffffffff816f298c>]
> notifier_call_chain+0x4c/0x70 ...
> 
> When several memory blocks are going online simultaneously we got several
> hv_memory_notifier() trying to acquire the ha_region_mutex. When this
> mutex is being held by hot_add_req() all these competing
> acquire_region_mutex() do mutex_trylock, fail, and queue themselves into
> wait_for_completion(..). However when we do complete() from
> release_region_mutex() only one of them wakes up.
> This could be solved by changing complete() -> complete_all() memory
> onlining can be delayed as well, in that case we can still get several
> hv_memory_notifier() runners at the same time trying to grab the mutex.
> Only one of them will succeed and the others will hang for forever as
> complete() is not being called. We don't see this issue often because we
> have 5sec onlining timeout in hv_mem_hot_add() and usually all udev
> events arrive in this time frame.
> 
> Get rid of the trylock path, waiting on the mutex is supposed to provide the
> required serialization.
> 
> Signed-off-by: Vitaly Kuznetsov <vkuznets@...hat.com>
> ---
>  drivers/hv/hv_balloon.c | 33 ++++++++++-----------------------
>  1 file changed, 10 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index
> ff16938..094de89 100644
> --- a/drivers/hv/hv_balloon.c
> +++ b/drivers/hv/hv_balloon.c
> @@ -534,7 +534,6 @@ struct hv_dynmem_device {
>  	struct task_struct *thread;
> 
>  	struct mutex ha_region_mutex;
> -	struct completion waiter_event;
> 
>  	/*
>  	 * A list of hot-add regions.
> @@ -554,25 +553,14 @@ static struct hv_dynmem_device dm_device;  static
> void post_status(struct hv_dynmem_device *dm);
> 
>  #ifdef CONFIG_MEMORY_HOTPLUG
> -static void acquire_region_mutex(bool trylock)
> +static void acquire_region_mutex(void)
>  {
> -	if (trylock) {
> -		reinit_completion(&dm_device.waiter_event);
> -		while (!mutex_trylock(&dm_device.ha_region_mutex))
> -			wait_for_completion(&dm_device.waiter_event);
> -	} else {
> -		mutex_lock(&dm_device.ha_region_mutex);
> -	}
> +	mutex_lock(&dm_device.ha_region_mutex);
>  }

Why have the wrapper; get rid of it and use mutex_lock directly.
> 
> -static void release_region_mutex(bool trylock)
> +static void release_region_mutex(void)
>  {
> -	if (trylock) {
> -		mutex_unlock(&dm_device.ha_region_mutex);
> -	} else {
> -		mutex_unlock(&dm_device.ha_region_mutex);
> -		complete(&dm_device.waiter_event);
> -	}
> +	mutex_unlock(&dm_device.ha_region_mutex);
>  }
>
No wrapper needed.
 
>  static int hv_memory_notifier(struct notifier_block *nb, unsigned long val,
> @@ -580,12 +568,12 @@ static int hv_memory_notifier(struct notifier_block
> *nb, unsigned long val,  {
>  	switch (val) {
>  	case MEM_GOING_ONLINE:
> -		acquire_region_mutex(true);
> +		acquire_region_mutex();
>  		break;
> 
>  	case MEM_ONLINE:
>  	case MEM_CANCEL_ONLINE:
> -		release_region_mutex(true);
> +		release_region_mutex();
>  		if (dm_device.ha_waiting) {
>  			dm_device.ha_waiting = false;
>  			complete(&dm_device.ol_waitevent);
> @@ -646,7 +634,7 @@ static void hv_mem_hot_add(unsigned long start,
> unsigned long size,
>  		init_completion(&dm_device.ol_waitevent);
>  		dm_device.ha_waiting = true;
> 
> -		release_region_mutex(false);
> +		release_region_mutex();
>  		nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn));
>  		ret = add_memory(nid, PFN_PHYS((start_pfn)),
>  				(HA_CHUNK << PAGE_SHIFT));
> @@ -675,7 +663,7 @@ static void hv_mem_hot_add(unsigned long start,
> unsigned long size,
>  		 * have not been "onlined" within the allowed time.
>  		 */
>  		wait_for_completion_timeout(&dm_device.ol_waitevent,
> 5*HZ);
> -		acquire_region_mutex(false);
> +		acquire_region_mutex();
>  		post_status(&dm_device);
>  	}
> 
> @@ -886,7 +874,7 @@ static void hot_add_req(struct work_struct *dummy)
>  	resp.hdr.size = sizeof(struct dm_hot_add_response);
> 
>  #ifdef CONFIG_MEMORY_HOTPLUG
> -	acquire_region_mutex(false);
> +	acquire_region_mutex();
>  	pg_start = dm->ha_wrk.ha_page_range.finfo.start_page;
>  	pfn_cnt = dm->ha_wrk.ha_page_range.finfo.page_cnt;
> 
> @@ -918,7 +906,7 @@ static void hot_add_req(struct work_struct *dummy)
>  	if (do_hot_add)
>  		resp.page_count = process_hot_add(pg_start, pfn_cnt,
>  						rg_start, rg_sz);
> -	release_region_mutex(false);
> +	release_region_mutex();
>  #endif
>  	/*
>  	 * The result field of the response structure has the @@ -1439,7
> +1427,6 @@ static int balloon_probe(struct hv_device *dev,
>  	dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN7;
>  	init_completion(&dm_device.host_event);
>  	init_completion(&dm_device.config_event);
> -	init_completion(&dm_device.waiter_event);
>  	INIT_LIST_HEAD(&dm_device.ha_region_list);
>  	mutex_init(&dm_device.ha_region_mutex);
>  	INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up);
> --
> 1.9.3

Thanks,

K. Y
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ