lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Mon, 16 May 2022 19:20:16 +0200
From:   Christian König <christian.koenig@....com>
To:     "T.J. Mercier" <tjmercier@...gle.com>, surenb@...gle.com,
        kaleshsingh@...gle.com, minchan@...gle.com, gregkh@...gle.com,
        jstultz@...gle.com, Sumit Semwal <sumit.semwal@...aro.org>,
        Daniel Vetter <daniel.vetter@...ll.ch>,
        Hridya Valsaraju <hridya@...gle.com>,
        Greg Kroah-Hartman <gregkh@...uxfoundation.org>
Cc:     kernel-team@...roid.com, linux-media@...r.kernel.org,
        dri-devel@...ts.freedesktop.org, linaro-mm-sig@...ts.linaro.org,
        linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2] dma-buf: Move sysfs work out of DMA-BUF export path

Am 16.05.22 um 19:13 schrieb T.J. Mercier:
> Recently, we noticed an issue where a process went into direct reclaim
> while holding the kernfs rw semaphore for sysfs in write (exclusive)
> mode. This caused processes who were doing DMA-BUF exports and releases
> to go into uninterruptible sleep since they needed to acquire the same
> semaphore for the DMA-BUF sysfs entry creation/deletion. In order to avoid
> blocking DMA-BUF export for an indeterminate amount of time while
> another process is holding the sysfs rw semaphore in exclusive mode,
> this patch moves the per-buffer sysfs file creation to the default work
> queue. Note that this can lead to a short-term inaccuracy in the dmabuf
> sysfs statistics, but this is a tradeoff to prevent the hot path from
> being blocked. A work_struct is added to dma_buf to achieve this, but as
> it is unioned with the kobject in the sysfs_entry, dma_buf does not
> increase in size.

I'm still not very keen of this approach as it strongly feels like we 
are working around shortcoming somewhere else.

> Fixes: bdb8d06dfefd ("dmabuf: Add the capability to expose DMA-BUF stats in sysfs")
> Originally-by: Hridya Valsaraju <hridya@...gle.com>
> Signed-off-by: T.J. Mercier <tjmercier@...gle.com>
>
> ---
> See the originally submitted patch by Hridya Valsaraju here:
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flkml.org%2Flkml%2F2022%2F1%2F4%2F1066&amp;data=05%7C01%7Cchristian.koenig%40amd.com%7C5575fa6126d74ca4315408da375f618d%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637883180063393649%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=1PcZaUfsLhQZOW29yGUDxazzcyNoBrN2NjeN1Yb40hk%3D&amp;reserved=0
>
> v2 changes:
> - Defer only sysfs creation instead of creation and teardown per
> Christian König
>
> - Use a work queue instead of a kthread for deferred work per
> Christian König
> ---
>   drivers/dma-buf/dma-buf-sysfs-stats.c | 56 ++++++++++++++++++++-------
>   include/linux/dma-buf.h               | 14 ++++++-
>   2 files changed, 54 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/dma-buf/dma-buf-sysfs-stats.c b/drivers/dma-buf/dma-buf-sysfs-stats.c
> index 2bba0babcb62..67b0a298291c 100644
> --- a/drivers/dma-buf/dma-buf-sysfs-stats.c
> +++ b/drivers/dma-buf/dma-buf-sysfs-stats.c
> @@ -11,6 +11,7 @@
>   #include <linux/printk.h>
>   #include <linux/slab.h>
>   #include <linux/sysfs.h>
> +#include <linux/workqueue.h>
>   
>   #include "dma-buf-sysfs-stats.h"
>   
> @@ -168,10 +169,46 @@ void dma_buf_uninit_sysfs_statistics(void)
>   	kset_unregister(dma_buf_stats_kset);
>   }
>   
> +static void sysfs_add_workfn(struct work_struct *work)
> +{
> +	struct dma_buf_sysfs_entry *sysfs_entry =
> +		container_of(work, struct dma_buf_sysfs_entry, sysfs_add_work);
> +	struct dma_buf *dmabuf = sysfs_entry->dmabuf;
> +
> +	/*
> +	 * A dmabuf is ref-counted via its file member. If this handler holds the only
> +	 * reference to the dmabuf, there is no need for sysfs kobject creation. This is an
> +	 * optimization and a race; when the reference count drops to 1 immediately after
> +	 * this check it is not harmful as the sysfs entry will still get cleaned up in
> +	 * dma_buf_stats_teardown, which won't get called until the final dmabuf reference
> +	 * is released, and that can't happen until the end of this function.
> +	 */
> +	if (file_count(dmabuf->file) > 1) {

Please completely drop that. I see absolutely no justification for this 
additional complexity.

> +		/*
> +		 * kobject_init_and_add expects kobject to be zero-filled, but we have populated it
> +		 * (the sysfs_add_work union member) to trigger this work function.
> +		 */
> +		memset(&dmabuf->sysfs_entry->kobj, 0, sizeof(dmabuf->sysfs_entry->kobj));
> +		dmabuf->sysfs_entry->kobj.kset = dma_buf_per_buffer_stats_kset;
> +		if (kobject_init_and_add(&dmabuf->sysfs_entry->kobj, &dma_buf_ktype, NULL,
> +						"%lu", file_inode(dmabuf->file)->i_ino)) {
> +			kobject_put(&dmabuf->sysfs_entry->kobj);
> +			dmabuf->sysfs_entry = NULL;
> +		}
> +	} else {
> +		/*
> +		 * Free the sysfs_entry and reset the pointer so dma_buf_stats_teardown doesn't
> +		 * attempt to operate on it.
> +		 */
> +		kfree(dmabuf->sysfs_entry);
> +		dmabuf->sysfs_entry = NULL;
> +	}
> +	dma_buf_put(dmabuf);
> +}
> +
>   int dma_buf_stats_setup(struct dma_buf *dmabuf)
>   {
>   	struct dma_buf_sysfs_entry *sysfs_entry;
> -	int ret;
>   
>   	if (!dmabuf || !dmabuf->file)
>   		return -EINVAL;
> @@ -181,25 +218,16 @@ int dma_buf_stats_setup(struct dma_buf *dmabuf)
>   		return -EINVAL;
>   	}
>   
> -	sysfs_entry = kzalloc(sizeof(struct dma_buf_sysfs_entry), GFP_KERNEL);
> +	sysfs_entry = kmalloc(sizeof(struct dma_buf_sysfs_entry), GFP_KERNEL);
>   	if (!sysfs_entry)
>   		return -ENOMEM;
>   
> -	sysfs_entry->kobj.kset = dma_buf_per_buffer_stats_kset;
>   	sysfs_entry->dmabuf = dmabuf;
> -
>   	dmabuf->sysfs_entry = sysfs_entry;
>   
> -	/* create the directory for buffer stats */
> -	ret = kobject_init_and_add(&sysfs_entry->kobj, &dma_buf_ktype, NULL,
> -				   "%lu", file_inode(dmabuf->file)->i_ino);
> -	if (ret)
> -		goto err_sysfs_dmabuf;
> +	INIT_WORK(&dmabuf->sysfs_entry->sysfs_add_work, sysfs_add_workfn);
> +	get_dma_buf(dmabuf); /* This reference will be dropped in sysfs_add_workfn. */
> +	schedule_work(&dmabuf->sysfs_entry->sysfs_add_work);
>   
>   	return 0;
> -
> -err_sysfs_dmabuf:
> -	kobject_put(&sysfs_entry->kobj);
> -	dmabuf->sysfs_entry = NULL;
> -	return ret;
>   }
> diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
> index 2097760e8e95..0200caa3c515 100644
> --- a/include/linux/dma-buf.h
> +++ b/include/linux/dma-buf.h
> @@ -22,6 +22,7 @@
>   #include <linux/fs.h>
>   #include <linux/dma-fence.h>
>   #include <linux/wait.h>
> +#include <linux/workqueue.h>
>   
>   struct device;
>   struct dma_buf;
> @@ -365,7 +366,7 @@ struct dma_buf {
>   	 */
>   	const char *name;
>   

> -	/** @name_lock: Spinlock to protect name acces for read access. */
> +	/** @name_lock: Spinlock to protect name access for read access. */
>   	spinlock_t name_lock;
>   
>   	/**
> @@ -441,6 +442,7 @@ struct dma_buf {
>   
>   		__poll_t active;
>   	} cb_in, cb_out;
> +

Those changes are unrelated.

Regards,
Christian.

>   #ifdef CONFIG_DMABUF_SYSFS_STATS
>   	/**
>   	 * @sysfs_entry:
> @@ -449,7 +451,15 @@ struct dma_buf {
>   	 * `DMA-BUF statistics`_ for the uapi this enables.
>   	 */
>   	struct dma_buf_sysfs_entry {
> -		struct kobject kobj;
> +		union {
> +			struct kobject kobj;
> +
> +			/** @sysfs_add_work:
> +			 *
> +			 * For deferred sysfs kobject creation using a workqueue.
> +			 */
> +			struct work_struct sysfs_add_work;
> +		};
>   		struct dma_buf *dmabuf;
>   	} *sysfs_entry;
>   #endif

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ