[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4aef53b1-3e0f-92eb-4bd3-cdc4cd301866@linux.alibaba.com>
Date: Mon, 28 Feb 2022 16:52:52 +0800
From: Xiaoguang Wang <xiaoguang.wang@...ux.alibaba.com>
To: Bodo Stroesser <bostroesser@...il.com>,
Guixin Liu <kanie@...ux.alibaba.com>,
gregkh@...uxfoundation.org, martin.petersen@...cle.com
Cc: linux-scsi@...r.kernel.org, target-devel@...r.kernel.org,
linux-kernel@...r.kernel.org, xlpang@...ux.alibaba.com
Subject: Re: [PATCH 2/2] scsi:target:tcmu: reduce once copy by using uio ioctl
hi Bodo,
> Liu,
>
> generally I like ideas to speed up tcmu.
>
> OTOH, since Andy Grover implemented tcmu based on uio device, we are
> restricted to what uio offers. With today's knowledge I think we would
> not use the uio device in tcmu again, but switching away from uio now
> would break existing userspace SW.
Yeah, it will have much work if deciding to switch away from uio.
I came up with a hacky or crazy idea :) what about we create a new file
in tcmu_open() by anon_inode_getfile_secure(), and export this fd by
tcmu mail box, we can do ioctl() on this new file, then uio framework
won't be touched...
static int tcmu_open(struct uio_info *info, struct inode *inode)
{
struct tcmu_dev *udev = container_of(info, struct tcmu_dev,
uio_info);
/* O_EXCL not supported for char devs, so fake it? */
if (test_and_set_bit(TCMU_DEV_BIT_OPEN, &udev->flags))
return -EBUSY;
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
tcmu device can only be opened once before closed.
Regards,
Xiaoguang Wang
>
> Before we start thinking how the same performance gain could be reached
> without a change in uio device, please let me know why you use file
> backend on top of tcmu instead of target_core_file kernel module?
> Wouldn't target_core_file be even faster for your purpose?
>
> Bodo
>
>
>
> On 17.02.22 03:29, Guixin Liu wrote:
>> Currently the data needs to be copied twice between sg, tcmu data
>> area and
>> userspace buffer if backstore holds its own userspace buffer, then we
>> can
>> use uio ioctl to copy data between sg and userspace buffer directly to
>> bypass data area to improve performance.
>>
>> Use tcm_loop and tcmu(backstore is file) to evaluate performance,
>> fio job: fio -filename=/dev/sdb -direct=1 -size=2G -name=1 -thread
>> -runtime=60 -time_based -rw=randread -numjobs=16 -iodepth=16 -bs=128k
>>
>> Without this patch:
>> READ: bw=3539MiB/s (3711MB/s), 207MiB/s-233MiB/s (217MB/s-244MB/s),
>> io=104GiB (111GB), run=30001-30002msec
>>
>> With this patch:
>> READ: bw=4420MiB/s (4634MB/s), 274MiB/s-278MiB/s (287MB/s-291MB/s),
>> io=259GiB (278GB), run=60001-60002msec
>>
>> Reviewed-by: Xiaoguang Wang <xiaoguang.wang@...ux.alibaba.com>
>> Signed-off-by: Guixin Liu <kanie@...ux.alibaba.com>
>> ---
>> drivers/target/target_core_user.c | 171
>> +++++++++++++++++++++++++++++-----
>> include/uapi/linux/target_core_user.h | 9 ++
>> 2 files changed, 157 insertions(+), 23 deletions(-)
>>
>> diff --git a/drivers/target/target_core_user.c
>> b/drivers/target/target_core_user.c
>> index 7b2a89a..afea088 100644
>> --- a/drivers/target/target_core_user.c
>> +++ b/drivers/target/target_core_user.c
>> @@ -122,6 +122,7 @@ struct tcmu_dev {
>> #define TCMU_DEV_BIT_BLOCKED 2
>> #define TCMU_DEV_BIT_TMR_NOTIFY 3
>> #define TCMU_DEV_BIT_PLUGGED 4
>> +#define TCMU_DEV_BIT_BYPASS_DATA_AREA 5
>> unsigned long flags;
>> struct uio_info uio_info;
>> @@ -642,12 +643,17 @@ static struct tcmu_cmd *tcmu_alloc_cmd(struct
>> se_cmd *se_cmd)
>> tcmu_cmd->se_cmd = se_cmd;
>> tcmu_cmd->tcmu_dev = udev;
>> - tcmu_cmd_set_block_cnts(tcmu_cmd);
>> - tcmu_cmd->dbi = kcalloc(tcmu_cmd->dbi_cnt, sizeof(uint32_t),
>> - GFP_NOIO);
>> - if (!tcmu_cmd->dbi) {
>> - kmem_cache_free(tcmu_cmd_cache, tcmu_cmd);
>> - return NULL;
>> + if (!test_bit(TCMU_DEV_BIT_BYPASS_DATA_AREA, &udev->flags)) {
>> + tcmu_cmd_set_block_cnts(tcmu_cmd);
>> + tcmu_cmd->dbi = kcalloc(tcmu_cmd->dbi_cnt, sizeof(uint32_t),
>> + GFP_NOIO);
>> + if (!tcmu_cmd->dbi) {
>> + kmem_cache_free(tcmu_cmd_cache, tcmu_cmd);
>> + return NULL;
>> + }
>> + } else {
>> + tcmu_cmd->dbi_cnt = 0;
>> + tcmu_cmd->dbi = NULL;
>> }
>> return tcmu_cmd;
>> @@ -1093,16 +1099,18 @@ static int queue_cmd_ring(struct tcmu_cmd
>> *tcmu_cmd, sense_reason_t *scsi_err)
>> tcmu_cmd_reset_dbi_cur(tcmu_cmd);
>> iov = &entry->req.iov[0];
>> - if (se_cmd->data_direction == DMA_TO_DEVICE ||
>> - se_cmd->se_cmd_flags & SCF_BIDI)
>> - scatter_data_area(udev, tcmu_cmd, &iov);
>> - else
>> - tcmu_setup_iovs(udev, tcmu_cmd, &iov, se_cmd->data_length);
>> -
>> + if (!test_bit(TCMU_DEV_BIT_BYPASS_DATA_AREA, &udev->flags)) {
>> + if (se_cmd->data_direction == DMA_TO_DEVICE ||
>> + se_cmd->se_cmd_flags & SCF_BIDI)
>> + scatter_data_area(udev, tcmu_cmd, &iov);
>> + else
>> + tcmu_setup_iovs(udev, tcmu_cmd, &iov, se_cmd->data_length);
>> + }
>> entry->req.iov_cnt = iov_cnt - iov_bidi_cnt;
>> /* Handle BIDI commands */
>> - if (se_cmd->se_cmd_flags & SCF_BIDI) {
>> + if ((se_cmd->se_cmd_flags & SCF_BIDI)
>> + && !test_bit(TCMU_DEV_BIT_BYPASS_DATA_AREA, &udev->flags)) {
>> iov++;
>> tcmu_setup_iovs(udev, tcmu_cmd, &iov,
>> tcmu_cmd->data_len_bidi);
>> entry->req.iov_bidi_cnt = iov_bidi_cnt;
>> @@ -1366,16 +1374,19 @@ static bool tcmu_handle_completion(struct
>> tcmu_cmd *cmd,
>> else
>> se_cmd->se_cmd_flags |= SCF_TREAT_READ_AS_NORMAL;
>> }
>> - if (se_cmd->se_cmd_flags & SCF_BIDI) {
>> - /* Get Data-In buffer before clean up */
>> - gather_data_area(udev, cmd, true, read_len);
>> - } else if (se_cmd->data_direction == DMA_FROM_DEVICE) {
>> - gather_data_area(udev, cmd, false, read_len);
>> - } else if (se_cmd->data_direction == DMA_TO_DEVICE) {
>> - /* TODO: */
>> - } else if (se_cmd->data_direction != DMA_NONE) {
>> - pr_warn("TCMU: data direction was %d!\n",
>> - se_cmd->data_direction);
>> +
>> + if (!test_bit(TCMU_DEV_BIT_BYPASS_DATA_AREA, &udev->flags)) {
>> + if (se_cmd->se_cmd_flags & SCF_BIDI) {
>> + /* Get Data-In buffer before clean up */
>> + gather_data_area(udev, cmd, true, read_len);
>> + } else if (se_cmd->data_direction == DMA_FROM_DEVICE) {
>> + gather_data_area(udev, cmd, false, read_len);
>> + } else if (se_cmd->data_direction == DMA_TO_DEVICE) {
>> + /* TODO: */
>> + } else if (se_cmd->data_direction != DMA_NONE) {
>> + pr_warn("TCMU: data direction was %d!\n",
>> + se_cmd->data_direction);
>> + }
>> }
>> done:
>> @@ -1973,6 +1984,84 @@ static int tcmu_release(struct uio_info *info,
>> struct inode *inode)
>> return 0;
>> }
>> +long tcmu_ioctl_copy_between_sgl_and_iovec(struct tcmu_cmd *tcmu_cmd,
>> + struct iovec __user *uiovec,
>> + unsigned long vcnt,
>> + bool is_copy_to_sgl)
>> +{
>> + struct iovec iovstack[UIO_FASTIOV];
>> + struct iovec *iov = iovstack;
>> + struct iov_iter iter;
>> + ssize_t ret;
>> + struct se_cmd *se_cmd = tcmu_cmd->se_cmd;
>> + struct scatterlist *data_sg, *sg;
>> + int i;
>> + unsigned int data_nents;
>> + long copy_ret = 0;
>> +
>> + if (se_cmd->se_cmd_flags & SCF_BIDI) {
>> + data_sg = se_cmd->t_bidi_data_sg;
>> + data_nents = se_cmd->t_bidi_data_nents;
>> + } else {
>> + data_sg = se_cmd->t_data_sg;
>> + data_nents = se_cmd->t_data_nents;
>> + }
>> +
>> + ret = import_iovec(READ, uiovec, vcnt, ARRAY_SIZE(iovstack),
>> &iov, &iter);
>> + if (ret < 0) {
>> + pr_err("import iovec failed.\n");
>> + return -EFAULT;
>> + }
>> +
>> + for_each_sg(data_sg, sg, data_nents, i) {
>> + if (is_copy_to_sgl)
>> + ret = copy_page_from_iter(sg_page(sg), sg->offset,
>> sg->length, &iter);
>> + else
>> + ret = copy_page_to_iter(sg_page(sg), sg->offset,
>> sg->length, &iter);
>> + if (ret < 0) {
>> + pr_err("copy failed.\n");
>> + copy_ret = -EFAULT;
>> + break;
>> + }
>> + }
>> + kfree(iov);
>> + return copy_ret;
>> +}
>> +
>> +long tcmu_ioctl(struct uio_info *info, unsigned int cmd, unsigned
>> long arg)
>> +{
>> + struct tcmu_dev *udev = container_of(info, struct tcmu_dev,
>> uio_info);
>> + struct tcmu_data_xfer __user *uxfer = (struct tcmu_data_xfer
>> __user *)arg;
>> + struct tcmu_data_xfer xfer;
>> + struct tcmu_cmd *tcmu_cmd;
>> +
>> + if (!test_bit(TCMU_DEV_BIT_BYPASS_DATA_AREA, &udev->flags))
>> + return -EINVAL;
>> +
>> + if (copy_from_user(&xfer, uxfer, sizeof(xfer)))
>> + return -EFAULT;
>> +
>> + tcmu_cmd = xa_load(&udev->commands, xfer.cmd_id);
>> + if (!tcmu_cmd) {
>> + set_bit(TCMU_DEV_BIT_BROKEN, &udev->flags);
>> + return -EFAULT;
>> + }
>> +
>> + if (test_bit(TCMU_CMD_BIT_EXPIRED, &tcmu_cmd->flags))
>> + return -EFAULT;
>> +
>> + switch (cmd) {
>> + case TCMU_IOCTL_CMD_COPY_TO_SGL:
>> + return tcmu_ioctl_copy_between_sgl_and_iovec(tcmu_cmd,
>> xfer.iovec,
>> + xfer.iov_cnt, true);
>> + case TCMU_IOCTL_CMD_COPY_FROM_SGL:
>> + return tcmu_ioctl_copy_between_sgl_and_iovec(tcmu_cmd,
>> xfer.iovec,
>> + xfer.iov_cnt, false);
>> + default:
>> + return -EINVAL;
>> + }
>> +}
>> +
>> static int tcmu_init_genl_cmd_reply(struct tcmu_dev *udev, int cmd)
>> {
>> struct tcmu_nl_cmd *nl_cmd = &udev->curr_nl_cmd;
>> @@ -2230,6 +2319,7 @@ static int tcmu_configure_device(struct
>> se_device *dev)
>> info->mmap = tcmu_mmap;
>> info->open = tcmu_open;
>> info->release = tcmu_release;
>> + info->ioctl = tcmu_ioctl;
>> ret = uio_register_device(tcmu_root_device, info);
>> if (ret)
>> @@ -2838,6 +2928,40 @@ static ssize_t
>> tcmu_nl_reply_supported_store(struct config_item *item,
>> }
>> CONFIGFS_ATTR(tcmu_, nl_reply_supported);
>> +static ssize_t tcmu_bypass_data_area_show(struct config_item
>> *item, char *page)
>> +{
>> + struct se_dev_attrib *da = container_of(to_config_group(item),
>> + struct se_dev_attrib, da_group);
>> + struct tcmu_dev *udev = TCMU_DEV(da->da_dev);
>> +
>> + if (test_bit(TCMU_DEV_BIT_BYPASS_DATA_AREA, &udev->flags))
>> + return snprintf(page, PAGE_SIZE, "%s\n", "true");
>> + else
>> + return snprintf(page, PAGE_SIZE, "%s\n", "false");
>> +}
>> +
>> +static ssize_t tcmu_bypass_data_area_store(struct config_item *item,
>> const char *page,
>> + size_t count)
>> +{
>> + struct se_dev_attrib *da = container_of(to_config_group(item),
>> + struct se_dev_attrib, da_group);
>> + struct tcmu_dev *udev = TCMU_DEV(da->da_dev);
>> + bool bypass_data_area;
>> + int ret;
>> +
>> + ret = strtobool(page, &bypass_data_area);
>> + if (ret < 0)
>> + return ret;
>> +
>> + if (bypass_data_area)
>> + set_bit(TCMU_DEV_BIT_BYPASS_DATA_AREA, &udev->flags);
>> + else
>> + clear_bit(TCMU_DEV_BIT_BYPASS_DATA_AREA, &udev->flags);
>> +
>> + return count;
>> +}
>> +CONFIGFS_ATTR(tcmu_, bypass_data_area);
>> +
>> static ssize_t tcmu_emulate_write_cache_show(struct config_item *item,
>> char *page)
>> {
>> @@ -3069,6 +3193,7 @@ static ssize_t tcmu_free_kept_buf_store(struct
>> config_item *item, const char *pa
>> &tcmu_attr_emulate_write_cache,
>> &tcmu_attr_tmr_notification,
>> &tcmu_attr_nl_reply_supported,
>> + &tcmu_attr_bypass_data_area,
>> NULL,
>> };
>> diff --git a/include/uapi/linux/target_core_user.h
>> b/include/uapi/linux/target_core_user.h
>> index 27ace51..c02a45e 100644
>> --- a/include/uapi/linux/target_core_user.h
>> +++ b/include/uapi/linux/target_core_user.h
>> @@ -185,4 +185,13 @@ enum tcmu_genl_attr {
>> };
>> #define TCMU_ATTR_MAX (__TCMU_ATTR_MAX - 1)
>> +struct tcmu_data_xfer {
>> + unsigned short cmd_id;
>> + unsigned long iov_cnt;
>> + struct iovec __user *iovec;
>> +};
>> +
>> +#define TCMU_IOCTL_CMD_COPY_TO_SGL _IOW('T', 0xe0, struct
>> tcmu_data_xfer)
>> +#define TCMU_IOCTL_CMD_COPY_FROM_SGL _IOW('T', 0xe1, struct
>> tcmu_data_xfer)
>> +
>> #endif
Powered by blists - more mailing lists