lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Thu, 30 May 2024 07:16:30 +0000
From: Nitesh Shetty <nj.shetty@...sung.com>
To: Bart Van Assche <bvanassche@....org>
Cc: Damien Le Moal <dlemoal@...nel.org>, Jens Axboe <axboe@...nel.dk>,
	Jonathan Corbet <corbet@....net>, Alasdair Kergon <agk@...hat.com>, Mike
	Snitzer <snitzer@...nel.org>, Mikulas Patocka <mpatocka@...hat.com>, Keith
	Busch <kbusch@...nel.org>, Christoph Hellwig <hch@....de>, Sagi Grimberg
	<sagi@...mberg.me>, Chaitanya Kulkarni <kch@...dia.com>, Alexander Viro
	<viro@...iv.linux.org.uk>, Christian Brauner <brauner@...nel.org>, Jan Kara
	<jack@...e.cz>, martin.petersen@...cle.com, david@...morbit.com,
	hare@...e.de, damien.lemoal@...nsource.wdc.com, anuj20.g@...sung.com,
	joshi.k@...sung.com, nitheshshetty@...il.com, gost.dev@...sung.com,
	linux-block@...r.kernel.org, linux-kernel@...r.kernel.org,
	linux-doc@...r.kernel.org, dm-devel@...ts.linux.dev,
	linux-nvme@...ts.infradead.org, linux-fsdevel@...r.kernel.org
Subject: Re: [PATCH v20 02/12] Add infrastructure for copy offload in block
 and request layer.

On 29/05/24 03:41PM, Bart Van Assche wrote:
>On 5/29/24 12:48 AM, Damien Le Moal wrote:
>>On 5/29/24 15:17, Nitesh Shetty wrote:
>>>On 24/05/24 01:33PM, Bart Van Assche wrote:
>>>>On 5/20/24 03:20, Nitesh Shetty wrote:
>>>>>We add two new opcode REQ_OP_COPY_DST, REQ_OP_COPY_SRC.
>>>>>Since copy is a composite operation involving src and dst sectors/lba,
>>>>>each needs to be represented by a separate bio to make it compatible
>>>>>with device mapper.
>>>>>We expect caller to take a plug and send bio with destination information,
>>>>>followed by bio with source information.
>>>>>Once the dst bio arrives we form a request and wait for source
>>>>>bio. Upon arrival of source bio we merge these two bio's and send
>>>>>corresponding request down to device driver.
>>>>>Merging non copy offload bio is avoided by checking for copy specific
>>>>>opcodes in merge function.
>>>>
>>>>In this patch I don't see any changes for blk_attempt_bio_merge(). Does
>>>>this mean that combining REQ_OP_COPY_DST and REQ_OP_COPY_SRC will never
>>>>happen if the QUEUE_FLAG_NOMERGES request queue flag has been set?
>>>>
>>>Yes, in this case copy won't work, as both src and dst bio reach driver
>>>as part of separate requests.
>>>We will add this as part of documentation.
>>
>>So that means that 2 major SAS HBAs which set this flag (megaraid and mpt3sas)
>>will not get support for copy offload ? Not ideal, by far.
>
>QUEUE_FLAG_NOMERGES can also be set through sysfs (see also
>queue_nomerges_store()). This is one of the reasons why using the merge
>infrastructure for combining REQ_OP_COPY_DST and REQ_OP_COPY_SRC is
>unacceptable.
>

Bart, Damien, Hannes,
Thanks for your review.
We tried a slightly modified approach which simplifies this patch and
also avoids merge path.
Also with this, we should be able to solve the QUEUE_FLAG_MERGES issue.
Previously we also tried payload/token based approach,
which avoids merge path and tries to combine bios in driver.
But we received feedback that it wasn't the right approach [1].
Do below changes look any better or do you guys have anything else in mind ?

[1] https://lore.kernel.org/linux-block/ZIKphgDavKVPREnw@infradead.org/


diff --git a/block/blk-core.c b/block/blk-core.c
index 82c3ae22d76d..7158bac8cc57 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -122,6 +122,8 @@ static const char *const blk_op_name[] = {
  	REQ_OP_NAME(ZONE_FINISH),
  	REQ_OP_NAME(ZONE_APPEND),
  	REQ_OP_NAME(WRITE_ZEROES),
+	REQ_OP_NAME(COPY_SRC),
+	REQ_OP_NAME(COPY_DST),
  	REQ_OP_NAME(DRV_IN),
  	REQ_OP_NAME(DRV_OUT),
  };
@@ -839,6 +841,11 @@ void submit_bio_noacct(struct bio *bio)
  		 * requests.
  		 */
  		fallthrough;
+	case REQ_OP_COPY_SRC:
+	case REQ_OP_COPY_DST:
+		if (!q->limits.max_copy_sectors)
+			goto not_supported;
+		break;
  	default:
  		goto not_supported;
  	}
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 8534c35e0497..a651e7c114d0 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -154,6 +154,20 @@ static struct bio *bio_split_write_zeroes(struct bio *bio,
  	return bio_split(bio, lim->max_write_zeroes_sectors, GFP_NOIO, bs);
  }
  
+static struct bio *bio_split_copy(struct bio *bio,
+				  const struct queue_limits *lim,
+				  unsigned int *nsegs)
+{
+	*nsegs = 1;
+	if (bio_sectors(bio) <= lim->max_copy_sectors)
+		return NULL;
+	/*
+	 * We don't support splitting for a copy bio. End it with EIO if
+	 * splitting is required and return an error pointer.
+	 */
+	return ERR_PTR(-EIO);
+}
+
  /*
   * Return the maximum number of sectors from the start of a bio that may be
   * submitted as a single request to a block device. If enough sectors remain,
@@ -362,6 +376,12 @@ struct bio *__bio_split_to_limits(struct bio *bio,
  	case REQ_OP_WRITE_ZEROES:
  		split = bio_split_write_zeroes(bio, lim, nr_segs, bs);
  		break;
+	case REQ_OP_COPY_SRC:
+	case REQ_OP_COPY_DST:
+		split = bio_split_copy(bio, lim, nr_segs);
+		if (IS_ERR(split))
+			return NULL;
+		break;
  	default:
  		split = bio_split_rw(bio, lim, nr_segs, bs,
  				get_max_io_size(bio, lim) << SECTOR_SHIFT);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 3b4df8e5ac9e..6d4ffbdade28 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2833,6 +2833,63 @@ static void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
  		blk_mq_commit_rqs(hctx, queued, false);
  }
  
+/*
+ * Copy offload sends a pair of bio with REQ_OP_COPY_DST and REQ_OP_COPY_SRC
+ * operation by taking a plug.
+ * Initially DST bio is sent which forms a request and
+ * waits for SRC bio to arrive. Once SRC bio arrives
+ * we combine it and send request down to driver.
+ */
+static inline bool blk_copy_offload_combine_ok(struct request *req,
+					      struct bio *bio)
+{
+	return (req_op(req) == REQ_OP_COPY_DST &&
+		bio_op(bio) == REQ_OP_COPY_SRC);
+}
+
+static int blk_copy_offload_combine(struct request *req, struct bio *bio)
+{
+	if (!blk_copy_offload_combine_ok(req, bio))
+		return 1;
+
+	if (req->__data_len != bio->bi_iter.bi_size)
+		return 1;
+
+	req->biotail->bi_next = bio;
+	req->biotail = bio;
+	req->nr_phys_segments++;
+	req->__data_len += bio->bi_iter.bi_size;
+
+	return 0;
+}
+
+static inline bool blk_copy_offload_attempt_combine(struct request_queue *q,
+					     struct bio *bio)
+{
+	struct blk_plug *plug = current->plug;
+	struct request *rq;
+
+	if (!plug || rq_list_empty(plug->mq_list))
+		return false;
+
+	rq_list_for_each(&plug->mq_list, rq) {
+		if (rq->q == q) {
+			if (!blk_copy_offload_combine(rq, bio))
+				return true;
+			break;
+		}
+
+		/*
+		 * Only keep iterating plug list for combines if we have multiple
+		 * queues
+		 */
+		if (!plug->multiple_queues)
+			break;
+	}
+	return false;
+}
+
  static bool blk_mq_attempt_bio_merge(struct request_queue *q,
  				     struct bio *bio, unsigned int nr_segs)
  {
@@ -2977,6 +3034,9 @@ void blk_mq_submit_bio(struct bio *bio)
  	if (blk_mq_attempt_bio_merge(q, bio, nr_segs))
  		goto queue_exit;
  
+	if (blk_copy_offload_attempt_combine(q, bio))
+		goto queue_exit;
+
  	if (blk_queue_is_zoned(q) && blk_zone_plug_bio(bio, nr_segs))
  		goto queue_exit;
  
diff --git a/block/blk.h b/block/blk.h
index 189bc25beb50..112c6736f44c 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -323,6 +323,8 @@ static inline bool bio_may_exceed_limits(struct bio *bio,
  	case REQ_OP_DISCARD:
  	case REQ_OP_SECURE_ERASE:
  	case REQ_OP_WRITE_ZEROES:
+	case REQ_OP_COPY_SRC:
+	case REQ_OP_COPY_DST:
  		return true; /* non-trivial splitting decisions */
  	default:
  		break;
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 781c4500491b..22a08425d13e 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -342,6 +342,10 @@ enum req_op {
  	/* reset all the zone present on the device */
  	REQ_OP_ZONE_RESET_ALL	= (__force blk_opf_t)15,
  
+	/* copy offload source and destination operations */
+	REQ_OP_COPY_SRC		= (__force blk_opf_t)18,
+	REQ_OP_COPY_DST		= (__force blk_opf_t)19,
+
  	/* Driver private requests */
  	REQ_OP_DRV_IN		= (__force blk_opf_t)34,
  	REQ_OP_DRV_OUT		= (__force blk_opf_t)35,
--ยท
2.34.1



Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ