lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <acad81e4-c2ef-59cc-5f0c-33b99082d270@samba.org>
Date:   Thu, 20 Oct 2022 12:04:53 +0200
From:   Stefan Metzmacher <metze@...ba.org>
To:     Pavel Begunkov <asml.silence@...il.com>,
        io-uring <io-uring@...r.kernel.org>, Jens Axboe <axboe@...nel.dk>
Cc:     Jakub Kicinski <kuba@...nel.org>, netdev <netdev@...r.kernel.org>,
        Dylan Yudaken <dylany@...com>
Subject: IORING_SEND_NOTIF_REPORT_USAGE (was Re: IORING_CQE_F_COPIED)

Hi Pavel,

> Yep, sth like that should do, but let's guard against
> spurious net_zcopy_put() just in case.
> 
> used = false;
> copied = false;
> 
> callback(skb, success, ubuf) {
>      if (skb)
>          used = true;
>      if (!success)
>          copied = true;
> }
> complete() {
>      if (!used || copied)
>          set_flag(IORING_CQE_F_COPIED);
> }
> 
>> And __io_notif_complete_tw still needs:
>>
>>          if (!nd->zero_copied)
>>                  notif->cqe.flags |= IORING_CQE_F_COPIED;
> 
> Which can be shoved in a custom callback
> 

Ok, got the idea.

> I'm more concerned about future changes around it, but there won't
> be extra ifs.
> 
> #define COMMON_FLAGS (RECVSEND_FIRST_POLL|...)
> #define ALL_FLAGS (COMMON_FLAGS|RECVSEND_PROBE)
> 
> if (flags & ~COMMON_FLAGS) {
>      if (flags & ~ALL_FLAGS)
>          return err;
>      if (flags & RECVSEND_PROBE)
>          set_callback(notif);
> }

So far I came up with a IORING_SEND_NOTIF_REPORT_USAGE opt-in flag
and the reporting is done in cqe.res with IORING_NOTIF_USAGE_ZC_USED (0x00000001)
and/or IORING_NOTIF_USAGE_ZC_COPIED (0x8000000). So the caller is also
able to notice that some parts were able to use zero copy, while other
fragments were copied.

I haven't tested it yet, but I want to post it early...

What do you think?

metze

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index ab7458033ee3..751fc4eff8d1 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -296,10 +296,28 @@ enum io_uring_op {
   *
   * IORING_RECVSEND_FIXED_BUF	Use registered buffers, the index is stored in
   *				the buf_index field.
+ *
+ * IORING_SEND_NOTIF_REPORT_USAGE
+ *				If SEND[MSG]_ZC should report
+ *				the zerocopy usage in cqe.res
+ *				for the IORING_CQE_F_NOTIF cqe.
+ *				IORING_NOTIF_USAGE_ZC_USED if zero copy was used
+ *				(at least partially).
+ *				IORING_NOTIF_USAGE_ZC_COPIED if data was copied
+ *				(at least partially).
   */
  #define IORING_RECVSEND_POLL_FIRST	(1U << 0)
  #define IORING_RECV_MULTISHOT		(1U << 1)
  #define IORING_RECVSEND_FIXED_BUF	(1U << 2)
+#define IORING_SEND_NOTIF_REPORT_USAGE	(1U << 3)
+
+/*
+ * cqe.res for IORING_CQE_F_NOTIF if
+ * IORING_SEND_NOTIF_REPORT_USAGE was requested
+ */
+#define IORING_NOTIF_USAGE_ZC_USED	(1U << 0)
+#define IORING_NOTIF_USAGE_ZC_COPIED	(1U << 31)
+

  /*
   * accept flags stored in sqe->ioprio
diff --git a/io_uring/net.c b/io_uring/net.c
index 735eec545115..a79d7d349e19 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -946,9 +946,11 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)

  	zc->flags = READ_ONCE(sqe->ioprio);
  	if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST |
-			  IORING_RECVSEND_FIXED_BUF))
+			  IORING_RECVSEND_FIXED_BUF |
+			  IORING_SEND_NOTIF_REPORT_USAGE))
  		return -EINVAL;
-	notif = zc->notif = io_alloc_notif(ctx);
+	notif = zc->notif = io_alloc_notif(ctx,
+					   zc->flags & IORING_SEND_NOTIF_REPORT_USAGE);
  	if (!notif)
  		return -ENOMEM;
  	notif->cqe.user_data = req->cqe.user_data;
diff --git a/io_uring/notif.c b/io_uring/notif.c
index e37c6569d82e..3844e3c8ad7e 100644
--- a/io_uring/notif.c
+++ b/io_uring/notif.c
@@ -3,13 +3,14 @@
  #include <linux/file.h>
  #include <linux/slab.h>
  #include <linux/net.h>
+#include <linux/errqueue.h>
  #include <linux/io_uring.h>

  #include "io_uring.h"
  #include "notif.h"
  #include "rsrc.h"

-static void __io_notif_complete_tw(struct io_kiocb *notif, bool *locked)
+static inline void __io_notif_complete_tw(struct io_kiocb *notif, bool *locked)
  {
  	struct io_notif_data *nd = io_notif_to_data(notif);
  	struct io_ring_ctx *ctx = notif->ctx;
@@ -21,20 +22,46 @@ static void __io_notif_complete_tw(struct io_kiocb *notif, bool *locked)
  	io_req_task_complete(notif, locked);
  }

-static void io_uring_tx_zerocopy_callback(struct sk_buff *skb,
-					  struct ubuf_info *uarg,
-					  bool success)
+static inline void io_uring_tx_zerocopy_callback(struct sk_buff *skb,
+						 struct ubuf_info *uarg,
+						 bool success)
  {
  	struct io_notif_data *nd = container_of(uarg, struct io_notif_data, uarg);
  	struct io_kiocb *notif = cmd_to_io_kiocb(nd);

  	if (refcount_dec_and_test(&uarg->refcnt)) {
-		notif->io_task_work.func = __io_notif_complete_tw;
  		io_req_task_work_add(notif);
  	}
  }

-struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx)
+static void __io_notif_complete_tw_report_usage(struct io_kiocb *notif, bool *locked)
+{
+	struct io_notif_data *nd = io_notif_to_data(notif);
+
+	if (likely(nd->zc_used))
+		notif->cqe.res |= IORING_NOTIF_USAGE_ZC_USED;
+
+	if (unlikely(nd->zc_copied))
+		notif->cqe.res |= IORING_NOTIF_USAGE_ZC_COPIED;
+
+	__io_notif_complete_tw(notif, locked);
+}
+
+static void io_uring_tx_zerocopy_callback_report_usage(struct sk_buff *skb,
+							struct ubuf_info *uarg,
+							bool success)
+{
+	struct io_notif_data *nd = container_of(uarg, struct io_notif_data, uarg);
+
+	if (success && !nd->zc_used && skb)
+		nd->zc_used = true;
+	else if (unlikely(!success && !nd->zc_copied))
+		nd->zc_copied = true;
+
+	io_uring_tx_zerocopy_callback(skb, uarg, success);
+}
+
+struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx, bool report_usage)
  	__must_hold(&ctx->uring_lock)
  {
  	struct io_kiocb *notif;
@@ -54,7 +81,14 @@ struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx)
  	nd = io_notif_to_data(notif);
  	nd->account_pages = 0;
  	nd->uarg.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN;
-	nd->uarg.callback = io_uring_tx_zerocopy_callback;
+	if (report_usage) {
+		nd->zc_used = nd->zc_copied = false;
+		nd->uarg.callback = io_uring_tx_zerocopy_callback_report_usage;
+		notif->io_task_work.func = __io_notif_complete_tw_report_usage;
+	} else {
+		nd->uarg.callback = io_uring_tx_zerocopy_callback;
+		notif->io_task_work.func = __io_notif_complete_tw;
+	}
  	refcount_set(&nd->uarg.refcnt, 1);
  	return notif;
  }
@@ -66,7 +100,6 @@ void io_notif_flush(struct io_kiocb *notif)

  	/* drop slot's master ref */
  	if (refcount_dec_and_test(&nd->uarg.refcnt)) {
-		notif->io_task_work.func = __io_notif_complete_tw;
  		io_req_task_work_add(notif);
  	}
  }
diff --git a/io_uring/notif.h b/io_uring/notif.h
index 5b4d710c8ca5..5ac7a2745e52 100644
--- a/io_uring/notif.h
+++ b/io_uring/notif.h
@@ -13,10 +13,12 @@ struct io_notif_data {
  	struct file		*file;
  	struct ubuf_info	uarg;
  	unsigned long		account_pages;
+	bool			zc_used;
+	bool			zc_copied;
  };

  void io_notif_flush(struct io_kiocb *notif);
-struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx);
+struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx, bool report_usage);

  static inline struct io_notif_data *io_notif_to_data(struct io_kiocb *notif)
  {

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ