lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 27 Mar 2018 18:59:11 +0200
From:   Björn Töpel <bjorn.topel@...il.com>
To:     bjorn.topel@...il.com, magnus.karlsson@...el.com,
        alexander.h.duyck@...el.com, alexander.duyck@...il.com,
        john.fastabend@...il.com, ast@...com, brouer@...hat.com,
        willemdebruijn.kernel@...il.com, daniel@...earbox.net,
        netdev@...r.kernel.org
Cc:     Björn Töpel <bjorn.topel@...el.com>,
        michael.lundkvist@...csson.com, jesse.brandeburg@...el.com,
        anjali.singhai@...el.com, qi.z.zhang@...el.com,
        ravineet.singh@...csson.com
Subject: [RFC PATCH v2 06/14] xsk: add Rx receive functions and poll support

From: Björn Töpel <bjorn.topel@...el.com>

Here the actual receive functions of AF_XDP are implemented, that in a
later commit, will be called from the XDP layers.

There's one set of functions for the XDP_DRV side and another for
XDP_SKB (generic).

Support for the poll syscall is also implemented.

Signed-off-by: Björn Töpel <bjorn.topel@...el.com>
---
 net/xdp/xdp_umem.h  |  18 +++++
 net/xdp/xsk.c       |  81 ++++++++++++++++++++-
 net/xdp/xsk_queue.h | 206 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 304 insertions(+), 1 deletion(-)

diff --git a/net/xdp/xdp_umem.h b/net/xdp/xdp_umem.h
index ad041b911b38..5e7105b7760b 100644
--- a/net/xdp/xdp_umem.h
+++ b/net/xdp/xdp_umem.h
@@ -36,6 +36,24 @@ struct xdp_umem {
 	struct user_struct *user;
 };
 
+static inline char *xdp_umem_get_data(struct xdp_umem *umem, u32 idx)
+{
+	u64 pg, off;
+	char *data;
+
+	pg = idx >> umem->nfpplog2;
+	off = (idx - (pg << umem->nfpplog2)) << umem->frame_size_log2;
+
+	data = page_address(umem->pgs[pg]);
+	return data + off;
+}
+
+static inline char *xdp_umem_get_data_with_headroom(struct xdp_umem *umem,
+						    u32 idx)
+{
+	return xdp_umem_get_data(umem, idx) + umem->frame_headroom;
+}
+
 bool xdp_umem_validate_queues(struct xdp_umem *umem);
 int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr);
 void xdp_get_umem(struct xdp_umem *umem);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index d99a1b830f94..a60b1fcfb2b3 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -35,10 +35,14 @@
 #include "xsk_queue.h"
 #include "xdp_umem.h"
 
+#define RX_BATCH_SIZE 16
+
 struct xdp_sock {
 	/* struct sock must be the first member of struct xdp_sock */
 	struct sock sk;
 	struct xsk_queue *rx;
+	struct xskq_iter rx_it;
+	u64 rx_dropped;
 	struct net_device *dev;
 	/* Protects multiple processes in the control path */
 	struct mutex mutex;
@@ -52,6 +56,74 @@ static struct xdp_sock *xdp_sk(struct sock *sk)
 	return (struct xdp_sock *)sk;
 }
 
+static inline int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
+{
+	u32 len = xdp->data_end - xdp->data;
+	void *buffer;
+	int err = 0;
+	u32 id;
+
+	if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
+		return -EINVAL;
+
+	if (!xskq_next_frame_deq(xs->umem->fq, &xs->rx_it, RX_BATCH_SIZE))
+		return -ENOSPC;
+
+	id = xdp_umem_get_id(xs->umem->fq, &xs->rx_it);
+	buffer = xdp_umem_get_data_with_headroom(xs->umem, id);
+	memcpy(buffer, xdp->data, len);
+	err = xskq_rxtx_enq_frame(xs->rx, id, len, xs->umem->frame_headroom);
+	if (err)
+		xskq_deq_return_frame(&xs->rx_it);
+
+	return err;
+}
+
+int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
+{
+	int err;
+
+	err = __xsk_rcv(xs, xdp);
+	if (!err)
+		page_frag_free(xdp->data);
+	else
+		xs->rx_dropped++;
+
+	return err;
+}
+
+void xsk_flush(struct xdp_sock *xs)
+{
+	xskq_enq_flush(xs->rx);
+	xs->sk.sk_data_ready(&xs->sk);
+}
+
+int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
+{
+	int err;
+
+	err = __xsk_rcv(xs, xdp);
+	if (!err)
+		xsk_flush(xs);
+	else
+		xs->rx_dropped++;
+
+	return err;
+}
+
+static unsigned int xsk_poll(struct file *file, struct socket *sock,
+			     struct poll_table_struct *wait)
+{
+	unsigned int mask = datagram_poll(file, sock, wait);
+	struct sock *sk = sock->sk;
+	struct xdp_sock *xs = xdp_sk(sk);
+
+	if (xs->rx && !xskq_empty(xs->rx))
+		mask |= POLLIN | POLLRDNORM;
+
+	return mask;
+}
+
 static int xsk_init_queue(u32 entries, struct xsk_queue **queue,
 			  bool umem_queue)
 {
@@ -190,6 +262,9 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 	} else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) {
 		err = -EINVAL;
 		goto out_unlock;
+	} else {
+		/* This xsk has its own umem. */
+		xskq_set_umem(xs->umem->fq, &xs->umem->props);
 	}
 
 	/* Rebind? */
@@ -204,6 +279,10 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 	xs->ifindex = sxdp->sxdp_ifindex;
 	xs->queue_id = sxdp->sxdp_queue_id;
 
+	xskq_init_iter(&xs->rx_it);
+
+	xskq_set_umem(xs->rx, &xs->umem->props);
+
 out_unlock:
 	if (err)
 		dev_put(dev);
@@ -340,7 +419,7 @@ static const struct proto_ops xsk_proto_ops = {
 	.socketpair =	sock_no_socketpair,
 	.accept =	sock_no_accept,
 	.getname =	sock_no_getname,
-	.poll =		sock_no_poll,
+	.poll =		xsk_poll,
 	.ioctl =	sock_no_ioctl,
 	.listen =	sock_no_listen,
 	.shutdown =	sock_no_shutdown,
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index d79b613a9e0a..af6e651f1207 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -37,6 +37,187 @@ struct xsk_queue {
 	u64 invalid_descs;
 };
 
+struct xskq_iter {
+	u32 head;
+	u32 tail;
+	struct xdp_desc desc_copy;
+};
+
+/* Common functions operating for both RXTX and umem queues */
+
+static inline bool xskq_is_valid_rx_entry(struct xsk_queue *q,
+					  u32 idx)
+{
+	if (unlikely(idx >= q->umem_props->nframes)) {
+		q->invalid_descs++;
+		return false;
+	}
+	return true;
+}
+
+static inline bool xskq_is_valid_tx_entry(struct xsk_queue *q,
+					  struct xdp_desc *d)
+{
+	u32 buff_len;
+
+	if (unlikely(d->idx >= q->umem_props->nframes)) {
+		q->invalid_descs++;
+		return false;
+	}
+
+	buff_len = q->umem_props->frame_size;
+	if (unlikely(d->len > buff_len || d->len == 0 ||
+		     d->offset > buff_len || d->offset + d->len > buff_len)) {
+		q->invalid_descs++;
+		return false;
+	}
+
+	return true;
+}
+
+static inline u32 xskq_nb_free(struct xsk_queue *q, u32 head_idx, u32 dcnt)
+{
+	u32 free_entries = q->nentries - (head_idx - q->cached_tail);
+
+	if (free_entries >= dcnt)
+		return free_entries;
+
+	/* Refresh the local tail pointer */
+	q->cached_tail = READ_ONCE(q->ring->tail_idx);
+	return q->nentries - (head_idx - q->cached_tail);
+}
+
+static inline u32 xskq_nb_avail(struct xsk_queue *q, u32 dcnt)
+{
+	u32 entries = q->cached_head - q->cached_tail;
+
+	if (entries == 0)
+		/* Refresh the local head pointer */
+		q->cached_head = READ_ONCE(q->ring->head_idx);
+
+	entries = q->cached_head - q->cached_tail;
+	return (entries > dcnt) ? dcnt : entries;
+}
+
+static inline bool xskq_empty(struct xsk_queue *q)
+{
+	if (xskq_nb_free(q, q->cached_head, 1) == q->nentries)
+		return true;
+	return false;
+}
+
+static inline bool xskq_full(struct xsk_queue *q)
+{
+	if (xskq_nb_avail(q, q->nentries) == q->nentries)
+		return true;
+	return false;
+}
+
+static inline void xskq_init_iter(struct xskq_iter *it)
+{
+	it->head = 0;
+	it->tail = 0;
+}
+
+static inline void xskq_set_umem(struct xsk_queue *q,
+				 struct xdp_umem_props *umem_props)
+{
+	q->umem_props = umem_props;
+}
+
+static inline bool xskq_iter_end(struct xskq_iter *it)
+{
+	return it->tail == it->head;
+}
+
+static inline void xskq_iter_validate(struct xsk_queue *q,
+				      struct xskq_iter *it)
+{
+	while (!xskq_iter_end(it)) {
+		unsigned int idx = it->tail & q->ring_mask;
+
+		if (q->validation == XSK_VALIDATION_TX) {
+			struct xdp_rxtx_queue *ring =
+				(struct xdp_rxtx_queue *)q->ring;
+
+			it->desc_copy.idx = ring->desc[idx].idx;
+			it->desc_copy.len = ring->desc[idx].len;
+			it->desc_copy.offset = ring->desc[idx].offset;
+
+			if (xskq_is_valid_tx_entry(q, &it->desc_copy))
+				break;
+		} else {
+			/* XSK_VALIDATION_RX */
+			struct xdp_umem_queue *ring =
+				(struct xdp_umem_queue *)q->ring;
+
+			if (xskq_is_valid_rx_entry(q, ring->desc[idx]))
+				break;
+		}
+
+		it->tail++;
+	}
+}
+
+static inline void xskq_deq_iter(struct xsk_queue *q,
+				 struct xskq_iter *it, int cnt)
+{
+	it->tail = q->cached_tail;
+	it->head = q->cached_tail + xskq_nb_avail(q, cnt);
+
+	/* Order tail and data */
+	smp_rmb();
+
+	xskq_iter_validate(q, it);
+}
+
+static inline void xskq_deq_iter_next(struct xsk_queue *q,
+				      struct xskq_iter *it)
+{
+	it->tail++;
+	xskq_iter_validate(q, it);
+}
+
+static inline void xskq_deq_iter_done(struct xsk_queue *q,
+				      struct xskq_iter *it)
+{
+	q->cached_tail = it->tail;
+	WRITE_ONCE(q->ring->tail_idx, it->tail);
+}
+
+static inline u64 xskq_nb_invalid_descs(struct xsk_queue *q)
+{
+	return q ? q->invalid_descs : 0;
+}
+
+static inline bool xskq_next_frame_deq(struct xsk_queue *q,
+				       struct xskq_iter *it,
+				       u32 batch_size)
+{
+	if (xskq_iter_end(it)) {
+		xskq_deq_iter_done(q, it);
+		xskq_deq_iter(q, it, batch_size);
+		return !xskq_iter_end(it);
+	}
+
+	xskq_deq_iter_next(q, it);
+	return !xskq_iter_end(it);
+}
+
+static inline void xskq_deq_return_frame(struct xskq_iter *it)
+{
+	it->tail--;
+}
+
+static inline void xskq_enq_flush(struct xsk_queue *q)
+{
+	/* Order flags and data */
+	smp_wmb();
+
+	WRITE_ONCE(q->ring->head_idx, q->iter_head_idx);
+	q->cached_head = q->iter_head_idx;
+}
+
 /* Functions operating on RXTX queues only */
 
 static inline u32 xskq_rxtx_get_ring_size(struct xsk_queue *q)
@@ -45,6 +226,23 @@ static inline u32 xskq_rxtx_get_ring_size(struct xsk_queue *q)
 		q->nentries * sizeof(struct xdp_desc));
 }
 
+static inline int xskq_rxtx_enq_frame(struct xsk_queue *q,
+				      u32 id, u32 len, u16 offset)
+{
+	struct xdp_rxtx_queue *ring = (struct xdp_rxtx_queue *)q->ring;
+	unsigned int idx;
+
+	if (xskq_nb_free(q, q->iter_head_idx, 1) == 0)
+		return -ENOSPC;
+
+	idx = (q->iter_head_idx++) & q->ring_mask;
+	ring->desc[idx].idx = id;
+	ring->desc[idx].len = len;
+	ring->desc[idx].offset = offset;
+
+	return 0;
+}
+
 /* Functions operating on UMEM queues only */
 
 static inline u32 xskq_umem_get_ring_size(struct xsk_queue *q)
@@ -52,6 +250,14 @@ static inline u32 xskq_umem_get_ring_size(struct xsk_queue *q)
 	return sizeof(struct xdp_umem_queue) + q->nentries * sizeof(u32);
 }
 
+static inline u32 xdp_umem_get_id(struct xsk_queue *q,
+				  struct xskq_iter *it)
+{
+	struct xdp_umem_queue *ring = (struct xdp_umem_queue *)q->ring;
+
+	return ring->desc[it->tail & q->ring_mask];
+}
+
 struct xsk_queue *xskq_create(u32 nentries, bool umem_queue);
 void xskq_destroy(struct xsk_queue *q_ops);
 
-- 
2.14.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ