lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20181027120445.21552-1-jianfeng.tan@linux.alibaba.com>
Date:   Sat, 27 Oct 2018 12:04:45 +0000
From:   Jianfeng Tan <jianfeng.tan@...ux.alibaba.com>
To:     netdev@...r.kernel.org
Cc:     davem@...emloft.net, jasowang@...hat.com, mst@...hat.com
Subject: [PATCH] net/packet: support vhost mrg_rxbuf

Previouly, virtio net header size is hardcoded to be 10, which makes
the feature mrg_rxbuf not available.

We redefine PACKET_VNET_HDR ioctl which treats user input as boolean,
but now as int, 0, 10, 12, or everything else be treated as 10.

There will be one case which is treated differently: if user input is
12, previously, the header size will be 10; but now it's 12.

Signed-off-by: Jianfeng Tan <jianfeng.tan@...ux.alibaba.com>
---
 net/packet/af_packet.c | 97 ++++++++++++++++++++++++++----------------
 net/packet/diag.c      |  2 +-
 net/packet/internal.h  |  2 +-
 3 files changed, 63 insertions(+), 38 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index ec3095f13aae..1bd7f4cdcc80 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1999,18 +1999,24 @@ static unsigned int run_filter(struct sk_buff *skb,
 }
 
 static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb,
-			   size_t *len)
+			   size_t *len, int vnet_hdr_len)
 {
+	int res;
 	struct virtio_net_hdr vnet_hdr;
 
-	if (*len < sizeof(vnet_hdr))
+	if (*len < vnet_hdr_len)
 		return -EINVAL;
-	*len -= sizeof(vnet_hdr);
+	*len -= vnet_hdr_len;
 
 	if (virtio_net_hdr_from_skb(skb, &vnet_hdr, vio_le(), true, 0))
 		return -EINVAL;
 
-	return memcpy_to_msg(msg, (void *)&vnet_hdr, sizeof(vnet_hdr));
+	res = memcpy_to_msg(msg, (void *)&vnet_hdr, sizeof(vnet_hdr));
+	if (res == 0)
+		iov_iter_advance(&msg->msg_iter,
+				 vnet_hdr_len - sizeof(vnet_hdr));
+
+	return res;
 }
 
 /*
@@ -2206,11 +2212,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 				  po->tp_reserve;
 	} else {
 		unsigned int maclen = skb_network_offset(skb);
+		int vnet_hdr_sz = READ_ONCE(po->vnet_hdr_sz);
+
 		netoff = TPACKET_ALIGN(po->tp_hdrlen +
 				       (maclen < 16 ? 16 : maclen)) +
 				       po->tp_reserve;
-		if (po->has_vnet_hdr) {
-			netoff += sizeof(struct virtio_net_hdr);
+		if (vnet_hdr_sz) {
+			netoff += vnet_hdr_sz;
 			do_vnet = true;
 		}
 		macoff = netoff - maclen;
@@ -2429,19 +2437,6 @@ static int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len)
 	return 0;
 }
 
-static int packet_snd_vnet_parse(struct msghdr *msg, size_t *len,
-				 struct virtio_net_hdr *vnet_hdr)
-{
-	if (*len < sizeof(*vnet_hdr))
-		return -EINVAL;
-	*len -= sizeof(*vnet_hdr);
-
-	if (!copy_from_iter_full(vnet_hdr, sizeof(*vnet_hdr), &msg->msg_iter))
-		return -EFAULT;
-
-	return __packet_snd_vnet_parse(vnet_hdr, *len);
-}
-
 static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 		void *frame, struct net_device *dev, void *data, int tp_len,
 		__be16 proto, unsigned char *addr, int hlen, int copylen,
@@ -2609,6 +2604,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 	int len_sum = 0;
 	int status = TP_STATUS_AVAILABLE;
 	int hlen, tlen, copylen = 0;
+	int vnet_hdr_sz;
 
 	mutex_lock(&po->pg_vec_lock);
 
@@ -2648,7 +2644,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 	size_max = po->tx_ring.frame_size
 		- (po->tp_hdrlen - sizeof(struct sockaddr_ll));
 
-	if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !po->has_vnet_hdr)
+	vnet_hdr_sz = READ_ONCE(po->vnet_hdr_sz);
+	if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !vnet_hdr_sz)
 		size_max = dev->mtu + reserve + VLAN_HLEN;
 
 	do {
@@ -2668,10 +2665,10 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 		status = TP_STATUS_SEND_REQUEST;
 		hlen = LL_RESERVED_SPACE(dev);
 		tlen = dev->needed_tailroom;
-		if (po->has_vnet_hdr) {
+		if (vnet_hdr_sz) {
 			vnet_hdr = data;
-			data += sizeof(*vnet_hdr);
-			tp_len -= sizeof(*vnet_hdr);
+			data += vnet_hdr_sz;
+			tp_len -= vnet_hdr_sz;
 			if (tp_len < 0 ||
 			    __packet_snd_vnet_parse(vnet_hdr, tp_len)) {
 				tp_len = -EINVAL;
@@ -2696,7 +2693,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 					  addr, hlen, copylen, &sockc);
 		if (likely(tp_len >= 0) &&
 		    tp_len > dev->mtu + reserve &&
-		    !po->has_vnet_hdr &&
+		    !vnet_hdr_sz &&
 		    !packet_extra_vlan_len_allowed(dev, skb))
 			tp_len = -EMSGSIZE;
 
@@ -2715,7 +2712,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 			}
 		}
 
-		if (po->has_vnet_hdr) {
+		if (vnet_hdr_sz) {
 			if (virtio_net_hdr_to_skb(skb, vnet_hdr, vio_le())) {
 				tp_len = -EINVAL;
 				goto tpacket_error;
@@ -2802,9 +2799,9 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 	int err, reserve = 0;
 	struct sockcm_cookie sockc;
 	struct virtio_net_hdr vnet_hdr = { 0 };
+	int vnet_hdr_sz;
 	int offset = 0;
 	struct packet_sock *po = pkt_sk(sk);
-	bool has_vnet_hdr = false;
 	int hlen, tlen, linear;
 	int extra_len = 0;
 
@@ -2844,11 +2841,29 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 
 	if (sock->type == SOCK_RAW)
 		reserve = dev->hard_header_len;
-	if (po->has_vnet_hdr) {
-		err = packet_snd_vnet_parse(msg, &len, &vnet_hdr);
-		if (err)
+
+	vnet_hdr_sz = READ_ONCE(po->vnet_hdr_sz);
+	if (vnet_hdr_sz) {
+		if (len < vnet_hdr_sz) {
+			err = -EINVAL;
 			goto out_unlock;
-		has_vnet_hdr = true;
+		}
+		len -= vnet_hdr_sz;
+
+		if (!copy_from_iter_full(&vnet_hdr, sizeof(vnet_hdr),
+					 &msg->msg_iter)) {
+			err = -EFAULT;
+			goto out_unlock;
+		}
+
+		if (__packet_snd_vnet_parse(&vnet_hdr, len)) {
+			err = -EINVAL;
+			goto out_unlock;
+		}
+
+		/* TODO: check hdr_len with len? */
+
+		iov_iter_advance(&msg->msg_iter, vnet_hdr_sz - sizeof(vnet_hdr));
 	}
 
 	if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
@@ -2912,7 +2927,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 	skb->mark = sockc.mark;
 	skb->tstamp = sockc.transmit_time;
 
-	if (has_vnet_hdr) {
+	if (vnet_hdr_sz) {
 		err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le());
 		if (err)
 			goto out_free;
@@ -3307,11 +3322,11 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 	if (pkt_sk(sk)->pressure)
 		packet_rcv_has_room(pkt_sk(sk), NULL);
 
-	if (pkt_sk(sk)->has_vnet_hdr) {
-		err = packet_rcv_vnet(msg, skb, &len);
+	vnet_hdr_len = READ_ONCE(pkt_sk(sk)->vnet_hdr_sz);
+	if (vnet_hdr_len) {
+		err = packet_rcv_vnet(msg, skb, &len, vnet_hdr_len);
 		if (err)
 			goto out_free;
-		vnet_hdr_len = sizeof(struct virtio_net_hdr);
 	}
 
 	/* You lose any data beyond the buffer you gave. If it worries
@@ -3772,7 +3787,17 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 		if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
 			ret = -EBUSY;
 		} else {
-			po->has_vnet_hdr = !!val;
+			/* Previouly we treat user input as boolean (!!val),
+			 * now we treat it as int. After the below correction, 
+			 * the only violation case is 12, which results in
+			 * vnet header size of 12 instead of 10. 
+			 */
+			if (val &&
+			    val != sizeof(struct virtio_net_hdr) &&
+			    val != sizeof(struct virtio_net_hdr_mrg_rxbuf))
+				val = sizeof(struct virtio_net_hdr);
+
+			po->vnet_hdr_sz = val;
 			ret = 0;
 		}
 		release_sock(sk);
@@ -3903,7 +3928,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 		val = po->origdev;
 		break;
 	case PACKET_VNET_HDR:
-		val = po->has_vnet_hdr;
+		val = po->vnet_hdr_sz;
 		break;
 	case PACKET_VERSION:
 		val = po->tp_version;
diff --git a/net/packet/diag.c b/net/packet/diag.c
index 7ef1c881ae74..950015b6704f 100644
--- a/net/packet/diag.c
+++ b/net/packet/diag.c
@@ -26,7 +26,7 @@ static int pdiag_put_info(const struct packet_sock *po, struct sk_buff *nlskb)
 		pinfo.pdi_flags |= PDI_AUXDATA;
 	if (po->origdev)
 		pinfo.pdi_flags |= PDI_ORIGDEV;
-	if (po->has_vnet_hdr)
+	if (po->vnet_hdr_sz)
 		pinfo.pdi_flags |= PDI_VNETHDR;
 	if (po->tp_loss)
 		pinfo.pdi_flags |= PDI_LOSS;
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 3bb7c5fb3bff..11bc75950f28 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -115,9 +115,9 @@ struct packet_sock {
 	unsigned int		running;	/* bind_lock must be held */
 	unsigned int		auxdata:1,	/* writer must hold sock lock */
 				origdev:1,
-				has_vnet_hdr:1,
 				tp_loss:1,
 				tp_tx_has_off:1;
+	int			vnet_hdr_sz;
 	int			pressure;
 	int			ifindex;	/* bound device		*/
 	__be16			num;
-- 
2.17.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ