[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20200423221515.GA4335@white>
Date: Fri, 24 Apr 2020 01:15:15 +0300
From: Lese Doru Calin <lesedorucalin01@...il.com>
To: David Miller <davem@...emloft.net>,
Paolo Abeni <pabeni@...hat.com>, netdev@...r.kernel.org
Cc: Alexey Kuznetsov <kuznet@....inr.ac.ru>,
Hideaki YOSHIFUJI <yoshfuji@...ux-ipv6.org>,
Eric Dumazet <eric.dumazet@...il.com>
Subject: [PATCH v5] net: Option to retrieve the pending data from send queue
of UDP socket
In this year's edition of GSoC, there is a project idea for CRIU to add support
for checkpoint/restore of cork-ed UDP sockets. But to add it, the kernel API needs
to be extended.
This is what this patch does. It adds a new command SIOUDPPENDGET to the ioctl
syscall regarding UDP sockets, which stores the pending data from the write queue
and the destination address in a struct msghdr. The arg for ioctl needs to be a
pointer to a user space struct msghdr. To retrive the data requires the
CAP_NET_ADMIN capability.
Signed-off-by: Lese Doru Calin <lesedorucalin01@...il.com>
---
It a different approach from https://lore.kernel.org/netdev/20200416132242.GA2586@white/,
that has given up on UDP "Repair mode" in the fast path for a ioctl syscall.
include/linux/socket.h | 2 +
include/uapi/linux/sockios.h | 3 +
net/ipv4/udp.c | 144 ++++++++++++++++++++++++++++++++---
net/socket.c | 4 +-
4 files changed, 139 insertions(+), 14 deletions(-)
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 54338fac45cb..632ba0ea6709 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -351,6 +351,8 @@ struct ucred {
#define IPX_TYPE 1
extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr);
+extern int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
+ void __user *uaddr, int __user *ulen);
extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
struct timespec64;
diff --git a/include/uapi/linux/sockios.h b/include/uapi/linux/sockios.h
index 7d1bccbbef78..3639fa906604 100644
--- a/include/uapi/linux/sockios.h
+++ b/include/uapi/linux/sockios.h
@@ -153,6 +153,9 @@
#define SIOCSHWTSTAMP 0x89b0 /* set and get config */
#define SIOCGHWTSTAMP 0x89b1 /* get config */
+/* UDP socket calls*/
+#define SIOUDPPENDGET 0x89C0 /* get the pending data from write queue */
+
/* Device private ioctl calls */
/*
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 32564b350823..3f53502a533e 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1620,6 +1620,133 @@ static int first_packet_length(struct sock *sk)
return res;
}
+static void udp_set_source_addr(struct sock *sk, struct msghdr *msg,
+ int *addr_len, u32 addr, u16 port)
+{
+ DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
+
+ if (sin) {
+ sin->sin_family = AF_INET;
+ sin->sin_port = port;
+ sin->sin_addr.s_addr = addr;
+ memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+ *addr_len = sizeof(*sin);
+
+ if (cgroup_bpf_enabled)
+ BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk,
+ (struct sockaddr *)sin);
+ }
+}
+
+static int udp_peek_sndq(struct sock *sk, struct msghdr *msg, int off, int len)
+{
+ int copy, copied = 0, err = 0;
+ struct sk_buff *skb;
+
+ skb_queue_walk(&sk->sk_write_queue, skb) {
+ copy = len - copied;
+ if (copy > skb->len - off)
+ copy = skb->len - off;
+
+ err = skb_copy_datagram_msg(skb, off, msg, copy);
+ if (err)
+ break;
+
+ copied += copy;
+ if (len <= copied)
+ break;
+ }
+ return err ?: copied;
+}
+
+static int udp_get_pending_write_queue(struct sock *sk, struct msghdr *msg,
+ int *addr_len)
+{
+ int err = 0, off = sizeof(struct udphdr);
+ struct inet_sock *inet = inet_sk(sk);
+ struct udp_sock *up = udp_sk(sk);
+ struct flowi4 *fl4;
+ struct flowi6 *fl6;
+
+ switch (up->pending) {
+ case 0:
+ return -ENODATA;
+ case AF_INET:
+ off += sizeof(struct iphdr);
+ fl4 = &inet->cork.fl.u.ip4;
+ udp_set_source_addr(sk, msg, addr_len,
+ fl4->daddr, fl4->fl4_dport);
+ break;
+ case AF_INET6:
+ off += sizeof(struct ipv6hdr);
+ if (msg->msg_name) {
+ DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6,
+ msg->msg_name);
+
+ fl6 = &inet->cork.fl.u.ip6;
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_port = fl6->fl6_dport;
+ sin6->sin6_flowinfo = 0;
+ sin6->sin6_addr = fl6->daddr;
+ sin6->sin6_scope_id = fl6->flowi6_oif;
+ *addr_len = sizeof(*sin6);
+
+ if (cgroup_bpf_enabled)
+ BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk,
+ (struct sockaddr *)sin6);
+ }
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ lock_sock(sk);
+ if (unlikely(!up->pending)) {
+ release_sock(sk);
+ return -EINVAL;
+ }
+ err = udp_peek_sndq(sk, msg, off, msg_data_left(msg));
+ release_sock(sk);
+ return err;
+}
+
+static int prep_msghdr_recv_pending(struct sock *sk, void __user *argp)
+{
+ struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
+ struct user_msghdr __user *msg;
+ struct sockaddr __user *uaddr;
+ struct sockaddr_storage addr;
+ struct msghdr msg_sys;
+ int __user *uaddr_len;
+ int err = 0, len = 0;
+
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+
+ if (!argp)
+ return -EINVAL;
+
+ msg = (struct user_msghdr __user *)argp;
+ err = recvmsg_copy_msghdr(&msg_sys, msg, 0, &uaddr, &iov);
+ if (err < 0)
+ return err;
+
+ uaddr_len = &msg->msg_namelen;
+ msg_sys.msg_name = &addr;
+ msg_sys.msg_flags = 0;
+
+ err = udp_get_pending_write_queue(sk, &msg_sys, &len);
+ msg_sys.msg_namelen = len;
+ len = err;
+
+ if (uaddr && err >= 0)
+ err = move_addr_to_user(&addr, msg_sys.msg_namelen,
+ uaddr, uaddr_len);
+
+ kfree(iov);
+ return err < 0 ? err : len;
+}
+
/*
* IOCTL requests applicable to the UDP protocol
*/
@@ -1641,6 +1768,9 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
return put_user(amount, (int __user *)arg);
}
+ case SIOUDPPENDGET:
+ return prep_msghdr_recv_pending(sk, (void __user *)arg);
+
default:
return -ENOIOCTLCMD;
}
@@ -1794,18 +1924,8 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
sock_recv_ts_and_drops(msg, sk, skb);
- /* Copy the address. */
- if (sin) {
- sin->sin_family = AF_INET;
- sin->sin_port = udp_hdr(skb)->source;
- sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
- memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
- *addr_len = sizeof(*sin);
-
- if (cgroup_bpf_enabled)
- BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk,
- (struct sockaddr *)sin);
- }
+ udp_set_source_addr(sk, msg, addr_len, ip_hdr(skb)->saddr,
+ udp_hdr(skb)->source);
if (udp_sk(sk)->gro_enabled)
udp_cmsg_recv(msg, sk, skb);
diff --git a/net/socket.c b/net/socket.c
index 2dd739fba866..bd25d528c9a0 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -217,8 +217,8 @@ int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *k
* specified. Zero is returned for a success.
*/
-static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
- void __user *uaddr, int __user *ulen)
+int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
+ void __user *uaddr, int __user *ulen)
{
int err;
int len;
--
2.17.1
Powered by blists - more mailing lists