>From ccafdce1eefb3d59793931e746f1f07722fcfbbe Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Sep 2009 18:48:32 -0300 Subject: [PATCH 2/2] net: Allow protocols to provide an unlocked_recvmsg socket method MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit So thar recvmmsg can use it. With this patch recvmmsg actually _requires_ that socket->ops->unlocked_recvmsg exists, and that socket->sk->sk_prot->unlocked_recvmsg is non NULL. We may well switch back to the previous scheme where sys_recvmmsg checks if the underlying protocol provides an unlocked version and uses it, falling back to the locked version if there is none. But first lets see if this works with recvmmsg alone and what kinds of gains we get with the unlocked_recvmmsg implementation in UDP. Followup patches can restore that behaviour if we want to use it with, say, DCCP and SCTP without an specific unlocked version. This should address the concerns raised by Rémi about the MSG_UNLOCKED problem. Cc: Caitlin Bestler Cc: Chris Van Hoof Cc: Clark Williams Cc: Neil Horman Cc: Nir Tzachar Cc: Nivedita Singhvi Cc: Paul Moore Cc: Rémi Denis-Courmont Cc: Steven Whitehouse Signed-off-by: Arnaldo Carvalho de Melo --- drivers/isdn/mISDN/socket.c | 2 + drivers/net/pppoe.c | 1 + drivers/net/pppol2tp.c | 1 + include/linux/net.h | 7 +++ include/net/sock.h | 13 +++++ net/appletalk/ddp.c | 1 + net/atm/pvc.c | 1 + net/atm/svc.c | 1 + net/ax25/af_ax25.c | 1 + net/bluetooth/bnep/sock.c | 1 + net/bluetooth/cmtp/sock.c | 1 + net/bluetooth/hci_sock.c | 1 + net/bluetooth/hidp/sock.c | 1 + net/bluetooth/l2cap.c | 1 + net/bluetooth/rfcomm/sock.c | 1 + net/bluetooth/sco.c | 1 + net/can/bcm.c | 1 + net/can/raw.c | 1 + net/core/sock.c | 26 +++++++++ net/dccp/ipv4.c | 1 + net/dccp/ipv6.c | 1 + net/decnet/af_decnet.c | 1 + net/econet/af_econet.c | 1 + net/ieee802154/af_ieee802154.c | 2 + net/ipv4/af_inet.c | 3 + net/ipv4/udp.c | 52 +++++++++++++++--- net/ipv6/af_inet6.c | 2 + net/ipv6/raw.c | 1 + net/ipx/af_ipx.c | 1 + net/irda/af_irda.c | 4 ++ net/iucv/af_iucv.c | 1 + net/key/af_key.c | 1 + net/llc/af_llc.c | 1 + net/netlink/af_netlink.c | 1 + net/netrom/af_netrom.c | 1 + net/packet/af_packet.c | 2 + net/phonet/socket.c | 2 + net/rds/af_rds.c | 1 + net/rose/af_rose.c | 1 + net/rxrpc/af_rxrpc.c | 1 + net/sctp/ipv6.c | 1 + net/sctp/protocol.c | 1 + net/socket.c | 112 +++++++++++++++++++++++++++++++++++---- net/tipc/socket.c | 3 + net/unix/af_unix.c | 3 + net/x25/af_x25.c | 1 + 46 files changed, 244 insertions(+), 21 deletions(-) diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c index c36f521..6da3a71 100644 --- a/drivers/isdn/mISDN/socket.c +++ b/drivers/isdn/mISDN/socket.c @@ -590,6 +590,7 @@ static const struct proto_ops data_sock_ops = { .getname = data_sock_getname, .sendmsg = mISDN_sock_sendmsg, .recvmsg = mISDN_sock_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .poll = datagram_poll, .listen = sock_no_listen, .shutdown = sock_no_shutdown, @@ -743,6 +744,7 @@ static const struct proto_ops base_sock_ops = { .getname = sock_no_getname, .sendmsg = sock_no_sendmsg, .recvmsg = sock_no_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .poll = sock_no_poll, .listen = sock_no_listen, .shutdown = sock_no_shutdown, diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c index 5f20902..bf30741 100644 --- a/drivers/net/pppoe.c +++ b/drivers/net/pppoe.c @@ -1121,6 +1121,7 @@ static const struct proto_ops pppoe_ops = { .getsockopt = sock_no_getsockopt, .sendmsg = pppoe_sendmsg, .recvmsg = pppoe_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = sock_no_mmap, .ioctl = pppox_ioctl, }; diff --git a/drivers/net/pppol2tp.c b/drivers/net/pppol2tp.c index e0f9219..af6160c 100644 --- a/drivers/net/pppol2tp.c +++ b/drivers/net/pppol2tp.c @@ -2590,6 +2590,7 @@ static struct proto_ops pppol2tp_ops = { .getsockopt = pppol2tp_getsockopt, .sendmsg = pppol2tp_sendmsg, .recvmsg = pppol2tp_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = sock_no_mmap, .ioctl = pppox_ioctl, }; diff --git a/include/linux/net.h b/include/linux/net.h index d67587a..8b852de 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -186,6 +186,10 @@ struct proto_ops { int (*recvmsg) (struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t total_len, int flags); + int (*unlocked_recvmsg)(struct kiocb *iocb, + struct socket *sock, + struct msghdr *m, + size_t total_len, int flags); int (*mmap) (struct file *file, struct socket *sock, struct vm_area_struct * vma); ssize_t (*sendpage) (struct socket *sock, struct page *page, @@ -316,6 +320,8 @@ SOCKCALL_WRAP(name, sendmsg, (struct kiocb *iocb, struct socket *sock, struct ms (iocb, sock, m, len)) \ SOCKCALL_WRAP(name, recvmsg, (struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t len, int flags), \ (iocb, sock, m, len, flags)) \ +SOCKCALL_WRAP(name, unlocked_recvmsg, (struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t len, int flags), \ + (iocb, sock, m, len, flags)) \ SOCKCALL_WRAP(name, mmap, (struct file *file, struct socket *sock, struct vm_area_struct *vma), \ (file, sock, vma)) \ \ @@ -337,6 +343,7 @@ static const struct proto_ops name##_ops = { \ .getsockopt = __lock_##name##_getsockopt, \ .sendmsg = __lock_##name##_sendmsg, \ .recvmsg = __lock_##name##_recvmsg, \ + .unlocked_recvmsg = __lock_##name##_unlocked_recvmsg, \ .mmap = __lock_##name##_mmap, \ }; diff --git a/include/net/sock.h b/include/net/sock.h index 950409d..7c62428 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -644,6 +644,11 @@ struct proto { struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len); + int (*unlocked_recvmsg)(struct kiocb *iocb, + struct sock *sk, + struct msghdr *msg, + size_t len, int noblock, + int flags, int *addr_len); int (*sendpage)(struct sock *sk, struct page *page, int offset, size_t size, int flags); int (*bind)(struct sock *sk, @@ -998,6 +1003,11 @@ extern int sock_no_sendmsg(struct kiocb *, struct socket *, struct msghdr *, size_t); extern int sock_no_recvmsg(struct kiocb *, struct socket *, struct msghdr *, size_t, int); +extern int sock_no_unlocked_recvmsg(struct kiocb *iocb, + struct socket *sock, + struct msghdr *msg, + size_t size, + int flags); extern int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma); @@ -1014,6 +1024,9 @@ extern int sock_common_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen); extern int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size, int flags); +extern int sock_common_unlocked_recvmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *msg, size_t size, + int flags); extern int sock_common_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen); extern int compat_sock_common_getsockopt(struct socket *sock, int level, diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 875eda5..100c5d7 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1842,6 +1842,7 @@ static const struct proto_ops SOCKOPS_WRAPPED(atalk_dgram_ops) = { .getsockopt = sock_no_getsockopt, .sendmsg = atalk_sendmsg, .recvmsg = atalk_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, }; diff --git a/net/atm/pvc.c b/net/atm/pvc.c index e1d22d9..5c03749 100644 --- a/net/atm/pvc.c +++ b/net/atm/pvc.c @@ -122,6 +122,7 @@ static const struct proto_ops pvc_proto_ops = { .getsockopt = pvc_getsockopt, .sendmsg = vcc_sendmsg, .recvmsg = vcc_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, }; diff --git a/net/atm/svc.c b/net/atm/svc.c index 7b831b5..6c66ae9 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -644,6 +644,7 @@ static const struct proto_ops svc_proto_ops = { .setsockopt = svc_setsockopt, .getsockopt = svc_getsockopt, .sendmsg = vcc_sendmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .recvmsg = vcc_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index da0f64f..43f4f57 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1976,6 +1976,7 @@ static const struct proto_ops ax25_proto_ops = { .getsockopt = ax25_getsockopt, .sendmsg = ax25_sendmsg, .recvmsg = ax25_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, }; diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c index e857628..0b26b3c 100644 --- a/net/bluetooth/bnep/sock.c +++ b/net/bluetooth/bnep/sock.c @@ -178,6 +178,7 @@ static const struct proto_ops bnep_sock_ops = { .getname = sock_no_getname, .sendmsg = sock_no_sendmsg, .recvmsg = sock_no_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .poll = sock_no_poll, .listen = sock_no_listen, .shutdown = sock_no_shutdown, diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c index 16b0fad..72a4b5d 100644 --- a/net/bluetooth/cmtp/sock.c +++ b/net/bluetooth/cmtp/sock.c @@ -173,6 +173,7 @@ static const struct proto_ops cmtp_sock_ops = { .getname = sock_no_getname, .sendmsg = sock_no_sendmsg, .recvmsg = sock_no_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .poll = sock_no_poll, .listen = sock_no_listen, .shutdown = sock_no_shutdown, diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 4f9621f..bd0aace 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -603,6 +603,7 @@ static const struct proto_ops hci_sock_ops = { .getname = hci_sock_getname, .sendmsg = hci_sock_sendmsg, .recvmsg = hci_sock_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .ioctl = hci_sock_ioctl, .poll = datagram_poll, .listen = sock_no_listen, diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c index 37c9d7d..90b40e2 100644 --- a/net/bluetooth/hidp/sock.c +++ b/net/bluetooth/hidp/sock.c @@ -224,6 +224,7 @@ static const struct proto_ops hidp_sock_ops = { .getname = sock_no_getname, .sendmsg = sock_no_sendmsg, .recvmsg = sock_no_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .poll = sock_no_poll, .listen = sock_no_listen, .shutdown = sock_no_shutdown, diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index bd0a4c1..945df03 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -2743,6 +2743,7 @@ static const struct proto_ops l2cap_sock_ops = { .getname = l2cap_sock_getname, .sendmsg = l2cap_sock_sendmsg, .recvmsg = l2cap_sock_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .poll = bt_sock_poll, .ioctl = bt_sock_ioctl, .mmap = sock_no_mmap, diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 0b85e81..00b1a41 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -1092,6 +1092,7 @@ static const struct proto_ops rfcomm_sock_ops = { .getname = rfcomm_sock_getname, .sendmsg = rfcomm_sock_sendmsg, .recvmsg = rfcomm_sock_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .shutdown = rfcomm_sock_shutdown, .setsockopt = rfcomm_sock_setsockopt, .getsockopt = rfcomm_sock_getsockopt, diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 51ae0c3..5ef7b5c 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -965,6 +965,7 @@ static const struct proto_ops sco_sock_ops = { .getname = sco_sock_getname, .sendmsg = sco_sock_sendmsg, .recvmsg = bt_sock_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .poll = bt_sock_poll, .ioctl = bt_sock_ioctl, .mmap = sock_no_mmap, diff --git a/net/can/bcm.c b/net/can/bcm.c index 72720c7..6e388b3 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1575,6 +1575,7 @@ static struct proto_ops bcm_ops __read_mostly = { .getsockopt = sock_no_getsockopt, .sendmsg = bcm_sendmsg, .recvmsg = bcm_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, }; diff --git a/net/can/raw.c b/net/can/raw.c index db3152d..b8fa610 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -730,6 +730,7 @@ static struct proto_ops raw_ops __read_mostly = { .getsockopt = raw_getsockopt, .sendmsg = raw_sendmsg, .recvmsg = raw_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, }; diff --git a/net/core/sock.c b/net/core/sock.c index 7633422..76a6279 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1643,6 +1643,13 @@ int sock_no_connect(struct socket *sock, struct sockaddr *saddr, } EXPORT_SYMBOL(sock_no_connect); +int sock_no_unlocked_recvmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *msg, size_t size, int flags) +{ + return -EOPNOTSUPP; +} +EXPORT_SYMBOL(sock_no_unlocked_recvmsg); + int sock_no_socketpair(struct socket *sock1, struct socket *sock2) { return -EOPNOTSUPP; @@ -2004,6 +2011,25 @@ int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock, } EXPORT_SYMBOL(sock_common_recvmsg); +int sock_common_unlocked_recvmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *msg, size_t size, int flags) +{ + struct sock *sk = sock->sk; + int addr_len = 0; + int err; + + if (sk->sk_prot->unlocked_recvmsg == NULL) + return -EOPNOTSUPP; + + err = sk->sk_prot->unlocked_recvmsg(iocb, sk, msg, size, + flags & MSG_DONTWAIT, + flags & ~MSG_DONTWAIT, &addr_len); + if (err >= 0) + msg->msg_namelen = addr_len; + return err; +} +EXPORT_SYMBOL(sock_common_unlocked_recvmsg); + /* * Set socket options on an inet socket. */ diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index a0a36c9..263c9b8 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -974,6 +974,7 @@ static const struct proto_ops inet_dccp_ops = { .getsockopt = sock_common_getsockopt, .sendmsg = inet_sendmsg, .recvmsg = sock_common_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, #ifdef CONFIG_COMPAT diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 3e70faa..ae1f650 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -1175,6 +1175,7 @@ static struct proto_ops inet6_dccp_ops = { .getsockopt = sock_common_getsockopt, .sendmsg = inet_sendmsg, .recvmsg = sock_common_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, #ifdef CONFIG_COMPAT diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 77d4028..aa1af0b 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -2348,6 +2348,7 @@ static const struct proto_ops dn_proto_ops = { .getsockopt = dn_getsockopt, .sendmsg = dn_sendmsg, .recvmsg = dn_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, }; diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index f0bbc57..857ff5b 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -765,6 +765,7 @@ static const struct proto_ops econet_ops = { .getsockopt = sock_no_getsockopt, .sendmsg = econet_sendmsg, .recvmsg = econet_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, }; diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c index af66180..1602409 100644 --- a/net/ieee802154/af_ieee802154.c +++ b/net/ieee802154/af_ieee802154.c @@ -197,6 +197,7 @@ static const struct proto_ops ieee802154_raw_ops = { .getsockopt = sock_common_getsockopt, .sendmsg = ieee802154_sock_sendmsg, .recvmsg = sock_common_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, #ifdef CONFIG_COMPAT @@ -222,6 +223,7 @@ static const struct proto_ops ieee802154_dgram_ops = { .getsockopt = sock_common_getsockopt, .sendmsg = ieee802154_sock_sendmsg, .recvmsg = sock_common_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, #ifdef CONFIG_COMPAT diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 566ea6c..e8a44d4 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -854,6 +854,7 @@ const struct proto_ops inet_stream_ops = { .getsockopt = sock_common_getsockopt, .sendmsg = tcp_sendmsg, .recvmsg = sock_common_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = sock_no_mmap, .sendpage = tcp_sendpage, .splice_read = tcp_splice_read, @@ -880,6 +881,7 @@ const struct proto_ops inet_dgram_ops = { .getsockopt = sock_common_getsockopt, .sendmsg = inet_sendmsg, .recvmsg = sock_common_recvmsg, + .unlocked_recvmsg = sock_common_unlocked_recvmsg, .mmap = sock_no_mmap, .sendpage = inet_sendpage, #ifdef CONFIG_COMPAT @@ -909,6 +911,7 @@ static const struct proto_ops inet_sockraw_ops = { .getsockopt = sock_common_getsockopt, .sendmsg = inet_sendmsg, .recvmsg = sock_common_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = sock_no_mmap, .sendpage = inet_sendpage, #ifdef CONFIG_COMPAT diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 80e3812..4033ae5 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -872,13 +872,34 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) return 0; } +static void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb) +{ + lock_sock(sk); + skb_free_datagram(sk, skb); + release_sock(sk); +} + +static int skb_kill_datagram_locked(struct sock *sk, struct sk_buff *skb, + unsigned int flags) +{ + int ret; + lock_sock(sk); + ret = skb_kill_datagram(sk, skb, flags); + release_sock(sk); + return ret; +} + /* * This should be easy, if there is something there we * return it, otherwise we block. */ - -int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len) +static int __udp_recvmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t len, int noblock, + int flags, int *addr_len, + void (*free_datagram)(struct sock *, + struct sk_buff *), + int (*kill_datagram)(struct sock *, + struct sk_buff *, unsigned int)) { struct inet_sock *inet = inet_sk(sk); struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; @@ -956,23 +977,35 @@ try_again: err = ulen; out_free: - lock_sock(sk); - skb_free_datagram(sk, skb); - release_sock(sk); + free_datagram(sk, skb); out: return err; csum_copy_err: - lock_sock(sk); - if (!skb_kill_datagram(sk, skb, flags)) + if (!kill_datagram(sk, skb, flags)) UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); - release_sock(sk); if (noblock) return -EAGAIN; goto try_again; } +int udp_recvmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t len, int noblock, + int flags, int *addr_len) +{ + return __udp_recvmsg(iocb, sk, msg, len, noblock, flags, addr_len, + skb_free_datagram_locked, + skb_kill_datagram_locked); +} + +int udp_unlocked_recvmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t len, int noblock, + int flags, int *addr_len) +{ + return __udp_recvmsg(iocb, sk, msg, len, noblock, flags, addr_len, + skb_free_datagram, skb_kill_datagram); +} int udp_disconnect(struct sock *sk, int flags) { @@ -1565,6 +1598,7 @@ struct proto udp_prot = { .getsockopt = udp_getsockopt, .sendmsg = udp_sendmsg, .recvmsg = udp_recvmsg, + .unlocked_recvmsg = udp_unlocked_recvmsg, .sendpage = udp_sendpage, .backlog_rcv = __udp_queue_rcv_skb, .hash = udp_lib_hash, diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 45f9a2a..7d0cc2f 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -518,6 +518,7 @@ const struct proto_ops inet6_stream_ops = { .getsockopt = sock_common_getsockopt, /* ok */ .sendmsg = tcp_sendmsg, /* ok */ .recvmsg = sock_common_recvmsg, /* ok */ + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = sock_no_mmap, .sendpage = tcp_sendpage, .splice_read = tcp_splice_read, @@ -544,6 +545,7 @@ const struct proto_ops inet6_dgram_ops = { .getsockopt = sock_common_getsockopt, /* ok */ .sendmsg = inet_sendmsg, /* ok */ .recvmsg = sock_common_recvmsg, /* ok */ + .unlocked_recvmsg = sock_common_unlocked_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, #ifdef CONFIG_COMPAT diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index d6c3c1c..c05ec59 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1326,6 +1326,7 @@ static const struct proto_ops inet6_sockraw_ops = { .getsockopt = sock_common_getsockopt, /* ok */ .sendmsg = inet_sendmsg, /* ok */ .recvmsg = sock_common_recvmsg, /* ok */ + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, #ifdef CONFIG_COMPAT diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index f1118d9..45048a0 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1953,6 +1953,7 @@ static const struct proto_ops SOCKOPS_WRAPPED(ipx_dgram_ops) = { .getsockopt = ipx_getsockopt, .sendmsg = ipx_sendmsg, .recvmsg = ipx_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, }; diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 50b43c5..7e97581 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -2489,6 +2489,7 @@ static const struct proto_ops SOCKOPS_WRAPPED(irda_stream_ops) = { .getsockopt = irda_getsockopt, .sendmsg = irda_sendmsg, .recvmsg = irda_recvmsg_stream, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, }; @@ -2513,6 +2514,7 @@ static const struct proto_ops SOCKOPS_WRAPPED(irda_seqpacket_ops) = { .getsockopt = irda_getsockopt, .sendmsg = irda_sendmsg, .recvmsg = irda_recvmsg_dgram, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, }; @@ -2537,6 +2539,7 @@ static const struct proto_ops SOCKOPS_WRAPPED(irda_dgram_ops) = { .getsockopt = irda_getsockopt, .sendmsg = irda_sendmsg_dgram, .recvmsg = irda_recvmsg_dgram, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, }; @@ -2562,6 +2565,7 @@ static const struct proto_ops SOCKOPS_WRAPPED(irda_ultra_ops) = { .getsockopt = irda_getsockopt, .sendmsg = irda_sendmsg_ultra, .recvmsg = irda_recvmsg_dgram, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, }; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 49c15b4..c208622 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1693,6 +1693,7 @@ static struct proto_ops iucv_sock_ops = { .getname = iucv_sock_getname, .sendmsg = iucv_sock_sendmsg, .recvmsg = iucv_sock_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .poll = iucv_sock_poll, .ioctl = sock_no_ioctl, .mmap = sock_no_mmap, diff --git a/net/key/af_key.c b/net/key/af_key.c index dba9abd..f1af697 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3636,6 +3636,7 @@ static const struct proto_ops pfkey_ops = { .getsockopt = sock_no_getsockopt, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, /* Now the operations that really occur. */ .release = pfkey_release, diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index c45eee1..d948caf 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -1115,6 +1115,7 @@ static const struct proto_ops llc_ui_ops = { .getsockopt = llc_ui_getsockopt, .sendmsg = llc_ui_sendmsg, .recvmsg = llc_ui_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, }; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 2936fa3..e7a51bb 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1978,6 +1978,7 @@ static const struct proto_ops netlink_ops = { .getsockopt = netlink_getsockopt, .sendmsg = netlink_sendmsg, .recvmsg = netlink_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, }; diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index ce1a34b..3550d34 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1395,6 +1395,7 @@ static const struct proto_ops nr_proto_ops = { .getsockopt = nr_getsockopt, .sendmsg = nr_sendmsg, .recvmsg = nr_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, }; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ebe5718..dc5d7ff 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2340,6 +2340,7 @@ static const struct proto_ops packet_ops_spkt = { .getsockopt = sock_no_getsockopt, .sendmsg = packet_sendmsg_spkt, .recvmsg = packet_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, }; @@ -2361,6 +2362,7 @@ static const struct proto_ops packet_ops = { .getsockopt = packet_getsockopt, .sendmsg = packet_sendmsg, .recvmsg = packet_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = packet_mmap, .sendpage = sock_no_sendpage, }; diff --git a/net/phonet/socket.c b/net/phonet/socket.c index ada2a35..2bd24a5 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -327,6 +327,7 @@ const struct proto_ops phonet_dgram_ops = { #endif .sendmsg = pn_socket_sendmsg, .recvmsg = sock_common_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, }; @@ -352,6 +353,7 @@ const struct proto_ops phonet_stream_ops = { #endif .sendmsg = pn_socket_sendmsg, .recvmsg = sock_common_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, }; diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index b11e7e5..3e8c846 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -377,6 +377,7 @@ static struct proto_ops rds_proto_ops = { .getsockopt = rds_getsockopt, .sendmsg = rds_sendmsg, .recvmsg = rds_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, }; diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index e5f478c..a64c623 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1532,6 +1532,7 @@ static struct proto_ops rose_proto_ops = { .getsockopt = rose_getsockopt, .sendmsg = rose_sendmsg, .recvmsg = rose_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, }; diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index bfe493e..bf4c38a 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -766,6 +766,7 @@ static const struct proto_ops rxrpc_rpc_ops = { .getsockopt = sock_no_getsockopt, .sendmsg = rxrpc_sendmsg, .recvmsg = rxrpc_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, }; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 6a4b190..b68d9f9 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -918,6 +918,7 @@ static const struct proto_ops inet6_seqpacket_ops = { .getsockopt = sock_common_getsockopt, .sendmsg = inet_sendmsg, .recvmsg = sock_common_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = sock_no_mmap, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_sock_common_setsockopt, diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index a76da65..78f52a3 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -897,6 +897,7 @@ static const struct proto_ops inet_seqpacket_ops = { .getsockopt = sock_common_getsockopt, .sendmsg = inet_sendmsg, .recvmsg = sock_common_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, #ifdef CONFIG_COMPAT diff --git a/net/socket.c b/net/socket.c index 32db56a..dc5b976 100644 --- a/net/socket.c +++ b/net/socket.c @@ -690,6 +690,32 @@ static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags); } +static inline int __sock_unlocked_recvmsg_nosec(struct kiocb *iocb, + struct socket *sock, + struct msghdr *msg, + size_t size, int flags) +{ + struct sock_iocb *si = kiocb_to_siocb(iocb); + + si->sock = sock; + si->scm = NULL; + si->msg = msg; + si->size = size; + si->flags = flags; + + return sock->ops->unlocked_recvmsg(iocb, sock, msg, size, flags); +} + +static inline int __sock_unlocked_recvmsg(struct kiocb *iocb, + struct socket *sock, + struct msghdr *msg, size_t size, + int flags) +{ + int err = security_socket_recvmsg(sock, msg, size, flags); + + return err ?: __sock_unlocked_recvmsg_nosec(iocb, sock, msg, size, flags); +} + int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { @@ -720,6 +746,58 @@ static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, return ret; } +static int sock_unlocked_recvmsg(struct socket *sock, struct msghdr *msg, + size_t size, int flags) +{ + struct kiocb iocb; + struct sock_iocb siocb; + int ret; + + init_sync_kiocb(&iocb, NULL); + iocb.private = &siocb; + ret = __sock_unlocked_recvmsg(&iocb, sock, msg, size, flags); + if (-EIOCBQUEUED == ret) + ret = wait_on_sync_kiocb(&iocb); + return ret; +} + +static int sock_unlocked_recvmsg_nosec(struct socket *sock, struct msghdr *msg, + size_t size, int flags) +{ + struct kiocb iocb; + struct sock_iocb siocb; + int ret; + + init_sync_kiocb(&iocb, NULL); + iocb.private = &siocb; + ret = __sock_unlocked_recvmsg_nosec(&iocb, sock, msg, size, flags); + if (-EIOCBQUEUED == ret) + ret = wait_on_sync_kiocb(&iocb); + return ret; +} + +enum sock_recvmsg_security { + SOCK_RECVMSG_SEC = 0, + SOCK_RECVMSG_NOSEC, +}; + +enum sock_recvmsg_locking { + SOCK_LOCKED_RECVMSG = 0, + SOCK_UNLOCKED_RECVMSG, +}; + +static int (*sock_recvmsg_table[2][2])(struct socket *sock, struct msghdr *msg, + size_t size, int flags) = { + [SOCK_RECVMSG_SEC] = { + [SOCK_LOCKED_RECVMSG] = sock_recvmsg, /* The old one */ + [SOCK_UNLOCKED_RECVMSG] = sock_unlocked_recvmsg, + }, + [SOCK_RECVMSG_NOSEC] = { + [SOCK_LOCKED_RECVMSG] = sock_recvmsg_nosec, + [SOCK_UNLOCKED_RECVMSG] = sock_unlocked_recvmsg_nosec, + }, +}; + int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t size, int flags) { @@ -1984,7 +2062,9 @@ out: } static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg, - struct msghdr *msg_sys, unsigned flags, int nosec) + struct msghdr *msg_sys, unsigned flags, + enum sock_recvmsg_security security, + enum sock_recvmsg_locking locking) { struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg; @@ -2044,8 +2124,8 @@ static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg, if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; - err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, - total_len, flags); + err = sock_recvmsg_table[security][locking](sock, msg_sys, + total_len, flags); if (err < 0) goto out_freeiov; len = err; @@ -2092,7 +2172,8 @@ SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg, if (!sock) goto out; - err = __sys_recvmsg(sock, msg, &msg_sys, flags, 0); + err = __sys_recvmsg(sock, msg, &msg_sys, flags, + SOCK_RECVMSG_SEC, SOCK_LOCKED_RECVMSG); fput_light(sock->file, fput_needed); out: @@ -2111,6 +2192,7 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, struct mmsghdr __user *entry; struct msghdr msg_sys; struct timespec end_time; + enum sock_recvmsg_security security; if (timeout && poll_select_set_timeout(&end_time, timeout->tv_sec, @@ -2123,20 +2205,25 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, if (!sock) return err; + lock_sock(sock->sk); + err = sock_error(sock->sk); if (err) goto out_put; entry = mmsg; + security = SOCK_RECVMSG_SEC; while (datagrams < vlen) { - /* - * No need to ask LSM for more than the first datagram. - */ err = __sys_recvmsg(sock, (struct msghdr __user *)entry, - &msg_sys, flags, datagrams); + &msg_sys, flags, security, + SOCK_UNLOCKED_RECVMSG); if (err < 0) break; + /* + * No need to ask LSM for more than the first datagram. + */ + security = SOCK_RECVMSG_NOSEC; err = put_user(err, &entry->msg_len); if (err) break; @@ -2165,9 +2252,8 @@ out_put: fput_light(sock->file, fput_needed); if (err == 0) - return datagrams; - - if (datagrams != 0) { + err = datagrams; + else if (datagrams != 0) { /* * We may return less entries than requested (vlen) if the * sock is non block and there aren't enough datagrams... @@ -2182,9 +2268,11 @@ out_put: sock->sk->sk_err = -err; } - return datagrams; + err = datagrams; } + release_sock(sock->sk); + return err; } diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 1848693..141539b 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1791,6 +1791,7 @@ static const struct proto_ops msg_ops = { .getsockopt = getsockopt, .sendmsg = send_msg, .recvmsg = recv_msg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage }; @@ -1812,6 +1813,7 @@ static const struct proto_ops packet_ops = { .getsockopt = getsockopt, .sendmsg = send_packet, .recvmsg = recv_msg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage }; @@ -1833,6 +1835,7 @@ static const struct proto_ops stream_ops = { .getsockopt = getsockopt, .sendmsg = send_stream, .recvmsg = recv_stream, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage }; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index fc3ebb9..7e726a6 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -521,6 +521,7 @@ static const struct proto_ops unix_stream_ops = { .getsockopt = sock_no_getsockopt, .sendmsg = unix_stream_sendmsg, .recvmsg = unix_stream_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, }; @@ -542,6 +543,7 @@ static const struct proto_ops unix_dgram_ops = { .getsockopt = sock_no_getsockopt, .sendmsg = unix_dgram_sendmsg, .recvmsg = unix_dgram_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, }; @@ -563,6 +565,7 @@ static const struct proto_ops unix_seqpacket_ops = { .getsockopt = sock_no_getsockopt, .sendmsg = unix_seqpacket_sendmsg, .recvmsg = unix_dgram_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, }; diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 5e6c072..7c20b26 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1620,6 +1620,7 @@ static const struct proto_ops SOCKOPS_WRAPPED(x25_proto_ops) = { .getsockopt = x25_getsockopt, .sendmsg = x25_sendmsg, .recvmsg = x25_recvmsg, + .unlocked_recvmsg = sock_no_unlocked_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, }; -- 1.6.2.5