[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20200528051236.620353-9-hch@lst.de>
Date: Thu, 28 May 2020 07:12:16 +0200
From: Christoph Hellwig <hch@....de>
To: "David S. Miller" <davem@...emloft.net>,
Jakub Kicinski <kuba@...nel.org>
Cc: Eric Dumazet <edumazet@...gle.com>,
Alexey Kuznetsov <kuznet@....inr.ac.ru>,
Hideaki YOSHIFUJI <yoshfuji@...ux-ipv6.org>,
Jon Maloy <jmaloy@...hat.com>,
Ying Xue <ying.xue@...driver.com>, drbd-dev@...ts.linbit.com,
linux-kernel@...r.kernel.org, linux-rdma@...r.kernel.org,
linux-nvme@...ts.infradead.org, target-devel@...r.kernel.org,
linux-afs@...ts.infradead.org, linux-cifs@...r.kernel.org,
cluster-devel@...hat.com, ocfs2-devel@....oracle.com,
netdev@...r.kernel.org, ceph-devel@...r.kernel.org,
rds-devel@....oracle.com, linux-nfs@...r.kernel.org,
tipc-discussion@...ts.sourceforge.net
Subject: [PATCH 08/28] net: add sock_set_rcvbuf
Add a helper to directly set the SO_RCVBUFFORCE sockopt from kernel space
without going through a fake uaccess.
Signed-off-by: Christoph Hellwig <hch@....de>
---
fs/dlm/lowcomms.c | 7 +-----
include/net/sock.h | 1 +
net/core/sock.c | 59 +++++++++++++++++++++++++---------------------
3 files changed, 34 insertions(+), 33 deletions(-)
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 138009c6a2ee1..45c37f572c9d2 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1297,7 +1297,6 @@ static int sctp_listen_for_all(void)
struct socket *sock = NULL;
int result = -EINVAL;
struct connection *con = nodeid2con(0, GFP_NOFS);
- int bufsize = NEEDED_RMEM;
int one = 1;
if (!con)
@@ -1312,11 +1311,7 @@ static int sctp_listen_for_all(void)
goto out;
}
- result = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVBUFFORCE,
- (char *)&bufsize, sizeof(bufsize));
- if (result)
- log_print("Error increasing buffer space on socket %d", result);
-
+ sock_set_rcvbuf(sock->sk, NEEDED_RMEM);
result = kernel_setsockopt(sock, SOL_SCTP, SCTP_NODELAY, (char *)&one,
sizeof(one));
if (result < 0)
diff --git a/include/net/sock.h b/include/net/sock.h
index dc08c176238fd..c997289aabbf9 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2693,6 +2693,7 @@ void sock_enable_timestamps(struct sock *sk);
void sock_no_linger(struct sock *sk);
void sock_set_keepalive(struct sock *sk);
void sock_set_priority(struct sock *sk, u32 priority);
+void sock_set_rcvbuf(struct sock *sk, int val);
void sock_set_reuseaddr(struct sock *sk);
void sock_set_sndtimeo(struct sock *sk, s64 secs);
diff --git a/net/core/sock.c b/net/core/sock.c
index 728f5fb156a0c..3c6ebf952e9ad 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -789,6 +789,35 @@ void sock_set_keepalive(struct sock *sk)
}
EXPORT_SYMBOL(sock_set_keepalive);
+static void __sock_set_rcvbuf(struct sock *sk, int val)
+{
+ /* Ensure val * 2 fits into an int, to prevent max_t() from treating it
+ * as a negative value.
+ */
+ val = min_t(int, val, INT_MAX / 2);
+ sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
+
+ /* We double it on the way in to account for "struct sk_buff" etc.
+ * overhead. Applications assume that the SO_RCVBUF setting they make
+ * will allow that much actual data to be received on that socket.
+ *
+ * Applications are unaware that "struct sk_buff" and other overheads
+ * allocate from the receive buffer during socket buffer allocation.
+ *
+ * And after considering the possible alternatives, returning the value
+ * we actually used in getsockopt is the most desirable behavior.
+ */
+ WRITE_ONCE(sk->sk_rcvbuf, max_t(int, val * 2, SOCK_MIN_RCVBUF));
+}
+
+void sock_set_rcvbuf(struct sock *sk, int val)
+{
+ lock_sock(sk);
+ __sock_set_rcvbuf(sk, val);
+ release_sock(sk);
+}
+EXPORT_SYMBOL(sock_set_rcvbuf);
+
/*
* This is meant for all protocols to use and covers goings on
* at the socket level. Everything here is generic.
@@ -885,30 +914,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
* play 'guess the biggest size' games. RCVBUF/SNDBUF
* are treated in BSD as hints
*/
- val = min_t(u32, val, sysctl_rmem_max);
-set_rcvbuf:
- /* Ensure val * 2 fits into an int, to prevent max_t()
- * from treating it as a negative value.
- */
- val = min_t(int, val, INT_MAX / 2);
- sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
- /*
- * We double it on the way in to account for
- * "struct sk_buff" etc. overhead. Applications
- * assume that the SO_RCVBUF setting they make will
- * allow that much actual data to be received on that
- * socket.
- *
- * Applications are unaware that "struct sk_buff" and
- * other overheads allocate from the receive buffer
- * during socket buffer allocation.
- *
- * And after considering the possible alternatives,
- * returning the value we actually used in getsockopt
- * is the most desirable behavior.
- */
- WRITE_ONCE(sk->sk_rcvbuf,
- max_t(int, val * 2, SOCK_MIN_RCVBUF));
+ __sock_set_rcvbuf(sk, min_t(u32, val, sysctl_rmem_max));
break;
case SO_RCVBUFFORCE:
@@ -920,9 +926,8 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
/* No negative values (to prevent underflow, as val will be
* multiplied by 2).
*/
- if (val < 0)
- val = 0;
- goto set_rcvbuf;
+ __sock_set_rcvbuf(sk, max(val, 0));
+ break;
case SO_KEEPALIVE:
if (sk->sk_prot->keepalive)
--
2.26.2
Powered by blists - more mailing lists