[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20210622101952.28839d7e@kicinski-fedora-PC1C0HJN.hsd1.ca.comcast.net>
Date: Tue, 22 Jun 2021 10:19:52 -0700
From: Jakub Kicinski <kuba@...nel.org>
To: Eric Dumazet <eric.dumazet@...il.com>
Cc: davem@...emloft.net, netdev@...r.kernel.org, willemb@...gle.com,
dsahern@...il.com, yoshfuji@...ux-ipv6.org, Dave Jones <dsj@...com>
Subject: Re: [PATCH net-next] ip: avoid OOM kills with large UDP sends over
loopback
On Tue, 22 Jun 2021 09:54:22 -0700 Jakub Kicinski wrote:
> > > +static inline void sk_allocation_push(struct sock *sk, gfp_t flag, gfp_t *old)
> > > +{
> > > + *old = sk->sk_allocation;
> > > + sk->sk_allocation |= flag;
> > > +}
> > > +
> >
> > This is not thread safe.
> >
> > Remember UDP sendmsg() does not lock the socket for non-corking sends.
>
> Ugh, you're right :(
Hm, isn't it buggy to call sock_alloc_send_[p]skb() without holding the
lock in the first place, then? The knee jerk fix would be to add another
layer of specialization to the helpers:
diff --git a/include/net/sock.h b/include/net/sock.h
index 7a7058f4f265..06f031705418 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1714,9 +1725,20 @@ int sock_gettstamp(struct socket *sock, void __user *userstamp,
bool timeval, bool time32);
struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
int noblock, int *errcode);
-struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
- unsigned long data_len, int noblock,
- int *errcode, int max_page_order);
+struct sk_buff *__sock_alloc_send_pskb(struct sock *sk,
+ unsigned long header_len,
+ unsigned long data_len, int noblock,
+ int *errcode, int max_page_order,
+ gfp_t gfp_flags);
+
+static inline sk_buff *
+sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
+ unsigned long data_len, int noblock, int *errcode)
+{
+ return __sock_alloc_send_pskb(sk, header_len, data_len,
+ noblock, errcode, 0, sk->sk_allocation);
+}
+
void *sock_kmalloc(struct sock *sk, int size, gfp_t priority);
void sock_kfree_s(struct sock *sk, void *mem, int size);
void sock_kzfree_s(struct sock *sk, void *mem, int size);
diff --git a/net/core/sock.c b/net/core/sock.c
index 946888afef88..64b7271a7d21 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2331,9 +2331,11 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
* Generic send/receive buffer handlers
*/
-struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
- unsigned long data_len, int noblock,
- int *errcode, int max_page_order)
+struct sk_buff *__sock_alloc_send_pskb(struct sock *sk,
+ unsigned long header_len,
+ unsigned long data_len, int noblock,
+ int *errcode, int max_page_order,
+ gfp_t gfp_flags)
{
struct sk_buff *skb;
long timeo;
@@ -2362,7 +2364,7 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
timeo = sock_wait_for_wmem(sk, timeo);
}
skb = alloc_skb_with_frags(header_len, data_len, max_page_order,
- errcode, sk->sk_allocation);
+ errcode, gfp_flags);
if (skb)
skb_set_owner_w(skb, sk);
return skb;
@@ -2373,7 +2375,7 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
*errcode = err;
return NULL;
}
-EXPORT_SYMBOL(sock_alloc_send_pskb);
+EXPORT_SYMBOL(__sock_alloc_send_pskb);
struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
int noblock, int *errcode)
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index c3efc7d658f6..211f1ea6cf2a 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1095,9 +1095,22 @@ static int __ip_append_data(struct sock *sk,
alloclen += rt->dst.trailer_len;
if (transhdrlen) {
- skb = sock_alloc_send_skb(sk,
- alloclen + hh_len + 15,
- (flags & MSG_DONTWAIT), &err);
+ bool sg = rt->dst.dev->features & NETIF_F_SG;
+ size_t header_len = alloclen + hh_len + 15;
+ gfp_t sk_allocation;
+
+ sk_allocation = sk->sk_allocation;
+ if (header_len > PAGE_SIZE && sg)
+ sk_allocation |= __GFP_NORETRY;
+
+ skb = __sock_alloc_send_pskb(sk, header_len, 0,
+ (flags & MSG_DONTWAIT), &err,
+ 0, sk_allocation);
+ if (unlikely(!skb) && !paged && sg) {
+ BUILD_BUG_ON(MAX_HEADER >= PAGE_SIZE);
+ paged = true;
+ goto alloc_new_skb;
+ }
} else {
skb = NULL;
if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
Powered by blists - more mailing lists