In order to make sure emergency packets receive all memory needed to proceed ensure processing of emergency SKBs happens under PF_MEMALLOC. Use the (new) sk_backlog_rcv() wrapper to ensure this for backlog processing. Skip taps, since those are user-space again. Signed-off-by: Peter Zijlstra --- include/net/sock.h | 5 +++++ net/core/dev.c | 44 ++++++++++++++++++++++++++++++++++++++------ net/core/sock.c | 18 ++++++++++++++++++ 3 files changed, 61 insertions(+), 6 deletions(-) Index: linux-2.6/net/core/dev.c =================================================================== --- linux-2.6.orig/net/core/dev.c +++ linux-2.6/net/core/dev.c @@ -1976,10 +1976,23 @@ int netif_receive_skb(struct sk_buff *sk struct net_device *orig_dev; int ret = NET_RX_DROP; __be16 type; + unsigned long pflags = current->flags; + + /* Emergency skb are special, they should + * - be delivered to SOCK_MEMALLOC sockets only + * - stay away from userspace + * - have bounded memory usage + * + * Use PF_MEMALLOC as a poor mans memory pool - the grouping kind. + * This saves us from propagating the allocation context down to all + * allocation sites. + */ + if (skb_emergency(skb)) + current->flags |= PF_MEMALLOC; /* if we've gotten here through NAPI, check netpoll */ if (netpoll_receive_skb(skb)) - return NET_RX_DROP; + goto out; if (!skb->tstamp.tv64) net_timestamp(skb); @@ -1990,7 +2003,7 @@ int netif_receive_skb(struct sk_buff *sk orig_dev = skb_bond(skb); if (!orig_dev) - return NET_RX_DROP; + goto out; __get_cpu_var(netdev_rx_stat).total++; @@ -2009,6 +2022,9 @@ int netif_receive_skb(struct sk_buff *sk } #endif + if (skb_emergency(skb)) + goto skip_taps; + list_for_each_entry_rcu(ptype, &ptype_all, list) { if (!ptype->dev || ptype->dev == skb->dev) { if (pt_prev) @@ -2017,6 +2033,7 @@ int netif_receive_skb(struct sk_buff *sk } } +skip_taps: #ifdef CONFIG_NET_CLS_ACT if (pt_prev) { ret = deliver_skb(skb, pt_prev, orig_dev); @@ -2029,19 +2046,31 @@ int netif_receive_skb(struct sk_buff *sk if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) { kfree_skb(skb); - goto out; + goto unlock; } skb->tc_verd = 0; ncls: #endif + if (skb_emergency(skb)) + switch(skb->protocol) { + case __constant_htons(ETH_P_ARP): + case __constant_htons(ETH_P_IP): + case __constant_htons(ETH_P_IPV6): + case __constant_htons(ETH_P_8021Q): + break; + + default: + goto drop; + } + skb = handle_bridge(skb, &pt_prev, &ret, orig_dev); if (!skb) - goto out; + goto unlock; skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev); if (!skb) - goto out; + goto unlock; type = skb->protocol; list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) { @@ -2056,6 +2085,7 @@ ncls: if (pt_prev) { ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } else { +drop: kfree_skb(skb); /* Jamal, now you will not able to escape explaining * me how you were going to use this. :-) @@ -2063,8 +2093,10 @@ ncls: ret = NET_RX_DROP; } -out: +unlock: rcu_read_unlock(); +out: + tsk_restore_flags(current, pflags, PF_MEMALLOC); return ret; } Index: linux-2.6/include/net/sock.h =================================================================== --- linux-2.6.orig/include/net/sock.h +++ linux-2.6/include/net/sock.h @@ -523,8 +523,13 @@ static inline void sk_add_backlog(struct skb->next = NULL; } +extern int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb); + static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) { + if (skb_emergency(skb)) + return __sk_backlog_rcv(sk, skb); + return sk->sk_backlog_rcv(sk, skb); } Index: linux-2.6/net/core/sock.c =================================================================== --- linux-2.6.orig/net/core/sock.c +++ linux-2.6/net/core/sock.c @@ -319,6 +319,24 @@ int sk_clear_memalloc(struct sock *sk) } EXPORT_SYMBOL_GPL(sk_clear_memalloc); +#ifdef CONFIG_NETVM +int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) +{ + int ret; + unsigned long pflags = current->flags; + + /* these should have been dropped before queueing */ + BUG_ON(!sk_has_memalloc(sk)); + + current->flags |= PF_MEMALLOC; + ret = sk->sk_backlog_rcv(sk, skb); + tsk_restore_flags(current, pflags, PF_MEMALLOC); + + return ret; +} +EXPORT_SYMBOL(__sk_backlog_rcv); +#endif + static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) { struct timeval tv; -- - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/