lists.openwall.net | lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC | |
Open Source and information security mailing list archives
| ||
|
Date: Wed, 18 Dec 2013 12:37:35 +0800 From: Zhi Yong Wu <zwu.kernel@...il.com> To: Tom Herbert <therbert@...gle.com> Cc: Linux Netdev List <netdev@...r.kernel.org>, LKML <linux-kernel@...r.kernel.org>, David Miller <davem@...emloft.net>, "Michael S. Tsirkin" <mst@...hat.com>, Jason Wang <jasowang@...hat.com>, Zhi Yong Wu <wuzhy@...ux.vnet.ibm.com> Subject: Re: [RFC PATCH] net, tun: remove the flow cache HI, Tom, On Wed, Dec 18, 2013 at 12:06 PM, Tom Herbert <therbert@...gle.com> wrote: > On Mon, Dec 16, 2013 at 11:26 PM, Zhi Yong Wu <zwu.kernel@...il.com> wrote: >> From: Zhi Yong Wu <wuzhy@...ux.vnet.ibm.com> >> >> The flow cache is an extremely broken concept, and it usually brings up >> growth issues and DoS attacks, so this patch is trying to remove it from >> the tuntap driver, and insteadly use a simpler way for its flow control. >> > Yes , in it's current state it's broken. But maybe we can try to fix > it instead of arbitrarily removing it. Please see my patches on > plumbing RFS into tuntap which may start to make it useful. Do you mean you patch [5/5] tun: Added support for RFS on tun flows? Sorry, can you say with more details? > > Tom > >> Signed-off-by: Zhi Yong Wu <wuzhy@...ux.vnet.ibm.com> >> --- >> drivers/net/tun.c | 208 +++-------------------------------------------------- >> 1 files changed, 10 insertions(+), 198 deletions(-) >> >> diff --git a/drivers/net/tun.c b/drivers/net/tun.c >> index 7c8343a..7c27fdc 100644 >> --- a/drivers/net/tun.c >> +++ b/drivers/net/tun.c >> @@ -32,12 +32,15 @@ >> * >> * Daniel Podlejski <underley@...erley.eu.org> >> * Modifications for 2.3.99-pre5 kernel. >> + * >> + * Zhi Yong Wu <wuzhy@...ux.vnet.ibm.com> >> + * Remove the flow cache. >> */ >> >> #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt >> >> #define DRV_NAME "tun" >> -#define DRV_VERSION "1.6" >> +#define DRV_VERSION "1.7" >> #define DRV_DESCRIPTION "Universal TUN/TAP device driver" >> #define DRV_COPYRIGHT "(C) 1999-2004 Max Krasnyansky <maxk@...lcomm.com>" >> >> @@ -146,18 +149,6 @@ struct tun_file { >> struct tun_struct *detached; >> }; >> >> -struct tun_flow_entry { >> - struct hlist_node hash_link; >> - struct rcu_head rcu; >> - struct tun_struct *tun; >> - >> - u32 rxhash; >> - int queue_index; >> - unsigned long updated; >> -}; >> - >> -#define TUN_NUM_FLOW_ENTRIES 1024 >> - >> /* Since the socket were moved to tun_file, to preserve the behavior of persist >> * device, socket filter, sndbuf and vnet header size were restore when the >> * file were attached to a persist device. >> @@ -184,163 +175,11 @@ struct tun_struct { >> int debug; >> #endif >> spinlock_t lock; >> - struct hlist_head flows[TUN_NUM_FLOW_ENTRIES]; >> - struct timer_list flow_gc_timer; >> - unsigned long ageing_time; >> unsigned int numdisabled; >> struct list_head disabled; >> void *security; >> - u32 flow_count; >> }; >> >> -static inline u32 tun_hashfn(u32 rxhash) >> -{ >> - return rxhash & 0x3ff; >> -} >> - >> -static struct tun_flow_entry *tun_flow_find(struct hlist_head *head, u32 rxhash) >> -{ >> - struct tun_flow_entry *e; >> - >> - hlist_for_each_entry_rcu(e, head, hash_link) { >> - if (e->rxhash == rxhash) >> - return e; >> - } >> - return NULL; >> -} >> - >> -static struct tun_flow_entry *tun_flow_create(struct tun_struct *tun, >> - struct hlist_head *head, >> - u32 rxhash, u16 queue_index) >> -{ >> - struct tun_flow_entry *e = kmalloc(sizeof(*e), GFP_ATOMIC); >> - >> - if (e) { >> - tun_debug(KERN_INFO, tun, "create flow: hash %u index %u\n", >> - rxhash, queue_index); >> - e->updated = jiffies; >> - e->rxhash = rxhash; >> - e->queue_index = queue_index; >> - e->tun = tun; >> - hlist_add_head_rcu(&e->hash_link, head); >> - ++tun->flow_count; >> - } >> - return e; >> -} >> - >> -static void tun_flow_delete(struct tun_struct *tun, struct tun_flow_entry *e) >> -{ >> - tun_debug(KERN_INFO, tun, "delete flow: hash %u index %u\n", >> - e->rxhash, e->queue_index); >> - hlist_del_rcu(&e->hash_link); >> - kfree_rcu(e, rcu); >> - --tun->flow_count; >> -} >> - >> -static void tun_flow_flush(struct tun_struct *tun) >> -{ >> - int i; >> - >> - spin_lock_bh(&tun->lock); >> - for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) { >> - struct tun_flow_entry *e; >> - struct hlist_node *n; >> - >> - hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) >> - tun_flow_delete(tun, e); >> - } >> - spin_unlock_bh(&tun->lock); >> -} >> - >> -static void tun_flow_delete_by_queue(struct tun_struct *tun, u16 queue_index) >> -{ >> - int i; >> - >> - spin_lock_bh(&tun->lock); >> - for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) { >> - struct tun_flow_entry *e; >> - struct hlist_node *n; >> - >> - hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) { >> - if (e->queue_index == queue_index) >> - tun_flow_delete(tun, e); >> - } >> - } >> - spin_unlock_bh(&tun->lock); >> -} >> - >> -static void tun_flow_cleanup(unsigned long data) >> -{ >> - struct tun_struct *tun = (struct tun_struct *)data; >> - unsigned long delay = tun->ageing_time; >> - unsigned long next_timer = jiffies + delay; >> - unsigned long count = 0; >> - int i; >> - >> - tun_debug(KERN_INFO, tun, "tun_flow_cleanup\n"); >> - >> - spin_lock_bh(&tun->lock); >> - for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) { >> - struct tun_flow_entry *e; >> - struct hlist_node *n; >> - >> - hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) { >> - unsigned long this_timer; >> - count++; >> - this_timer = e->updated + delay; >> - if (time_before_eq(this_timer, jiffies)) >> - tun_flow_delete(tun, e); >> - else if (time_before(this_timer, next_timer)) >> - next_timer = this_timer; >> - } >> - } >> - >> - if (count) >> - mod_timer(&tun->flow_gc_timer, round_jiffies_up(next_timer)); >> - spin_unlock_bh(&tun->lock); >> -} >> - >> -static void tun_flow_update(struct tun_struct *tun, u32 rxhash, >> - struct tun_file *tfile) >> -{ >> - struct hlist_head *head; >> - struct tun_flow_entry *e; >> - unsigned long delay = tun->ageing_time; >> - u16 queue_index = tfile->queue_index; >> - >> - if (!rxhash) >> - return; >> - else >> - head = &tun->flows[tun_hashfn(rxhash)]; >> - >> - rcu_read_lock(); >> - >> - /* We may get a very small possibility of OOO during switching, not >> - * worth to optimize.*/ >> - if (tun->numqueues == 1 || tfile->detached) >> - goto unlock; >> - >> - e = tun_flow_find(head, rxhash); >> - if (likely(e)) { >> - /* TODO: keep queueing to old queue until it's empty? */ >> - e->queue_index = queue_index; >> - e->updated = jiffies; >> - } else { >> - spin_lock_bh(&tun->lock); >> - if (!tun_flow_find(head, rxhash) && >> - tun->flow_count < MAX_TAP_FLOWS) >> - tun_flow_create(tun, head, rxhash, queue_index); >> - >> - if (!timer_pending(&tun->flow_gc_timer)) >> - mod_timer(&tun->flow_gc_timer, >> - round_jiffies_up(jiffies + delay)); >> - spin_unlock_bh(&tun->lock); >> - } >> - >> -unlock: >> - rcu_read_unlock(); >> -} >> - >> /* We try to identify a flow through its rxhash first. The reason that >> * we do not check rxq no. is becuase some cards(e.g 82599), chooses >> * the rxq based on the txq where the last packet of the flow comes. As >> @@ -351,7 +190,6 @@ unlock: >> static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb) >> { >> struct tun_struct *tun = netdev_priv(dev); >> - struct tun_flow_entry *e; >> u32 txq = 0; >> u32 numqueues = 0; >> >> @@ -360,12 +198,11 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb) >> >> txq = skb_get_rxhash(skb); >> if (txq) { >> - e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq); >> - if (e) >> - txq = e->queue_index; >> - else >> + txq = skb_get_queue_mapping(skb); >> + if (!txq) { >> /* use multiply and shift instead of expensive divide */ >> txq = ((u64)txq * numqueues) >> 32; >> + } >> } else if (likely(skb_rx_queue_recorded(skb))) { >> txq = skb_get_rx_queue(skb); >> while (unlikely(txq >= numqueues)) >> @@ -439,7 +276,6 @@ static void __tun_detach(struct tun_file *tfile, bool clean) >> tun_disable_queue(tun, tfile); >> >> synchronize_net(); >> - tun_flow_delete_by_queue(tun, tun->numqueues + 1); >> /* Drop read queue */ >> tun_queue_purge(tfile); >> tun_set_real_num_queues(tun); >> @@ -858,25 +694,6 @@ static const struct net_device_ops tap_netdev_ops = { >> #endif >> }; >> >> -static void tun_flow_init(struct tun_struct *tun) >> -{ >> - int i; >> - >> - for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) >> - INIT_HLIST_HEAD(&tun->flows[i]); >> - >> - tun->ageing_time = TUN_FLOW_EXPIRE; >> - setup_timer(&tun->flow_gc_timer, tun_flow_cleanup, (unsigned long)tun); >> - mod_timer(&tun->flow_gc_timer, >> - round_jiffies_up(jiffies + tun->ageing_time)); >> -} >> - >> -static void tun_flow_uninit(struct tun_struct *tun) >> -{ >> - del_timer_sync(&tun->flow_gc_timer); >> - tun_flow_flush(tun); >> -} >> - >> /* Initialize net device. */ >> static void tun_net_init(struct net_device *dev) >> { >> @@ -986,7 +803,6 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, >> int copylen; >> bool zerocopy = false; >> int err; >> - u32 rxhash; >> >> if (!(tun->flags & TUN_NO_PI)) { >> if (len < sizeof(pi)) >> @@ -1146,13 +962,12 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, >> skb_reset_network_header(skb); >> skb_probe_transport_header(skb, 0); >> >> - rxhash = skb_get_rxhash(skb); >> + skb_set_queue_mapping(skb, tfile->queue_index); >> netif_rx_ni(skb); >> >> tun->dev->stats.rx_packets++; >> tun->dev->stats.rx_bytes += len; >> >> - tun_flow_update(tun, rxhash, tfile); >> return total_len; >> } >> >> @@ -1370,7 +1185,6 @@ static void tun_free_netdev(struct net_device *dev) >> struct tun_struct *tun = netdev_priv(dev); >> >> BUG_ON(!(list_empty(&tun->disabled))); >> - tun_flow_uninit(tun); >> security_tun_dev_free_security(tun->security); >> free_netdev(dev); >> } >> @@ -1644,7 +1458,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) >> goto err_free_dev; >> >> tun_net_init(dev); >> - tun_flow_init(tun); >> >> dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | >> TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX | >> @@ -1655,7 +1468,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) >> INIT_LIST_HEAD(&tun->disabled); >> err = tun_attach(tun, file, false); >> if (err < 0) >> - goto err_free_flow; >> + goto err_free_security; >> >> err = register_netdevice(tun->dev); >> if (err < 0) >> @@ -1705,8 +1518,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) >> >> err_detach: >> tun_detach_all(dev); >> -err_free_flow: >> - tun_flow_uninit(tun); >> +err_free_security: >> security_tun_dev_free_security(tun->security); >> err_free_dev: >> free_netdev(dev); >> -- >> 1.7.6.5 >> >> -- >> To unsubscribe from this list: send the line "unsubscribe netdev" in >> the body of a message to majordomo@...r.kernel.org >> More majordomo info at http://vger.kernel.org/majordomo-info.html -- Regards, Zhi Yong Wu -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@...r.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists