[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAGsizzLUwA_=fZFX-4ZnYU4K-nj1A2DV_BZKOMKxgQDh2Rs54g@mail.gmail.com>
Date: Mon, 23 Jan 2012 14:12:48 +0100
From: Štefan Gula <steweg@...t.sk>
To: Alexey Kuznetsov <kuznet@....inr.ac.ru>,
"David S. Miller" <davem@...emloft.net>,
James Morris <jmorris@...ei.org>,
Hideaki YOSHIFUJI <yoshfuji@...ux-ipv6.org>,
Patrick McHardy <kaber@...sh.net>
Cc: netdev@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [patch v4, kernel version 3.2.1] net/ipv4/ip_gre: Ethernet
multipoint GRE over IP
2012/1/18 Stefan Gula <steweg@...t.sk>:
> From: Stefan Gula <steweg@...il.com>
>
> This patch is an extension for current Ethernet over GRE
> implementation, which allows user to create virtual bridge (multipoint
> VPN) and forward traffic based on Ethernet MAC address information in
> it. It simulates the Bridge behavior learning mechanism, but instead
> of learning port ID from which given MAC address comes, it learns IP
> address of peer which encapsulated given packet. Multicast, Broadcast
> and unknown-multicast traffic is send over network as multicast
> encapsulated GRE packet, so one Ethernet multipoint GRE tunnel can be
> represented as one single virtual switch on logical level and be also
> represented as one multicast IPv4 address on network level.
>
> Signed-off-by: Stefan Gula <steweg@...il.com>
>
> ---
>
> code was merged with Eric Dumazet proposal (all except the reordering of orig_source as that needed to be previous value), tested and fixed with additional lines in ipgre_tap_netdev_ops struct, orig_source line was moved before pskb_may_pull
>
> diff -uprN -X linux-3.2.1-orig/Documentation/dontdiff linux-3.2.1-orig/include/net/ipip.h linux-3.2.1-my/include/net/ipip.h
> --- linux-3.2.1-orig/include/net/ipip.h 2012-01-12 20:42:45.000000000 +0100
> +++ linux-3.2.1-my/include/net/ipip.h 2012-01-16 11:17:01.000000000 +0100
> @@ -27,6 +27,14 @@ struct ip_tunnel {
> __u32 o_seqno; /* The last output seqno */
> int hlen; /* Precalculated GRE header length */
> int mlink;
> +#ifdef CONFIG_NET_IPGRE_BRIDGE
> +#define GRETAP_BR_HASH_BITS 8
> +#define GRETAP_BR_HASH_SIZE (1 << GRETAP_BR_HASH_BITS)
> + struct hlist_head hash[GRETAP_BR_HASH_SIZE];
> + spinlock_t hash_lock;
> + unsigned long ageing_time;
> + struct timer_list gc_timer;
> +#endif
>
> struct ip_tunnel_parm parms;
>
> diff -uprN -X linux-3.2.1-orig/Documentation/dontdiff linux-3.2.1-orig/net/ipv4/Kconfig linux-3.2.1-my/net/ipv4/Kconfig
> --- linux-3.2.1-orig/net/ipv4/Kconfig 2012-01-12 20:42:45.000000000 +0100
> +++ linux-3.2.1-my/net/ipv4/Kconfig 2012-01-16 12:37:00.000000000 +0100
> @@ -211,6 +211,15 @@ config NET_IPGRE_BROADCAST
> Network), but can be distributed all over the Internet. If you want
> to do that, say Y here and to "IP multicast routing" below.
>
> +config NET_IPGRE_BRIDGE
> + bool "IP: Ethernet over multipoint GRE over IP"
> + depends on IP_MULTICAST && NET_IPGRE && NET_IPGRE_BROADCAST
> + help
> + Allows you to use multipoint GRE VPN as virtual switch and interconnect
> + several L2 endpoints over L3 routed infrastructure. It is useful for
> + creating multipoint L2 VPNs which can be later used inside bridge
> + interfaces If you want to use. GRE multipoint L2 VPN feature say Y.
> +
> config IP_MROUTE
> bool "IP: multicast routing"
> depends on IP_MULTICAST
> diff -uprN -X linux-3.2.1-orig/Documentation/dontdiff linux-3.2.1-orig/net/ipv4/ip_gre.c linux-3.2.1-my/net/ipv4/ip_gre.c
> --- linux-3.2.1-orig/net/ipv4/ip_gre.c 2012-01-12 20:42:45.000000000 +0100
> +++ linux-3.2.1-my/net/ipv4/ip_gre.c 2012-01-18 00:33:55.000000000 +0100
> @@ -52,6 +52,11 @@
> #include <net/ip6_route.h>
> #endif
>
> +#ifdef CONFIG_NET_IPGRE_BRIDGE
> +#include <linux/jhash.h>
> +#include <asm/unaligned.h>
> +#endif
> +
> /*
> Problems & solutions
> --------------------
> @@ -134,6 +139,172 @@ struct ipgre_net {
> struct net_device *fb_tunnel_dev;
> };
>
> +#ifdef CONFIG_NET_IPGRE_BRIDGE
> + /*
> + * This part of code includes codes to enable L2 ethernet
> + * switch virtualization over IP routed infrastructure with
> + * utilization of multicast capable endpoint using Ethernet
> + * over GRE
> + *
> + * Author: Stefan Gula
> + * Signed-off-by: Stefan Gula <steweg@...il.com>
> + */
> +struct ipgre_tap_bridge_entry {
> + struct hlist_node hlist;
> + __be32 raddr;
> + unsigned char addr[ETH_ALEN];
> + unsigned long updated;
> + struct rcu_head rcu;
> +};
> +
> +static u32 ipgre_salt __read_mostly;
> +
> +static inline int ipgre_tap_bridge_hash(const unsigned char *mac)
> +{
> + u32 key = get_unaligned((u32 *)(mac + 2));
> +
> + return jhash_1word(key, ipgre_salt) & (GRETAP_BR_HASH_SIZE - 1);
> +}
> +
> +static inline int ipgre_tap_bridge_has_expired(const struct ip_tunnel *tunnel,
> + const struct ipgre_tap_bridge_entry *entry)
> +{
> + return time_before_eq(entry->updated + tunnel->ageing_time,
> + jiffies);
> +}
> +
> +static inline void ipgre_tap_bridge_delete(struct ipgre_tap_bridge_entry *entry)
> +{
> + hlist_del_rcu(&entry->hlist);
> + kfree_rcu(entry, rcu);
> +}
> +
> +static void ipgre_tap_bridge_cleanup(unsigned long _data)
> +{
> + struct ip_tunnel *tunnel = (struct ip_tunnel *)_data;
> + unsigned long delay = tunnel->ageing_time;
> + unsigned long next_timer = jiffies + tunnel->ageing_time;
> + int i;
> +
> + spin_lock(&tunnel->hash_lock);
> + for (i = 0; i < GRETAP_BR_HASH_SIZE; i++) {
> + struct ipgre_tap_bridge_entry *entry;
> + struct hlist_node *h, *n;
> +
> + hlist_for_each_entry_safe(entry, h, n,
> + &tunnel->hash[i], hlist)
> + {
> + unsigned long this_timer;
> + this_timer = entry->updated + delay;
> + if (time_before_eq(this_timer, jiffies))
> + ipgre_tap_bridge_delete(entry);
> + else if (time_before(this_timer, next_timer))
> + next_timer = this_timer;
> + }
> + }
> + spin_unlock(&tunnel->hash_lock);
> + mod_timer(&tunnel->gc_timer, round_jiffies_up(next_timer));
> +}
> +
> +static void ipgre_tap_bridge_flush(struct ip_tunnel *tunnel)
> +{
> + int i;
> +
> + spin_lock_bh(&tunnel->hash_lock);
> + for (i = 0; i < GRETAP_BR_HASH_SIZE; i++) {
> + struct ipgre_tap_bridge_entry *entry;
> + struct hlist_node *h, *n;
> +
> + hlist_for_each_entry_safe(entry, h, n,
> + &tunnel->hash[i], hlist)
> + {
> + ipgre_tap_bridge_delete(entry);
> + }
> + }
> + spin_unlock_bh(&tunnel->hash_lock);
> +}
> +
> +static struct ipgre_tap_bridge_entry *__ipgre_tap_bridge_get(
> + struct ip_tunnel *tunnel, const unsigned char *addr)
> +{
> + struct hlist_node *h;
> + struct ipgre_tap_bridge_entry *entry;
> +
> + hlist_for_each_entry_rcu(entry, h,
> + &tunnel->hash[ipgre_tap_bridge_hash(addr)], hlist) {
> + if (!compare_ether_addr(entry->addr, addr)) {
> + if (unlikely(ipgre_tap_bridge_has_expired(tunnel,
> + entry)))
> + break;
> + return entry;
> + }
> + }
> +
> + return NULL;
> +}
> +
> +static struct ipgre_tap_bridge_entry *ipgre_tap_bridge_find(
> + struct hlist_head *head,
> + const unsigned char *addr)
> +{
> + struct hlist_node *h;
> + struct ipgre_tap_bridge_entry *entry;
> +
> + hlist_for_each_entry(entry, h, head, hlist) {
> + if (!compare_ether_addr(entry->addr, addr))
> + return entry;
> + }
> + return NULL;
> +}
> +
> +
> +static struct ipgre_tap_bridge_entry *ipgre_tap_bridge_find_rcu(
> + struct hlist_head *head,
> + const unsigned char *addr)
> +{
> + struct hlist_node *h;
> + struct ipgre_tap_bridge_entry *entry;
> +
> + hlist_for_each_entry_rcu(entry, h, head, hlist) {
> + if (!compare_ether_addr(entry->addr, addr))
> + return entry;
> + }
> + return NULL;
> +}
> +
> +static struct ipgre_tap_bridge_entry *ipgre_tap_bridge_create(
> + struct hlist_head *head,
> + __be32 source,
> + const unsigned char *addr)
> +{
> + struct ipgre_tap_bridge_entry *entry;
> +
> + entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
> + if (entry) {
> + memcpy(entry->addr, addr, ETH_ALEN);
> + entry->raddr = source;
> + entry->updated = jiffies;
> + hlist_add_head_rcu(&entry->hlist, head);
> + }
> + return entry;
> +}
> +
> +static __be32 ipgre_tap_bridge_get_raddr(struct ip_tunnel *tunnel,
> + const unsigned char *addr)
> +{
> + __be32 raddr = 0;
> + struct ipgre_tap_bridge_entry *entry;
> +
> + rcu_read_lock();
> + entry = __ipgre_tap_bridge_get(tunnel, addr);
> + if (entry)
> + raddr = entry->raddr;
> + rcu_read_unlock();
> +
> + return raddr;
> +}
> +
> +#endif
> /* Tunnel hash table */
>
> /*
> @@ -562,6 +733,12 @@ static int ipgre_rcv(struct sk_buff *skb
> struct ip_tunnel *tunnel;
> int offset = 4;
> __be16 gre_proto;
> +#ifdef CONFIG_NET_IPGRE_BRIDGE
> + __be32 orig_source;
> + struct hlist_head *head;
> + struct ipgre_tap_bridge_entry *entry;
> + const struct ethhdr *tethhdr;
> +#endif
>
> if (!pskb_may_pull(skb, 16))
> goto drop_nolock;
> @@ -654,6 +831,9 @@ static int ipgre_rcv(struct sk_buff *skb
>
> /* Warning: All skb pointers will be invalidated! */
> if (tunnel->dev->type == ARPHRD_ETHER) {
> +#ifdef CONFIG_NET_IPGRE_BRIDGE
> + orig_source = iph->saddr;
> +#endif
> if (!pskb_may_pull(skb, ETH_HLEN)) {
> tunnel->dev->stats.rx_length_errors++;
> tunnel->dev->stats.rx_errors++;
> @@ -663,6 +843,32 @@ static int ipgre_rcv(struct sk_buff *skb
> iph = ip_hdr(skb);
> skb->protocol = eth_type_trans(skb, tunnel->dev);
> skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
> +#ifdef CONFIG_NET_IPGRE_BRIDGE
> + if (ipv4_is_multicast(tunnel->parms.iph.daddr)) {
> + tethhdr = eth_hdr(skb);
> + if (!is_multicast_ether_addr(
> + tethhdr->h_source)) {
> + head = &tunnel->hash[
> + ipgre_tap_bridge_hash(
> + tethhdr->h_source)];
> + entry = ipgre_tap_bridge_find_rcu(head,
> + tethhdr->h_source);
> + if (likely(entry)) {
> + entry->raddr = orig_source;
> + entry->updated = jiffies;
> + } else {
> + spin_lock(&tunnel->hash_lock);
> + if (!ipgre_tap_bridge_find(head,
> + tethhdr->h_source))
> + ipgre_tap_bridge_create(
> + head,
> + orig_source,
> + tethhdr->h_source);
> + spin_unlock(&tunnel->hash_lock);
> + }
> + }
> + }
> +#endif
> }
>
> tstats = this_cpu_ptr(tunnel->dev->tstats);
> @@ -702,7 +908,7 @@ static netdev_tx_t ipgre_tunnel_xmit(str
> struct iphdr *iph; /* Our new IP header */
> unsigned int max_headroom; /* The extra header space needed */
> int gre_hlen;
> - __be32 dst;
> + __be32 dst = 0;
> int mtu;
>
> if (dev->type == ARPHRD_ETHER)
> @@ -716,7 +922,15 @@ static netdev_tx_t ipgre_tunnel_xmit(str
> tiph = &tunnel->parms.iph;
> }
>
> - if ((dst = tiph->daddr) == 0) {
> +#ifdef CONFIG_NET_IPGRE_BRIDGE
> + if ((dev->type == ARPHRD_ETHER) &&
> + ipv4_is_multicast(tunnel->parms.iph.daddr))
> + dst = ipgre_tap_bridge_get_raddr(tunnel,
> + ((struct ethhdr *)skb->data)->h_dest);
> +#endif
> + if (dst == 0)
> + dst = tiph->daddr;
> + if (dst == 0) {
> /* NBMA tunnel */
>
> if (skb_dst(skb) == NULL) {
> @@ -1209,6 +1423,16 @@ static int ipgre_open(struct net_device
> return -EADDRNOTAVAIL;
> t->mlink = dev->ifindex;
> ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
> +#ifdef CONFIG_NET_IPGRE_BRIDGE
> + if (t->dev->type == ARPHRD_ETHER) {
> + INIT_HLIST_HEAD(t->hash);
> + spin_lock_init(&t->hash_lock);
> + t->ageing_time = 300 * HZ;
> + setup_timer(&t->gc_timer, ipgre_tap_bridge_cleanup,
> + (unsigned long) t);
> + mod_timer(&t->gc_timer, jiffies + t->ageing_time);
> + }
> +#endif
> }
> return 0;
> }
> @@ -1219,6 +1443,12 @@ static int ipgre_close(struct net_device
>
> if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
> struct in_device *in_dev;
> +#ifdef CONFIG_NET_IPGRE_BRIDGE
> + if (t->dev->type == ARPHRD_ETHER) {
> + ipgre_tap_bridge_flush(t);
> + del_timer_sync(&t->gc_timer);
> + }
> +#endif
> in_dev = inetdev_by_index(dev_net(dev), t->mlink);
> if (in_dev)
> ip_mc_dec_group(in_dev, t->parms.iph.daddr);
> @@ -1488,6 +1718,10 @@ static int ipgre_tap_init(struct net_dev
> static const struct net_device_ops ipgre_tap_netdev_ops = {
> .ndo_init = ipgre_tap_init,
> .ndo_uninit = ipgre_tunnel_uninit,
> +#ifdef CONFIG_NET_IPGRE_BRIDGE
> + .ndo_open = ipgre_open,
> + .ndo_stop = ipgre_close,
> +#endif
> .ndo_start_xmit = ipgre_tunnel_xmit,
> .ndo_set_mac_address = eth_mac_addr,
> .ndo_validate_addr = eth_validate_addr,
> @@ -1705,6 +1939,9 @@ static int __init ipgre_init(void)
>
> printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
>
> +#ifdef CONFIG_NET_IPGRE_BRIDGE
> + get_random_bytes(&ipgre_salt, sizeof(ipgre_salt));
> +#endif
> err = register_pernet_device(&ipgre_net_ops);
> if (err < 0)
> return err;
is there anything else needed from my side to get this code into the
kernel or should I only wait for the maintainers to check it?
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists