[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1410406193-6185-3-git-send-email-azhou@nicira.com>
Date: Wed, 10 Sep 2014 20:29:52 -0700
From: Andy Zhou <azhou@...ira.com>
To: davem@...emloft.net
Cc: netdev@...r.kernel.org, Andy Zhou <azhou@...ira.com>
Subject: [net-next v5 2/3] vxlan: Refactor vxlan driver to make use of the common UDP tunnel functions.
Signed-off-by: Andy Zhou <azhou@...ira.com>
---
drivers/net/vxlan.c | 174 +++++++++++++++++------------------------
include/net/vxlan.h | 17 ++--
net/openvswitch/vport-vxlan.c | 6 +-
3 files changed, 84 insertions(+), 113 deletions(-)
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 53c3ec1..d915669 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -42,6 +42,7 @@
#include <net/netns/generic.h>
#include <net/vxlan.h>
#include <net/protocol.h>
+#include <net/udp_tunnel.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
#include <net/addrconf.h>
@@ -280,7 +281,7 @@ static struct vxlan_sock *vxlan_find_sock(struct net *net, __be16 port)
struct vxlan_sock *vs;
hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
- if (inet_sk(vs->sock->sk)->inet_sport == port)
+ if (inet_sk(vs->uts.sock->sk)->inet_sport == port)
return vs;
}
return NULL;
@@ -636,7 +637,7 @@ static int vxlan_gro_complete(struct sk_buff *skb, int nhoff)
static void vxlan_notify_add_rx_port(struct vxlan_sock *vs)
{
struct net_device *dev;
- struct sock *sk = vs->sock->sk;
+ struct sock *sk = vs->uts.sock->sk;
struct net *net = sock_net(sk);
sa_family_t sa_family = sk->sk_family;
__be16 port = inet_sk(sk)->inet_sport;
@@ -661,7 +662,7 @@ static void vxlan_notify_add_rx_port(struct vxlan_sock *vs)
static void vxlan_notify_del_rx_port(struct vxlan_sock *vs)
{
struct net_device *dev;
- struct sock *sk = vs->sock->sk;
+ struct sock *sk = vs->uts.sock->sk;
struct net *net = sock_net(sk);
sa_family_t sa_family = sk->sk_family;
__be16 port = inet_sk(sk)->inet_sport;
@@ -1053,7 +1054,7 @@ static void vxlan_sock_hold(struct vxlan_sock *vs)
void vxlan_sock_release(struct vxlan_sock *vs)
{
- struct sock *sk = vs->sock->sk;
+ struct sock *sk = vs->uts.sock->sk;
struct net *net = sock_net(sk);
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
@@ -1062,7 +1063,6 @@ void vxlan_sock_release(struct vxlan_sock *vs)
spin_lock(&vn->sock_lock);
hlist_del_rcu(&vs->hlist);
- rcu_assign_sk_user_data(vs->sock->sk, NULL);
vxlan_notify_del_rx_port(vs);
spin_unlock(&vn->sock_lock);
@@ -1078,7 +1078,7 @@ static void vxlan_igmp_join(struct work_struct *work)
{
struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, igmp_join);
struct vxlan_sock *vs = vxlan->vn_sock;
- struct sock *sk = vs->sock->sk;
+ struct sock *sk = vs->uts.sock->sk;
union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
int ifindex = vxlan->default_dst.remote_ifindex;
@@ -1107,7 +1107,7 @@ static void vxlan_igmp_leave(struct work_struct *work)
{
struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, igmp_leave);
struct vxlan_sock *vs = vxlan->vn_sock;
- struct sock *sk = vs->sock->sk;
+ struct sock *sk = vs->uts.sock->sk;
union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
int ifindex = vxlan->default_dst.remote_ifindex;
@@ -1336,7 +1336,6 @@ out:
}
#if IS_ENABLED(CONFIG_IPV6)
-
static struct sk_buff *vxlan_na_create(struct sk_buff *request,
struct neighbour *n, bool isrouter)
{
@@ -1570,13 +1569,6 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
return false;
}
-static inline struct sk_buff *vxlan_handle_offloads(struct sk_buff *skb,
- bool udp_csum)
-{
- int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
- return iptunnel_handle_offloads(skb, udp_csum, type);
-}
-
#if IS_ENABLED(CONFIG_IPV6)
static int vxlan6_xmit_skb(struct vxlan_sock *vs,
struct dst_entry *dst, struct sk_buff *skb,
@@ -1585,13 +1577,12 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
__be16 src_port, __be16 dst_port, __be32 vni,
bool xnet)
{
- struct ipv6hdr *ip6h;
struct vxlanhdr *vxh;
- struct udphdr *uh;
int min_headroom;
int err;
+ bool udp_sum = !udp_get_no_check6_tx(vs->uts.sock->sk);
- skb = vxlan_handle_offloads(skb, !udp_get_no_check6_tx(vs->sock->sk));
+ skb = udp_tunnel_handle_offloads(skb, udp_sum);
if (IS_ERR(skb))
return -EINVAL;
@@ -1619,38 +1610,8 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
vxh->vx_flags = htonl(VXLAN_FLAGS);
vxh->vx_vni = vni;
- __skb_push(skb, sizeof(*uh));
- skb_reset_transport_header(skb);
- uh = udp_hdr(skb);
-
- uh->dest = dst_port;
- uh->source = src_port;
-
- uh->len = htons(skb->len);
-
- memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
- IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
- IPSKB_REROUTED);
- skb_dst_set(skb, dst);
-
- udp6_set_csum(udp_get_no_check6_tx(vs->sock->sk), skb,
- saddr, daddr, skb->len);
-
- __skb_push(skb, sizeof(*ip6h));
- skb_reset_network_header(skb);
- ip6h = ipv6_hdr(skb);
- ip6h->version = 6;
- ip6h->priority = prio;
- ip6h->flow_lbl[0] = 0;
- ip6h->flow_lbl[1] = 0;
- ip6h->flow_lbl[2] = 0;
- ip6h->payload_len = htons(skb->len);
- ip6h->nexthdr = IPPROTO_UDP;
- ip6h->hop_limit = ttl;
- ip6h->daddr = *daddr;
- ip6h->saddr = *saddr;
-
- ip6tunnel_xmit(skb, dev);
+ udp_tunnel6_xmit_skb(&vs->uts, dst, skb, dev, saddr, daddr, prio,
+ ttl, src_port, dst_port);
return 0;
}
#endif
@@ -1661,11 +1622,11 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
__be16 src_port, __be16 dst_port, __be32 vni, bool xnet)
{
struct vxlanhdr *vxh;
- struct udphdr *uh;
int min_headroom;
int err;
+ bool udp_sum = !vs->uts.sock->sk->sk_no_check_tx;
- skb = vxlan_handle_offloads(skb, !vs->sock->sk->sk_no_check_tx);
+ skb = udp_tunnel_handle_offloads(skb, udp_sum);
if (IS_ERR(skb))
return -EINVAL;
@@ -1691,20 +1652,8 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
vxh->vx_flags = htonl(VXLAN_FLAGS);
vxh->vx_vni = vni;
- __skb_push(skb, sizeof(*uh));
- skb_reset_transport_header(skb);
- uh = udp_hdr(skb);
-
- uh->dest = dst_port;
- uh->source = src_port;
-
- uh->len = htons(skb->len);
-
- udp_set_csum(vs->sock->sk->sk_no_check_tx, skb,
- src, dst, skb->len);
-
- return iptunnel_xmit(vs->sock->sk, rt, skb, src, dst, IPPROTO_UDP,
- tos, ttl, df, xnet);
+ return udp_tunnel_xmit_skb(&vs->uts, rt, skb, src, dst, tos,
+ ttl, df, src_port, dst_port, xnet);
}
EXPORT_SYMBOL_GPL(vxlan_xmit_skb);
@@ -1829,18 +1778,18 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
- err = vxlan_xmit_skb(vxlan->vn_sock, rt, skb,
- fl4.saddr, dst->sin.sin_addr.s_addr,
- tos, ttl, df, src_port, dst_port,
- htonl(vni << 8),
- !net_eq(vxlan->net, dev_net(vxlan->dev)));
+ err = udp_tunnel_xmit_skb(&vxlan->vn_sock->uts, rt, skb,
+ fl4.saddr, dst->sin.sin_addr.s_addr,
+ tos, ttl, df, src_port, dst_port,
+ !net_eq(vxlan->net,
+ dev_net(vxlan->dev)));
if (err < 0)
goto rt_tx_error;
iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
#if IS_ENABLED(CONFIG_IPV6)
} else {
- struct sock *sk = vxlan->vn_sock->sock->sk;
+ struct sock *sk = vxlan->vn_sock->uts.sock->sk;
struct dst_entry *ndst;
struct flowi6 fl6;
u32 flags;
@@ -2202,8 +2151,8 @@ void vxlan_get_rx_port(struct net_device *dev)
spin_lock(&vn->sock_lock);
for (i = 0; i < PORT_HASH_SIZE; ++i) {
hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist) {
- port = inet_sk(vs->sock->sk)->inet_sport;
- sa_family = vs->sock->sk->sk_family;
+ port = inet_sk(vs->uts.sock->sk)->inet_sport;
+ sa_family = vs->uts.sock->sk->sk_family;
dev->netdev_ops->ndo_add_vxlan_port(dev, sa_family,
port);
}
@@ -2330,12 +2279,18 @@ static const struct ethtool_ops vxlan_ethtool_ops = {
.get_link = ethtool_op_get_link,
};
+static void free_vxlan_sock_rcu(struct rcu_head *rcu)
+{
+ struct vxlan_sock *vs = container_of(rcu, struct vxlan_sock, rcu);
+
+ udp_tunnel_sock_free(&vs->uts);
+}
+
static void vxlan_del_work(struct work_struct *work)
{
struct vxlan_sock *vs = container_of(work, struct vxlan_sock, del_work);
-
- sk_release_kernel(vs->sock->sk);
- kfree_rcu(vs, rcu);
+ udp_tunnel_sock_release(&vs->uts);
+ call_rcu(&vs->rcu, free_vxlan_sock_rcu);
}
static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
@@ -2375,39 +2330,60 @@ static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
return sock;
}
+static struct vxlan_sock *vxlan_create_tunnel_sock(struct net *net,
+ struct socket *sock)
+{
+ struct udp_tunnel_sock_cfg cfg;
+ struct udp_tunnel_sock *uts;
+ struct vxlan_sock *vs;
+
+ cfg.sock = sock;
+ cfg.encap_type = 1;
+ cfg.encap_rcv = vxlan_udp_encap_recv;
+ cfg.encap_destroy = NULL;
+
+ uts = create_udp_tunnel_sock(net, sizeof(*vs), &cfg);
+
+ if (IS_ERR(uts))
+ vs = ERR_CAST(uts);
+ else
+ vs = container_of(uts, struct vxlan_sock, uts);
+
+ return vs;
+}
+
/* Create new listen socket if needed */
static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
- vxlan_rcv_t *rcv, void *data,
+ vxlan_rcv_t rcv, void *data,
u32 flags)
{
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
struct vxlan_sock *vs;
struct socket *sock;
- struct sock *sk;
- unsigned int h;
bool ipv6 = !!(flags & VXLAN_F_IPV6);
+ unsigned int h;
- vs = kzalloc(sizeof(*vs), GFP_KERNEL);
- if (!vs)
- return ERR_PTR(-ENOMEM);
+ sock = vxlan_create_sock(net, ipv6, port, flags);
+ if (IS_ERR(sock))
+ return ERR_CAST(sock);
+
+ vs = vxlan_create_tunnel_sock(net, sock);
+ if (IS_ERR(vs))
+ return vs;
for (h = 0; h < VNI_HASH_SIZE; ++h)
INIT_HLIST_HEAD(&vs->vni_list[h]);
- INIT_WORK(&vs->del_work, vxlan_del_work);
+ spin_lock(&vn->sock_lock);
+ list_add(&vs->next, &vn->vxlan_list);
+ spin_unlock(&vn->sock_lock);
- sock = vxlan_create_sock(net, ipv6, port, flags);
- if (IS_ERR(sock)) {
- kfree(vs);
- return ERR_CAST(sock);
- }
+ INIT_WORK(&vs->del_work, vxlan_del_work);
- vs->sock = sock;
- sk = sock->sk;
atomic_set(&vs->refcnt, 1);
+
vs->rcv = rcv;
- vs->data = data;
- rcu_assign_sk_user_data(vs->sock->sk, vs);
+ vs->rcv_data = data;
/* Initialize the vxlan udp offloads structure */
vs->udp_offloads.port = port;
@@ -2419,21 +2395,11 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
vxlan_notify_add_rx_port(vs);
spin_unlock(&vn->sock_lock);
- /* Mark socket as an encapsulation socket. */
- udp_sk(sk)->encap_type = 1;
- udp_sk(sk)->encap_rcv = vxlan_udp_encap_recv;
-#if IS_ENABLED(CONFIG_IPV6)
- if (ipv6)
- ipv6_stub->udpv6_encap_enable();
- else
-#endif
- udp_encap_enable();
-
return vs;
}
struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
- vxlan_rcv_t *rcv, void *data,
+ vxlan_rcv_t rcv, void *data,
bool no_share, u32 flags)
{
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index d5f59f3..b1dd547 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -4,23 +4,27 @@
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/udp.h>
+#include <net/udp_tunnel.h>
#define VNI_HASH_BITS 10
#define VNI_HASH_SIZE (1<<VNI_HASH_BITS)
struct vxlan_sock;
-typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb, __be32 key);
-/* per UDP socket information */
+typedef void (*vxlan_rcv_t)(struct vxlan_sock *vs, struct sk_buff *skb,
+ __be32 key);
+
+/* per vxlan socket information */
struct vxlan_sock {
+ struct udp_tunnel_sock uts; /* Must be the first member */
struct hlist_node hlist;
- vxlan_rcv_t *rcv;
- void *data;
+ struct list_head next;
struct work_struct del_work;
- struct socket *sock;
struct rcu_head rcu;
struct hlist_head vni_list[VNI_HASH_SIZE];
atomic_t refcnt;
+ vxlan_rcv_t rcv;
+ void *rcv_data;
struct udp_offload udp_offloads;
};
@@ -35,7 +39,7 @@ struct vxlan_sock {
#define VXLAN_F_UDP_ZERO_CSUM6_RX 0x100
struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
- vxlan_rcv_t *rcv, void *data,
+ vxlan_rcv_t rcv, void *data,
bool no_share, u32 flags);
void vxlan_sock_release(struct vxlan_sock *vs);
@@ -57,4 +61,5 @@ static inline void vxlan_get_rx_port(struct net_device *netdev)
{
}
#endif
+
#endif
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index d8b7e24..7599efd 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -59,7 +59,7 @@ static inline struct vxlan_port *vxlan_vport(const struct vport *vport)
static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
{
struct ovs_key_ipv4_tunnel tun_key;
- struct vport *vport = vs->data;
+ struct vport *vport = vs->rcv_data;
struct iphdr *iph;
__be64 key;
@@ -74,7 +74,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb)
{
struct vxlan_port *vxlan_port = vxlan_vport(vport);
- __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
+ __be16 dst_port = inet_sk(vxlan_port->vs->uts.sock->sk)->inet_sport;
if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port)))
return -EMSGSIZE;
@@ -139,7 +139,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
{
struct net *net = ovs_dp_get_net(vport->dp);
struct vxlan_port *vxlan_port = vxlan_vport(vport);
- __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
+ __be16 dst_port = inet_sk(vxlan_port->vs->uts.sock->sk)->inet_sport;
struct rtable *rt;
struct flowi4 fl;
__be16 src_port;
--
1.7.9.5
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists