[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CACzMAJKn4zR8q1dQsiV4ShxbvamKYi2bCFrCyiVm-R8rM4Np2w@mail.gmail.com>
Date: Fri, 12 Sep 2014 13:42:07 -0700
From: Andy Zhou <azhou@...ira.com>
To: Tom Herbert <therbert@...gle.com>
Cc: David Miller <davem@...emloft.net>,
Linux Netdev List <netdev@...r.kernel.org>
Subject: Re: [net-next v5 1/3] udp-tunnel: Expand UDP tunnel APIs
On Thu, Sep 11, 2014 at 2:04 PM, Tom Herbert <therbert@...gle.com> wrote:
> On Wed, Sep 10, 2014 at 8:29 PM, Andy Zhou <azhou@...ira.com> wrote:
>> Added common udp tunnel socket creation, and packet transmission APIs
>> API that can be used by other UDP based tunneling protocol
>> implementation.
>>
>> Signed-off-by: Andy Zhou <azhou@...ira.com>
>> ---
>> include/net/udp_tunnel.h | 73 +++++++++++++++++++++++++++
>> net/ipv4/Kconfig | 1 +
>> net/ipv4/udp_tunnel.c | 108 ++++++++++++++++++++++++++--------------
>> net/ipv6/Makefile | 1 +
>> net/ipv6/ip6_udp_tunnel.c | 121 +++++++++++++++++++++++++++++++++++++++++++++
>> 5 files changed, 268 insertions(+), 36 deletions(-)
>> create mode 100644 net/ipv6/ip6_udp_tunnel.c
>>
>> diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
>> index ffd69cb..e9dcf83 100644
>> --- a/include/net/udp_tunnel.h
>> +++ b/include/net/udp_tunnel.h
>> @@ -1,6 +1,14 @@
>> #ifndef __NET_UDP_TUNNEL_H
>> #define __NET_UDP_TUNNEL_H
>>
>> +#include <net/ip_tunnels.h>
>> +#include <net/udp.h>
>> +
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +#include <net/ipv6.h>
>> +#include <net/addrconf.h>
>> +#endif
>> +
>> struct udp_port_cfg {
>> u8 family;
>>
>> @@ -29,4 +37,69 @@ struct udp_port_cfg {
>> int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
>> struct socket **sockp);
>>
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
>> + struct socket **sockp);
>> +#else
>> +static inline int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
>> + struct socket **sockp)
>> +{
>> + return 0;
>> +}
>> +#endif
>> +
>> +struct udp_tunnel_sock;
>> +
>> +typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb);
>> +typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk);
>> +
>> +struct udp_tunnel_sock_cfg {
>> + struct socket *sock; /* The socket UDP tunnel will attach to */
>> + /* Used for setting up udp_sock fields, see udp.h for details */
>> + __u8 encap_type;
>> + udp_tunnel_encap_rcv_t encap_rcv;
>> + udp_tunnel_encap_destroy_t encap_destroy;
>> +};
>> +
>> +struct udp_tunnel_sock {
>> + struct socket *sock;
>> +};
>> +
>> +struct udp_tunnel_sock *create_udp_tunnel_sock(struct net *net, size_t size,
>> + struct udp_tunnel_sock_cfg
>> + *sock_cfg);
>> +
>> +int udp_tunnel_xmit_skb(struct udp_tunnel_sock *uts, struct rtable *rt,
>> + struct sk_buff *skb, __be32 src, __be32 dst,
>> + __u8 tos, __u8 ttl, __be16 df, __be16 src_port,
>> + __be16 dst_port, bool xnet);
>> +
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +int udp_tunnel6_xmit_skb(struct udp_tunnel_sock *uts, struct dst_entry *dst,
>> + struct sk_buff *skb, struct net_device *dev,
>> + struct in6_addr *saddr, struct in6_addr *daddr,
>> + __u8 prio, __u8 ttl, __be16 src_port,
>> + __be16 dst_port);
>> +#endif
>> +
>> +void udp_tunnel_sock_release(struct udp_tunnel_sock *uts);
>> +void udp_tunnel_sock_free(struct udp_tunnel_sock *uts);
>> +
>> +static inline struct sk_buff *udp_tunnel_handle_offloads(struct sk_buff *skb,
>> + bool udp_csum)
>> +{
>> + int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
>> +
>> + return iptunnel_handle_offloads(skb, udp_csum, type);
>> +}
>> +
>> +static inline void udp_tunnel_encap_enable(struct socket *sock)
>> +{
>> +#if IS_ENABLED(CONFIG_IPV6)
>> + if (sock->sk->sk_family == PF_INET6)
>> + ipv6_stub->udpv6_encap_enable();
>> + else
>> +#endif
>> + udp_encap_enable();
>> +}
>> #endif
>> diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
>> index dbc10d8..cccb95f 100644
>> --- a/net/ipv4/Kconfig
>> +++ b/net/ipv4/Kconfig
>> @@ -308,6 +308,7 @@ config NET_IPVTI
>> on top.
>>
>> config NET_UDP_TUNNEL
>> + depends on (IPV6 || IPV6=n)
>> tristate
>> default n
>>
>> diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
>> index 61ec1a6..d60c1a0 100644
>> --- a/net/ipv4/udp_tunnel.c
>> +++ b/net/ipv4/udp_tunnel.c
>> @@ -14,42 +14,9 @@ int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
>> int err = -EINVAL;
>> struct socket *sock = NULL;
>>
>> -#if IS_ENABLED(CONFIG_IPV6)
>> if (cfg->family == AF_INET6) {
>> - struct sockaddr_in6 udp6_addr;
>> -
>> - err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock);
>> - if (err < 0)
>> - goto error;
>> -
>> - sk_change_net(sock->sk, net);
>> -
>> - udp6_addr.sin6_family = AF_INET6;
>> - memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6,
>> - sizeof(udp6_addr.sin6_addr));
>> - udp6_addr.sin6_port = cfg->local_udp_port;
>> - err = kernel_bind(sock, (struct sockaddr *)&udp6_addr,
>> - sizeof(udp6_addr));
>> - if (err < 0)
>> - goto error;
>> -
>> - if (cfg->peer_udp_port) {
>> - udp6_addr.sin6_family = AF_INET6;
>> - memcpy(&udp6_addr.sin6_addr, &cfg->peer_ip6,
>> - sizeof(udp6_addr.sin6_addr));
>> - udp6_addr.sin6_port = cfg->peer_udp_port;
>> - err = kernel_connect(sock,
>> - (struct sockaddr *)&udp6_addr,
>> - sizeof(udp6_addr), 0);
>> - }
>> - if (err < 0)
>> - goto error;
>> -
>> - udp_set_no_check6_tx(sock->sk, !cfg->use_udp6_tx_checksums);
>> - udp_set_no_check6_rx(sock->sk, !cfg->use_udp6_rx_checksums);
>> - } else
>> -#endif
>> - if (cfg->family == AF_INET) {
>> + return udp_sock_create6(net, cfg, sockp);
>> + } else if (cfg->family == AF_INET) {
>> struct sockaddr_in udp_addr;
>>
>> err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, &sock);
>> @@ -82,7 +49,6 @@ int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
>> return -EPFNOSUPPORT;
>> }
>>
>> -
>> *sockp = sock;
>>
>> return 0;
>> @@ -97,4 +63,74 @@ error:
>> }
>> EXPORT_SYMBOL(udp_sock_create);
>>
>> +struct udp_tunnel_sock *
>> +create_udp_tunnel_sock(struct net *net, size_t size,
>> + struct udp_tunnel_sock_cfg *cfg)
>> +{
>> + struct udp_tunnel_sock *uts;
>> + struct sock *sk;
>> + struct socket *sock = cfg->sock;
>> +
>> + uts = kzalloc(size, GFP_KERNEL);
>> + if (!uts)
>> + return ERR_PTR(-ENOMEM);
>> +
> Allocating memory for the caller seems like overkill to me and there's
> no guarantee that is what caller wants anyway (maybe they are using
> array of static structures for instance). Seems like it would be just
> as easy to return the sock and let caller allocate whatever else it
> needs on its own (only field in udp_tunnel_sock is the sock anyway).
O.K. udp_tunnel layer does not need to allocate or maintain memory. It
will further simplify this layer.
>
>> + sk = sock->sk;
>> +
>> + /* Disable multicast loopback */
>> + inet_sk(sk)->mc_loop = 0;
>> +
> Probably want to enable checksum unnecessary conversions also.
Right. Will do.
>
>> + rcu_assign_sk_user_data(sk, uts);
>> +
>> + udp_sk(sk)->encap_type = cfg->encap_type;
>> + udp_sk(sk)->encap_rcv = cfg->encap_rcv;
>> + udp_sk(sk)->encap_destroy = cfg->encap_destroy;
>> +
>> + uts->sock = sock;
>> +
>> + udp_tunnel_encap_enable(sock);
>> +
>> + return uts;
>> +}
>> +EXPORT_SYMBOL_GPL(create_udp_tunnel_sock);
>> +
>> +int udp_tunnel_xmit_skb(struct udp_tunnel_sock *uts, struct rtable *rt,
>> + struct sk_buff *skb, __be32 src, __be32 dst,
>> + __u8 tos, __u8 ttl, __be16 df, __be16 src_port,
>> + __be16 dst_port, bool xnet)
>> +{
>> + struct udphdr *uh;
>> + struct socket *sock = uts->sock;
>> +
>> + __skb_push(skb, sizeof(*uh));
>> + skb_reset_transport_header(skb);
>> + uh = udp_hdr(skb);
>> +
>> + uh->dest = dst_port;
>> + uh->source = src_port;
>> + uh->len = htons(skb->len);
>> +
>> + udp_set_csum(sock->sk->sk_no_check_tx, skb, src, dst, skb->len);
>> +
>> + return iptunnel_xmit(sock->sk, rt, skb, src, dst, IPPROTO_UDP,
>> + tos, ttl, df, xnet);
>> +}
>> +EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb);
>> +
>> +void udp_tunnel_sock_release(struct udp_tunnel_sock *uts)
>> +{
>> + struct sock *sk = uts->sock->sk;
>> +
>> + rcu_assign_sk_user_data(uts->sock->sk, NULL);
>> + kernel_sock_shutdown(uts->sock, SHUT_RDWR);
>> + sk_release_kernel(sk);
>> +}
>> +EXPORT_SYMBOL_GPL(udp_tunnel_sock_release);
>> +
>> +void udp_tunnel_sock_free(struct udp_tunnel_sock *uts)
>> +{
>> + kfree(uts);
>> +}
>> +EXPORT_SYMBOL_GPL(udp_tunnel_sock_free);
>> +
>> MODULE_LICENSE("GPL");
>> diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
>> index 2fe6836..45f830e 100644
>> --- a/net/ipv6/Makefile
>> +++ b/net/ipv6/Makefile
>
> Probably good to make IPv4 and v6 stuff in separate patches.
Is this really necessary? Removing dependency may be tricky..
>
>> @@ -35,6 +35,7 @@ obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o
>> obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o
>> obj-$(CONFIG_IPV6_MIP6) += mip6.o
>> obj-$(CONFIG_NETFILTER) += netfilter/
>> +obj-$(CONFIG_NET_UDP_TUNNEL) += ip6_udp_tunnel.o
>>
>> obj-$(CONFIG_IPV6_VTI) += ip6_vti.o
>> obj-$(CONFIG_IPV6_SIT) += sit.o
>> diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
>> new file mode 100644
>> index 0000000..5109f46
>> --- /dev/null
>> +++ b/net/ipv6/ip6_udp_tunnel.c
>> @@ -0,0 +1,121 @@
>> +#include <linux/module.h>
>> +#include <linux/errno.h>
>> +#include <linux/socket.h>
>> +#include <linux/udp.h>
>> +#include <linux/types.h>
>> +#include <linux/kernel.h>
>> +#include <linux/in6.h>
>> +#include <net/udp.h>
>> +#include <net/udp_tunnel.h>
>> +#include <net/net_namespace.h>
>> +#include <net/netns/generic.h>
>> +#include <net/ip6_tunnel.h>
>> +#include <net/ip6_checksum.h>
>> +
>> +int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
>> + struct socket **sockp)
>> +{
>> + struct sockaddr_in6 udp6_addr;
>> + int err = -EINVAL;
>> + struct socket *sock = NULL;
>> +
>> + err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock);
>> + if (err < 0)
>> + goto error;
>> +
>> + sk_change_net(sock->sk, net);
>> +
>> + udp6_addr.sin6_family = AF_INET6;
>> + memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6,
>> + sizeof(udp6_addr.sin6_addr));
>> + udp6_addr.sin6_port = cfg->local_udp_port;
>> + err = kernel_bind(sock, (struct sockaddr *)&udp6_addr,
>> + sizeof(udp6_addr));
>> + if (err < 0)
>> + goto error;
>> +
>> + if (cfg->peer_udp_port) {
>> + udp6_addr.sin6_family = AF_INET6;
>> + memcpy(&udp6_addr.sin6_addr, &cfg->peer_ip6,
>> + sizeof(udp6_addr.sin6_addr));
>> + udp6_addr.sin6_port = cfg->peer_udp_port;
>> + err = kernel_connect(sock,
>> + (struct sockaddr *)&udp6_addr,
>> + sizeof(udp6_addr), 0);
>> + }
>> + if (err < 0)
>> + goto error;
>> +
>> + udp_set_no_check6_tx(sock->sk, !cfg->use_udp6_tx_checksums);
>> + udp_set_no_check6_rx(sock->sk, !cfg->use_udp6_rx_checksums);
>> +
>> + *sockp = sock;
>> + return 0;
>> +
>> +error:
>> + if (sock) {
>> + kernel_sock_shutdown(sock, SHUT_RDWR);
>> + sk_release_kernel(sock->sk);
>> + }
>> + *sockp = NULL;
>> + return err;
>> +}
>> +EXPORT_SYMBOL_GPL(udp_sock_create6);
>> +
>> +int udp_tunnel6_xmit_skb(struct udp_tunnel_sock *uts, struct dst_entry *dst,
>> + struct sk_buff *skb, struct net_device *dev,
>> + struct in6_addr *saddr, struct in6_addr *daddr,
>> + __u8 prio, __u8 ttl, __be16 src_port, __be16 dst_port)
>> +{
>> + struct udphdr *uh;
>> + struct ipv6hdr *ip6h;
>> +
>> + __skb_push(skb, sizeof(*uh));
>> + skb_reset_transport_header(skb);
>> + uh = udp_hdr(skb);
>> +
>> + uh->dest = dst_port;
>> + uh->source = src_port;
>> +
>> + uh->len = htons(skb->len);
>> + uh->check = 0;
>> +
>> + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
>> + IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED
>> + | IPSKB_REROUTED);
>> + skb_dst_set(skb, dst);
>> +
>> + if (!skb_is_gso(skb) && !(dst->dev->features & NETIF_F_IPV6_CSUM)) {
>> + __wsum csum = skb_checksum(skb, 0, skb->len, 0);
>> +
>> + skb->ip_summed = CHECKSUM_UNNECESSARY;
>> + uh->check = csum_ipv6_magic(saddr, daddr, skb->len,
>> + IPPROTO_UDP, csum);
>> + if (uh->check == 0)
>> + uh->check = CSUM_MANGLED_0;
>> + } else {
>> + skb->ip_summed = CHECKSUM_PARTIAL;
>> + skb->csum_start = skb_transport_header(skb) - skb->head;
>> + skb->csum_offset = offsetof(struct udphdr, check);
>> + uh->check = ~csum_ipv6_magic(saddr, daddr,
>> + skb->len, IPPROTO_UDP, 0);
>> + }
>
> Can't we call udp6_set_csum for this?
Thanks for pointing it out. Will do.
>
>> +
>> + __skb_push(skb, sizeof(*ip6h));
>> + skb_reset_network_header(skb);
>> + ip6h = ipv6_hdr(skb);
>> + ip6h->version = 6;
>> + ip6h->priority = prio;
>> + ip6h->flow_lbl[0] = 0;
>> + ip6h->flow_lbl[1] = 0;
>> + ip6h->flow_lbl[2] = 0;
>> + ip6h->payload_len = htons(skb->len);
>> + ip6h->nexthdr = IPPROTO_UDP;
>> + ip6h->hop_limit = ttl;
>> + ip6h->daddr = *daddr;
>> + ip6h->saddr = *saddr;
>> +
>> + ip6tunnel_xmit(skb, dev);
>
> So iptunnel_xmit creates the IP header, but ip6tunnel_xmit doesn't. It
> should be on the TODO list to make this consistent!
Agreed.
>
>> + return 0;
>> +}
>> +EXPORT_SYMBOL_GPL(udp_tunnel6_xmit_skb);
>> --
>> 1.7.9.5
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe netdev" in
>> the body of a message to majordomo@...r.kernel.org
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists