[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAG-2HqVzWhDa8UMbwT+r+7MZhzSkXiqi-My1jHxEbvNJn=cqOA@mail.gmail.com>
Date: Thu, 10 Jan 2019 15:39:16 +0100
From: Tom Gundersen <teg@...m.no>
To: David Herrmann <dh.herrmann@...il.com>
Cc: netdev <netdev@...r.kernel.org>, David Miller <davem@...emloft.net>
Subject: Re: [PATCH 1/3] net: introduce SO_BINDTOIF sockopt
On Thu, Jan 10, 2019 at 3:25 PM David Herrmann <dh.herrmann@...il.com> wrote:
>
> This introduces a new generic SOL_SOCKET-level socket option called
> SO_BINDTOIF. It behaves similar to SO_BINDTODEVICE, but takes a network
> interface index as argument, rather than the network interface name.
>
> User-space often refers to network-interfaces via their index, but has
> to temporarily resolve it to a name for a call into SO_BINDTODEVICE.
> This might pose problems when the network-device is renamed
> asynchronously by other parts of the system. When this happens, the
> SO_BINDTODEVICE might either fail, or worse, it might bind to the wrong
> device.
>
> In most cases user-space only ever operates on devices which they
> either manage themselves, or otherwise have a guarantee that the device
> name will not change (e.g., devices that are UP cannot be renamed).
> However, particularly in libraries this guarantee is non-obvious and it
> would be nice if that race-condition would simply not exist. It would
> make it easier for those libraries to operate even in situations where
> the device-name might change under the hood.
>
> A real use-case that we recently hit is trying to start the network
> stack early in the initrd but make it survive into the real system.
> Existing distributions rename network-interfaces during the transition
> from initrd into the real system. This, obviously, cannot affect
> devices that are up and running (unless you also consider moving them
> between network-namespaces). However, the network manager now has to
> make sure its management engine for dormant devices will not run in
> parallel to these renames. Particularly, when you offload operations
> like DHCP into separate processes, these might setup their sockets
> early, and thus have to resolve the device-name possibly running into
> this race-condition.
>
> By avoiding a call to resolve the device-name, we no longer depend on
> the name and can run network setup of dormant devices in parallel to
> the transition off the initrd. The SO_BINDTOIF ioctl plugs this race.
>
> Signed-off-by: David Herrmann <dh.herrmann@...il.com>
Reviewed-by: Tom Gundersen <teg@...m.no>
> ---
> arch/alpha/include/uapi/asm/socket.h | 2 ++
> arch/ia64/include/uapi/asm/socket.h | 2 ++
> arch/mips/include/uapi/asm/socket.h | 2 ++
> arch/parisc/include/uapi/asm/socket.h | 2 ++
> arch/s390/include/uapi/asm/socket.h | 2 ++
> arch/sparc/include/uapi/asm/socket.h | 2 ++
> arch/xtensa/include/uapi/asm/socket.h | 2 ++
> include/uapi/asm-generic/socket.h | 2 ++
> net/core/sock.c | 46 +++++++++++++++++++++------
> 9 files changed, 52 insertions(+), 10 deletions(-)
>
> diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
> index 065fb372e355..6e346e51eec7 100644
> --- a/arch/alpha/include/uapi/asm/socket.h
> +++ b/arch/alpha/include/uapi/asm/socket.h
> @@ -115,4 +115,6 @@
> #define SO_TXTIME 61
> #define SCM_TXTIME SO_TXTIME
>
> +#define SO_BINDTOIF 62
> +
> #endif /* _UAPI_ASM_SOCKET_H */
> diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h
> index c872c4e6bafb..ece83ba17b9d 100644
> --- a/arch/ia64/include/uapi/asm/socket.h
> +++ b/arch/ia64/include/uapi/asm/socket.h
> @@ -117,4 +117,6 @@
> #define SO_TXTIME 61
> #define SCM_TXTIME SO_TXTIME
>
> +#define SO_BINDTOIF 62
> +
> #endif /* _ASM_IA64_SOCKET_H */
> diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
> index 71370fb3ceef..27f7f761ace5 100644
> --- a/arch/mips/include/uapi/asm/socket.h
> +++ b/arch/mips/include/uapi/asm/socket.h
> @@ -126,4 +126,6 @@
> #define SO_TXTIME 61
> #define SCM_TXTIME SO_TXTIME
>
> +#define SO_BINDTOIF 62
> +
> #endif /* _UAPI_ASM_SOCKET_H */
> diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
> index 061b9cf2a779..efd3917f23e1 100644
> --- a/arch/parisc/include/uapi/asm/socket.h
> +++ b/arch/parisc/include/uapi/asm/socket.h
> @@ -107,4 +107,6 @@
> #define SO_TXTIME 0x4036
> #define SCM_TXTIME SO_TXTIME
>
> +#define SO_BINDTOIF 0x4037
> +
> #endif /* _UAPI_ASM_SOCKET_H */
> diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h
> index 39d901476ee5..c8ba542e69e6 100644
> --- a/arch/s390/include/uapi/asm/socket.h
> +++ b/arch/s390/include/uapi/asm/socket.h
> @@ -114,4 +114,6 @@
> #define SO_TXTIME 61
> #define SCM_TXTIME SO_TXTIME
>
> +#define SO_BINDTOIF 62
> +
> #endif /* _ASM_SOCKET_H */
> diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
> index 7ea35e5601b6..50006bde7dc0 100644
> --- a/arch/sparc/include/uapi/asm/socket.h
> +++ b/arch/sparc/include/uapi/asm/socket.h
> @@ -104,6 +104,8 @@
> #define SO_TXTIME 0x003f
> #define SCM_TXTIME SO_TXTIME
>
> +#define SO_BINDTOIF 0x0040
> +
> /* Security levels - as per NRL IPv6 - don't actually do anything */
> #define SO_SECURITY_AUTHENTICATION 0x5001
> #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002
> diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h
> index 1de07a7f7680..a36241ffbd86 100644
> --- a/arch/xtensa/include/uapi/asm/socket.h
> +++ b/arch/xtensa/include/uapi/asm/socket.h
> @@ -119,4 +119,6 @@
> #define SO_TXTIME 61
> #define SCM_TXTIME SO_TXTIME
>
> +#define SO_BINDTOIF 62
> +
> #endif /* _XTENSA_SOCKET_H */
> diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
> index a12692e5f7a8..31fb8414ea4c 100644
> --- a/include/uapi/asm-generic/socket.h
> +++ b/include/uapi/asm-generic/socket.h
> @@ -110,4 +110,6 @@
> #define SO_TXTIME 61
> #define SCM_TXTIME SO_TXTIME
>
> +#define SO_BINDTOIF 62
> +
> #endif /* __ASM_GENERIC_SOCKET_H */
> diff --git a/net/core/sock.c b/net/core/sock.c
> index 6aa2e7e0b4fb..df8f83bc22b3 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -520,20 +520,43 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
> }
> EXPORT_SYMBOL(sk_dst_check);
>
> -static int sock_setbindtodevice(struct sock *sk, char __user *optval,
> - int optlen)
> +static int sock_setbindtodevice_locked(struct sock *sk, int ifindex)
> {
> int ret = -ENOPROTOOPT;
> #ifdef CONFIG_NETDEVICES
> struct net *net = sock_net(sk);
> - char devname[IFNAMSIZ];
> - int index;
>
> /* Sorry... */
> ret = -EPERM;
> if (!ns_capable(net->user_ns, CAP_NET_RAW))
> goto out;
>
> + ret = -EINVAL;
> + if (ifindex < 0)
> + goto out;
> +
> + sk->sk_bound_dev_if = ifindex;
> + if (sk->sk_prot->rehash)
> + sk->sk_prot->rehash(sk);
> + sk_dst_reset(sk);
> +
> + ret = 0;
> +
> +out:
> +#endif
> +
> + return ret;
> +}
> +
> +static int sock_setbindtodevice(struct sock *sk, char __user *optval,
> + int optlen)
> +{
> + int ret = -ENOPROTOOPT;
> +#ifdef CONFIG_NETDEVICES
> + struct net *net = sock_net(sk);
> + char devname[IFNAMSIZ];
> + int index;
> +
> ret = -EINVAL;
> if (optlen < 0)
> goto out;
> @@ -566,14 +589,9 @@ static int sock_setbindtodevice(struct sock *sk, char __user *optval,
> }
>
> lock_sock(sk);
> - sk->sk_bound_dev_if = index;
> - if (sk->sk_prot->rehash)
> - sk->sk_prot->rehash(sk);
> - sk_dst_reset(sk);
> + ret = sock_setbindtodevice_locked(sk, index);
> release_sock(sk);
>
> - ret = 0;
> -
> out:
> #endif
>
> @@ -1055,6 +1073,10 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
> }
> break;
>
> + case SO_BINDTOIF:
> + ret = sock_setbindtodevice_locked(sk, val);
> + break;
> +
> default:
> ret = -ENOPROTOOPT;
> break;
> @@ -1399,6 +1421,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
> SOF_TXTIME_REPORT_ERRORS : 0;
> break;
>
> + case SO_BINDTOIF:
> + v.val = sk->sk_bound_dev_if;
> + break;
> +
> default:
> /* We implement the SO_SNDLOWAT etc to not be settable
> * (1003.1g 7).
> --
> 2.20.1
>
Powered by blists - more mailing lists