lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <158141590610140@mail.yandex-team.ru>
Date:   Wed, 27 May 2020 23:17:59 +0300
From:   Dmitry Yakunin <zeil@...dex-team.ru>
To:     Eric Dumazet <eric.dumazet@...il.com>,
        "davem@...emloft.net" <davem@...emloft.net>,
        "brakmo@...com" <brakmo@...com>
Cc:     "bpf@...r.kernel.org" <bpf@...r.kernel.org>,
        "netdev@...r.kernel.org" <netdev@...r.kernel.org>
Subject: Re: [PATCH bpf-next] bpf: add SO_KEEPALIVE and related options to bpf_setsockopt



27.05.2020, 19:43, "Eric Dumazet" <eric.dumazet@...il.com>:
> On 5/27/20 8:05 AM, Dmitry Yakunin wrote:
>>  This patch adds support of SO_KEEPALIVE flag and TCP related options
>>  to bpf_setsockopt() routine. This is helpful if we want to enable or tune
>>  TCP keepalive for applications which don't do it in the userspace code.
>>  In order to avoid copy-paste, common code from classic setsockopt was moved
>>  to auxiliary functions in the headers.
>
> Please split this in two patches :
> - one adding the helpers, a pure TCP patch.
> - one for BPF additions.

Thanks for your comment.
I've split this into three patches (sock, tcp and bpf) and resent.

>>  Signed-off-by: Dmitry Yakunin <zeil@...dex-team.ru>
>>  ---
>>   include/net/sock.h | 9 +++++++++
>>   include/net/tcp.h | 18 ++++++++++++++++++
>>   net/core/filter.c | 39 ++++++++++++++++++++++++++++++++++++++-
>>   net/core/sock.c | 9 ---------
>>   net/ipv4/tcp.c | 15 ++-------------
>>   5 files changed, 67 insertions(+), 23 deletions(-)
>>
>>  diff --git a/include/net/sock.h b/include/net/sock.h
>>  index 3e8c6d4..ee35dea 100644
>>  --- a/include/net/sock.h
>>  +++ b/include/net/sock.h
>>  @@ -879,6 +879,15 @@ static inline void sock_reset_flag(struct sock *sk, enum sock_flags flag)
>>           __clear_bit(flag, &sk->sk_flags);
>>   }
>>
>>  +static inline void sock_valbool_flag(struct sock *sk, enum sock_flags bit,
>>  + int valbool)
>>  +{
>>  + if (valbool)
>>  + sock_set_flag(sk, bit);
>>  + else
>>  + sock_reset_flag(sk, bit);
>>  +}
>>  +
>>   static inline bool sock_flag(const struct sock *sk, enum sock_flags flag)
>>   {
>>           return test_bit(flag, &sk->sk_flags);
>>  diff --git a/include/net/tcp.h b/include/net/tcp.h
>>  index b681338..ae6a495 100644
>>  --- a/include/net/tcp.h
>>  +++ b/include/net/tcp.h
>>  @@ -1465,6 +1465,24 @@ static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp)
>>                             tcp_jiffies32 - tp->rcv_tstamp);
>>   }
>>
>>  +/* val must be validated at the top level function */
>>  +static inline void keepalive_time_set(struct tcp_sock *tp, int val)
>>  +{
>>  + struct sock *sk = (struct sock *)tp;
>
> We prefer the other way to avoid a cast unless really needed :
>
> static inline tcp_keepalive_time_set(struct sock *sk, int val)
> {
>       stuct tcp_sock *tp = tcp_sk(sk);
>
>>  +
>>  + tp->keepalive_time = val * HZ;
>>  + if (sock_flag(sk, SOCK_KEEPOPEN) &&
>>  + !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
>>  + u32 elapsed = keepalive_time_elapsed(tp);
>>  +
>>  + if (tp->keepalive_time > elapsed)
>>  + elapsed = tp->keepalive_time - elapsed;
>>  + else
>>  + elapsed = 0;
>>  + inet_csk_reset_keepalive_timer(sk, elapsed);
>>  + }
>>  +}
>>  +
>>   static inline int tcp_fin_time(const struct sock *sk)
>>   {
>>           int fin_timeout = tcp_sk(sk)->linger2 ? : sock_net(sk)->ipv4.sysctl_tcp_fin_timeout;
>>  diff --git a/net/core/filter.c b/net/core/filter.c
>>  index a6fc234..1035e43 100644
>>  --- a/net/core/filter.c
>>  +++ b/net/core/filter.c
>>  @@ -4248,8 +4248,8 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = {
>>   static int _bpf_setsockopt(struct sock *sk, int level, int optname,
>>                              char *optval, int optlen, u32 flags)
>>   {
>>  + int val, valbool;
>>           int ret = 0;
>>  - int val;
>>
>>           if (!sk_fullsock(sk))
>>                   return -EINVAL;
>>  @@ -4260,6 +4260,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
>>                   if (optlen != sizeof(int))
>>                           return -EINVAL;
>>                   val = *((int *)optval);
>>  + valbool = val ? 1 : 0;
>>
>>                   /* Only some socketops are supported */
>>                   switch (optname) {
>>  @@ -4298,6 +4299,11 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
>>                                   sk_dst_reset(sk);
>>                           }
>>                           break;
>>  + case SO_KEEPALIVE:
>>  + if (sk->sk_prot->keepalive)
>>  + sk->sk_prot->keepalive(sk, valbool);
>>  + sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
>>  + break;
>>                   default:
>>                           ret = -EINVAL;
>>                   }
>>  @@ -4358,6 +4364,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
>>                           ret = tcp_set_congestion_control(sk, name, false,
>>                                                            reinit, true);
>>                   } else {
>>  + struct inet_connection_sock *icsk = inet_csk(sk);
>>                           struct tcp_sock *tp = tcp_sk(sk);
>>
>>                           if (optlen != sizeof(int))
>>  @@ -4386,6 +4393,36 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
>>                                   else
>>                                           tp->save_syn = val;
>>                                   break;
>>  + case TCP_KEEPIDLE:
>>  + if (val < 1 || val > MAX_TCP_KEEPIDLE)
>>  + ret = -EINVAL;
>>  + else
>>  + keepalive_time_set(tp, val);
>>  + break;
>>  + case TCP_KEEPINTVL:
>>  + if (val < 1 || val > MAX_TCP_KEEPINTVL)
>>  + ret = -EINVAL;
>>  + else
>>  + tp->keepalive_intvl = val * HZ;
>>  + break;
>>  + case TCP_KEEPCNT:
>>  + if (val < 1 || val > MAX_TCP_KEEPCNT)
>>  + ret = -EINVAL;
>>  + else
>>  + tp->keepalive_probes = val;
>>  + break;
>>  + case TCP_SYNCNT:
>>  + if (val < 1 || val > MAX_TCP_SYNCNT)
>>  + ret = -EINVAL;
>>  + else
>>  + icsk->icsk_syn_retries = val;
>>  + break;
>>  + case TCP_USER_TIMEOUT:
>>  + if (val < 0)
>>  + ret = -EINVAL;
>>  + else
>>  + icsk->icsk_user_timeout = val;
>>  + break;
>>                           default:
>>                                   ret = -EINVAL;
>>                           }
>>  diff --git a/net/core/sock.c b/net/core/sock.c
>>  index fd85e65..9836b01 100644
>>  --- a/net/core/sock.c
>>  +++ b/net/core/sock.c
>>  @@ -684,15 +684,6 @@ static int sock_getbindtodevice(struct sock *sk, char __user *optval,
>>           return ret;
>>   }
>>
>>  -static inline void sock_valbool_flag(struct sock *sk, enum sock_flags bit,
>>  - int valbool)
>>  -{
>>  - if (valbool)
>>  - sock_set_flag(sk, bit);
>>  - else
>>  - sock_reset_flag(sk, bit);
>>  -}
>>  -
>>   bool sk_mc_loop(struct sock *sk)
>>   {
>>           if (dev_recursion_level())
>>  diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
>>  index 9700649..7b239e8 100644
>>  --- a/net/ipv4/tcp.c
>>  +++ b/net/ipv4/tcp.c
>>  @@ -3003,19 +3003,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
>>           case TCP_KEEPIDLE:
>>                   if (val < 1 || val > MAX_TCP_KEEPIDLE)
>>                           err = -EINVAL;
>>  - else {
>>  - tp->keepalive_time = val * HZ;
>>  - if (sock_flag(sk, SOCK_KEEPOPEN) &&
>>  - !((1 << sk->sk_state) &
>>  - (TCPF_CLOSE | TCPF_LISTEN))) {
>>  - u32 elapsed = keepalive_time_elapsed(tp);
>>  - if (tp->keepalive_time > elapsed)
>>  - elapsed = tp->keepalive_time - elapsed;
>>  - else
>>  - elapsed = 0;
>>  - inet_csk_reset_keepalive_timer(sk, elapsed);
>>  - }
>>  - }
>>  + else
>>  + keepalive_time_set(tp, val);
>>                   break;
>>           case TCP_KEEPINTVL:
>>                   if (val < 1 || val > MAX_TCP_KEEPINTVL)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ