lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Date:   Fri, 17 Mar 2023 16:07:22 -0700
From:   Kui-Feng Lee <sinquersw@...il.com>
To:     Daniel Borkmann <daniel@...earbox.net>,
        Kui-Feng Lee <kuifeng@...a.com>, bpf@...r.kernel.org,
        ast@...nel.org, martin.lau@...ux.dev, song@...nel.org,
        kernel-team@...a.com, andrii@...nel.org, sdf@...gle.com
Cc:     netdev@...r.kernel.org, Eric Dumazet <edumazet@...gle.com>
Subject: Re: [PATCH bpf-next v7 2/8] net: Update an existing TCP congestion
 control algorithm.



On 3/17/23 08:23, Daniel Borkmann wrote:
> On 3/16/23 3:36 AM, Kui-Feng Lee wrote:
>> This feature lets you immediately transition to another congestion
>> control algorithm or implementation with the same name.  Once a name
>> is updated, new connections will apply this new algorithm.
>>
>> The purpose is to update a customized algorithm implemented in BPF
>> struct_ops with a new version on the flight.  The following is an
>> example of using the userspace API implemented in later BPF patches.
>>
>>     link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);
>>     .......
>>     err = bpf_link__update_map(link, skel->maps.ca_update_2);
>>
>> We first load and register an algorithm implemented in BPF struct_ops,
>> then swap it out with a new one using the same name. After that, newly
>> created connections will apply the updated algorithm, while older ones
>> retain the previous version already applied.
>>
>> This patch also takes this chance to refactor the ca validation into
>> the new tcp_validate_congestion_control() function.
>>
>> Cc: netdev@...r.kernel.org, Eric Dumazet <edumazet@...gle.com>
>> Signed-off-by: Kui-Feng Lee <kuifeng@...a.com>
>> ---
>>   include/net/tcp.h   |  3 +++
>>   net/ipv4/tcp_cong.c | 60 +++++++++++++++++++++++++++++++++++++++------
>>   2 files changed, 56 insertions(+), 7 deletions(-)
>>
>> diff --git a/include/net/tcp.h b/include/net/tcp.h
>> index db9f828e9d1e..2abb755e6a3a 100644
>> --- a/include/net/tcp.h
>> +++ b/include/net/tcp.h
>> @@ -1117,6 +1117,9 @@ struct tcp_congestion_ops {
>>   int tcp_register_congestion_control(struct tcp_congestion_ops *type);
>>   void tcp_unregister_congestion_control(struct tcp_congestion_ops 
>> *type);
>> +int tcp_update_congestion_control(struct tcp_congestion_ops *type,
>> +                  struct tcp_congestion_ops *old_type);
>> +int tcp_validate_congestion_control(struct tcp_congestion_ops *ca);
>>   void tcp_assign_congestion_control(struct sock *sk);
>>   void tcp_init_congestion_control(struct sock *sk);
>> diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
>> index db8b4b488c31..c90791ae8389 100644
>> --- a/net/ipv4/tcp_cong.c
>> +++ b/net/ipv4/tcp_cong.c
>> @@ -75,14 +75,8 @@ struct tcp_congestion_ops *tcp_ca_find_key(u32 key)
>>       return NULL;
>>   }
>> -/*
>> - * Attach new congestion control algorithm to the list
>> - * of available options.
>> - */
>> -int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
>> +int tcp_validate_congestion_control(struct tcp_congestion_ops *ca)
>>   {
>> -    int ret = 0;
>> -
>>       /* all algorithms must implement these */
>>       if (!ca->ssthresh || !ca->undo_cwnd ||
>>           !(ca->cong_avoid || ca->cong_control)) {
>> @@ -90,6 +84,20 @@ int tcp_register_congestion_control(struct 
>> tcp_congestion_ops *ca)
>>           return -EINVAL;
>>       }
>> +    return 0;
>> +}
>> +
>> +/* Attach new congestion control algorithm to the list
>> + * of available options.
>> + */
>> +int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
>> +{
>> +    int ret;
>> +
>> +    ret = tcp_validate_congestion_control(ca);
>> +    if (ret)
>> +        return ret;
>> +
>>       ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name));
>>       spin_lock(&tcp_cong_list_lock);
>> @@ -130,6 +138,44 @@ void tcp_unregister_congestion_control(struct 
>> tcp_congestion_ops *ca)
>>   }
>>   EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);
>> +/* Replace a registered old ca with a new one.
>> + *
>> + * The new ca must have the same name as the old one, that has been
>> + * registered.
>> + */
>> +int tcp_update_congestion_control(struct tcp_congestion_ops *ca, 
>> struct tcp_congestion_ops *old_ca)
>> +{
>> +    struct tcp_congestion_ops *existing;
>> +    int ret;
>> +
>> +    ret = tcp_validate_congestion_control(ca);
>> +    if (ret)
>> +        return ret;
>> +
>> +    ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name));
>> +
>> +    spin_lock(&tcp_cong_list_lock);
>> +    existing = tcp_ca_find_key(old_ca->key);
>> +    if (ca->key == TCP_CA_UNSPEC || !existing || 
>> strcmp(existing->name, ca->name)) {
>> +        pr_notice("%s not registered or non-unique key\n",
>> +              ca->name);
>> +        ret = -EINVAL;
>> +    } else if (existing != old_ca) {
>> +        pr_notice("invalid old congestion control algorithm to 
>> replace\n");
>> +        ret = -EINVAL;
>> +    } else {
>> +        /* Add the new one before removing the old one to keep
>> +         * one implementation available all the time.
>> +         */
>> +        list_add_tail_rcu(&ca->list, &tcp_cong_list);
>> +        list_del_rcu(&existing->list);
>> +        pr_debug("%s updated\n", ca->name);
>> +    }
>> +    spin_unlock(&tcp_cong_list_lock);
>> +
>> +    return ret;
>> +}
> 
> Was wondering if we could have tcp_register_congestion_control and 
> tcp_update_congestion_control
> could be refactored for reuse. Maybe like below. From the function 
> itself what is not clear whether
> callers that replace an existing one should do the synchronize_rcu() 
> themselves or if this should
> be part of tcp_update_congestion_control?
> 
> int tcp_check_congestion_control(struct tcp_congestion_ops *ca)
> {
>      /* All algorithms must implement these. */
>      if (!ca->ssthresh || !ca->undo_cwnd ||
>          !(ca->cong_avoid || ca->cong_control)) {
>          pr_err("%s does not implement required ops\n", ca->name);
>          return -EINVAL;
>      }
>      if (ca->key == TCP_CA_UNSPEC)
>          ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name));
>      if (ca->key == TCP_CA_UNSPEC) {
>          pr_notice("%s results in zero key\n", ca->name);
>          return -EEXIST;
>      }
>      return 0;
> }
> 
> /* Attach new congestion control algorithm to the list of available
>   * options or replace an existing one if old is non-NULL.
>   */
> int tcp_update_congestion_control(struct tcp_congestion_ops *new,
>                    struct tcp_congestion_ops *old)
> {
>      struct tcp_congestion_ops *found;
>      int ret;
> 
>      ret = tcp_check_congestion_control(new);
>      if (ret)
>          return ret;
>      if (old &&
>          (old->key != new->key ||
>           strcmp(old->name, new->name))) {
>          pr_notice("%s & %s have non-matching congestion control names\n",
>                old->name, new->name);
>          return -EINVAL;
>      }
>      spin_lock(&tcp_cong_list_lock);
>      found = tcp_ca_find_key(new->key); >      if (old) {
>          if (found == old) {
>              list_add_tail_rcu(&new->list, &tcp_cong_list);
>              list_del_rcu(&old->list);
>          } else {
>              pr_notice("%s not registered\n", old->name);
>              ret = -EINVAL;
>          }
>      } else {
>          if (found) {
>              pr_notice("%s already registered\n", new->name);
>              ret = -EEXIST;
>          } else {
>              list_add_tail_rcu(&new->list, &tcp_cong_list);
>          }
>      }
>      spin_unlock(&tcp_cong_list_lock);
>      return ret;
> }

After trying to do this refactoring, I found it just shares a few lines
of code, but make thing complicated by adding more checks.  So, I prefer
to keep it as it is.  How do you think?


> 
> int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
> {
>      return tcp_update_congestion_control(ca, NULL);
> }
> EXPORT_SYMBOL_GPL(tcp_register_congestion_control);

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ