[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <4271f277-ec08-ea15-4c7b-8c5147db6308@gmail.com>
Date: Fri, 17 Mar 2023 16:07:22 -0700
From: Kui-Feng Lee <sinquersw@...il.com>
To: Daniel Borkmann <daniel@...earbox.net>,
Kui-Feng Lee <kuifeng@...a.com>, bpf@...r.kernel.org,
ast@...nel.org, martin.lau@...ux.dev, song@...nel.org,
kernel-team@...a.com, andrii@...nel.org, sdf@...gle.com
Cc: netdev@...r.kernel.org, Eric Dumazet <edumazet@...gle.com>
Subject: Re: [PATCH bpf-next v7 2/8] net: Update an existing TCP congestion
control algorithm.
On 3/17/23 08:23, Daniel Borkmann wrote:
> On 3/16/23 3:36 AM, Kui-Feng Lee wrote:
>> This feature lets you immediately transition to another congestion
>> control algorithm or implementation with the same name. Once a name
>> is updated, new connections will apply this new algorithm.
>>
>> The purpose is to update a customized algorithm implemented in BPF
>> struct_ops with a new version on the flight. The following is an
>> example of using the userspace API implemented in later BPF patches.
>>
>> link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);
>> .......
>> err = bpf_link__update_map(link, skel->maps.ca_update_2);
>>
>> We first load and register an algorithm implemented in BPF struct_ops,
>> then swap it out with a new one using the same name. After that, newly
>> created connections will apply the updated algorithm, while older ones
>> retain the previous version already applied.
>>
>> This patch also takes this chance to refactor the ca validation into
>> the new tcp_validate_congestion_control() function.
>>
>> Cc: netdev@...r.kernel.org, Eric Dumazet <edumazet@...gle.com>
>> Signed-off-by: Kui-Feng Lee <kuifeng@...a.com>
>> ---
>> include/net/tcp.h | 3 +++
>> net/ipv4/tcp_cong.c | 60 +++++++++++++++++++++++++++++++++++++++------
>> 2 files changed, 56 insertions(+), 7 deletions(-)
>>
>> diff --git a/include/net/tcp.h b/include/net/tcp.h
>> index db9f828e9d1e..2abb755e6a3a 100644
>> --- a/include/net/tcp.h
>> +++ b/include/net/tcp.h
>> @@ -1117,6 +1117,9 @@ struct tcp_congestion_ops {
>> int tcp_register_congestion_control(struct tcp_congestion_ops *type);
>> void tcp_unregister_congestion_control(struct tcp_congestion_ops
>> *type);
>> +int tcp_update_congestion_control(struct tcp_congestion_ops *type,
>> + struct tcp_congestion_ops *old_type);
>> +int tcp_validate_congestion_control(struct tcp_congestion_ops *ca);
>> void tcp_assign_congestion_control(struct sock *sk);
>> void tcp_init_congestion_control(struct sock *sk);
>> diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
>> index db8b4b488c31..c90791ae8389 100644
>> --- a/net/ipv4/tcp_cong.c
>> +++ b/net/ipv4/tcp_cong.c
>> @@ -75,14 +75,8 @@ struct tcp_congestion_ops *tcp_ca_find_key(u32 key)
>> return NULL;
>> }
>> -/*
>> - * Attach new congestion control algorithm to the list
>> - * of available options.
>> - */
>> -int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
>> +int tcp_validate_congestion_control(struct tcp_congestion_ops *ca)
>> {
>> - int ret = 0;
>> -
>> /* all algorithms must implement these */
>> if (!ca->ssthresh || !ca->undo_cwnd ||
>> !(ca->cong_avoid || ca->cong_control)) {
>> @@ -90,6 +84,20 @@ int tcp_register_congestion_control(struct
>> tcp_congestion_ops *ca)
>> return -EINVAL;
>> }
>> + return 0;
>> +}
>> +
>> +/* Attach new congestion control algorithm to the list
>> + * of available options.
>> + */
>> +int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
>> +{
>> + int ret;
>> +
>> + ret = tcp_validate_congestion_control(ca);
>> + if (ret)
>> + return ret;
>> +
>> ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name));
>> spin_lock(&tcp_cong_list_lock);
>> @@ -130,6 +138,44 @@ void tcp_unregister_congestion_control(struct
>> tcp_congestion_ops *ca)
>> }
>> EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);
>> +/* Replace a registered old ca with a new one.
>> + *
>> + * The new ca must have the same name as the old one, that has been
>> + * registered.
>> + */
>> +int tcp_update_congestion_control(struct tcp_congestion_ops *ca,
>> struct tcp_congestion_ops *old_ca)
>> +{
>> + struct tcp_congestion_ops *existing;
>> + int ret;
>> +
>> + ret = tcp_validate_congestion_control(ca);
>> + if (ret)
>> + return ret;
>> +
>> + ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name));
>> +
>> + spin_lock(&tcp_cong_list_lock);
>> + existing = tcp_ca_find_key(old_ca->key);
>> + if (ca->key == TCP_CA_UNSPEC || !existing ||
>> strcmp(existing->name, ca->name)) {
>> + pr_notice("%s not registered or non-unique key\n",
>> + ca->name);
>> + ret = -EINVAL;
>> + } else if (existing != old_ca) {
>> + pr_notice("invalid old congestion control algorithm to
>> replace\n");
>> + ret = -EINVAL;
>> + } else {
>> + /* Add the new one before removing the old one to keep
>> + * one implementation available all the time.
>> + */
>> + list_add_tail_rcu(&ca->list, &tcp_cong_list);
>> + list_del_rcu(&existing->list);
>> + pr_debug("%s updated\n", ca->name);
>> + }
>> + spin_unlock(&tcp_cong_list_lock);
>> +
>> + return ret;
>> +}
>
> Was wondering if we could have tcp_register_congestion_control and
> tcp_update_congestion_control
> could be refactored for reuse. Maybe like below. From the function
> itself what is not clear whether
> callers that replace an existing one should do the synchronize_rcu()
> themselves or if this should
> be part of tcp_update_congestion_control?
>
> int tcp_check_congestion_control(struct tcp_congestion_ops *ca)
> {
> /* All algorithms must implement these. */
> if (!ca->ssthresh || !ca->undo_cwnd ||
> !(ca->cong_avoid || ca->cong_control)) {
> pr_err("%s does not implement required ops\n", ca->name);
> return -EINVAL;
> }
> if (ca->key == TCP_CA_UNSPEC)
> ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name));
> if (ca->key == TCP_CA_UNSPEC) {
> pr_notice("%s results in zero key\n", ca->name);
> return -EEXIST;
> }
> return 0;
> }
>
> /* Attach new congestion control algorithm to the list of available
> * options or replace an existing one if old is non-NULL.
> */
> int tcp_update_congestion_control(struct tcp_congestion_ops *new,
> struct tcp_congestion_ops *old)
> {
> struct tcp_congestion_ops *found;
> int ret;
>
> ret = tcp_check_congestion_control(new);
> if (ret)
> return ret;
> if (old &&
> (old->key != new->key ||
> strcmp(old->name, new->name))) {
> pr_notice("%s & %s have non-matching congestion control names\n",
> old->name, new->name);
> return -EINVAL;
> }
> spin_lock(&tcp_cong_list_lock);
> found = tcp_ca_find_key(new->key); > if (old) {
> if (found == old) {
> list_add_tail_rcu(&new->list, &tcp_cong_list);
> list_del_rcu(&old->list);
> } else {
> pr_notice("%s not registered\n", old->name);
> ret = -EINVAL;
> }
> } else {
> if (found) {
> pr_notice("%s already registered\n", new->name);
> ret = -EEXIST;
> } else {
> list_add_tail_rcu(&new->list, &tcp_cong_list);
> }
> }
> spin_unlock(&tcp_cong_list_lock);
> return ret;
> }
After trying to do this refactoring, I found it just shares a few lines
of code, but make thing complicated by adding more checks. So, I prefer
to keep it as it is. How do you think?
>
> int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
> {
> return tcp_update_congestion_control(ca, NULL);
> }
> EXPORT_SYMBOL_GPL(tcp_register_congestion_control);
Powered by blists - more mailing lists