lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <f72b77c3-15ac-3de3-5bce-c263564c1487@iogearbox.net>
Date:   Fri, 17 Mar 2023 16:23:48 +0100
From:   Daniel Borkmann <daniel@...earbox.net>
To:     Kui-Feng Lee <kuifeng@...a.com>, bpf@...r.kernel.org,
        ast@...nel.org, martin.lau@...ux.dev, song@...nel.org,
        kernel-team@...a.com, andrii@...nel.org, sdf@...gle.com
Cc:     netdev@...r.kernel.org, Eric Dumazet <edumazet@...gle.com>
Subject: Re: [PATCH bpf-next v7 2/8] net: Update an existing TCP congestion
 control algorithm.

On 3/16/23 3:36 AM, Kui-Feng Lee wrote:
> This feature lets you immediately transition to another congestion
> control algorithm or implementation with the same name.  Once a name
> is updated, new connections will apply this new algorithm.
> 
> The purpose is to update a customized algorithm implemented in BPF
> struct_ops with a new version on the flight.  The following is an
> example of using the userspace API implemented in later BPF patches.
> 
>     link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);
>     .......
>     err = bpf_link__update_map(link, skel->maps.ca_update_2);
> 
> We first load and register an algorithm implemented in BPF struct_ops,
> then swap it out with a new one using the same name. After that, newly
> created connections will apply the updated algorithm, while older ones
> retain the previous version already applied.
> 
> This patch also takes this chance to refactor the ca validation into
> the new tcp_validate_congestion_control() function.
> 
> Cc: netdev@...r.kernel.org, Eric Dumazet <edumazet@...gle.com>
> Signed-off-by: Kui-Feng Lee <kuifeng@...a.com>
> ---
>   include/net/tcp.h   |  3 +++
>   net/ipv4/tcp_cong.c | 60 +++++++++++++++++++++++++++++++++++++++------
>   2 files changed, 56 insertions(+), 7 deletions(-)
> 
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index db9f828e9d1e..2abb755e6a3a 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -1117,6 +1117,9 @@ struct tcp_congestion_ops {
>   
>   int tcp_register_congestion_control(struct tcp_congestion_ops *type);
>   void tcp_unregister_congestion_control(struct tcp_congestion_ops *type);
> +int tcp_update_congestion_control(struct tcp_congestion_ops *type,
> +				  struct tcp_congestion_ops *old_type);
> +int tcp_validate_congestion_control(struct tcp_congestion_ops *ca);
>   
>   void tcp_assign_congestion_control(struct sock *sk);
>   void tcp_init_congestion_control(struct sock *sk);
> diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
> index db8b4b488c31..c90791ae8389 100644
> --- a/net/ipv4/tcp_cong.c
> +++ b/net/ipv4/tcp_cong.c
> @@ -75,14 +75,8 @@ struct tcp_congestion_ops *tcp_ca_find_key(u32 key)
>   	return NULL;
>   }
>   
> -/*
> - * Attach new congestion control algorithm to the list
> - * of available options.
> - */
> -int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
> +int tcp_validate_congestion_control(struct tcp_congestion_ops *ca)
>   {
> -	int ret = 0;
> -
>   	/* all algorithms must implement these */
>   	if (!ca->ssthresh || !ca->undo_cwnd ||
>   	    !(ca->cong_avoid || ca->cong_control)) {
> @@ -90,6 +84,20 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
>   		return -EINVAL;
>   	}
>   
> +	return 0;
> +}
> +
> +/* Attach new congestion control algorithm to the list
> + * of available options.
> + */
> +int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
> +{
> +	int ret;
> +
> +	ret = tcp_validate_congestion_control(ca);
> +	if (ret)
> +		return ret;
> +
>   	ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name));
>   
>   	spin_lock(&tcp_cong_list_lock);
> @@ -130,6 +138,44 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca)
>   }
>   EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);
>   
> +/* Replace a registered old ca with a new one.
> + *
> + * The new ca must have the same name as the old one, that has been
> + * registered.
> + */
> +int tcp_update_congestion_control(struct tcp_congestion_ops *ca, struct tcp_congestion_ops *old_ca)
> +{
> +	struct tcp_congestion_ops *existing;
> +	int ret;
> +
> +	ret = tcp_validate_congestion_control(ca);
> +	if (ret)
> +		return ret;
> +
> +	ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name));
> +
> +	spin_lock(&tcp_cong_list_lock);
> +	existing = tcp_ca_find_key(old_ca->key);
> +	if (ca->key == TCP_CA_UNSPEC || !existing || strcmp(existing->name, ca->name)) {
> +		pr_notice("%s not registered or non-unique key\n",
> +			  ca->name);
> +		ret = -EINVAL;
> +	} else if (existing != old_ca) {
> +		pr_notice("invalid old congestion control algorithm to replace\n");
> +		ret = -EINVAL;
> +	} else {
> +		/* Add the new one before removing the old one to keep
> +		 * one implementation available all the time.
> +		 */
> +		list_add_tail_rcu(&ca->list, &tcp_cong_list);
> +		list_del_rcu(&existing->list);
> +		pr_debug("%s updated\n", ca->name);
> +	}
> +	spin_unlock(&tcp_cong_list_lock);
> +
> +	return ret;
> +}

Was wondering if we could have tcp_register_congestion_control and tcp_update_congestion_control
could be refactored for reuse. Maybe like below. From the function itself what is not clear whether
callers that replace an existing one should do the synchronize_rcu() themselves or if this should
be part of tcp_update_congestion_control?

int tcp_check_congestion_control(struct tcp_congestion_ops *ca)
{
	/* All algorithms must implement these. */
	if (!ca->ssthresh || !ca->undo_cwnd ||
	    !(ca->cong_avoid || ca->cong_control)) {
		pr_err("%s does not implement required ops\n", ca->name);
		return -EINVAL;
	}
	if (ca->key == TCP_CA_UNSPEC)
		ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name));
	if (ca->key == TCP_CA_UNSPEC) {
		pr_notice("%s results in zero key\n", ca->name);
		return -EEXIST;
	}
	return 0;
}

/* Attach new congestion control algorithm to the list of available
  * options or replace an existing one if old is non-NULL.
  */
int tcp_update_congestion_control(struct tcp_congestion_ops *new,
				  struct tcp_congestion_ops *old)
{
	struct tcp_congestion_ops *found;
	int ret;

	ret = tcp_check_congestion_control(new);
	if (ret)
		return ret;
	if (old &&
	    (old->key != new->key ||
	     strcmp(old->name, new->name))) {
		pr_notice("%s & %s have non-matching congestion control names\n",
			  old->name, new->name);
		return -EINVAL;
	}
	spin_lock(&tcp_cong_list_lock);
	found = tcp_ca_find_key(new->key);
	if (old) {
		if (found == old) {
			list_add_tail_rcu(&new->list, &tcp_cong_list);
			list_del_rcu(&old->list);
		} else {
			pr_notice("%s not registered\n", old->name);
			ret = -EINVAL;
		}
	} else {
		if (found) {
			pr_notice("%s already registered\n", new->name);
			ret = -EEXIST;
		} else {
			list_add_tail_rcu(&new->list, &tcp_cong_list);
		}
	}
	spin_unlock(&tcp_cong_list_lock);
	return ret;
}

int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
{
	return tcp_update_congestion_control(ca, NULL);
}
EXPORT_SYMBOL_GPL(tcp_register_congestion_control);

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ