netdev - Re: [PATCH net-next v3 2/3] net: qualcomm: rmnet: add tx packets aggregation

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ad410abb19bdbcdac617878d14a4e37228f1157b.camel@redhat.com>
Date:   Wed, 07 Dec 2022 13:45:58 +0100
From:   Paolo Abeni <pabeni@...hat.com>
To:     Daniele Palmas <dnlplm@...il.com>,
        David Miller <davem@...emloft.net>,
        Jakub Kicinski <kuba@...nel.org>,
        Eric Dumazet <edumazet@...gle.com>,
        Subash Abhinov Kasiviswanathan <quic_subashab@...cinc.com>,
        Sean Tranchetti <quic_stranche@...cinc.com>,
        Jonathan Corbet <corbet@....net>,
        Alexander Lobakin <alexandr.lobakin@...el.com>,
        Gal Pressman <gal@...dia.com>
Cc:     Bjørn Mork <bjorn@...k.no>,
        Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
        Dave Taht <dave.taht@...il.com>, netdev@...r.kernel.org
Subject: Re: [PATCH net-next v3 2/3] net: qualcomm: rmnet: add tx packets
 aggregation

On Mon, 2022-12-05 at 10:33 +0100, Daniele Palmas wrote:
> diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
> index a313242a762e..914ef03b5438 100644
> --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
> +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
> @@ -164,8 +164,18 @@ static int rmnet_map_egress_handler(struct sk_buff *skb,
>  
>  	map_header->mux_id = mux_id;
>  
> -	skb->protocol = htons(ETH_P_MAP);
> +	if (port->egress_agg_params.count > 1) {

This is racy. Here you read 'count' outside the 'agg_lock' lock and
later, in rmnet_map_tx_aggregate() the code assumes the above condition
helds, but ethtool could have changed the value in the meantime.

You need a READ_ONCE() above, a WRITE_ONCE() on update and cope with 0
value in rmnet_map_tx_aggregate().

[...]

> +static void rmnet_map_flush_tx_packet_work(struct work_struct *work)
> +{
> +	struct sk_buff *skb = NULL;
> +	struct rmnet_port *port;
> +
> +	port = container_of(work, struct rmnet_port, agg_wq);
> +
> +	spin_lock_bh(&port->agg_lock);
> +	if (likely(port->agg_state == -EINPROGRESS)) {
> +		/* Buffer may have already been shipped out */
> +		if (likely(port->skbagg_head)) {
> +			skb = port->skbagg_head;
> +			reset_aggr_params(port);
> +		}
> +		port->agg_state = 0;
> +	}
> +
> +	spin_unlock_bh(&port->agg_lock);
> +	if (skb)
> +		rmnet_send_skb(port, skb);
> +}
> +
> +static enum hrtimer_restart rmnet_map_flush_tx_packet_queue(struct hrtimer *t)
> +{
> +	struct rmnet_port *port;
> +
> +	port = container_of(t, struct rmnet_port, hrtimer);
> +
> +	schedule_work(&port->agg_wq);

Why you need to schedule a work and you can't instead call the core of
rmnet_map_flush_tx_packet_work() here? it looks like the latter does
not need process context...

> +
> +	return HRTIMER_NORESTART;
> +}
> +
> +unsigned int rmnet_map_tx_aggregate(struct sk_buff *skb, struct rmnet_port *port,
> +				    struct net_device *orig_dev)
> +{
> +	struct timespec64 diff, last;
> +	unsigned int len = skb->len;
> +	struct sk_buff *agg_skb;
> +	int size;
> +
> +	spin_lock_bh(&port->agg_lock);
> +	memcpy(&last, &port->agg_last, sizeof(struct timespec64));
> +	ktime_get_real_ts64(&port->agg_last);
> +
> +	if (!port->skbagg_head) {
> +		/* Check to see if we should agg first. If the traffic is very
> +		 * sparse, don't aggregate.
> +		 */
> +new_packet:
> +		diff = timespec64_sub(port->agg_last, last);
> +		size = port->egress_agg_params.bytes - skb->len;
> +
> +		if (size < 0) {
> +			/* dropped */
> +			spin_unlock_bh(&port->agg_lock);
> +			return 0;
> +		}
> +
> +		if (diff.tv_sec > 0 || diff.tv_nsec > RMNET_AGG_BYPASS_TIME_NSEC ||
> +		    size == 0) {

You can avoid some code duplication moving the following lines under an
'error' label and jumping to it here and in the next error case.

> +			spin_unlock_bh(&port->agg_lock);
> +			skb->protocol = htons(ETH_P_MAP);
> +			dev_queue_xmit(skb);
> +			return len;
> +		}
> +
> +		port->skbagg_head = skb_copy_expand(skb, 0, size, GFP_ATOMIC);
> +		if (!port->skbagg_head) {
> +			spin_unlock_bh(&port->agg_lock);
> +			skb->protocol = htons(ETH_P_MAP);
> +			dev_queue_xmit(skb);
> +			return len;
> +		}
> +		dev_kfree_skb_any(skb);
> +		port->skbagg_head->protocol = htons(ETH_P_MAP);
> +		port->agg_count = 1;
> +		ktime_get_real_ts64(&port->agg_time);
> +		skb_frag_list_init(port->skbagg_head);
> +		goto schedule;
> +	}
> +	diff = timespec64_sub(port->agg_last, port->agg_time);
> +	size = port->egress_agg_params.bytes - port->skbagg_head->len;
> +
> +	if (skb->len > size) {
> +		agg_skb = port->skbagg_head;
> +		reset_aggr_params(port);
> +		spin_unlock_bh(&port->agg_lock);
> +		hrtimer_cancel(&port->hrtimer);
> +		rmnet_send_skb(port, agg_skb);
> +		spin_lock_bh(&port->agg_lock);
> +		goto new_packet;
> +	}
> +
> +	if (skb_has_frag_list(port->skbagg_head))
> +		port->skbagg_tail->next = skb;
> +	else
> +		skb_shinfo(port->skbagg_head)->frag_list = skb;
> +
> +	port->skbagg_head->len += skb->len;
> +	port->skbagg_head->data_len += skb->len;
> +	port->skbagg_head->truesize += skb->truesize;
> +	port->skbagg_tail = skb;
> +	port->agg_count++;
> +
> +	if (diff.tv_sec > 0 || diff.tv_nsec > port->egress_agg_params.time_nsec ||
> +	    port->agg_count == port->egress_agg_params.count ||

At this point port->egress_agg_params.count can be 0, you need to check
for:
	port->agg_count >= port->egress_agg_params.count	


Cheers,

Paolo