[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ccec1043-2223-b9d8-e73e-3e20987c634d@mellanox.com>
Date: Sat, 30 Jun 2018 19:06:41 +0300
From: Boris Pismenny <borisp@...lanox.com>
To: Willem de Bruijn <willemdebruijn.kernel@...il.com>,
Saeed Mahameed <saeedm@...lanox.com>
Cc: David Miller <davem@...emloft.net>,
Network Development <netdev@...r.kernel.org>,
ogerlitz@...lanox.com, yossiku@...lanox.com,
Alexander Duyck <alexander.duyck@...il.com>
Subject: Re: [net-next 01/12] net/mlx5e: Add UDP GSO support
On 06/30/18 01:19, Willem de Bruijn wrote:
> On Fri, Jun 29, 2018 at 2:24 AM Saeed Mahameed <saeedm@...lanox.com> wrote:
>> From: Boris Pismenny <borisp@...lanox.com>
>>
>> This patch enables UDP GSO support. We enable this by using two WQEs
>> the first is a UDP LSO WQE for all segments with equal length, and the
>> second is for the last segment in case it has different length.
>> Due to HW limitation, before sending, we must adjust the packet length fields.
>>
>> We measure performance between two Intel(R) Xeon(R) CPU E5-2643 v2 @3.50GHz
>> machines connected back-to-back with Connectx4-Lx (40Gbps) NICs.
>> We compare single stream UDP, UDP GSO and UDP GSO with offload.
>> Performance:
>> | MSS (bytes) | Throughput (Gbps) | CPU utilization (%)
>> UDP GSO offload | 1472 | 35.6 | 8%
>> UDP GSO | 1472 | 25.5 | 17%
>> UDP | 1472 | 10.2 | 17%
>> UDP GSO offload | 1024 | 35.6 | 8%
>> UDP GSO | 1024 | 19.2 | 17%
>> UDP | 1024 | 5.7 | 17%
>> UDP GSO offload | 512 | 33.8 | 16%
>> UDP GSO | 512 | 10.4 | 17%
>> UDP | 512 | 3.5 | 17%
> Very nice results :)
Thanks. We expect to have 100Gbps results by netdev0x12.
>> +static void mlx5e_udp_gso_prepare_last_skb(struct sk_buff *skb,
>> + struct sk_buff *nskb,
>> + int remaining)
>> +{
>> + int bytes_needed = remaining, remaining_headlen, remaining_page_offset;
>> + int headlen = skb_transport_offset(skb) + sizeof(struct udphdr);
>> + int payload_len = remaining + sizeof(struct udphdr);
>> + int k = 0, i, j;
>> +
>> + skb_copy_bits(skb, 0, nskb->data, headlen);
>> + nskb->dev = skb->dev;
>> + skb_reset_mac_header(nskb);
>> + skb_set_network_header(nskb, skb_network_offset(skb));
>> + skb_set_transport_header(nskb, skb_transport_offset(skb));
>> + skb_set_tail_pointer(nskb, headlen);
>> +
>> + /* How many frags do we need? */
>> + for (i = skb_shinfo(skb)->nr_frags - 1; i >= 0; i--) {
>> + bytes_needed -= skb_frag_size(&skb_shinfo(skb)->frags[i]);
>> + k++;
>> + if (bytes_needed <= 0)
>> + break;
>> + }
>> +
>> + /* Fill the first frag and split it if necessary */
>> + j = skb_shinfo(skb)->nr_frags - k;
>> + remaining_page_offset = -bytes_needed;
>> + skb_fill_page_desc(nskb, 0,
>> + skb_shinfo(skb)->frags[j].page.p,
>> + skb_shinfo(skb)->frags[j].page_offset + remaining_page_offset,
>> + skb_shinfo(skb)->frags[j].size - remaining_page_offset);
>> +
>> + skb_frag_ref(skb, j);
>> +
>> + /* Fill the rest of the frags */
>> + for (i = 1; i < k; i++) {
>> + j = skb_shinfo(skb)->nr_frags - k + i;
>> +
>> + skb_fill_page_desc(nskb, i,
>> + skb_shinfo(skb)->frags[j].page.p,
>> + skb_shinfo(skb)->frags[j].page_offset,
>> + skb_shinfo(skb)->frags[j].size);
>> + skb_frag_ref(skb, j);
>> + }
>> + skb_shinfo(nskb)->nr_frags = k;
>> +
>> + remaining_headlen = remaining - skb->data_len;
>> +
>> + /* headlen contains remaining data? */
>> + if (remaining_headlen > 0)
>> + skb_copy_bits(skb, skb->len - remaining, nskb->data + headlen,
>> + remaining_headlen);
>> + nskb->len = remaining + headlen;
>> + nskb->data_len = payload_len - sizeof(struct udphdr) +
>> + max_t(int, 0, remaining_headlen);
>> + nskb->protocol = skb->protocol;
>> + if (nskb->protocol == htons(ETH_P_IP)) {
>> + ip_hdr(nskb)->id = htons(ntohs(ip_hdr(nskb)->id) +
>> + skb_shinfo(skb)->gso_segs);
>> + ip_hdr(nskb)->tot_len =
>> + htons(payload_len + sizeof(struct iphdr));
>> + } else {
>> + ipv6_hdr(nskb)->payload_len = htons(payload_len);
>> + }
>> + udp_hdr(nskb)->len = htons(payload_len);
>> + skb_shinfo(nskb)->gso_size = 0;
>> + nskb->ip_summed = skb->ip_summed;
>> + nskb->csum_start = skb->csum_start;
>> + nskb->csum_offset = skb->csum_offset;
>> + nskb->queue_mapping = skb->queue_mapping;
>> +}
>> +
>> +/* might send skbs and update wqe and pi */
>> +struct sk_buff *mlx5e_udp_gso_handle_tx_skb(struct net_device *netdev,
>> + struct mlx5e_txqsq *sq,
>> + struct sk_buff *skb,
>> + struct mlx5e_tx_wqe **wqe,
>> + u16 *pi)
>> +{
>> + int payload_len = skb_shinfo(skb)->gso_size + sizeof(struct udphdr);
>> + int headlen = skb_transport_offset(skb) + sizeof(struct udphdr);
>> + int remaining = (skb->len - headlen) % skb_shinfo(skb)->gso_size;
>> + struct sk_buff *nskb;
>> +
>> + if (skb->protocol == htons(ETH_P_IP))
>> + ip_hdr(skb)->tot_len = htons(payload_len + sizeof(struct iphdr));
>> + else
>> + ipv6_hdr(skb)->payload_len = htons(payload_len);
>> + udp_hdr(skb)->len = htons(payload_len);
>> + if (!remaining)
>> + return skb;
>> +
>> + nskb = alloc_skb(max_t(int, headlen, headlen + remaining - skb->data_len), GFP_ATOMIC);
>> + if (unlikely(!nskb)) {
>> + sq->stats->dropped++;
>> + return NULL;
>> + }
>> +
>> + mlx5e_udp_gso_prepare_last_skb(skb, nskb, remaining);
>> +
>> + skb_shinfo(skb)->gso_segs--;
>> + pskb_trim(skb, skb->len - remaining);
>> + mlx5e_sq_xmit(sq, skb, *wqe, *pi);
>> + mlx5e_sq_fetch_wqe(sq, wqe, pi);
>> + return nskb;
>> +}
> The device driver seems to be implementing the packet split here
> similar to NETIF_F_GSO_PARTIAL. When advertising the right flag, the
> stack should be able to do that for you and pass two packets to the
> driver.
We've totally missed that!
Thank you. I'll fix and resubmit.
Powered by blists - more mailing lists