[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CY4PR1001MB2311844FE8390F00A3363DEEE8E00@CY4PR1001MB2311.namprd10.prod.outlook.com>
Date: Thu, 19 Nov 2020 23:52:55 +0000
From: "Ramsay, Lincoln" <Lincoln.Ramsay@...i.com>
To: Florian Westphal <fw@...len.de>
CC: Igor Russkikh <irusskikh@...vell.com>,
"David S. Miller" <davem@...emloft.net>,
Jakub Kicinski <kuba@...nel.org>,
"netdev@...r.kernel.org" <netdev@...r.kernel.org>,
Dmitry Bogdanov <dbogdanov@...vell.com>
Subject: [PATCH v4] aquantia: Remove the build_skb path
When performing IPv6 forwarding, there is an expectation that SKBs
will have some headroom. When forwarding a packet from the aquantia
driver, this does not always happen, triggering a kernel warning.
aq_ring.c has this code (edited slightly for brevity):
if (buff->is_eop && buff->len <= AQ_CFG_RX_FRAME_MAX - AQ_SKB_ALIGN) {
skb = build_skb(aq_buf_vaddr(&buff->rxdata), AQ_CFG_RX_FRAME_MAX);
} else {
skb = napi_alloc_skb(napi, AQ_CFG_RX_HDR_SIZE);
There is a significant difference between the SKB produced by these
2 code paths. When napi_alloc_skb creates an SKB, there is a certain
amount of headroom reserved. However, this is not done in the
build_skb codepath.
As the hardware buffer that build_skb is built around does not
handle the presence of the SKB header, this code path is being
removed and the napi_alloc_skb path will always be used. This code
path does have to copy the packet header into the SKB, but it adds
the packet data as a frag.
Signed-off-by: Lincoln Ramsay <lincoln.ramsay@...ngear.com>
---
> For build_skb path to work the buffer scheme would need to be changed
> to reserve headroom, so yes, I think that the proposed patch is the
> most convenient solution.
I don't know about benefits/feasibility, but I did wonder if (in the event that the "fast path" is possible), the dma_mapping could use an offset? The page would include the skb header but the dma mapping would not. If that was done though, only 1 RX frame would fit into the page (at least on my system, where the RX frame seems to be 2k and the page is 4k). Also, there's a possibility to set the "order" variable, so that multiple pages are created at once and I'm not sure if this would work in that case.
> This only copies the initial part and then the rest is added as a frag.
Oh yeah. That's not as bad as I had thought then :)
I wonder though... if the "fast path" is possible, could the whole packet (including header) be added as a frag, avoiding the header copy? Or is that not how SKBs work?
.../net/ethernet/aquantia/atlantic/aq_ring.c | 127 ++++++++----------
1 file changed, 53 insertions(+), 74 deletions(-)
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
index 4f913658eea4..425e8e5afec7 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
@@ -413,85 +413,64 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
buff->rxdata.pg_off,
buff->len, DMA_FROM_DEVICE);
- /* for single fragment packets use build_skb() */
- if (buff->is_eop &&
- buff->len <= AQ_CFG_RX_FRAME_MAX - AQ_SKB_ALIGN) {
- skb = build_skb(aq_buf_vaddr(&buff->rxdata),
+ skb = napi_alloc_skb(napi, AQ_CFG_RX_HDR_SIZE);
+ if (unlikely(!skb)) {
+ u64_stats_update_begin(&self->stats.rx.syncp);
+ self->stats.rx.skb_alloc_fails++;
+ u64_stats_update_end(&self->stats.rx.syncp);
+ err = -ENOMEM;
+ goto err_exit;
+ }
+ if (is_ptp_ring)
+ buff->len -=
+ aq_ptp_extract_ts(self->aq_nic, skb,
+ aq_buf_vaddr(&buff->rxdata),
+ buff->len);
+
+ hdr_len = buff->len;
+ if (hdr_len > AQ_CFG_RX_HDR_SIZE)
+ hdr_len = eth_get_headlen(skb->dev,
+ aq_buf_vaddr(&buff->rxdata),
+ AQ_CFG_RX_HDR_SIZE);
+
+ memcpy(__skb_put(skb, hdr_len), aq_buf_vaddr(&buff->rxdata),
+ ALIGN(hdr_len, sizeof(long)));
+
+ if (buff->len - hdr_len > 0) {
+ skb_add_rx_frag(skb, 0, buff->rxdata.page,
+ buff->rxdata.pg_off + hdr_len,
+ buff->len - hdr_len,
AQ_CFG_RX_FRAME_MAX);
- if (unlikely(!skb)) {
- u64_stats_update_begin(&self->stats.rx.syncp);
- self->stats.rx.skb_alloc_fails++;
- u64_stats_update_end(&self->stats.rx.syncp);
- err = -ENOMEM;
- goto err_exit;
- }
- if (is_ptp_ring)
- buff->len -=
- aq_ptp_extract_ts(self->aq_nic, skb,
- aq_buf_vaddr(&buff->rxdata),
- buff->len);
- skb_put(skb, buff->len);
page_ref_inc(buff->rxdata.page);
- } else {
- skb = napi_alloc_skb(napi, AQ_CFG_RX_HDR_SIZE);
- if (unlikely(!skb)) {
- u64_stats_update_begin(&self->stats.rx.syncp);
- self->stats.rx.skb_alloc_fails++;
- u64_stats_update_end(&self->stats.rx.syncp);
- err = -ENOMEM;
- goto err_exit;
- }
- if (is_ptp_ring)
- buff->len -=
- aq_ptp_extract_ts(self->aq_nic, skb,
- aq_buf_vaddr(&buff->rxdata),
- buff->len);
-
- hdr_len = buff->len;
- if (hdr_len > AQ_CFG_RX_HDR_SIZE)
- hdr_len = eth_get_headlen(skb->dev,
- aq_buf_vaddr(&buff->rxdata),
- AQ_CFG_RX_HDR_SIZE);
-
- memcpy(__skb_put(skb, hdr_len), aq_buf_vaddr(&buff->rxdata),
- ALIGN(hdr_len, sizeof(long)));
-
- if (buff->len - hdr_len > 0) {
- skb_add_rx_frag(skb, 0, buff->rxdata.page,
- buff->rxdata.pg_off + hdr_len,
- buff->len - hdr_len,
- AQ_CFG_RX_FRAME_MAX);
- page_ref_inc(buff->rxdata.page);
- }
+ }
- if (!buff->is_eop) {
- buff_ = buff;
- i = 1U;
- do {
- next_ = buff_->next,
- buff_ = &self->buff_ring[next_];
+ if (!buff->is_eop) {
+ buff_ = buff;
+ i = 1U;
+ do {
+ next_ = buff_->next,
+ buff_ = &self->buff_ring[next_];
- dma_sync_single_range_for_cpu(
- aq_nic_get_dev(self->aq_nic),
- buff_->rxdata.daddr,
- buff_->rxdata.pg_off,
- buff_->len,
- DMA_FROM_DEVICE);
- skb_add_rx_frag(skb, i++,
- buff_->rxdata.page,
- buff_->rxdata.pg_off,
- buff_->len,
- AQ_CFG_RX_FRAME_MAX);
- page_ref_inc(buff_->rxdata.page);
- buff_->is_cleaned = 1;
-
- buff->is_ip_cso &= buff_->is_ip_cso;
- buff->is_udp_cso &= buff_->is_udp_cso;
- buff->is_tcp_cso &= buff_->is_tcp_cso;
- buff->is_cso_err |= buff_->is_cso_err;
+ dma_sync_single_range_for_cpu(
+ aq_nic_get_dev(self->aq_nic),
+ buff_->rxdata.daddr,
+ buff_->rxdata.pg_off,
+ buff_->len,
+ DMA_FROM_DEVICE);
+ skb_add_rx_frag(skb, i++,
+ buff_->rxdata.page,
+ buff_->rxdata.pg_off,
+ buff_->len,
+ AQ_CFG_RX_FRAME_MAX);
+ page_ref_inc(buff_->rxdata.page);
+ buff_->is_cleaned = 1;
- } while (!buff_->is_eop);
- }
+ buff->is_ip_cso &= buff_->is_ip_cso;
+ buff->is_udp_cso &= buff_->is_udp_cso;
+ buff->is_tcp_cso &= buff_->is_tcp_cso;
+ buff->is_cso_err |= buff_->is_cso_err;
+
+ } while (!buff_->is_eop);
}
if (buff->is_vlan)
--
2.17.1
Powered by blists - more mailing lists