lists.openwall.net | lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC | |
Open Source and information security mailing list archives
| ||
|
Message-ID: <5b719373-beb0-ce8b-7789-b24c01a28eff@engleder-embedded.com> Date: Tue, 10 Jan 2023 22:07:23 +0100 From: Gerhard Engleder <gerhard@...leder-embedded.com> To: Alexander H Duyck <alexander.duyck@...il.com>, netdev@...r.kernel.org Cc: davem@...emloft.net, kuba@...nel.org, edumazet@...gle.com, pabeni@...hat.com Subject: Re: [PATCH net-next v4 05/10] tsnep: Add XDP TX support On 10.01.23 17:56, Alexander H Duyck wrote: > nOn Mon, 2023-01-09 at 20:15 +0100, Gerhard Engleder wrote: >> Implement ndo_xdp_xmit() for XDP TX support. Support for fragmented XDP >> frames is included. >> >> Also some const, braces and logic clean ups are done in normal TX path >> to keep both TX paths in sync. >> >> Signed-off-by: Gerhard Engleder <gerhard@...leder-embedded.com> >> --- >> drivers/net/ethernet/engleder/tsnep.h | 12 +- >> drivers/net/ethernet/engleder/tsnep_main.c | 211 +++++++++++++++++++-- >> 2 files changed, 210 insertions(+), 13 deletions(-) >> >> diff --git a/drivers/net/ethernet/engleder/tsnep.h b/drivers/net/ethernet/engleder/tsnep.h >> index d658413ceb14..9cb267938794 100644 >> --- a/drivers/net/ethernet/engleder/tsnep.h >> +++ b/drivers/net/ethernet/engleder/tsnep.h >> @@ -57,6 +57,12 @@ struct tsnep_rxnfc_rule { >> int location; >> }; >> >> +enum tsnep_tx_type { >> + TSNEP_TX_TYPE_SKB, >> + TSNEP_TX_TYPE_XDP_TX, >> + TSNEP_TX_TYPE_XDP_NDO, >> +}; >> + > > I'm more a fan of using a bitmap rather than an enum for these type of > things since otherwise you are having to check for 3 possible values > for everything. > > Also you may want to reorder this so that SKB is the last option. With > that 0/1 would be your two XDP values and 2 would be your SKB value. It > should make it easier for this to be treated more as a decision tree. I will try to use bitmap. >> struct tsnep_tx_entry { >> struct tsnep_tx_desc *desc; >> struct tsnep_tx_desc_wb *desc_wb; >> @@ -65,7 +71,11 @@ struct tsnep_tx_entry { >> >> u32 properties; >> >> - struct sk_buff *skb; >> + enum tsnep_tx_type type; >> + union { >> + struct sk_buff *skb; >> + struct xdp_frame *xdpf; >> + }; >> size_t len; >> DEFINE_DMA_UNMAP_ADDR(dma);xdp_return_frame_bulk >> }; >> diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c >> index 943de5a09693..1ae73c706c9e 100644 >> --- a/drivers/net/ethernet/engleder/tsnep_main.c >> +++ b/drivers/net/ethernet/engleder/tsnep_main.c >> @@ -310,10 +310,12 @@ static void tsnep_tx_activate(struct tsnep_tx *tx, int index, int length, >> struct tsnep_tx_entry *entry = &tx->entry[index]; >> >> entry->properties = 0; >> + /* xdpf is union with skb */ >> if (entry->skb) { >> entry->properties = length & TSNEP_DESC_LENGTH_MASK; >> entry->properties |= TSNEP_DESC_INTERRUPT_FLAG; >> - if (skb_shinfo(entry->skb)->tx_flags & SKBTX_IN_PROGRESS) >> + if (entry->type == Txdp_return_frame_bulkSNEP_TX_TYPE_SKB && >> + (skb_shinfo(entry->skb)->tx_flags & SKBTX_IN_PROGRESS)) >> entry->properties |= TSNEP_DESC_EXTENDED_WRITEBACK_FLAG; >> >> /* toggle user flag to prevent false acknowledge >> @@ -370,7 +372,8 @@ static int tsnep_tx_desc_available(struct tsnep_tx *tx) >> return tx->read - tx->write - 1; >> } >> >> -static int tsnep_tx_map(struct sk_buff *skb, struct tsnep_tx *tx, int count) >> +static int tsnep_tx_map(const struct sk_buff *skb, struct tsnep_tx *tx, >> + int count) >> { > > This change to const doesn't add anything since this is a static > function. You could probably just skip making this change since the > function will likely be inlined anyway. const was requested for tsnep_xdp_tx_map() during last review round so I added it also here to keep both function similar. >> struct device *dmadev = tx->adapter->dmadev; >> struct tsnep_tx_entry *entry; >> @@ -382,7 +385,7 @@ static int tsnep_tx_map(struct sk_buff *skb, struct tsnep_tx *tx, int count) >> for (i = 0; i < count; i++) { >> entry = &tx->entry[(tx->write + i) % TSNEP_RING_SIZE]; >> >> - if (i == 0) { >> + if (!i) { >> len = skb_headlen(skb); >> dma = dma_map_single(dmadev, skb->data, len, >> DMA_TO_DEVICE); >> @@ -400,6 +403,8 @@ static int tsnep_tx_map(struct sk_buff *skb, struct tsnep_tx *tx, int count) >> >> entry->desc->tx = __cpu_to_le64(dma); >> >> + entry->type = TSNEP_TX_TYPE_SKB; >> + >> map_len += len; >> } >> > > I wonder if it wouldn't be better to change this so that you have a 4th > type for just "PAGE" or "FRAGMENT" since you only really need to > identify this as SKB or XDP on the first buffer, and then all the rest > are just going to be unmapped as page regardless of if it is XDP or > SKB. I will it try in combination with your bitmap suggestion. >> @@ -417,12 +422,13 @@ static int tsnep_tx_unmap(struct tsnep_tx *tx, int index, int count) >> entry = &tx->entry[(index + i) % TSNEP_RING_SIZE]; >> >> if (entry->len) { >> - if (i == 0) >> + if (!i && entry->type == TSNEP_TX_TYPE_SKB) >> dma_unmap_single(dmadev, >> dma_unmap_addr(entry, dma), >> dma_unmap_len(entry, len), >> DMA_TO_DEVICE); >> - else >> + else if (entry->type == TSNEP_TX_TYPE_SKB || >> + entry->type == TSNEP_TX_TYPE_XDP_NDO) >> dma_unmap_page(dmadev, >> dma_unmap_addr(entry, dma), >> dma_unmap_len(entry, len), > > Rather than perform 2 checks here you could just verify type != > TYPE_XDP_TX which would save you a check. Will be improved with your bitmap suggestion. >> @@ -482,7 +488,7 @@ static netdev_tx_t tsnep_xmit_frame_ring(struct sk_buff *skb, >> >> for (i = 0; i < count; i++) >> tsnep_tx_activate(tx, (tx->write + i) % TSNEP_RING_SIZE, length, >> - i == (count - 1)); >> + i == count - 1); >> tx->write = (tx->write + count) % TSNEP_RING_SIZE; >> >> skb_tx_timestamp(skb); >> @@ -502,12 +508,133 @@ static netdev_tx_t tsnep_xmit_frame_ring(struct sk_buff *skb, >> return NETDEV_TX_OK; >> } >> >> +static int tsnep_xdp_tx_map(const struct xdp_frame *xdpf, struct tsnep_tx *tx, >> + const struct skb_shared_info *shinfo, int count, >> + enum tsnep_tx_type type) > > Again the const here isn't adding any value since this is a static > function and will likely be inlined into the function below which calls > it. const was requested here during last review round so I added it. It may add some value by detecting some problems at compile time. >> +{ >> + struct device *dmadev = tx->adapter->dmadev; >> + struct tsnep_tx_entry *entry; >> + const skb_frag_t *frag; >> + struct page *page; >> + unsigned int len; >> + int map_len = 0; >> + dma_addr_t dma; >> + void *data; >> + int i; >> + >> + frag = NULL; >> + len = xdpf->len; >> + for (i = 0; i < count; i++) { >> + entry = &tx->entry[(tx->write + i) % TSNEP_RING_SIZE]; >> + if (type == TSNEP_TX_TYPE_XDP_NDO) { >> + data = unlikely(frag) ? skb_frag_address(frag) : >> + xdpf->data; >> + dma = dma_map_single(dmadev, data, len, DMA_TO_DEVICE); >> + if (dma_mapping_error(dmadev, dma)) >> + return -ENOMEM; >> + >> + entry->type = TSNEP_TX_TYPE_XDP_NDO; >> + } else { >> + page = unlikely(frag) ? skb_frag_page(frag) : >> + virt_to_page(xdpf->data); >> + dma = page_pool_get_dma_addr(page); >> + if (unlikely(frag)) >> + dma += skb_frag_off(frag); >> + else >> + dma += sizeof(*xdpf) + xdpf->headroom; >> + dma_sync_single_for_device(dmadev, dma, len, >> + DMA_BIDIRECTIONAL); >> + >> + entry->type = TSNEP_TX_TYPE_XDP_TX; >> + } >> + >> + entry->len = len; >> + dma_unmap_addr_set(entry, dma, dma); >> + >> + entry->desc->tx = __cpu_to_le64(dma); >> + >> + map_len += len; >> + >> + if (i + 1 < count) { >> + frag = &shinfo->frags[i]; >> + len = skb_frag_size(frag); >> + } >> + } >> + >> + return map_len; >> +} >> + >> +/* This function requires __netif_tx_lock is held by the caller. */ >> +static bool tsnep_xdp_xmit_frame_ring(struct xdp_frame *xdpf, >> + struct tsnep_tx *tx, >> + enum tsnep_tx_type type) >> +{ >> + const struct skb_shared_info *shinfo = >> + xdp_get_shared_info_from_frame(xdpf); >> + struct tsnep_tx_entry *entry; >> + int count, length, retval, i; >> + >> + count = 1; >> + if (unlikely(xdp_frame_has_frags(xdpf))) >> + count += shinfo->nr_frags; >> + >> + spin_lock_bh(&tx->lock); >> + >> + /* ensure that TX ring is not filled up by XDP, always MAX_SKB_FRAGS >> + * will be available for normal TX path and queue is stopped there if >> + * necessary >> + */ >> + if (tsnep_tx_desc_available(tx) < (MAX_SKB_FRAGS + 1 + count)) { >> + spin_unlock_bh(&tx->lock); >> + >> + return false; >> + } >> + >> + entry = &tx->entry[tx->write]; >> + entry->xdpf = xdpf; >> + >> + retval = tsnep_xdp_tx_map(xdpf, tx, shinfo, count, type); >> + if (retval < 0) { >> + tsnep_tx_unmap(tx, tx->write, count); >> + entry->xdpf = NULL; >> + >> + tx->dropped++; >> + >> + spin_unlock_bh(&tx->lock); >> + >> + return false; >> + } >> + length = retval; >> + >> + for (i = 0; i < count; i++) >> + tsnep_tx_activate(tx, (tx->write + i) % TSNEP_RING_SIZE, length, >> + i == count - 1); >> + tx->write = (tx->write + count) % TSNEP_RING_SIZE; >> + >> + /* descriptor properties shall be valid before hardware is notified */ >> + dma_wmb(); >> + >> + spin_unlock_bh(&tx->lock); >> + >> + return true; >> +} >> + >> +static void tsnep_xdp_xmit_flush(struct tsnep_tx *tx) >> +{ >> + iowrite32(TSNEP_CONTROL_TX_ENABLE, tx->addr + TSNEP_CONTROL); >> +} >> + >> static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget) >> { >> - int budget = 128; >> struct tsnep_tx_entry *entry; >> - int count; >> + struct xdp_frame_bulk bq; >> + int budget = 128; >> int length; >> + int count; >> + >> + xdp_frame_bulk_init(&bq); >> + >> + rcu_read_lock(); /* need for xdp_return_frame_bulk */ >> > > You should be able to get rid of both of these. See comments below. > >> spin_lock_bh(&tx->lock); >> >> @@ -527,12 +654,17 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget) >> dma_rmb(); >> >> count = 1; >> - if (skb_shinfo(entry->skb)->nr_frags > 0) >> + if (entry->type == TSNEP_TX_TYPE_SKB && >> + skb_shinfo(entry->skb)->nr_frags > 0) >> count += skb_shinfo(entry->skb)->nr_frags; >> + else if (entry->type != TSNEP_TX_TYPE_SKB && >> + xdp_frame_has_frags(entry->xdpf)) >> + count += xdp_get_shared_info_from_frame(entry->xdpf)->nr_frags; >> >> length = tsnep_tx_unmap(tx, tx->read, count); >> >> - if ((skb_shinfo(entry->skb)->tx_flags & SKBTX_IN_PROGRESS) && >> + if (entry->type == TSNEP_TX_TYPE_SKB && >> + (skb_shinfo(entry->skb)->tx_flags & SKBTX_IN_PROGRESS) && >> (__le32_to_cpu(entry->desc_wb->properties) & >> TSNEP_DESC_EXTENDED_WRITEBACK_FLAG)) { >> struct skb_shared_hwtstamps hwtstamps; >> @@ -552,7 +684,18 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget) >> skb_tstamp_tx(entry->skb, &hwtstamps); >> } >> >> - napi_consume_skb(entry->skb, napi_budget); >> + switch (entry->type) { >> + case TSNEP_TX_TYPE_SKB: >> + napi_consume_skb(entry->skb, napi_budget); >> + break; >> + case TSNEP_TX_TYPE_XDP_TX: >> + xdp_return_frame_rx_napi(entry->xdpf); >> + break; >> + case TSNEP_TX_TYPE_XDP_NDO: >> + xdp_return_frame_bulk(entry->xdpf, &bq); >> + break; >> + } >> + /* xdpf is union with skb */ >> entry->skb = NULL; >> >> tx->read = (tx->read + count) % TSNEP_RING_SIZE; > > If I am not mistaken I think you can use xdp_return_frame_rx_napi for > both of these without having to resort to the bulk approach since the > tsnep_tx_poll is operating in the NAPI polling context. > > When you free the buffers outside of polling context you would then > want to use the xdp_return_frame_bulk. So for example if you have any > buffers that weren't transmitted when you get to the point of > tsnep_tx_ring_cleanup then you would want to unmap and free them there > using something like the xdp_return_frame or xdp_return_frame_bulk. This is done similar in other drivers, so I thought it is good practice. I will compare the performance of xdp_return_frame_rx_napi() and xdp_return_frame_bulk(). >> @@ -570,7 +713,11 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget) >> >> spin_unlock_bh(&tx->lock); >> >> - return (budget != 0); >> + xdp_flush_frame_bulk(&bq); >> + >> + rcu_read_unlock(); >> + >> + return budget != 0; >> } >> >> static bool tsnep_tx_pending(struct tsnep_tx *tx) >> @@ -1330,6 +1477,45 @@ static ktime_t tsnep_netdev_get_tstamp(struct net_device *netdev, >> return ns_to_ktime(timestamp); >> } >> >> +static int tsnep_netdev_xdp_xmit(struct net_device *dev, int n, >> + struct xdp_frame **xdp, u32 flags) >> +{ >> + struct tsnep_adapter *adapter = netdev_priv(dev); >> + u32 cpu = smp_processor_id(); >> + struct netdev_queue *nq; >> + int nxmit, queue; >> + bool xmit; >> + >> + if (unlikely(test_bit(__TSNEP_DOWN, &adapter->state))) >> + return -ENETDOWN; >> + > > Again, here you are better off probably checking for netif_carrier_ok > rather than adding a new flag. > >> + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) >> + return -EINVAL; >> + >> + queue = cpu % adapter->num_tx_queues; > > Modulo math here unconditionally will be expensive. Take a look at > using reciprocal_scale like we do in skb_tx_hash. I will try to find a better solution. >> + nq = netdev_get_tx_queue(adapter->netdev, queue); >> + >> + __netif_tx_lock(nq, cpu); >> + >> + /* Avoid transmit queue timeout since we share it with the slow path */ >> + txq_trans_cond_update(nq); >> + > > Doing this here may not be the right spot. It might make more sense to > do this after the call to xmit_frame_ring after the break case in order > to guarantee that you actually put something on the ring before > updating the state otherwise you can stall the Tx ring without > triggering the watchdog by just hammering this. > >> + for (nxmit = 0; nxmit < n; nxmit++) { >> + xmit = tsnep_xdp_xmit_frame_ring(xdp[nxmit], >> + &adapter->tx[queue], >> + TSNEP_TX_TYPE_XDP_NDO); >> + if (!xmit) >> + break; > > So move txq_trans_cond_update(nq) here. Will be fixed. Gerhard
Powered by blists - more mailing lists