lists.openwall.net | lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC | |
Open Source and information security mailing list archives
| ||
|
Date: Sun, 25 Oct 2009 14:19:42 +0200 From: "Vladislav Zolotarov" <vladz@...adcom.com> To: "David S. Miller" <davem@...emloft.net> cc: "Eilon Greenstein" <eilong@...adcom.com>, Netdev <netdev@...r.kernel.org> Subject: [PATCH net-next] bnx2x: Do Tx handling in a separate tasklet. This patch moves the 'Tx interrupt work' of each Tx queue from the hardIRQ context to the separate low-latency tasklet. Otherwise there is a possibility of a software lockup situation in a Tx softIRQ as it handles freeing all skb's 'freed' in (hard)IRQ context. Signed-off-by: Vladislav Zolotarov <vladz@...adcom.com> Signed-off-by: Eilon Greenstein <eilong@...adcom.com> --- drivers/net/bnx2x.h | 5 ++ drivers/net/bnx2x_main.c | 160 +++++++++++++++++++++++++++++----------------- 2 files changed, 105 insertions(+), 60 deletions(-) diff --git a/drivers/net/bnx2x.h b/drivers/net/bnx2x.h index c3b32f7..b29680e 100644 --- a/drivers/net/bnx2x.h +++ b/drivers/net/bnx2x.h @@ -259,6 +259,11 @@ struct bnx2x_eth_q_stats { struct bnx2x_fastpath { struct napi_struct napi; +/* + * Tx tasklet should not run for more than 1 tick. Then it + * should reschedule itself. + */ + struct tasklet_struct tx_int_task; u8 is_rx_queue; diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c index 59b58d8..0e2c1cb 100644 --- a/drivers/net/bnx2x_main.c +++ b/drivers/net/bnx2x_main.c @@ -56,8 +56,8 @@ #include "bnx2x_init_ops.h" #include "bnx2x_dump.h" -#define DRV_MODULE_VERSION "1.52.1-1" -#define DRV_MODULE_RELDATE "2009/10/13" +#define DRV_MODULE_VERSION "1.52.1-2" +#define DRV_MODULE_RELDATE "2009/10/25" #define BNX2X_BC_VER 0x040200 #include <linux/firmware.h> @@ -784,21 +784,13 @@ static inline void bnx2x_ack_sb(struct bnx2x *bp, u8 sb_id, barrier(); } -static inline u16 bnx2x_update_fpsb_idx(struct bnx2x_fastpath *fp) +static inline void bnx2x_update_fpsb_idx(struct bnx2x_fastpath *fp) { struct host_status_block *fpsb = fp->status_blk; - u16 rc = 0; barrier(); /* status block is written to by the chip */ - if (fp->fp_c_idx != fpsb->c_status_block.status_block_index) { - fp->fp_c_idx = fpsb->c_status_block.status_block_index; - rc |= 1; - } - if (fp->fp_u_idx != fpsb->u_status_block.status_block_index) { - fp->fp_u_idx = fpsb->u_status_block.status_block_index; - rc |= 2; - } - return rc; + fp->fp_c_idx = fpsb->c_status_block.status_block_index; + fp->fp_u_idx = fpsb->u_status_block.status_block_index; } static u16 bnx2x_ack_int(struct bnx2x *bp) @@ -838,6 +830,9 @@ static u16 bnx2x_free_tx_pkt(struct bnx2x *bp, struct bnx2x_fastpath *fp, u16 bd_idx = TX_BD(tx_buf->first_bd), new_cons; int nbd; + /* prefetch skb end pointer to speedup dev_kfree_skb() */ + prefetch(&skb->end); + DP(BNX2X_MSG_OFF, "pkt_idx %d buff @(%p)->skb %p\n", idx, tx_buf, skb); @@ -882,7 +877,7 @@ static u16 bnx2x_free_tx_pkt(struct bnx2x *bp, struct bnx2x_fastpath *fp, /* release skb */ WARN_ON(!skb); - dev_kfree_skb_any(skb); + dev_kfree_skb(skb); tx_buf->first_bd = 0; tx_buf->skb = NULL; @@ -912,12 +907,23 @@ static inline u16 bnx2x_tx_avail(struct bnx2x_fastpath *fp) return (s16)(fp->bp->tx_ring_size) - used; } -static void bnx2x_tx_int(struct bnx2x_fastpath *fp) +static inline int bnx2x_has_tx_work(struct bnx2x_fastpath *fp, u16 sw_cons) +{ + u16 hw_cons; + + /* Tell compiler that status block fields can change */ + barrier(); + hw_cons = le16_to_cpu(*fp->tx_cons_sb); + return hw_cons != sw_cons; +} + +static void bnx2x_tx_int(unsigned long data) { + struct bnx2x_fastpath *fp = (struct bnx2x_fastpath *)data; struct bnx2x *bp = fp->bp; struct netdev_queue *txq; u16 hw_cons, sw_cons, bd_cons = fp->tx_bd_cons; - int done = 0; + unsigned long start_time = jiffies; #ifdef BNX2X_STOP_ON_ERROR if (unlikely(bp->panic)) @@ -928,7 +934,8 @@ static void bnx2x_tx_int(struct bnx2x_fastpath *fp) hw_cons = le16_to_cpu(*fp->tx_cons_sb); sw_cons = fp->tx_pkt_cons; - while (sw_cons != hw_cons) { + while ((sw_cons != hw_cons) && + (start_time == jiffies)) { u16 pkt_cons; pkt_cons = TX_BD(sw_cons); @@ -945,7 +952,6 @@ static void bnx2x_tx_int(struct bnx2x_fastpath *fp) */ bd_cons = bnx2x_free_tx_pkt(bp, fp, pkt_cons); sw_cons++; - done++; } fp->tx_pkt_cons = sw_cons; @@ -954,7 +960,8 @@ static void bnx2x_tx_int(struct bnx2x_fastpath *fp) /* TBD need a thresh? */ if (unlikely(netif_tx_queue_stopped(txq))) { - /* Need to make the tx_bd_cons update visible to start_xmit() + /* + * Need to make the tx_bd_cons update visible to start_xmit() * before checking for netif_tx_queue_stopped(). Without the * memory barrier, there is a small possibility that * start_xmit() will miss it and cause the queue to be stopped @@ -967,6 +974,32 @@ static void bnx2x_tx_int(struct bnx2x_fastpath *fp) (bnx2x_tx_avail(fp) >= MAX_SKB_FRAGS + 3)) netif_tx_wake_queue(txq); } + + /* + * If the loop was broken by the timeout or/and there is more + * Tx work to do, reschedule the task, otherwise reenable interrupts. + * Do not restart the task if interrupts are disabled. + */ + if (likely(atomic_read(&bp->intr_sem) == 0)) { + if (bnx2x_has_tx_work(fp, sw_cons)) + tasklet_hi_schedule(&fp->tx_int_task); + else { + fp->fp_c_idx = + fp->status_blk->c_status_block.status_block_index; + /* + * Ensure that IGU status block is actually read and + * the read value is written to the memory before we + * check the Tx work. + */ + rmb(); + if (bnx2x_has_tx_work(fp, sw_cons)) + tasklet_hi_schedule(&fp->tx_int_task); + else + bnx2x_ack_sb(bp, fp->sb_id, CSTORM_ID, + le16_to_cpu(fp->fp_c_idx), + IGU_INT_ENABLE, 1); + } + } } #ifdef BCM_CNIC @@ -1734,6 +1767,7 @@ static irqreturn_t bnx2x_msix_fp_int(int irq, void *fp_cookie) if (unlikely(bp->panic)) return IRQ_HANDLED; #endif + /* Handle Rx or Tx according to MSI-X vector */ if (fp->is_rx_queue) { prefetch(fp->rx_cons_sb); @@ -1745,15 +1779,7 @@ static irqreturn_t bnx2x_msix_fp_int(int irq, void *fp_cookie) prefetch(fp->tx_cons_sb); prefetch(&fp->status_blk->c_status_block.status_block_index); - bnx2x_update_fpsb_idx(fp); - rmb(); - bnx2x_tx_int(fp); - - /* Re-enable interrupts */ - bnx2x_ack_sb(bp, fp->sb_id, USTORM_ID, - le16_to_cpu(fp->fp_u_idx), IGU_INT_NOP, 1); - bnx2x_ack_sb(bp, fp->sb_id, CSTORM_ID, - le16_to_cpu(fp->fp_c_idx), IGU_INT_ENABLE, 1); + tasklet_hi_schedule(&fp->tx_int_task); } return IRQ_HANDLED; @@ -1802,17 +1828,7 @@ static irqreturn_t bnx2x_interrupt(int irq, void *dev_instance) prefetch(&fp->status_blk->c_status_block. status_block_index); - bnx2x_update_fpsb_idx(fp); - rmb(); - bnx2x_tx_int(fp); - - /* Re-enable interrupts */ - bnx2x_ack_sb(bp, fp->sb_id, USTORM_ID, - le16_to_cpu(fp->fp_u_idx), - IGU_INT_NOP, 1); - bnx2x_ack_sb(bp, fp->sb_id, CSTORM_ID, - le16_to_cpu(fp->fp_c_idx), - IGU_INT_ENABLE, 1); + tasklet_hi_schedule(&fp->tx_int_task); } status &= ~mask; } @@ -4673,7 +4689,7 @@ static void bnx2x_timer(unsigned long data) struct bnx2x_fastpath *fp = &bp->fp[0]; int rc; - bnx2x_tx_int(fp); + tasklet_hi_schedule(&fp->tx_int_task); rc = bnx2x_rx_int(fp, 1000); } @@ -5117,6 +5133,8 @@ static void bnx2x_init_tx_ring(struct bnx2x *bp) for_each_tx_queue(bp, j) { struct bnx2x_fastpath *fp = &bp->fp[j]; + tasklet_init(&fp->tx_int_task, bnx2x_tx_int, (unsigned long)fp); + for (i = 1; i <= NUM_TX_RINGS; i++) { struct eth_tx_next_bd *tx_next_bd = &fp->tx_desc_ring[TX_DESC_CNT * i - 1].next_bd; @@ -7119,20 +7137,38 @@ static void bnx2x_netif_start(struct bnx2x *bp) if (intr_sem) { if (netif_running(bp->dev)) { + int i; + bnx2x_napi_enable(bp); bnx2x_int_enable(bp); if (bp->state == BNX2X_STATE_OPEN) netif_tx_wake_all_queues(bp->dev); + + /* Enable Tx tasklets */ + for_each_tx_queue(bp, i) + tasklet_enable(&bp->fp[i].tx_int_task); } } } static void bnx2x_netif_stop(struct bnx2x *bp, int disable_hw) { + int i; + bnx2x_int_disable_sync(bp, disable_hw); bnx2x_napi_disable(bp); netif_tx_disable(bp->dev); bp->dev->trans_start = jiffies; /* prevent tx timeout */ + + /* Stop Tx tasklet */ + for_each_tx_queue(bp, i) { + struct bnx2x_fastpath *fp = &bp->fp[i]; + + /* Stop */ + tasklet_disable(&fp->tx_int_task); + /* Kill */ + tasklet_kill(&fp->tx_int_task); + } } /* @@ -7931,7 +7967,7 @@ static int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode) cnt = 1000; while (bnx2x_has_tx_work_unload(fp)) { - bnx2x_tx_int(fp); + bnx2x_tx_int((unsigned long)fp); if (!cnt) { BNX2X_ERR("timeout waiting for queue[%d]\n", i); @@ -11004,8 +11040,6 @@ static int bnx2x_poll(struct napi_struct *napi, int budget) prefetch(fp->rx_buf_ring[RX_BD(fp->rx_bd_cons)].skb); prefetch((char *)(fp->rx_buf_ring[RX_BD(fp->rx_bd_cons)].skb) + 256); - bnx2x_update_fpsb_idx(fp); - if (bnx2x_has_rx_work(fp)) { work_done = bnx2x_rx_int(fp, budget); @@ -11014,28 +11048,34 @@ static int bnx2x_poll(struct napi_struct *napi, int budget) goto poll_again; } - /* bnx2x_has_rx_work() reads the status block, thus we need to - * ensure that status block indices have been actually read - * (bnx2x_update_fpsb_idx) prior to this check (bnx2x_has_rx_work) - * so that we won't write the "newer" value of the status block to IGU - * (if there was a DMA right after bnx2x_has_rx_work and - * if there is no rmb, the memory reading (bnx2x_update_fpsb_idx) - * may be postponed to right before bnx2x_ack_sb). In this case - * there will never be another interrupt until there is another update - * of the status block, while there is still unhandled work. - */ - rmb(); - + /* Fall out from the NAPI loop if needed */ if (!bnx2x_has_rx_work(fp)) { #ifdef BNX2X_STOP_ON_ERROR poll_panic: #endif - napi_complete(napi); + fp->fp_u_idx = + fp->status_blk->u_status_block.status_block_index; + /* + * bnx2x_has_rx_work() reads the status block, thus we need + * to ensure that status block indices have been actually read + * (bnx2x_update_fpsb_idx) prior to this check + * (bnx2x_has_rx_work) so that we won't write the "newer" + * value of the status block to IGU (if there was a DMA right + * after bnx2x_has_rx_work and if there is no rmb, the memory + * reading (bnx2x_update_fpsb_idx) may be postponed to right + * before bnx2x_ack_sb). In this case there will never be + * another interrupt until there is another update of the + * status block, while there is still unhandled work. + */ + rmb(); - bnx2x_ack_sb(bp, fp->sb_id, USTORM_ID, - le16_to_cpu(fp->fp_u_idx), IGU_INT_NOP, 1); - bnx2x_ack_sb(bp, fp->sb_id, CSTORM_ID, - le16_to_cpu(fp->fp_c_idx), IGU_INT_ENABLE, 1); + if (!bnx2x_has_rx_work(fp)) { + napi_complete(napi); + /* Re-enable interrupts */ + bnx2x_ack_sb(bp, fp->sb_id, USTORM_ID, + le16_to_cpu(fp->fp_u_idx), + IGU_INT_ENABLE, 1); + } } poll_again: -- 1.6.0.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@...r.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists