[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAJ+vNU1zQuibUP0ScxvFJidYfqzMC42rN_pymR4jUjPcPxrieg@mail.gmail.com>
Date: Tue, 2 Jan 2018 11:18:40 -0800
From: Tim Harvey <tharvey@...eworks.com>
To: David Daney <ddaney@...iumnetworks.com>
Cc: Andrew Lunn <andrew@...n.ch>, Sunil Goutham <sgoutham@...ium.com>,
netdev <netdev@...r.kernel.org>
Subject: Re: thunderx sgmii interface hang
On Fri, Dec 22, 2017 at 4:30 PM, David Daney <ddaney@...iumnetworks.com> wrote:
> On 12/22/2017 04:22 PM, Tim Harvey wrote:
<snip>
>>
>> BGXX_GMP_GMI_TXX_INT[UNDFLW] is getting set when the issue is
>> triggered. From CN80XX-HM-1.2P this is caused by:
>>
>> "In the unlikely event that P2X data cannot keep the GMP TX FIFO full,
>> the SGMII/1000BASE-X/ QSGMII packet transfer will underflow. This
>> should be detected by the receiving device as an FCS error.
>> Internally, the packet is drained and lost"
>>
>
> Yikes!
>
>> Perhaps this needs to be caught and handled in some way. There's some
>> interrupt handlers in nicvf_main.c yet I'm not clear where to hook up
>> this one.
>
>
> This would be an interrupt generated by the BGX device, not the NIC device
> It will have an MSI-X index of (6 + LMAC * 7). See BGX_INT_VEC_E in the
> HRM.
>
> Note that I am telling you which interrupt it is, but not recommending that
> catching it and doing something is necessarily the best thing to do.
>
David,
The following patch registers the BGX_GMP_GMI_TXX interrupt, enables
the UNDFLW interrupt, and toggles the MAC/PCS enable and does indeed
catch/resolve the issue which never occurs again.
I would agree this isn't a 'fix' but works around whatever the issue
is. Any ideas what could cause an UNDFLW like this?
I suspect it might have something to do with the fact we have a 100MHz
external ref clock for the DLM which is configured for
GSERx_LANE_MODE[LMODE] of 0x6 (R_125G_REFCLK15625_SGMII 1.25Gbd
156.25MHz ref clock SGMII) which seems just wrong (100MHz clock yet we
tell CN80XX 165MHz) but according to the reference manual and BDK code
this is an allowed configuration.
Do you have access to a CN80XX reference board that has an SGMII PHY
on it to test for this issue? I've been trying to get hold of one but
have not been successful.
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
index 2bba9d1..be9148f9 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
@@ -179,6 +179,15 @@
#define BGX_GMP_GMI_TXX_BURST 0x38228
#define BGX_GMP_GMI_TXX_MIN_PKT 0x38240
#define BGX_GMP_GMI_TXX_SGMII_CTL 0x38300
+#define BGX_GMP_GMI_TXX_INT 0x38500
+#define BGX_GMP_GMI_TXX_INT_W1S 0x38508
+#define BGX_GMP_GMI_TXX_INT_ENA_W1C 0x38510
+#define BGX_GMP_GMI_TXX_INT_ENA_W1S 0x38518
+#define GMI_TXX_INT_PTP_LOST BIT_ULL(4)
+#define GMI_TXX_INT_LATE_COL BIT_ULL(3)
+#define GMI_TXX_INT_XSDEF BIT_ULL(2)
+#define GMI_TXX_INT_XSCOL BIT_ULL(1)
+#define GMI_TXX_INT_UNDFLW BIT_ULL(0)
#define BGX_MSIX_VEC_0_29_ADDR 0x400000 /* +(0..29) << 4 */
#define BGX_MSIX_VEC_0_29_CTL 0x400008
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 805c02a..0690966 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -1344,6 +1344,54 @@ static int bgx_init_phy(struct bgx *bgx)
return bgx_init_of_phy(bgx);
}
+static irqreturn_t bgx_intr_handler(int irq, void *data)
+{
+ struct bgx *bgx = (struct bgx *)data;
+ struct device *dev = &bgx->pdev->dev;
+ u64 status, val;
+ int lmac;
+
+ for (lmac = 0; lmac < bgx->lmac_count; lmac++) {
+ status = bgx_reg_read(bgx, lmac, BGX_GMP_GMI_TXX_INT);
+ if (status & GMI_TXX_INT_UNDFLW) {
+ dev_err(dev, "BGX%d lmac%d UNDFLW\n", bgx->bgx_id,
+ lmac);
+ val = bgx_reg_read(bgx, lmac, BGX_CMRX_CFG);
+ val &= ~CMR_EN;
+ bgx_reg_write(bgx, lmac, BGX_CMRX_CFG, val);
+ val |= CMR_EN;
+ bgx_reg_write(bgx, lmac, BGX_CMRX_CFG, val);
+ }
+ /* clear interrupts */
+ bgx_reg_write(bgx, lmac, BGX_GMP_GMI_TXX_INT, status);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static int bgx_register_intr(struct pci_dev *pdev)
+{
+ struct bgx *bgx = pci_get_drvdata(pdev);
+ struct device *dev = &pdev->dev;
+ int num_vec, ret;
+ char irq_name[32];
+
+ /* Enable MSI-X */
+ num_vec = pci_msix_vec_count(pdev);
+ ret = pci_alloc_irq_vectors(pdev, num_vec, num_vec, PCI_IRQ_MSIX);
+ if (ret < 0) {
+ dev_err(dev, "Req for #%d msix vectors failed\n", num_vec);
+ return 1;
+ }
+ sprintf(irq_name, "BGX%d", bgx->bgx_id);
+ ret = request_irq(pci_irq_vector(pdev, GMPX_GMI_TX_INT),
+ bgx_intr_handler, 0, irq_name, bgx);
+ if (ret)
+ return 1;
+
+ return 0;
+}
+
static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
int err;
@@ -1414,6 +1462,8 @@ static int bgx_probe(struct pci_dev *pdev, const
struct pci_device_id *ent)
xcv_init_hw(bgx->phy_mode);
bgx_init_hw(bgx);
+ bgx_register_intr(pdev);
+
/* Enable all LMACs */
for (lmac = 0; lmac < bgx->lmac_count; lmac++) {
err = bgx_lmac_enable(bgx, lmac);
@@ -1424,6 +1474,10 @@ static int bgx_probe(struct pci_dev *pdev,
const struct pci_device_id *ent)
bgx_lmac_disable(bgx, --lmac);
goto err_enable;
}
+
+ /* enable TX FIFO Underflow interrupt */
+ bgx_reg_modify(bgx, lmac, BGX_GMP_GMI_TXX_INT_ENA_W1S,
+ GMI_TXX_INT_UNDFLW);
}
return 0;
Regards,
Tim
Powered by blists - more mailing lists