lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aP0ae1rxKnaJUO-_@shell.armlinux.org.uk>
Date: Sat, 25 Oct 2025 19:44:11 +0100
From: "Russell King (Oracle)" <linux@...linux.org.uk>
To: Daniel Golle <daniel@...rotopia.org>
Cc: Hauke Mehrtens <hauke@...ke-m.de>, Andrew Lunn <andrew@...n.ch>,
	Vladimir Oltean <olteanv@...il.com>,
	"David S. Miller" <davem@...emloft.net>,
	Eric Dumazet <edumazet@...gle.com>,
	Jakub Kicinski <kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>,
	Rob Herring <robh@...nel.org>,
	Krzysztof Kozlowski <krzk+dt@...nel.org>,
	Conor Dooley <conor+dt@...nel.org>, Simon Horman <horms@...nel.org>,
	netdev@...r.kernel.org, devicetree@...r.kernel.org,
	linux-kernel@...r.kernel.org,
	Andreas Schirm <andreas.schirm@...mens.com>,
	Lukas Stockmann <lukas.stockmann@...mens.com>,
	Alexander Sverdlin <alexander.sverdlin@...mens.com>,
	Peter Christen <peter.christen@...mens.com>,
	Avinash Jayaraman <ajayaraman@...linear.com>,
	Bing tao Xu <bxu@...linear.com>, Liang Xu <lxu@...linear.com>,
	Juraj Povazanec <jpovazanec@...linear.com>,
	"Fanni (Fang-Yi) Chan" <fchan@...linear.com>,
	"Benny (Ying-Tsan) Weng" <yweng@...linear.com>,
	"Livia M. Rosu" <lrosu@...linear.com>,
	John Crispin <john@...ozen.org>
Subject: Re: [PATCH net-next v2 13/13] net: dsa: add driver for MaxLinear
 GSW1xx switch family

On Sat, Oct 25, 2025 at 03:51:23PM +0100, Daniel Golle wrote:
> +static int gsw1xx_sgmii_pcs_config(struct phylink_pcs *pcs,
> +				   unsigned int neg_mode,
> +				   phy_interface_t interface,
> +				   const unsigned long *advertising,
> +				   bool permit_pause_to_mac)
> +{
> +	struct gsw1xx_priv *priv = sgmii_pcs_to_gsw1xx(pcs);
> +	bool sgmii_mac_mode = dsa_is_user_port(priv->gswip.ds, GSW1XX_SGMII_PORT);
> +	u16 txaneg, anegctl, val, nco_ctrl;
> +	int ret;
> +
> +	/* Assert and deassert SGMII shell reset */
> +	ret = regmap_set_bits(priv->shell, GSW1XX_SHELL_RST_REQ,
> +			      GSW1XX_RST_REQ_SGMII_SHELL);
> +	if (ret < 0)
> +		return ret;
> +
> +	ret = regmap_clear_bits(priv->shell, GSW1XX_SHELL_RST_REQ,
> +				GSW1XX_RST_REQ_SGMII_SHELL);
> +	if (ret < 0)
> +		return ret;

So this is disruptive. Overall, at this point, having added every other
comment below, this code has me wondering whether you are aware of the
documentation I have written in phylink.h for pcs_config(). This code
goes against this paragraph in that documentation:

"
 * pcs_config() will be called when configuration of the PCS is required
 * or when the advertisement is possibly updated. It must not unnecessarily
 * disrupt an established link.
"

Low quality implementations lead to poor user experiences.

> +
> +	/* Hardware Bringup FSM Enable  */
> +	ret = regmap_write(priv->sgmii, GSW1XX_SGMII_PHY_HWBU_CTRL,
> +			   GSW1XX_SGMII_PHY_HWBU_CTRL_EN_HWBU_FSM |
> +			   GSW1XX_SGMII_PHY_HWBU_CTRL_HW_FSM_EN);
> +	if (ret < 0)
> +		return ret;
> +
> +	/* Configure SGMII PHY Receiver */
> +	val = FIELD_PREP(GSW1XX_SGMII_PHY_RX0_CFG2_EQ,
> +			 GSW1XX_SGMII_PHY_RX0_CFG2_EQ_DEF) |
> +	      GSW1XX_SGMII_PHY_RX0_CFG2_LOS_EN |
> +	      GSW1XX_SGMII_PHY_RX0_CFG2_TERM_EN |
> +	      FIELD_PREP(GSW1XX_SGMII_PHY_RX0_CFG2_FILT_CNT,
> +			 GSW1XX_SGMII_PHY_RX0_CFG2_FILT_CNT_DEF);
> +
> +	// if (!priv->dts.sgmii_rx_invert)
> +		val |= GSW1XX_SGMII_PHY_RX0_CFG2_INVERT;
> +
> +	ret = regmap_write(priv->sgmii, GSW1XX_SGMII_PHY_RX0_CFG2, val);
> +	if (ret < 0)
> +		return ret;
> +
> +	/* Reset and Release TBI */
> +	val = GSW1XX_SGMII_TBI_TBICTL_INITTBI | GSW1XX_SGMII_TBI_TBICTL_ENTBI |
> +	      GSW1XX_SGMII_TBI_TBICTL_CRSTRR | GSW1XX_SGMII_TBI_TBICTL_CRSOFF;
> +	ret = regmap_write(priv->sgmii, GSW1XX_SGMII_TBI_TBICTL, val);
> +	if (ret < 0)
> +		return ret;
> +	val &= ~GSW1XX_SGMII_TBI_TBICTL_INITTBI;
> +	ret = regmap_write(priv->sgmii, GSW1XX_SGMII_TBI_TBICTL, val);
> +	if (ret < 0)
> +		return ret;
> +
> +	/* Release Tx Data Buffers */
> +	ret = regmap_set_bits(priv->sgmii, GSW1XX_SGMII_PCS_TXB_CTL,
> +			      GSW1XX_SGMII_PCS_TXB_CTL_INIT_TX_TXB);
> +	if (ret < 0)
> +		return ret;
> +	ret = regmap_clear_bits(priv->sgmii, GSW1XX_SGMII_PCS_TXB_CTL,
> +				GSW1XX_SGMII_PCS_TXB_CTL_INIT_TX_TXB);
> +	if (ret < 0)
> +		return ret;
> +
> +	/* Release Rx Data Buffers */
> +	ret = regmap_set_bits(priv->sgmii, GSW1XX_SGMII_PCS_RXB_CTL,
> +			      GSW1XX_SGMII_PCS_RXB_CTL_INIT_RX_RXB);
> +	if (ret < 0)
> +		return ret;
> +	ret = regmap_clear_bits(priv->sgmii, GSW1XX_SGMII_PCS_RXB_CTL,
> +				GSW1XX_SGMII_PCS_RXB_CTL_INIT_RX_RXB);
> +	if (ret < 0)
> +		return ret;
> +
> +	anegctl = GSW1XX_SGMII_TBI_ANEGCTL_OVRANEG;
> +	if (neg_mode != PHYLINK_PCS_NEG_INBAND_ENABLED)
> +		anegctl |= GSW1XX_SGMII_TBI_ANEGCTL_OVRABL;

override aneg and override able? What's this doing?

> +
> +	switch (phylink_get_link_timer_ns(interface)) {
> +	case 10000:
> +		anegctl |= FIELD_PREP(GSW1XX_SGMII_TBI_ANEGCTL_LT,
> +				      GSW1XX_SGMII_TBI_ANEGCTL_LT_10US);
> +		break;
> +	case 1600000:
> +		anegctl |= FIELD_PREP(GSW1XX_SGMII_TBI_ANEGCTL_LT,
> +				      GSW1XX_SGMII_TBI_ANEGCTL_LT_1_6MS);
> +		break;
> +	case 5000000:
> +		anegctl |= FIELD_PREP(GSW1XX_SGMII_TBI_ANEGCTL_LT,
> +				      GSW1XX_SGMII_TBI_ANEGCTL_LT_5MS);
> +		break;
> +	case 10000000:
> +		anegctl |= FIELD_PREP(GSW1XX_SGMII_TBI_ANEGCTL_LT,
> +				      GSW1XX_SGMII_TBI_ANEGCTL_LT_10MS);
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	if (interface == PHY_INTERFACE_MODE_SGMII) {
> +		txaneg = ADVERTISE_SGMII;
> +		if (sgmii_mac_mode) {
> +			txaneg |= BIT(14); /* MAC should always send BIT 14 */

Bit 14 is ADVERTISE_LPACK.

I think I'd prefer:

			txaneg = ADVERTISE_SGMII | ADVERTISE_LPACK;

and...

> +			anegctl |= FIELD_PREP(GSW1XX_SGMII_TBI_ANEGCTL_ANMODE,
> +					      GSW1XX_SGMII_TBI_ANEGCTL_ANMODE_SGMII_MAC);
> +		} else {
> +			txaneg |= LPA_SGMII_1000FULL;

			txaneg = LPA_SGMII | LPA_SGMII_1000FULL;

here.

> +			anegctl |= FIELD_PREP(GSW1XX_SGMII_TBI_ANEGCTL_ANMODE,
> +					      GSW1XX_SGMII_TBI_ANEGCTL_ANMODE_SGMII_PHY);

So this seems to be yet another case of reverse SGMII. Andrew, please
can we get to a conclusion on PHY_INTERFACE_MODE_REVSGMII before we
end up with a crapshow of drivers doing their own stuff *exactly*
like we see here?

> +		if (neg_mode & PHYLINK_PCS_NEG_INBAND)
> +			anegctl |= GSW1XX_SGMII_TBI_ANEGCTL_ANEGEN;

Please add a comment describing what is going on here. What does this
register bit do...

> +	} else if (interface == PHY_INTERFACE_MODE_1000BASEX ||
> +		   interface == PHY_INTERFACE_MODE_2500BASEX) {
> +		txaneg = BIT(5) | BIT(7);

ADVERTISE_1000XFULL | ADVERTISE_1000XPAUSE ?

> +		anegctl |= FIELD_PREP(GSW1XX_SGMII_TBI_ANEGCTL_ANMODE,
> +				      GSW1XX_SGMII_TBI_ANEGCTL_ANMODE_1000BASEX);
> +	} else {
> +		dev_err(priv->gswip.dev, "%s: SGMII wrong interface mode %s\n",
> +			__func__, phy_modes(interface));
> +		return -EINVAL;
> +	}
> +
> +	ret = regmap_write(priv->sgmii, GSW1XX_SGMII_TBI_TXANEGH,
> +			   FIELD_GET(GENMASK(15, 8), txaneg));
> +	if (ret < 0)
> +		return ret;
> +	ret = regmap_write(priv->sgmii, GSW1XX_SGMII_TBI_TXANEGL,
> +			   FIELD_GET(GENMASK(7, 0), txaneg));
> +	if (ret < 0)
> +		return ret;
> +	ret = regmap_write(priv->sgmii, GSW1XX_SGMII_TBI_ANEGCTL, anegctl);
> +	if (ret < 0)
> +		return ret;
> +
> +	/* setup SerDes clock speed */
> +	if (interface == PHY_INTERFACE_MODE_2500BASEX)
> +		nco_ctrl = GSW1XX_SGMII_2G5 | GSW1XX_SGMII_2G5_NCO2;
> +	else
> +		nco_ctrl = GSW1XX_SGMII_1G | GSW1XX_SGMII_1G_NCO1;
> +
> +	ret = regmap_update_bits(priv->clk, GSW1XX_CLK_NCO_CTRL,
> +				 GSW1XX_SGMII_HSP_MASK | GSW1XX_SGMII_SEL,
> +				 nco_ctrl);
> +	if (ret)
> +		return ret;
> +
> +	ret = gsw1xx_sgmii_phy_xaui_write(priv, 0x30, 0x80);
> +	if (ret)
> +		return ret;
> +
> +	return 0;
> +}
> +
> +static void gsw1xx_sgmii_pcs_link_up(struct phylink_pcs *pcs,
> +				     unsigned int neg_mode,
> +				     phy_interface_t interface, int speed,
> +				     int duplex)
> +{
> +	struct gsw1xx_priv *priv = sgmii_pcs_to_gsw1xx(pcs);
> +	u16 lpstat;
> +
> +	/* When in-band AN is enabled hardware will set lpstat */
> +	if (neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED)
> +		return;
> +
> +	/* Force speed and duplex settings */
> +	if (interface == PHY_INTERFACE_MODE_SGMII) {
> +		if (speed == SPEED_10)
> +			lpstat = FIELD_PREP(GSW1XX_SGMII_TBI_LPSTAT_SPEED,
> +					    GSW1XX_SGMII_TBI_LPSTAT_SPEED_10);
> +		else if (speed == SPEED_100)
> +			lpstat = FIELD_PREP(GSW1XX_SGMII_TBI_LPSTAT_SPEED,
> +					    GSW1XX_SGMII_TBI_LPSTAT_SPEED_100);
> +		else
> +			lpstat = FIELD_PREP(GSW1XX_SGMII_TBI_LPSTAT_SPEED,
> +					    GSW1XX_SGMII_TBI_LPSTAT_SPEED_1000);
> +	} else {
> +		lpstat = FIELD_PREP(GSW1XX_SGMII_TBI_LPSTAT_SPEED,
> +				    GSW1XX_SGMII_TBI_LPSTAT_SPEED_NOSGMII);
> +	}
> +
> +	if (duplex == DUPLEX_FULL)
> +		lpstat |= GSW1XX_SGMII_TBI_LPSTAT_DUPLEX;
> +
> +	regmap_write(priv->sgmii, GSW1XX_SGMII_TBI_LPSTAT, lpstat);
> +}
> +
> +static void gsw1xx_sgmii_pcs_get_state(struct phylink_pcs *pcs,
> +				       unsigned int neg_mode,
> +				       struct phylink_link_state *state)
> +{
> +	struct gsw1xx_priv *priv = sgmii_pcs_to_gsw1xx(pcs);
> +	int ret;
> +	u32 val;
> +
> +	ret = regmap_read(priv->sgmii, GSW1XX_SGMII_TBI_TBISTAT, &val);
> +	if (ret < 0)
> +		return;
> +
> +	state->link = !!(val & GSW1XX_SGMII_TBI_TBISTAT_LINK);
> +	state->an_complete = !!(val & GSW1XX_SGMII_TBI_TBISTAT_AN_COMPLETE);
> +
> +	ret = regmap_read(priv->sgmii, GSW1XX_SGMII_TBI_LPSTAT, &val);
> +	if (ret < 0)
> +		return;
> +
> +	state->duplex = (val & GSW1XX_SGMII_TBI_LPSTAT_DUPLEX) ?
> +			 DUPLEX_FULL : DUPLEX_HALF;
> +	if (val & GSW1XX_SGMII_TBI_LPSTAT_PAUSE_RX)
> +		state->pause |= MLO_PAUSE_RX;
> +
> +	if (val & GSW1XX_SGMII_TBI_LPSTAT_PAUSE_TX)
> +		state->pause |= MLO_PAUSE_TX;
> +
> +	switch (FIELD_GET(GSW1XX_SGMII_TBI_LPSTAT_SPEED, val)) {
> +	case GSW1XX_SGMII_TBI_LPSTAT_SPEED_10:
> +		state->speed = SPEED_10;
> +		break;
> +	case GSW1XX_SGMII_TBI_LPSTAT_SPEED_100:
> +		state->speed = SPEED_100;
> +		break;
> +	case GSW1XX_SGMII_TBI_LPSTAT_SPEED_1000:
> +		state->speed = SPEED_1000;
> +		break;
> +	case GSW1XX_SGMII_TBI_LPSTAT_SPEED_NOSGMII:
> +		if (state->interface == PHY_INTERFACE_MODE_1000BASEX)
> +			state->speed = SPEED_1000;
> +		else if (state->interface == PHY_INTERFACE_MODE_2500BASEX)
> +			state->speed = SPEED_2500;
> +		else
> +			state->speed = SPEED_UNKNOWN;
> +		break;
> +	}
> +}
> +
> +static void gsw1xx_sgmii_pcs_an_restart(struct phylink_pcs *pcs)
> +{
> +	struct gsw1xx_priv *priv = sgmii_pcs_to_gsw1xx(pcs);
> +
> +	regmap_set_bits(priv->sgmii, GSW1XX_SGMII_TBI_ANEGCTL,
> +			GSW1XX_SGMII_TBI_ANEGCTL_RANEG);
> +}
> +
> +static int gsw1xx_sgmii_pcs_enable(struct phylink_pcs *pcs)
> +{
> +	struct gsw1xx_priv *priv = sgmii_pcs_to_gsw1xx(pcs);
> +
> +	/* Deassert SGMII shell reset */
> +	return regmap_clear_bits(priv->shell, GSW1XX_SHELL_RST_REQ,
> +				 GSW1XX_RST_REQ_SGMII_SHELL);
> +}
> +
> +static void gsw1xx_sgmii_pcs_disable(struct phylink_pcs *pcs)
> +{
> +	struct gsw1xx_priv *priv = sgmii_pcs_to_gsw1xx(pcs);
> +
> +	/* Assert SGMII shell reset */
> +	regmap_set_bits(priv->shell, GSW1XX_SHELL_RST_REQ,
> +			GSW1XX_RST_REQ_SGMII_SHELL);
> +}
> +
> +static const struct phylink_pcs_ops gsw1xx_sgmii_pcs_ops = {
> +	.pcs_an_restart = gsw1xx_sgmii_pcs_an_restart,
> +	.pcs_config = gsw1xx_sgmii_pcs_config,
> +	.pcs_disable = gsw1xx_sgmii_pcs_disable,
> +	.pcs_enable = gsw1xx_sgmii_pcs_enable,
> +	.pcs_get_state = gsw1xx_sgmii_pcs_get_state,
> +	.pcs_link_up = gsw1xx_sgmii_pcs_link_up,

Please order these in the same order as they appear in the struct, and
please order your functions above in the same order. This makes it
easier in future if new methods need to be added.

Also, please add the .pcs_inband_caps method to describe the
capabilities of the PCS.

It seems to me that this is not just a Cisco SGMII PCS, but also
supports IEEE 802.3 1000BASE-X. "SGMII" is an ambiguous term. Please
avoid propagating this ambigutiy to the kernel. I think in this case
merely "gsw1xx_pcs_xyz" will do.

> +};
> +
> +static void gsw1xx_phylink_get_caps(struct dsa_switch *ds, int port,
> +				    struct phylink_config *config)
> +{
> +	struct gswip_priv *priv = ds->priv;
> +
> +	config->mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
> +				   MAC_10 | MAC_100 | MAC_1000;
> +
> +	switch (port) {
> +	case 0:
> +	case 1:
> +	case 2:
> +	case 3:
> +		__set_bit(PHY_INTERFACE_MODE_INTERNAL,
> +			  config->supported_interfaces);
> +		break;
> +	case 4: /* port 4: SGMII */
> +		__set_bit(PHY_INTERFACE_MODE_SGMII,
> +			  config->supported_interfaces);
> +		__set_bit(PHY_INTERFACE_MODE_1000BASEX,
> +			  config->supported_interfaces);
> +		if (priv->hw_info->supports_2500m) {
> +			__set_bit(PHY_INTERFACE_MODE_2500BASEX,
> +				  config->supported_interfaces);
> +			config->mac_capabilities |= MAC_2500FD;
> +		}
> +		return; /* no support for EEE on SGMII port */
> +	case 5: /* port 5: RGMII or RMII */
> +		__set_bit(PHY_INTERFACE_MODE_RMII,
> +			  config->supported_interfaces);
> +		phy_interface_set_rgmii(config->supported_interfaces);
> +		break;
> +	}
> +
> +	config->lpi_capabilities = MAC_100FD | MAC_1000FD;
> +	config->lpi_timer_default = 20;
> +	memcpy(config->lpi_interfaces, config->supported_interfaces,
> +	       sizeof(config->lpi_interfaces));
> +}
> +
> +static struct phylink_pcs *gsw1xx_phylink_mac_select_pcs(struct phylink_config *config,
> +							 phy_interface_t interface)
> +{
> +	struct dsa_port *dp = dsa_phylink_to_port(config);
> +	struct gswip_priv *gswip_priv = dp->ds->priv;
> +	struct gsw1xx_priv *gsw1xx_priv = container_of(gswip_priv,
> +						       struct gsw1xx_priv,
> +						       gswip);

Reverse christmas tree?

> +static int gsw1xx_probe(struct mdio_device *mdiodev)
> +{
> +	struct device *dev = &mdiodev->dev;
> +	struct gsw1xx_priv *priv;
> +	u32 version;
> +	int ret;
> +
> +	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
> +	if (!priv)
> +		return -ENOMEM;
> +
> +	priv->mdio_dev = mdiodev;
> +	priv->smdio_badr = GSW1XX_SMDIO_BADR_UNKNOWN;
> +
> +	priv->gswip.dev = dev;
> +	priv->gswip.hw_info = of_device_get_match_data(dev);
> +	if (!priv->gswip.hw_info)
> +		return -EINVAL;
> +
> +	priv->gswip.gswip = gsw1xx_regmap_init(priv, "switch",
> +					       GSW1XX_SWITCH_BASE, 0xfff);
> +	if (IS_ERR(priv->gswip.gswip))
> +		return PTR_ERR(priv->gswip.gswip);
> +
> +	priv->gswip.mdio = gsw1xx_regmap_init(priv, "mdio", GSW1XX_MMDIO_BASE,
> +					      0xff);
> +	if (IS_ERR(priv->gswip.mdio))
> +		return PTR_ERR(priv->gswip.mdio);
> +
> +	priv->gswip.mii = gsw1xx_regmap_init(priv, "mii", GSW1XX_RGMII_BASE,
> +					     0xff);
> +	if (IS_ERR(priv->gswip.mii))
> +		return PTR_ERR(priv->gswip.mii);
> +
> +	priv->sgmii = gsw1xx_regmap_init(priv, "sgmii", GSW1XX_SGMII_BASE,
> +					 0xfff);
> +	if (IS_ERR(priv->sgmii))
> +		return PTR_ERR(priv->sgmii);
> +
> +	priv->gpio = gsw1xx_regmap_init(priv, "gpio", GSW1XX_GPIO_BASE, 0xff);
> +	if (IS_ERR(priv->gpio))
> +		return PTR_ERR(priv->gpio);
> +
> +	priv->clk = gsw1xx_regmap_init(priv, "clk", GSW1XX_CLK_BASE, 0xff);
> +	if (IS_ERR(priv->clk))
> +		return PTR_ERR(priv->clk);
> +
> +	priv->shell = gsw1xx_regmap_init(priv, "shell", GSW1XX_SHELL_BASE,
> +					 0xff);
> +	if (IS_ERR(priv->shell))
> +		return PTR_ERR(priv->shell);
> +
> +	priv->sgmii_pcs.ops = &gsw1xx_sgmii_pcs_ops;
> +	priv->sgmii_pcs.poll = 1;

This is declared as a C99 'bool'. It takes true/false values in
preference to 0/1.


-- 
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 80Mbps down 10Mbps up. Decent connectivity at last!

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ