lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Tue, 17 Jun 2014 10:49:51 +0800
From:	Wei Yang <weiyang@...ux.vnet.ibm.com>
To:	Wei Yang <weiyang@...ux.vnet.ibm.com>
Cc:	davem@...emloft.net, netdev@...r.kernel.org,
	Bjorn Helgaas <bhelgaas@...gle.com>,
	Amir Vadai <amirv@...lanox.com>,
	Jack Morgenstein <jackm@....mellanox.co.il>,
	Or Gerlitz <ogerlitz@...lanox.com>
Subject: Re: [PATCH 3.14-stable] net/mlx4_core: Preserve pci_dev_data after
 __mlx4_remove_one()

David,

I saw the fix for the crash during reboot is merged in mainline, while I am
not sure how to check these backport is merged in the stable tree(not familiar
to check it in stable tree.)

Do you suggest me to include that fix and send these backport again? Or?

On Sun, Jun 01, 2014 at 03:24:35PM +0800, Wei Yang wrote:
>pci_match_id() just match the static pci_device_id, which may return NULL if
>someone binds the driver to a device manually using
>/sys/bus/pci/drivers/.../new_id.
>
>This patch wrap up a helper function __mlx4_remove_one() which does the tear
>down function but preserve the drv_data. Functions like
>mlx4_pci_err_detected() and mlx4_restart_one() will call this one with out
>releasing drvdata.
>
>Fixes: 97a5221 "net/mlx4_core: pass pci_device_id.driver_data to __mlx4_init_one during reset".
>
>CC: Bjorn Helgaas <bhelgaas@...gle.com>
>CC: Amir Vadai <amirv@...lanox.com>
>CC: Jack Morgenstein <jackm@....mellanox.co.il>
>CC: Or Gerlitz <ogerlitz@...lanox.com>
>Signed-off-by: Wei Yang <weiyang@...ux.vnet.ibm.com>
>Acked-by: Jack Morgenstein <jackm@....mellanox.co.il>
>Signed-off-by: David S. Miller <davem@...emloft.net>
>---
> drivers/net/ethernet/mellanox/mlx4/main.c |  170 ++++++++++++++++-------------
> drivers/net/ethernet/mellanox/mlx4/mlx4.h |    1 +
> 2 files changed, 95 insertions(+), 76 deletions(-)
>
>diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
>index d413e60..b29bbe1 100644
>--- a/drivers/net/ethernet/mellanox/mlx4/main.c
>+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
>@@ -2275,13 +2275,8 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
> 	/* Allow large DMA segments, up to the firmware limit of 1 GB */
> 	dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024);
>
>-	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
>-	if (!priv) {
>-		err = -ENOMEM;
>-		goto err_release_regions;
>-	}
>-
>-	dev       = &priv->dev;
>+	dev       = pci_get_drvdata(pdev);
>+	priv      = mlx4_priv(dev);
> 	dev->pdev = pdev;
> 	INIT_LIST_HEAD(&priv->ctx_list);
> 	spin_lock_init(&priv->ctx_lock);
>@@ -2464,8 +2459,7 @@ slave_start:
> 	mlx4_sense_init(dev);
> 	mlx4_start_sense(dev);
>
>-	priv->pci_dev_data = pci_dev_data;
>-	pci_set_drvdata(pdev, dev);
>+	priv->removed = 0;
>
> 	return 0;
>
>@@ -2531,84 +2525,108 @@ err_disable_pdev:
>
> static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
> {
>+	struct mlx4_priv *priv;
>+	struct mlx4_dev *dev;
>+
> 	printk_once(KERN_INFO "%s", mlx4_version);
>
>+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
>+	if (!priv)
>+		return -ENOMEM;
>+
>+	dev       = &priv->dev;
>+	pci_set_drvdata(pdev, dev);
>+	priv->pci_dev_data = id->driver_data;
>+
> 	return __mlx4_init_one(pdev, id->driver_data);
> }
>
>-static void mlx4_remove_one(struct pci_dev *pdev)
>+static void __mlx4_remove_one(struct pci_dev *pdev)
> {
> 	struct mlx4_dev  *dev  = pci_get_drvdata(pdev);
> 	struct mlx4_priv *priv = mlx4_priv(dev);
>+	int               pci_dev_data;
> 	int p;
>
>-	if (dev) {
>-		/* in SRIOV it is not allowed to unload the pf's
>-		 * driver while there are alive vf's */
>-		if (mlx4_is_master(dev)) {
>-			if (mlx4_how_many_lives_vf(dev))
>-				printk(KERN_ERR "Removing PF when there are assigned VF's !!!\n");
>-		}
>-		mlx4_stop_sense(dev);
>-		mlx4_unregister_device(dev);
>+	if (priv->removed)
>+		return;
>
>-		for (p = 1; p <= dev->caps.num_ports; p++) {
>-			mlx4_cleanup_port_info(&priv->port[p]);
>-			mlx4_CLOSE_PORT(dev, p);
>-		}
>+	pci_dev_data = priv->pci_dev_data;
>
>-		if (mlx4_is_master(dev))
>-			mlx4_free_resource_tracker(dev,
>-						   RES_TR_FREE_SLAVES_ONLY);
>-
>-		mlx4_cleanup_counters_table(dev);
>-		mlx4_cleanup_qp_table(dev);
>-		mlx4_cleanup_srq_table(dev);
>-		mlx4_cleanup_cq_table(dev);
>-		mlx4_cmd_use_polling(dev);
>-		mlx4_cleanup_eq_table(dev);
>-		mlx4_cleanup_mcg_table(dev);
>-		mlx4_cleanup_mr_table(dev);
>-		mlx4_cleanup_xrcd_table(dev);
>-		mlx4_cleanup_pd_table(dev);
>+	/* in SRIOV it is not allowed to unload the pf's
>+	 * driver while there are alive vf's */
>+	if (mlx4_is_master(dev) && mlx4_how_many_lives_vf(dev))
>+		printk(KERN_ERR "Removing PF when there are assigned VF's !!!\n");
>+	mlx4_stop_sense(dev);
>+	mlx4_unregister_device(dev);
>
>-		if (mlx4_is_master(dev))
>-			mlx4_free_resource_tracker(dev,
>-						   RES_TR_FREE_STRUCTS_ONLY);
>-
>-		iounmap(priv->kar);
>-		mlx4_uar_free(dev, &priv->driver_uar);
>-		mlx4_cleanup_uar_table(dev);
>-		if (!mlx4_is_slave(dev))
>-			mlx4_clear_steering(dev);
>-		mlx4_free_eq_table(dev);
>-		if (mlx4_is_master(dev))
>-			mlx4_multi_func_cleanup(dev);
>-		mlx4_close_hca(dev);
>-		if (mlx4_is_slave(dev))
>-			mlx4_multi_func_cleanup(dev);
>-		mlx4_cmd_cleanup(dev);
>-
>-		if (dev->flags & MLX4_FLAG_MSI_X)
>-			pci_disable_msix(pdev);
>-		if (dev->flags & MLX4_FLAG_SRIOV) {
>-			mlx4_warn(dev, "Disabling SR-IOV\n");
>-			pci_disable_sriov(pdev);
>-		}
>+	for (p = 1; p <= dev->caps.num_ports; p++) {
>+		mlx4_cleanup_port_info(&priv->port[p]);
>+		mlx4_CLOSE_PORT(dev, p);
>+	}
>
>-		if (!mlx4_is_slave(dev))
>-			mlx4_free_ownership(dev);
>+	if (mlx4_is_master(dev))
>+		mlx4_free_resource_tracker(dev,
>+					   RES_TR_FREE_SLAVES_ONLY);
>+
>+	mlx4_cleanup_counters_table(dev);
>+	mlx4_cleanup_qp_table(dev);
>+	mlx4_cleanup_srq_table(dev);
>+	mlx4_cleanup_cq_table(dev);
>+	mlx4_cmd_use_polling(dev);
>+	mlx4_cleanup_eq_table(dev);
>+	mlx4_cleanup_mcg_table(dev);
>+	mlx4_cleanup_mr_table(dev);
>+	mlx4_cleanup_xrcd_table(dev);
>+	mlx4_cleanup_pd_table(dev);
>
>-		kfree(dev->caps.qp0_tunnel);
>-		kfree(dev->caps.qp0_proxy);
>-		kfree(dev->caps.qp1_tunnel);
>-		kfree(dev->caps.qp1_proxy);
>+	if (mlx4_is_master(dev))
>+		mlx4_free_resource_tracker(dev,
>+					   RES_TR_FREE_STRUCTS_ONLY);
>
>-		kfree(priv);
>-		pci_release_regions(pdev);
>-		pci_disable_device(pdev);
>-		pci_set_drvdata(pdev, NULL);
>+	iounmap(priv->kar);
>+	mlx4_uar_free(dev, &priv->driver_uar);
>+	mlx4_cleanup_uar_table(dev);
>+	if (!mlx4_is_slave(dev))
>+		mlx4_clear_steering(dev);
>+	mlx4_free_eq_table(dev);
>+	if (mlx4_is_master(dev))
>+		mlx4_multi_func_cleanup(dev);
>+	mlx4_close_hca(dev);
>+	if (mlx4_is_slave(dev))
>+		mlx4_multi_func_cleanup(dev);
>+	mlx4_cmd_cleanup(dev);
>+
>+	if (dev->flags & MLX4_FLAG_MSI_X)
>+		pci_disable_msix(pdev);
>+	if (dev->flags & MLX4_FLAG_SRIOV) {
>+		mlx4_warn(dev, "Disabling SR-IOV\n");
>+		pci_disable_sriov(pdev);
> 	}
>+
>+	if (!mlx4_is_slave(dev))
>+		mlx4_free_ownership(dev);
>+
>+	kfree(dev->caps.qp0_tunnel);
>+	kfree(dev->caps.qp0_proxy);
>+	kfree(dev->caps.qp1_tunnel);
>+	kfree(dev->caps.qp1_proxy);
>+
>+	pci_release_regions(pdev);
>+	pci_disable_device(pdev);
>+	memset(priv, 0, sizeof(*priv));
>+	priv->pci_dev_data = pci_dev_data;
>+	priv->removed = 1;
>+}
>+
>+static void mlx4_remove_one(struct pci_dev *pdev)
>+{
>+	struct mlx4_dev  *dev  = pci_get_drvdata(pdev);
>+	struct mlx4_priv *priv = mlx4_priv(dev);
>+
>+	__mlx4_remove_one(pdev);
>+	kfree(priv);
>+	pci_set_drvdata(pdev, NULL);
> }
>
> int mlx4_restart_one(struct pci_dev *pdev)
>@@ -2618,7 +2636,7 @@ int mlx4_restart_one(struct pci_dev *pdev)
> 	int		  pci_dev_data;
>
> 	pci_dev_data = priv->pci_dev_data;
>-	mlx4_remove_one(pdev);
>+	__mlx4_remove_one(pdev);
> 	return __mlx4_init_one(pdev, pci_dev_data);
> }
>
>@@ -2673,7 +2691,7 @@ MODULE_DEVICE_TABLE(pci, mlx4_pci_table);
> static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev,
> 					      pci_channel_state_t state)
> {
>-	mlx4_remove_one(pdev);
>+	__mlx4_remove_one(pdev);
>
> 	return state == pci_channel_io_perm_failure ?
> 		PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
>@@ -2681,11 +2699,11 @@ static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev,
>
> static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev)
> {
>-	const struct pci_device_id *id;
>-	int ret;
>+	struct mlx4_dev	 *dev  = pci_get_drvdata(pdev);
>+	struct mlx4_priv *priv = mlx4_priv(dev);
>+	int               ret;
>
>-	id = pci_match_id(mlx4_pci_table, pdev);
>-	ret = __mlx4_init_one(pdev, id->driver_data);
>+	ret = __mlx4_init_one(pdev, priv->pci_dev_data);
>
> 	return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
> }
>diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
>index 7aec6c8..99d7a28 100644
>--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
>+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
>@@ -796,6 +796,7 @@ struct mlx4_priv {
> 	spinlock_t		ctx_lock;
>
> 	int			pci_dev_data;
>+	int                     removed;
>
> 	struct list_head        pgdir_list;
> 	struct mutex            pgdir_mutex;
>-- 
>1.7.9.5

-- 
Richard Yang
Help you, Help me

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ