lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1394376844-12812-4-git-send-email-ogerlitz@mellanox.com>
Date:	Sun,  9 Mar 2014 16:54:00 +0200
From:	Or Gerlitz <ogerlitz@...lanox.com>
To:	davem@...emloft.net
Cc:	netdev@...r.kernel.org, roland@...nel.org,
	jackm@....mellanox.co.il, amirv@...lanox.com,
	Or Gerlitz <ogerlitz@...lanox.com>
Subject: [PATCH net-next 3/7] mlx4: In RoCE allow guests to have multiple GIDS

From: Jack Morgenstein <jackm@....mellanox.co.il>

The GIDs are statically distributed, as follows:
PF: gets 16 GIDs
VFs:  Remaining GIDS are divided evenly between VFs activated by the driver.
      If the division is not even, lower-numbered VFs get an extra GID.

For an IB interface, the number of gids per guest remains as before: one gid per guest.

Signed-off-by: Jack Morgenstein <jackm@....mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@...lanox.com>
---
 drivers/infiniband/hw/mlx4/mad.c                   |   35 ++++++-
 drivers/net/ethernet/mellanox/mlx4/fw.c            |    5 +-
 drivers/net/ethernet/mellanox/mlx4/main.c          |    6 +-
 drivers/net/ethernet/mellanox/mlx4/mlx4.h          |    3 +
 drivers/net/ethernet/mellanox/mlx4/port.c          |  117 ++++++++++++++++---
 .../net/ethernet/mellanox/mlx4/resource_tracker.c  |   64 +++++++++--
 include/linux/mlx4/device.h                        |    1 +
 7 files changed, 195 insertions(+), 36 deletions(-)

diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index e79a183..37a3fc7 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -511,7 +511,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
 	memset(&attr, 0, sizeof attr);
 	attr.port_num = port;
 	if (is_eth) {
-		mlx4_get_roce_gid_from_slave(dev->dev, port, slave, attr.grh.dgid.raw);
+		memcpy(&attr.grh.dgid.raw[0], &grh->dgid.raw[0], 16);
 		attr.ah_flags = IB_AH_GRH;
 	}
 	ah = ib_create_ah(tun_ctx->pd, &attr);
@@ -1214,6 +1214,36 @@ out:
 	return ret;
 }
 
+static int get_slave_base_gid_ix(struct mlx4_ib_dev *dev, int slave, int port)
+{
+	int gids;
+	int vfs;
+
+	if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND)
+		return slave;
+
+	gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS;
+	vfs = dev->dev->num_vfs;
+
+	if (slave == 0)
+		return 0;
+	if (slave <= gids % vfs)
+		return MLX4_ROCE_PF_GIDS + ((gids / vfs) + 1) * (slave - 1);
+
+	return MLX4_ROCE_PF_GIDS + (gids % vfs) + ((gids / vfs) * (slave - 1));
+}
+
+static int get_real_sgid_index(struct mlx4_ib_dev *dev, int slave, int port,
+			       struct ib_ah_attr *ah_attr)
+{
+	if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND) {
+		ah_attr->grh.sgid_index = slave;
+		return 0;
+	}
+	ah_attr->grh.sgid_index += get_slave_base_gid_ix(dev, slave, port);
+	return 0;
+}
+
 static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc *wc)
 {
 	struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
@@ -1301,7 +1331,8 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
 	ah.ibah.device = ctx->ib_dev;
 	mlx4_ib_query_ah(&ah.ibah, &ah_attr);
 	if (ah_attr.ah_flags & IB_AH_GRH)
-		ah_attr.grh.sgid_index = slave;
+		if (get_real_sgid_index(dev, slave, ctx->port, &ah_attr))
+			return;
 
 	mlx4_ib_send_to_wire(dev, slave, ctx->port,
 			     is_proxy_qp0(dev, wc->src_qp, slave) ?
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 9cdf452..6e1ee21 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -934,7 +934,10 @@ int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave,
 		MLX4_PUT(outbox->buf, port_type,
 			 QUERY_PORT_SUPPORTED_TYPE_OFFSET);
 
-		short_field = 1; /* slave max gids */
+		if (dev->caps.port_type[vhcr->in_modifier] == MLX4_PORT_TYPE_ETH)
+			short_field = mlx4_get_slave_num_gids(dev, slave);
+		else
+			short_field = 1; /* slave max gids */
 		MLX4_PUT(outbox->buf, short_field,
 			 QUERY_PORT_CUR_MAX_GID_OFFSET);
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 979ea43..4c441aa 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -1462,7 +1462,11 @@ static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev)
 	int i;
 
 	for (i = 1; i <= dev->caps.num_ports; i++) {
-		dev->caps.gid_table_len[i] = 1;
+		if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH)
+			dev->caps.gid_table_len[i] =
+				mlx4_get_slave_num_gids(dev, 0);
+		else
+			dev->caps.gid_table_len[i] = 1;
 		dev->caps.pkey_table_len[i] =
 			dev->phys_caps.pkey_phys_table_len[i] - 1;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index da829f4..6ba38c9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -1287,4 +1287,7 @@ void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work);
 
 void mlx4_init_quotas(struct mlx4_dev *dev);
 
+int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave);
+int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave);
+
 #endif /* MLX4_H */
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index 095f74d..e50b201 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -507,6 +507,31 @@ int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps)
 }
 static struct mlx4_roce_gid_entry zgid_entry;
 
+int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave)
+{
+	if (slave == 0)
+		return MLX4_ROCE_PF_GIDS;
+	if (slave <= ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) % dev->num_vfs))
+		return ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / dev->num_vfs) + 1;
+	return (MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / dev->num_vfs;
+}
+
+int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave)
+{
+	int gids;
+	int vfs;
+
+	gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS;
+	vfs = dev->num_vfs;
+
+	if (slave == 0)
+		return 0;
+	if (slave <= gids % vfs)
+		return MLX4_ROCE_PF_GIDS + ((gids / vfs) + 1) * (slave - 1);
+
+	return MLX4_ROCE_PF_GIDS + (gids % vfs) + ((gids / vfs) * (slave - 1));
+}
+
 static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod,
 				u8 op_mod, struct mlx4_cmd_mailbox *inbox)
 {
@@ -516,15 +541,18 @@ static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod,
 	struct mlx4_slave_state *slave_st = &master->slave_state[slave];
 	struct mlx4_set_port_rqp_calc_context *qpn_context;
 	struct mlx4_set_port_general_context *gen_context;
-	struct mlx4_roce_gid_entry *gid_entry;
+	struct mlx4_roce_gid_entry *gid_entry_tbl, *gid_entry_mbox, *gid_entry_mb1;
 	int reset_qkey_viols;
 	int port;
 	int is_eth;
+	int num_gids;
+	int base;
 	u32 in_modifier;
 	u32 promisc;
 	u16 mtu, prev_mtu;
 	int err;
-	int i;
+	int i, j;
+	int offset;
 	__be32 agg_cap_mask;
 	__be32 slave_cap_mask;
 	__be32 new_cap_mask;
@@ -585,26 +613,65 @@ static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod,
 			gen_context->mtu = cpu_to_be16(master->max_mtu[port]);
 			break;
 		case MLX4_SET_PORT_GID_TABLE:
-			gid_entry = (struct mlx4_roce_gid_entry *)(inbox->buf);
-			/* check that do not have duplicates */
-			if (memcmp(gid_entry->raw, zgid_entry.raw, 16)) {
-				for (i = 0; i < MLX4_ROCE_MAX_GIDS; i++) {
-					if (slave != i &&
-					    !memcmp(gid_entry->raw, priv->roce_gids[port - 1][i].raw, 16)) {
-						mlx4_warn(dev, "requested gid entry for slave:%d "
-							  "is a duplicate of slave %d\n",
-							  slave, i);
-						return -EEXIST;
+			/* change to MULTIPLE entries: number of guest's gids
+			 * need a FOR-loop here over number of gids the guest has.
+			 * 1. Check no duplicates in gids passed by slave
+			 */
+			num_gids = mlx4_get_slave_num_gids(dev, slave);
+			base = mlx4_get_base_gid_ix(dev, slave);
+			gid_entry_mbox = (struct mlx4_roce_gid_entry *)(inbox->buf);
+			for (i = 0; i < num_gids; gid_entry_mbox++, i++) {
+				if (!memcmp(gid_entry_mbox->raw, zgid_entry.raw,
+					    sizeof(zgid_entry)))
+					continue;
+				gid_entry_mb1 = gid_entry_mbox + 1;
+				for (j = i + 1; j < num_gids; gid_entry_mb1++, j++) {
+					if (!memcmp(gid_entry_mb1->raw,
+						    zgid_entry.raw, sizeof(zgid_entry)))
+						continue;
+					if (!memcmp(gid_entry_mb1->raw, gid_entry_mbox->raw,
+						    sizeof(gid_entry_mbox->raw))) {
+						/* found duplicate */
+						return -EINVAL;
 					}
 				}
 			}
-			/* insert slave GID at proper index */
-			memcpy(priv->roce_gids[port - 1][slave].raw, gid_entry->raw, 16);
-			/* rewrite roce port gids table to FW */
+
+			/* 2. Check that do not have duplicates in OTHER
+			 *    entries in the port GID table
+			 */
 			for (i = 0; i < MLX4_ROCE_MAX_GIDS; i++) {
-				memcpy(gid_entry->raw, priv->roce_gids[port - 1][i].raw, 16);
-				gid_entry++;
+				if (i >= base && i < base + num_gids)
+					continue; /* don't compare to slave's current gids */
+				gid_entry_tbl = &priv->roce_gids[port - 1][i];
+				if (!memcmp(gid_entry_tbl->raw, zgid_entry.raw, sizeof(zgid_entry)))
+					continue;
+				gid_entry_mbox = (struct mlx4_roce_gid_entry *)(inbox->buf);
+				for (j = 0; j < num_gids; gid_entry_mbox++, j++) {
+					if (!memcmp(gid_entry_mbox->raw, zgid_entry.raw,
+						    sizeof(zgid_entry)))
+						continue;
+					if (!memcmp(gid_entry_mbox->raw, gid_entry_tbl->raw,
+						    sizeof(gid_entry_tbl->raw))) {
+						/* found duplicate */
+						mlx4_warn(dev, "requested gid entry for slave:%d "
+							  "is a duplicate of gid at index %d\n",
+							  slave, i);
+						return -EINVAL;
+					}
+				}
 			}
+
+			/* insert slave GIDs with memcpy, starting at slave's base index */
+			gid_entry_mbox = (struct mlx4_roce_gid_entry *)(inbox->buf);
+			for (i = 0, offset = base; i < num_gids; gid_entry_mbox++, offset++, i++)
+				memcpy(priv->roce_gids[port - 1][offset].raw, gid_entry_mbox->raw, 16);
+
+			/* Now, copy roce port gids table to current mailbox for passing to FW */
+			gid_entry_mbox = (struct mlx4_roce_gid_entry *)(inbox->buf);
+			for (i = 0; i < MLX4_ROCE_MAX_GIDS; gid_entry_mbox++, i++)
+				memcpy(gid_entry_mbox->raw, priv->roce_gids[port - 1][i].raw, 16);
+
 			break;
 		}
 		return mlx4_cmd(dev, inbox->dma, in_mod, op_mod,
@@ -957,6 +1024,7 @@ int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, int *s
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	int i, found_ix = -1;
+	int vf_gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS;
 
 	if (!mlx4_is_mfunc(dev))
 		return -EINVAL;
@@ -968,8 +1036,19 @@ int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, int *s
 		}
 	}
 
-	if (found_ix >= 0)
-		*slave_id = found_ix;
+	if (found_ix >= 0) {
+		if (found_ix < MLX4_ROCE_PF_GIDS)
+			*slave_id = 0;
+		else if (found_ix < MLX4_ROCE_PF_GIDS + (vf_gids % dev->num_vfs) *
+			 (vf_gids / dev->num_vfs + 1))
+			*slave_id = ((found_ix - MLX4_ROCE_PF_GIDS) /
+				     (vf_gids / dev->num_vfs + 1)) + 1;
+		else
+			*slave_id =
+			((found_ix - MLX4_ROCE_PF_GIDS -
+			  ((vf_gids % dev->num_vfs) * ((vf_gids / dev->num_vfs + 1)))) /
+			 (vf_gids / dev->num_vfs)) + vf_gids % dev->num_vfs + 1;
+	}
 
 	return (found_ix >= 0) ? 0 : -EINVAL;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 57428a0..1c3634e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -219,6 +219,11 @@ struct res_fs_rule {
 	int			qpn;
 };
 
+static int mlx4_is_eth(struct mlx4_dev *dev, int port)
+{
+	return dev->caps.port_mask[port] == MLX4_PORT_TYPE_IB ? 0 : 1;
+}
+
 static void *res_tracker_lookup(struct rb_root *root, u64 res_id)
 {
 	struct rb_node *node = root->rb_node;
@@ -600,15 +605,34 @@ static void update_gid(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *inbox,
 	struct mlx4_qp_context	*qp_ctx = inbox->buf + 8;
 	enum mlx4_qp_optpar	optpar = be32_to_cpu(*(__be32 *) inbox->buf);
 	u32			ts = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff;
+	int port;
 
-	if (MLX4_QP_ST_UD == ts)
-		qp_ctx->pri_path.mgid_index = 0x80 | slave;
-
-	if (MLX4_QP_ST_RC == ts || MLX4_QP_ST_UC == ts) {
-		if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH)
-			qp_ctx->pri_path.mgid_index = slave & 0x7F;
-		if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH)
-			qp_ctx->alt_path.mgid_index = slave & 0x7F;
+	if (MLX4_QP_ST_UD == ts) {
+		port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1;
+		if (mlx4_is_eth(dev, port))
+			qp_ctx->pri_path.mgid_index = mlx4_get_base_gid_ix(dev, slave) | 0x80;
+		else
+			qp_ctx->pri_path.mgid_index = slave | 0x80;
+
+	} else if (MLX4_QP_ST_RC == ts || MLX4_QP_ST_XRC == ts || MLX4_QP_ST_UC == ts) {
+		if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) {
+			port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1;
+			if (mlx4_is_eth(dev, port)) {
+				qp_ctx->pri_path.mgid_index += mlx4_get_base_gid_ix(dev, slave);
+				qp_ctx->pri_path.mgid_index &= 0x7f;
+			} else {
+				qp_ctx->pri_path.mgid_index = slave & 0x7F;
+			}
+		}
+		if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) {
+			port = (qp_ctx->alt_path.sched_queue >> 6 & 1) + 1;
+			if (mlx4_is_eth(dev, port)) {
+				qp_ctx->alt_path.mgid_index += mlx4_get_base_gid_ix(dev, slave);
+				qp_ctx->alt_path.mgid_index &= 0x7f;
+			} else {
+				qp_ctx->alt_path.mgid_index = slave & 0x7F;
+			}
+		}
 	}
 }
 
@@ -2734,6 +2758,8 @@ static int verify_qp_parameters(struct mlx4_dev *dev,
 	u32			qp_type;
 	struct mlx4_qp_context	*qp_ctx;
 	enum mlx4_qp_optpar	optpar;
+	int port;
+	int num_gids;
 
 	qp_ctx  = inbox->buf + 8;
 	qp_type	= (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff;
@@ -2741,6 +2767,7 @@ static int verify_qp_parameters(struct mlx4_dev *dev,
 
 	switch (qp_type) {
 	case MLX4_QP_ST_RC:
+	case MLX4_QP_ST_XRC:
 	case MLX4_QP_ST_UC:
 		switch (transition) {
 		case QP_TRANS_INIT2RTR:
@@ -2749,13 +2776,24 @@ static int verify_qp_parameters(struct mlx4_dev *dev,
 		case QP_TRANS_SQD2SQD:
 		case QP_TRANS_SQD2RTS:
 			if (slave != mlx4_master_func_num(dev))
-				/* slaves have only gid index 0 */
-				if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH)
-					if (qp_ctx->pri_path.mgid_index)
+				if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) {
+					port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1;
+					if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB)
+						num_gids = mlx4_get_slave_num_gids(dev, slave);
+					else
+						num_gids = 1;
+					if (qp_ctx->pri_path.mgid_index >= num_gids)
 						return -EINVAL;
-				if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH)
-					if (qp_ctx->alt_path.mgid_index)
+				}
+				if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) {
+					port = (qp_ctx->alt_path.sched_queue >> 6 & 1) + 1;
+					if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB)
+						num_gids = mlx4_get_slave_num_gids(dev, slave);
+					else
+						num_gids = 1;
+					if (qp_ctx->alt_path.mgid_index >= num_gids)
 						return -EINVAL;
+				}
 			break;
 		default:
 			break;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index fbe6cda..5cb7cee 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -49,6 +49,7 @@
 #define MIN_MSIX_P_PORT		5
 
 #define MLX4_ROCE_MAX_GIDS	128
+#define MLX4_ROCE_PF_GIDS	16
 
 enum {
 	MLX4_FLAG_MSI_X		= 1 << 0,
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ