[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20200422125026.GS11945@mellanox.com>
Date: Wed, 22 Apr 2020 09:50:26 -0300
From: Jason Gunthorpe <jgg@...lanox.com>
To: Maor Gottlieb <maorg@...lanox.com>
Cc: davem@...emloft.net, dledford@...hat.com, j.vosburgh@...il.com,
vfalico@...il.com, andy@...yhouse.net, kuba@...nel.org,
jiri@...lanox.com, dsahern@...nel.org, leonro@...lanox.com,
saeedm@...lanox.com, linux-rdma@...r.kernel.org,
netdev@...r.kernel.org, alexr@...lanox.com
Subject: Re: [PATCH V4 mlx5-next 10/15] RDMA/core: Add LAG functionality
On Wed, Apr 22, 2020 at 11:39:46AM +0300, Maor Gottlieb wrote:
> Add support to get the RoCE LAG xmit slave by building skb
> of the RoCE packet and call to master_get_xmit_slave.
> If driver wants to get the slave assume all slaves are available,
> then need to set RDMA_LAG_FLAGS_HASH_ALL_SLAVES in flags.
>
> Signed-off-by: Maor Gottlieb <maorg@...lanox.com>
> Reviewed-by: Leon Romanovsky <leonro@...lanox.com>
> drivers/infiniband/core/Makefile | 2 +-
> drivers/infiniband/core/lag.c | 138 +++++++++++++++++++++++++++++++
> include/rdma/ib_verbs.h | 2 +
> include/rdma/lag.h | 22 +++++
> 4 files changed, 163 insertions(+), 1 deletion(-)
> create mode 100644 drivers/infiniband/core/lag.c
> create mode 100644 include/rdma/lag.h
>
> diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
> index d1b14887960e..870f0fcd54d5 100644
> +++ b/drivers/infiniband/core/Makefile
> @@ -12,7 +12,7 @@ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
> roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \
> multicast.o mad.o smi.o agent.o mad_rmpp.o \
> nldev.o restrack.o counters.o ib_core_uverbs.o \
> - trace.o
> + trace.o lag.o
>
> ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o
> ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o
> diff --git a/drivers/infiniband/core/lag.c b/drivers/infiniband/core/lag.c
> new file mode 100644
> index 000000000000..3036fb3dc43a
> +++ b/drivers/infiniband/core/lag.c
> @@ -0,0 +1,138 @@
> +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
> +/*
> + * Copyright (c) 2020 Mellanox Technologies. All rights reserved.
> + */
> +
> +#include <rdma/ib_verbs.h>
> +#include <rdma/ib_cache.h>
> +#include <rdma/lag.h>
> +
> +static struct sk_buff *rdma_build_skb(struct ib_device *device,
> + struct net_device *netdev,
> + struct rdma_ah_attr *ah_attr)
> +{
> + struct ipv6hdr *ip6h;
> + struct sk_buff *skb;
> + struct ethhdr *eth;
> + struct iphdr *iph;
> + struct udphdr *uh;
> + u8 smac[ETH_ALEN];
> + bool is_ipv4;
> + int hdr_len;
> +
> + is_ipv4 = ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw);
> + hdr_len = ETH_HLEN + sizeof(struct udphdr) + LL_RESERVED_SPACE(netdev);
> + hdr_len += is_ipv4 ? sizeof(struct iphdr) : sizeof(struct ipv6hdr);
> +
> + skb = alloc_skb(hdr_len, GFP_ATOMIC);
> + if (!skb)
> + return NULL;
> +
> + skb->dev = netdev;
> + skb_reserve(skb, hdr_len);
> + skb_push(skb, sizeof(struct udphdr));
> + skb_reset_transport_header(skb);
> + uh = udp_hdr(skb);
> + uh->source = htons(0xC000);
> + uh->dest = htons(ROCE_V2_UDP_DPORT);
> + uh->len = htons(sizeof(struct udphdr));
> +
> + if (is_ipv4) {
> + skb_push(skb, sizeof(struct iphdr));
> + skb_reset_network_header(skb);
> + iph = ip_hdr(skb);
> + iph->frag_off = 0;
> + iph->version = 4;
> + iph->protocol = IPPROTO_UDP;
> + iph->ihl = 0x5;
> + iph->tot_len = htons(sizeof(struct udphdr) + sizeof(struct
> + iphdr));
> + memcpy(&iph->saddr, ah_attr->grh.sgid_attr->gid.raw + 12,
> + sizeof(struct in_addr));
> + memcpy(&iph->daddr, ah_attr->grh.dgid.raw + 12,
> + sizeof(struct in_addr));
> + } else {
> + skb_push(skb, sizeof(struct ipv6hdr));
> + skb_reset_network_header(skb);
> + ip6h = ipv6_hdr(skb);
> + ip6h->version = 6;
> + ip6h->nexthdr = IPPROTO_UDP;
> + memcpy(&ip6h->flow_lbl, &ah_attr->grh.flow_label,
> + sizeof(*ip6h->flow_lbl));
> + memcpy(&ip6h->saddr, ah_attr->grh.sgid_attr->gid.raw,
> + sizeof(struct in6_addr));
> + memcpy(&ip6h->daddr, ah_attr->grh.dgid.raw,
> + sizeof(struct in6_addr));
> + }
What about setting up the UDP header? It looks like this needs to be
before the sport patch and the sport patch needs to modify here too.
> +void rdma_lag_put_ah_roce_slave(struct rdma_ah_attr *ah_attr)
> +{
> + if (ah_attr->roce.xmit_slave)
> + dev_put(ah_attr->roce.xmit_slave);
> +}
> +
> +int rdma_lag_get_ah_roce_slave(struct ib_device *device,
> + struct rdma_ah_attr *ah_attr)
> +{
> + struct net_device *master;
> + struct net_device *slave;
> +
> + if (!(ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE &&
> + ah_attr->grh.sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP))
> + return 0;
> +
> + rcu_read_lock();
> + master = rdma_read_gid_attr_ndev_rcu(ah_attr->grh.sgid_attr);
> + if (IS_ERR(master)) {
> + rcu_read_unlock();
> + return PTR_ERR(master);
> + }
> + dev_hold(master);
> + rcu_read_unlock();
> +
> + if (!netif_is_bond_master(master)) {
> + dev_put(master);
> + return 0;
> + }
> +
> + slave = rdma_get_xmit_slave_udp(device, master, ah_attr);
> +
> + dev_put(master);
> + if (!slave) {
> + ibdev_warn(device, "Failed to get lag xmit slave\n");
> + return -EINVAL;
> + }
> +
> + ah_attr->roce.xmit_slave = slave;
Is xmit_slave is reliably NULL in the other return 0 cases?
Jason
Powered by blists - more mailing lists