[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1341158452.4852.107.camel@deadeye.wl.decadent.org.uk>
Date: Sun, 1 Jul 2012 17:00:52 +0100
From: Ben Hutchings <bhutchings@...arflare.com>
To: Or Gerlitz <ogerlitz@...lanox.com>
CC: <davem@...emloft.net>, <roland@...nel.org>,
<yevgenyp@...lanox.com>, <oren@...lanox.com>,
<netdev@...r.kernel.org>, Hadar Hen Zion <hadarh@...lanox.co.il>
Subject: Re: [PATCH net-next 09/10] net/mlx4_en: Manage flow steering rules
with ethtool
On Sun, 2012-07-01 at 12:43 +0300, Or Gerlitz wrote:
> From: Hadar Hen Zion <hadarh@...lanox.co.il>
>
> Implement the ethtool APIs for attaching L2/L3/L4 based flow steering
> rules to the netdevice RX rings. Added set_rxnfc callback and enhanced
> the existing get_rxnfc callback.
>
> Signed-off-by: Hadar Hen Zion <hadarh@...lanox.co.il>
> Signed-off-by: Or Gerlitz <ogerlitz@...lanox.com>
> ---
> drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 373 +++++++++++++++++++++++
> drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 7 +
> 2 files changed, 380 insertions(+), 0 deletions(-)
>
> diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
> index 72901ce..30de264 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
> @@ -38,6 +38,10 @@
> #include "mlx4_en.h"
> #include "en_port.h"
>
> +#define EN_ETHTOOL_QP_ATTACH (1ull << 63)
> +#define EN_ETHTOOL_MAC_MASK 0xffffffffffffULL
> +#define EN_ETHTOOL_SHORT_MASK cpu_to_be16(0xffff)
> +#define EN_ETHTOOL_WORD_MASK cpu_to_be32(0xffffffff)
>
> static void
> mlx4_en_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo)
> @@ -599,16 +603,360 @@ static int mlx4_en_set_rxfh_indir(struct net_device *dev,
> return err;
> }
>
> +#define not_all_zeros_or_all_ones(field, type) \
> + (field && (type)~field)
> +
> +static int mlx4_en_validate_flow(struct net_device *dev,
> + struct ethtool_rxnfc *cmd)
> +{
> + struct ethtool_usrip4_spec *l3_mask;
> + struct ethtool_tcpip4_spec *l4_mask;
> + struct ethhdr *eth_mask;
> + u64 full_mac = ~0ull;
> + u64 zero_mac = 0;
> +
> + if (cmd->fs.location >= MAX_NUM_OF_FS_RULES)
> + return -EINVAL;
> +
> + switch (cmd->fs.flow_type & ~FLOW_EXT) {
> + case TCP_V4_FLOW:
> + case UDP_V4_FLOW:
> + if (cmd->fs.h_u.tcp_ip4_spec.tos)
> + return -EOPNOTSUPP;
I suspect that your filter ignores TOS, rather than only matching TOS ==
0, so you should actually be checking the corresponding field in the
mask (fs.m_u). The error code should be EINVAL.
> + l4_mask = &cmd->fs.m_u.tcp_ip4_spec;
> + /* don't allow mask which isn't all 0 or 1 */
> + if (not_all_zeros_or_all_ones(l4_mask->ip4src, __be32) ||
> + not_all_zeros_or_all_ones(l4_mask->ip4dst, __be32) ||
> + not_all_zeros_or_all_ones(l4_mask->psrc, __be16) ||
> + not_all_zeros_or_all_ones(l4_mask->pdst, __be16))
> + return -EOPNOTSUPP;
Again, here and in many further instances, the error code should be
EINVAL.
> + break;
> + case IP_USER_FLOW:
> + l3_mask = &cmd->fs.m_u.usr_ip4_spec;
> + if (cmd->fs.h_u.usr_ip4_spec.l4_4_bytes ||
> + cmd->fs.h_u.usr_ip4_spec.tos ||
I think this should be checking l4_4_bytes and tos in the mask.
> + cmd->fs.h_u.usr_ip4_spec.proto ||
> + cmd->fs.h_u.usr_ip4_spec.ip_ver != ETH_RX_NFC_IP4 ||
> + (!cmd->fs.h_u.usr_ip4_spec.ip4src &&
> + !cmd->fs.h_u.usr_ip4_spec.ip4dst) ||
> + not_all_zeros_or_all_ones(l3_mask->ip4src, __be32) ||
> + not_all_zeros_or_all_ones(l3_mask->ip4dst, __be32))
> + return -EOPNOTSUPP;
> + break;
> + case ETHER_FLOW:
> + eth_mask = &cmd->fs.m_u.ether_spec;
> + if (memcmp(eth_mask->h_source, &zero_mac, ETH_ALEN))
> + return -EOPNOTSUPP;
> + if (!memcmp(eth_mask->h_dest, &zero_mac, ETH_ALEN))
> + return -EOPNOTSUPP;
But in the next statement you test whether eth_mask->h_dest is either
all-zeroes or all-ones. Is all-zeroes valid or not? I suspect you
actually intend to reject the case where both h_dest and h_proto are
masked out.
> + if (not_all_zeros_or_all_ones(eth_mask->h_proto, __be16) ||
> + (memcmp(eth_mask->h_dest, &zero_mac, ETH_ALEN) &&
> + memcmp(eth_mask->h_dest, &full_mac, ETH_ALEN)))
> + return -EOPNOTSUPP;
> + break;
> + default:
> + return -EOPNOTSUPP;
> + }
> +
> + if ((cmd->fs.flow_type & FLOW_EXT)) {
> + if (cmd->fs.m_ext.vlan_etype ||
> + not_all_zeros_or_all_ones(cmd->fs.m_ext.vlan_tci,
> + __be16)) {
> + return -EOPNOTSUPP;
> + }
> + }
> +
> + return 0;
> +}
> +
> +static void add_ip_rule(struct mlx4_en_priv *priv,
> + struct ethtool_rxnfc *cmd,
> + struct list_head *list_h)
> +{
> + struct mlx4_spec_list *spec_l3;
> +
> + spec_l3 = kzalloc(sizeof *spec_l3, GFP_KERNEL);
> + if (!spec_l3) {
> + en_err(priv, "Fail to alloc ethtool rule.\n");
> + return;
> + }
This should return an error code as well - logging is not a substitue.
> + spec_l3->id = MLX4_NET_TRANS_RULE_ID_IPV4;
> + spec_l3->ipv4.src_ip = cmd->fs.h_u.usr_ip4_spec.ip4src;
> + if (spec_l3->ipv4.src_ip)
> + spec_l3->ipv4.src_ip_msk = EN_ETHTOOL_WORD_MASK;
> + spec_l3->ipv4.dst_ip = cmd->fs.h_u.usr_ip4_spec.ip4dst;
> + if (spec_l3->ipv4.dst_ip)
> + spec_l3->ipv4.dst_ip_msk = EN_ETHTOOL_WORD_MASK;
The conditions should be using the mask (cmd->fs.m_u) not the value.
> + list_add_tail(&spec_l3->list, list_h);
> +}
> +
> +static void add_tcp_udp_rule(struct mlx4_en_priv *priv,
> + struct ethtool_rxnfc *cmd,
> + struct list_head *list_h, int proto)
> +{
> + struct mlx4_spec_list *spec_l3;
> + struct mlx4_spec_list *spec_l4;
> +
> + spec_l3 = kzalloc(sizeof *spec_l3, GFP_KERNEL);
> + spec_l4 = kzalloc(sizeof *spec_l4, GFP_KERNEL);
> + if (!spec_l4 || !spec_l3) {
> + en_err(priv, "Fail to alloc ethtool rule.\n");
If one of them was successfully allocated, it will now be leaked.
> + return;
> + }
> +
> + spec_l3->id = MLX4_NET_TRANS_RULE_ID_IPV4;
> +
> + if (proto == TCP_V4_FLOW) {
> + spec_l4->id = MLX4_NET_TRANS_RULE_ID_TCP;
> + spec_l3->ipv4.src_ip = cmd->fs.h_u.tcp_ip4_spec.ip4src;
> + spec_l3->ipv4.dst_ip = cmd->fs.h_u.tcp_ip4_spec.ip4dst;
> + spec_l4->tcp_udp.src_port = cmd->fs.h_u.tcp_ip4_spec.psrc;
> + spec_l4->tcp_udp.dst_port = cmd->fs.h_u.tcp_ip4_spec.pdst;
> + } else {
> + spec_l4->id = MLX4_NET_TRANS_RULE_ID_UDP;
> + spec_l3->ipv4.src_ip = cmd->fs.h_u.udp_ip4_spec.ip4src;
> + spec_l3->ipv4.dst_ip = cmd->fs.h_u.udp_ip4_spec.ip4dst;
> + spec_l4->tcp_udp.src_port = cmd->fs.h_u.udp_ip4_spec.psrc;
> + spec_l4->tcp_udp.dst_port = cmd->fs.h_u.udp_ip4_spec.pdst;
> + }
> +
> + if (spec_l3->ipv4.src_ip)
> + spec_l3->ipv4.src_ip_msk = EN_ETHTOOL_WORD_MASK;
> + if (spec_l3->ipv4.dst_ip)
> + spec_l3->ipv4.dst_ip_msk = EN_ETHTOOL_WORD_MASK;
> +
> + if (spec_l4->tcp_udp.src_port)
> + spec_l4->tcp_udp.src_port_msk = EN_ETHTOOL_SHORT_MASK;
> + if (spec_l4->tcp_udp.dst_port)
> + spec_l4->tcp_udp.dst_port_msk = EN_ETHTOOL_SHORT_MASK;
All these conditions should be using the mask, not the value.
> + list_add_tail(&spec_l3->list, list_h);
> + list_add_tail(&spec_l4->list, list_h);
> +}
> +
> +static int mlx4_en_ethtool_to_net_trans_rule(struct net_device *dev,
> + struct ethtool_rxnfc *cmd,
> + struct list_head *rule_list_h)
> +{
> + int err;
> + u64 mac;
> + __be64 be_mac;
> + struct ethhdr *eth_spec;
> + struct mlx4_en_priv *priv = netdev_priv(dev);
> + struct mlx4_spec_list *spec_l2;
> + __be64 mac_msk = cpu_to_be64(EN_ETHTOOL_MAC_MASK << 16);
> +
> + err = mlx4_en_validate_flow(dev, cmd);
> + if (err)
> + return err;
> +
> + spec_l2 = kzalloc(sizeof *spec_l2, GFP_KERNEL);
> + if (!spec_l2)
> + return -ENOMEM;
> +
> + mac = priv->mac & EN_ETHTOOL_MAC_MASK;
> + be_mac = cpu_to_be64(mac << 16);
> +
> + spec_l2->id = MLX4_NET_TRANS_RULE_ID_ETH;
> + memcpy(spec_l2->eth.dst_mac_msk, &mac_msk, ETH_ALEN);
> + if ((cmd->fs.flow_type & ~FLOW_EXT) != ETHER_FLOW)
> + memcpy(spec_l2->eth.dst_mac, &be_mac, ETH_ALEN);
Does the hardware require filtering by MAC address and not only by layer
3/4 addresses?
> + if ((cmd->fs.flow_type & FLOW_EXT) && cmd->fs.m_ext.vlan_tci) {
> + spec_l2->eth.vlan_id = cmd->fs.h_ext.vlan_tci;
> + spec_l2->eth.vlan_id_msk = cpu_to_be16(0xfff);
This doesn't match mlx4_en_validate_flow(); you are replacing a mask of
0xffff with 0xfff.
> + }
> +
> + list_add_tail(&spec_l2->list, rule_list_h);
> +
> + switch (cmd->fs.flow_type & ~FLOW_EXT) {
> + case ETHER_FLOW:
> + eth_spec = &cmd->fs.h_u.ether_spec;
> + memcpy(&spec_l2->eth.dst_mac, eth_spec->h_dest, ETH_ALEN);
> + spec_l2->eth.ether_type = eth_spec->h_proto;
> + if (eth_spec->h_proto)
> + spec_l2->eth.ether_type_enable = 1;
> + break;
> + case IP_USER_FLOW:
> + add_ip_rule(priv, cmd, rule_list_h);
> + break;
> + case TCP_V4_FLOW:
> + add_tcp_udp_rule(priv, cmd, rule_list_h, TCP_V4_FLOW);
> + break;
> + case UDP_V4_FLOW:
> + add_tcp_udp_rule(priv, cmd, rule_list_h, UDP_V4_FLOW);
> + break;
All those functions need to be able to return errors.
> + }
> + return 0;
> +}
> +
> +static int mlx4_en_flow_replace(struct net_device *dev,
> + struct ethtool_rxnfc *cmd)
> +{
> + int err;
> + struct mlx4_en_priv *priv = netdev_priv(dev);
> + struct ethtool_flow_id *loc_rule;
> + struct mlx4_spec_list *spec, *tmp_spec;
> + u32 qpn;
> + u64 reg_id;
> +
> + struct mlx4_net_trans_rule rule = {
> + .queue_mode = MLX4_NET_TRANS_Q_FIFO,
> + .exclusive = 0,
> + .allow_loopback = 1,
> + .promisc_mode = MLX4_FS_PROMISC_NONE,
> + };
> +
> + rule.port = priv->port;
> + rule.priority = MLX4_DOMAIN_ETHTOOL | cmd->fs.location;
> + INIT_LIST_HEAD(&rule.list);
> +
> + /* Allow direct QP attaches if the EN_ETHTOOL_QP_ATTACH flag is set */
> + if (cmd->fs.ring_cookie == RX_CLS_FLOW_DISC)
> + return -EOPNOTSUPP;
EINVAL.
[...]
> static int mlx4_en_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
> u32 *rule_locs)
> {
> struct mlx4_en_priv *priv = netdev_priv(dev);
> + struct mlx4_en_dev *mdev = priv->mdev;
> int err = 0;
> + int i = 0, priority = 0;
> +
> + if (mdev->dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED)
> + return -EOPNOTSUPP;
>
> switch (cmd->cmd) {
> case ETHTOOL_GRXRINGS:
> cmd->data = priv->rx_ring_num;
> break;
> + case ETHTOOL_GRXCLSRLCNT:
> + cmd->rule_cnt = mlx4_en_get_num_flows(priv);
> + break;
> + case ETHTOOL_GRXCLSRULE:
> + err = mlx4_en_get_flow(dev, cmd, cmd->fs.location);
> + break;
> + case ETHTOOL_GRXCLSRLALL:
> + while (!err || err == -ENOENT) {
> + err = mlx4_en_get_flow(dev, cmd, i);
> + if (!err)
> + ((u32 *)(rule_locs))[priority++] = i;
I don't see any range check against cmd->rule_cnt.
Why are you casting rule_locs?
Also, are the rules really prioritised by location, so that if rule 0
and 1 match a packet then only rule 0 is applied? If they are actually
prioritised by the match type then you need to assign locations on the
driver side that reflect the actual priorities.
> + i++;
> + }
> + if (priority)
> + err = 0;
[...]
But if there are no rules defined, this is an error? That's not right.
I think you should unconditionally set err = 0 here.
Ben.
--
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists