[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <vbfzhjqu0zf.fsf@mellanox.com>
Date: Fri, 30 Aug 2019 15:28:13 +0000
From: Vlad Buslov <vladbu@...lanox.com>
To: Pablo Neira Ayuso <pablo@...filter.org>
CC: "netfilter-devel@...r.kernel.org" <netfilter-devel@...r.kernel.org>,
"davem@...emloft.net" <davem@...emloft.net>,
"netdev@...r.kernel.org" <netdev@...r.kernel.org>,
"vishal@...lsio.com" <vishal@...lsio.com>,
"jakub.kicinski@...ronome.com" <jakub.kicinski@...ronome.com>,
Saeed Mahameed <saeedm@...lanox.com>,
"jiri@...nulli.us" <jiri@...nulli.us>
Subject: Re: [PATCH net-next 3/4] net: flow_offload: mangle action at byte
level
On Fri 30 Aug 2019 at 03:53, Pablo Neira Ayuso <pablo@...filter.org> wrote:
> The flow mangle action is originally modeled after the tc pedit action,
> this has a number of shortcomings:
>
> 1) The tc pedit offset must be set on the 32-bits boundaries. Many
> protocol header field offsets are not aligned to 32-bits, eg. port
> destination, port source and ethernet destination. This patch adjusts
> the offset accordingly and trim off length in these case, so drivers get
> an exact offset and length to the header fields.
>
> 2) The maximum mangle length is one word of 32-bits, hence you need to
> up to four actions to mangle an IPv6 address. This patch coalesces
> consecutive tc pedit actions into one single action so drivers can
> configure the IPv6 mangling in one go. Ethernet address fields now
> require one single action instead of two too.
>
> The following drivers have been updated accordingly to use this new
> mangle action layout:
>
> 1) The cxgb4 driver does not need to split protocol field matching
> larger than one 32-bit words into multiple definitions. Instead one
> single definition per protocol field is enough. Checking for
> transport protocol ports is also simplified.
>
> 2) The mlx5 driver logic to disallow IPv4 ttl and IPv6 hoplimit fields
> becomes more simple too.
>
> 3) The nfp driver uses the nfp_fl_set_helper() function to configure the
> payload mangling. The memchr_inv() function is used to check for
> proper initialization of the value and mask. The driver has been
> updated to refer to the exact protocol header offsets too.
>
> As a result, this patch reduces code complexity on the driver side at
> the cost of adding ~100 LOC at the core to perform offset and length
> adjustment; and to coalesce consecutive actions.
>
> Signed-off-by: Pablo Neira Ayuso <pablo@...filter.org>
> ---
[...]
> diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
> index e30a151d8527..e8827fa8263a 100644
> --- a/net/sched/cls_api.c
> +++ b/net/sched/cls_api.c
> @@ -3289,11 +3289,128 @@ void tc_cleanup_flow_action(struct flow_action *flow_action)
> }
> EXPORT_SYMBOL(tc_cleanup_flow_action);
>
> +static unsigned int flow_action_mangle_type(enum pedit_cmd cmd)
> +{
> + switch (cmd) {
> + case TCA_PEDIT_KEY_EX_CMD_SET:
> + return FLOW_ACTION_MANGLE;
> + case TCA_PEDIT_KEY_EX_CMD_ADD:
> + return FLOW_ACTION_ADD;
> + default:
> + WARN_ON_ONCE(1);
> + }
> + return 0;
> +}
> +
> +struct flow_action_mangle_ctx {
> + u8 cmd;
> + u8 offset;
> + u8 htype;
> + u8 idx;
> + u8 val[FLOW_ACTION_MANGLE_MAXLEN];
> + u8 mask[FLOW_ACTION_MANGLE_MAXLEN];
> + u32 first_word;
> + u32 last_word;
> +};
> +
> +static void flow_action_mangle_entry(struct flow_action_entry *entry,
> + const struct flow_action_mangle_ctx *ctx)
> +{
> + u32 delta;
> +
> + entry->id = ctx->cmd;
> + entry->mangle.htype = ctx->htype;
> + entry->mangle.offset = ctx->offset;
> + entry->mangle.len = ctx->idx;
> +
> + /* Adjust offset. */
> + delta = sizeof(u32) - (fls(ctx->first_word) / BITS_PER_BYTE);
> + entry->mangle.offset += delta;
> +
> + /* Adjust length. */
> + entry->mangle.len -= ((ffs(ctx->last_word) / BITS_PER_BYTE) + delta);
> +
> + /* Copy value and mask from offset using the adjusted length. */
> + memcpy(entry->mangle.val, &ctx->val[delta], entry->mangle.len);
> + memcpy(entry->mangle.mask, &ctx->mask[delta], entry->mangle.len);
> +}
> +
> +static void flow_action_mangle_ctx_update(struct flow_action_mangle_ctx *ctx,
> + const struct tc_action *act, int k)
> +{
> + u32 val, mask;
> +
> + val = tcf_pedit_val(act, k);
> + mask = ~tcf_pedit_mask(act, k);
> +
> + memcpy(&ctx->val[ctx->idx], &val, sizeof(u32));
> + memcpy(&ctx->mask[ctx->idx], &mask, sizeof(u32));
> + ctx->idx += sizeof(u32);
> +}
> +
> +static void flow_action_mangle_ctx_init(struct flow_action_mangle_ctx *ctx,
> + const struct tc_action *act, int k)
> +{
> + ctx->cmd = flow_action_mangle_type(tcf_pedit_cmd(act, k));
> + ctx->offset = tcf_pedit_offset(act, k);
> + ctx->htype = tcf_pedit_htype(act, k);
> + ctx->idx = 0;
> +
> + ctx->first_word = ntohl(~tcf_pedit_mask(act, k));
> + ctx->last_word = ctx->first_word;
> +
> + flow_action_mangle_ctx_update(ctx, act, k);
> +}
> +
> +static int flow_action_mangle(struct flow_action *flow_action,
> + struct flow_action_entry *entry,
> + const struct tc_action *act, int j)
> +{
> + struct flow_action_mangle_ctx ctx;
> + int k;
> +
> + if (tcf_pedit_cmd(act, 0) > TCA_PEDIT_KEY_EX_CMD_ADD)
> + return -1;
> +
> + flow_action_mangle_ctx_init(&ctx, act, 0);
> +
> + /* Special case: one single 32-bits word. */
> + if (tcf_pedit_nkeys(act) == 1) {
> + flow_action_mangle_entry(entry, &ctx);
> + return j;
> + }
> +
> + for (k = 1; k < tcf_pedit_nkeys(act); k++) {
> + if (tcf_pedit_cmd(act, k) > TCA_PEDIT_KEY_EX_CMD_ADD)
> + return -1;
> +
> + /* Offset is contiguous and type is the same, coalesce. */
> + if (ctx.idx < FLOW_ACTION_MANGLE_MAXLEN &&
> + ctx.offset + ctx.idx == tcf_pedit_offset(act, k) &&
> + ctx.cmd == flow_action_mangle_type(tcf_pedit_cmd(act, k))) {
> + flow_action_mangle_ctx_update(&ctx, act, k);
> + continue;
Hi Pablo,
With this change you coalesce multiple pedits into single
flow_action_entry, which means that resulting rule->action.num_entries
is incorrect because number of filled flow actions entries will be less
than num_actions. With this, I get mlx5 rejecting such flow_rule with
"mlx5_core: The offload action is not supported." due to trailing
unfilled flow action(s) with id==0.
> + }
> + ctx.last_word = ntohl(~tcf_pedit_mask(act, k - 1));
> +
> + /* Cannot coalesce, set up this entry. */
> + flow_action_mangle_entry(entry, &ctx);
> +
> + flow_action_mangle_ctx_init(&ctx, act, k);
> + entry = &flow_action->entries[++j];
> + }
> +
> + ctx.last_word = ntohl(~tcf_pedit_mask(act, k - 1));
> + flow_action_mangle_entry(entry, &ctx);
> +
> + return j;
> +}
> +
> int tc_setup_flow_action(struct flow_action *flow_action,
> const struct tcf_exts *exts, bool rtnl_held)
> {
> const struct tc_action *act;
> - int i, j, k, err = 0;
> + int i, j, err = 0;
>
> if (!exts)
> return 0;
> @@ -3366,25 +3483,9 @@ int tc_setup_flow_action(struct flow_action *flow_action,
> } else if (is_tcf_tunnel_release(act)) {
> entry->id = FLOW_ACTION_TUNNEL_DECAP;
> } else if (is_tcf_pedit(act)) {
> - for (k = 0; k < tcf_pedit_nkeys(act); k++) {
> - switch (tcf_pedit_cmd(act, k)) {
> - case TCA_PEDIT_KEY_EX_CMD_SET:
> - entry->id = FLOW_ACTION_MANGLE;
> - break;
> - case TCA_PEDIT_KEY_EX_CMD_ADD:
> - entry->id = FLOW_ACTION_ADD;
> - break;
> - default:
> - err = -EOPNOTSUPP;
> - goto err_out;
> - }
> - entry->mangle.htype = tcf_pedit_htype(act, k);
> - entry->mangle.mask = ~tcf_pedit_mask(act, k);
> - entry->mangle.val = tcf_pedit_val(act, k) &
> - entry->mangle.mask;
> - entry->mangle.offset = tcf_pedit_offset(act, k);
> - entry = &flow_action->entries[++j];
> - }
> + j = flow_action_mangle(flow_action, entry, act, j);
> + if (j < 0)
> + goto err_out;
> } else if (is_tcf_csum(act)) {
> entry->id = FLOW_ACTION_CSUM;
> entry->csum_flags = tcf_csum_update_flags(act);
> @@ -3439,8 +3540,7 @@ int tc_setup_flow_action(struct flow_action *flow_action,
> goto err_out;
> }
>
> - if (!is_tcf_pedit(act))
> - j++;
> + j++;
> }
>
> err_out:
Powered by blists - more mailing lists