lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <e8d3e33d-a89b-bdcf-431d-6759e7b45393@oss.nxp.com>
Date:   Fri, 10 Dec 2021 17:30:02 +0100
From:   Yannick Vignon <yannick.vignon@....nxp.com>
To:     Ong Boon Leong <boon.leong.ong@...el.com>,
        Giuseppe Cavallaro <peppe.cavallaro@...com>,
        Alexandre Torgue <alexandre.torgue@...com>,
        Jose Abreu <joabreu@...opsys.com>,
        "David S . Miller" <davem@...emloft.net>,
        Jakub Kicinski <kuba@...nel.org>,
        Maxime Coquelin <mcoquelin.stm32@...il.com>,
        alexandre.torgue@...s.st.com,
        Kurt Kanzenbach <kurt.kanzenbach@...utronix.de>
Cc:     netdev@...r.kernel.org, linux-stm32@...md-mailman.stormreply.com,
        linux-arm-kernel@...ts.infradead.org
Subject: Re: [PATCH net-next 1/2] net: stmmac: fix tc flower deletion for VLAN
 priority Rx steering

Hi,

On 12/9/2021 4:16 PM, Ong Boon Leong wrote:
> To replicate the issue:-
> 
> 1) Add 2 flower filters for VLAN Priority based frame steering:-
> $ IFDEVNAME=eth0
> $ tc qdisc add dev $IFDEVNAME ingress
> $ tc qdisc add dev $IFDEVNAME root mqprio num_tc 8 \
>     map 0 1 2 3 4 5 6 7 0 0 0 0 0 0 0 0 \
>     queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 hw 0
> $ tc filter add dev $IFDEVNAME parent ffff: protocol 802.1Q \
>     flower vlan_prio 0 hw_tc 0
> $ tc filter add dev $IFDEVNAME parent ffff: protocol 802.1Q \
>     flower vlan_prio 1 hw_tc 1
> 
> 2) Get the 'pref' id
> $ tc filter show dev $IFDEVNAME ingress
> 
> 3) Delete a specific tc flower record
> $ tc filter del dev $IFDEVNAME parent ffff: pref 49151
> 
>  From dmesg, we will observe kernel NULL pointer ooops
> 
> [  197.170464] BUG: kernel NULL pointer dereference, address: 0000000000000000
> [  197.171367] #PF: supervisor read access in kernel mode
> [  197.171367] #PF: error_code(0x0000) - not-present page
> [  197.171367] PGD 0 P4D 0
> [  197.171367] Oops: 0000 [#1] PREEMPT SMP NOPTI
> [  197.171367] CPU: 0 PID: 3216 Comm: tc Tainted: G     U      E     5.16.0-rc2+ #12
> [  197.171367] Hardware name: Intel Corporation Elkhart Lake Embedded Platform/ElkhartLake LPDDR4x T3 CRB, BIOS EHLSFWI1.R00.3273.A04.2107240322 07/24/2021
> [  197.171367] RIP: 0010:tc_setup_cls+0x20b/0x4a0 [stmmac]
> [  197.171367] Code: fe ff ff c7 43 14 00 00 00 00 48 c7 03 00 00 00 00 c7 43 1c 00 00 00 00 49 8b 44 24 28 48 8b bd b0 00 00 00 41 0f b7 54 24 58 <48> 8b 00 0f bf 8f 38 08 00 00 81 ea e0 ff 00 00 8b 00 25 00 04 00
> [  197.171367] RSP: 0018:ffff940940a037c0 EFLAGS: 00010246
> [  197.171367] RAX: 0000000000000000 RBX: ffff92e826cae2c8 RCX: ffff92e825f39000
> [  197.171367] RDX: 0000000000000000 RSI: ffff92e826cae2a8 RDI: ffff92e82f0c0000
> [  197.171367] RBP: ffff92e82f0c0940 R08: 0000000000000000 R09: ffff92e825f39434
> [  197.171367] R10: ffff92e826c5af00 R11: ffff940940a038a8 R12: ffff940940a038a8
> [  197.171367] R13: 0000000000000000 R14: 0000000000000000 R15: ffff92e830a5b600
> [  197.171367] FS:  00007fa7b0c47740(0000) GS:ffff92e964200000(0000) knlGS:0000000000000000
> [  197.171367] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [  197.171367] CR2: 0000000000000000 CR3: 0000000124c50000 CR4: 0000000000350ef0
> [  197.171367] Call Trace:
> [  197.171367]  <TASK>
> [  197.171367]  ? __stmmac_disable_all_queues+0xa8/0xe0 [stmmac]
> [  197.171367]  stmmac_setup_tc_block_cb+0x70/0x110 [stmmac]
> [  197.171367]  tc_setup_cb_destroy+0xb3/0x180
> [  197.171367]  fl_hw_destroy_filter+0x94/0xc0 [cls_flower]
> [  197.171367]  __fl_delete+0x16a/0x180 [cls_flower]
> [  197.171367]  fl_destroy+0xb9/0x140 [cls_flower]
> [  197.171367]  tcf_proto_destroy+0x1d/0xa0
> [  197.171367]  tc_del_tfilter+0x3c9/0x7b0
> [  197.171367]  ? tc_dump_tfilter+0x310/0x310
> [  197.171367]  rtnetlink_rcv_msg+0x2bf/0x370
> [  197.171367]  ? preempt_count_add+0x68/0xa0
> [  197.171367]  ? _raw_spin_lock_irqsave+0x19/0x40
> [  197.171367]  ? _raw_spin_unlock_irqrestore+0x1f/0x31
> [  197.171367]  ? rtnl_calcit.isra.0+0x130/0x130
> [  197.171367]  netlink_rcv_skb+0x4e/0x100
> [  197.171367]  netlink_unicast+0x18e/0x230
> [  197.171367]  netlink_sendmsg+0x245/0x480
> [  197.171367]  sock_sendmsg+0x5b/0x60
> [  197.171367]  ____sys_sendmsg+0x20b/0x280
> [  197.171367]  ? copy_msghdr_from_user+0x5c/0x90
> [  197.171367]  ___sys_sendmsg+0x7c/0xc0
> [  197.171367]  ? folio_add_lru+0x52/0x80
> [  197.171367]  ? __sys_sendto+0xee/0x160
> [  197.171367]  __sys_sendmsg+0x59/0xa0
> [  197.171367]  do_syscall_64+0x40/0x90
> [  197.171367]  entry_SYSCALL_64_after_hwframe+0x44/0xae
> [  197.171367] RIP: 0033:0x7fa7b0d64397
> [  197.171367] Code: 64 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 89 74 24 10
> [  197.171367] RSP: 002b:00007ffdd88b58e8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
> [  197.171367] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fa7b0d64397
> [  197.171367] RDX: 0000000000000000 RSI: 00007ffdd88b5960 RDI: 0000000000000003
> [  197.171367] RBP: 0000000061b05c21 R08: 0000000000000001 R09: 0000564584e47890
> [  197.171367] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000001
> [  197.171367] R13: 00007ffdd88b9a80 R14: 00000000bfff0000 R15: 0000564584e3e420
> [  197.171367]  </TASK>
> [  197.171367] Modules linked in: cls_flower sch_mqprio sch_ingress dwmac_intel(E) stmmac(E) pcs_xpcs phylink marvell marvell10g libphy 8021q bnep bluetooth ecryptfs nfsd sch_fq_codel uio uhid snd_soc_dmic snd_sof_pci_intel_tgl x86_pkg_temp_thermal snd_sof_intel_hda_common kvm_intel iTCO_wdt iTCO_vendor_support soundwire_intel mei_hdcp kvm soundwire_generic_allocation soundwire_cadence soundwire_bus irqbypass snd_sof_xtensa_dsp ax88179_178a snd_soc_acpi_intel_match intel_rapl_msr pcspkr usbnet snd_soc_acpi mii snd_hda_intel snd_intel_dspcfg snd_intel_sdw_acpi snd_hda_codec i2c_i801 snd_hda_core intel_ish_ipc tpm_crb 8250_lpss intel_ishtp tpm_tis i915 mei_me i2c_smbus mei tpm_tis_core dw_dmac_core tpm spi_dw_pci parport_pc intel_pmc_core spi_dw thermal parport ttm fuse configfs snd_sof_pci snd_sof snd_soc_core snd_compress ac97_bus ledtrig_audio snd_pcm snd_timer snd soundcore [last unloaded: libphy]
> [  197.171367] CR2: 0000000000000000
> [  197.171367] ---[ end trace 8b8d1c617c39093d ]---
> 
> This patch reimplements the tc flower rx frame steering for VLAN priority
> by keeping a record of flow_cls_offload added. The implementation also
> makes way to support EtherType based RX frame steering later.
> 
> Fixes: 0e039f5cf86c ("net: stmmac: add RX frame steering based on VLAN priority in tc flower")
> Tested-by: Kurt Kanzenbach <kurt@...utronix.de>
> Signed-off-by: Ong Boon Leong <boon.leong.ong@...el.com>
> ---
>   drivers/net/ethernet/stmicro/stmmac/stmmac.h  | 17 ++++
>   .../net/ethernet/stmicro/stmmac/stmmac_tc.c   | 86 ++++++++++++++++---
>   2 files changed, 90 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
> index 4f5292cadf5..18a262ef17f 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
> @@ -171,6 +171,19 @@ struct stmmac_flow_entry {
>   	int is_l4;
>   };
>   
> +/* Rx Frame Steering */
> +enum stmmac_rfs_type {
> +	STMMAC_RFS_T_VLAN,
> +	STMMAC_RFS_T_MAX,
> +};
> +
> +struct stmmac_rfs_entry {
> +	unsigned long cookie;
> +	int in_use;
> +	int type;
> +	int tc;
> +};
> +
>   struct stmmac_priv {
>   	/* Frequently used values are kept adjacent for cache effect */
>   	u32 tx_coal_frames[MTL_MAX_TX_QUEUES];
> @@ -288,6 +301,10 @@ struct stmmac_priv {
>   	struct stmmac_tc_entry *tc_entries;
>   	unsigned int flow_entries_max;
>   	struct stmmac_flow_entry *flow_entries;
> +	unsigned int rfs_entries_max[STMMAC_RFS_T_MAX];
> +	unsigned int rfs_entries_cnt[STMMAC_RFS_T_MAX];
> +	unsigned int rfs_entries_total;
> +	struct stmmac_rfs_entry *rfs_entries;
>   
>   	/* Pulse Per Second output */
>   	struct stmmac_pps_cfg pps[STMMAC_PPS_MAX];
> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
> index 1c4ea0b1b84..d0a2b289f46 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
> @@ -232,11 +232,33 @@ static int tc_setup_cls_u32(struct stmmac_priv *priv,
>   	}
>   }
>   
> +static int tc_rfs_init(struct stmmac_priv *priv)
> +{
> +	int i;
> +
> +	priv->rfs_entries_max[STMMAC_RFS_T_VLAN] = 8;
> +
> +	for (i = 0; i < STMMAC_RFS_T_MAX; i++)
> +		priv->rfs_entries_total += priv->rfs_entries_max[i];
> +
> +	priv->rfs_entries = devm_kcalloc(priv->device,
> +					 priv->rfs_entries_total,
> +					 sizeof(*priv->rfs_entries),
> +					 GFP_KERNEL);
> +	if (!priv->rfs_entries)
> +		return -ENOMEM;
> +
> +	dev_info(priv->device, "Enabled RFS Flow TC (entries=%d)\n",
> +		 priv->rfs_entries_total);
> +
> +	return 0;
> +}
> +
>   static int tc_init(struct stmmac_priv *priv)
>   {
>   	struct dma_features *dma_cap = &priv->dma_cap;
>   	unsigned int count;
> -	int i;
> +	int ret, i;
>   
>   	if (dma_cap->l3l4fnum) {
>   		priv->flow_entries_max = dma_cap->l3l4fnum;
> @@ -250,10 +272,14 @@ static int tc_init(struct stmmac_priv *priv)
>   		for (i = 0; i < priv->flow_entries_max; i++)
>   			priv->flow_entries[i].idx = i;
>   
> -		dev_info(priv->device, "Enabled Flow TC (entries=%d)\n",
> +		dev_info(priv->device, "Enabled L3L4 Flow TC (entries=%d)\n",
>   			 priv->flow_entries_max);
>   	}
>   
> +	ret = tc_rfs_init(priv);
> +	if (ret)
> +		return -ENOMEM;
> +
>   	if (!priv->plat->fpe_cfg) {
>   		priv->plat->fpe_cfg = devm_kzalloc(priv->device,
>   						   sizeof(*priv->plat->fpe_cfg),
> @@ -607,16 +633,45 @@ static int tc_del_flow(struct stmmac_priv *priv,
>   	return ret;
>   }
>   
> +static struct stmmac_rfs_entry *tc_find_rfs(struct stmmac_priv *priv,
> +					    struct flow_cls_offload *cls,
> +					    bool get_free)
> +{
> +	int i;
> +
> +	for (i = 0; i < priv->rfs_entries_total; i++) {
> +		struct stmmac_rfs_entry *entry = &priv->rfs_entries[i];
> +
> +		if (entry->cookie == cls->cookie)
> +			return entry;
> +		if (get_free && entry->in_use == false)
> +			return entry;
> +	}
> +
> +	return NULL;
> +}
> +
>   #define VLAN_PRIO_FULL_MASK (0x07)
>   
>   static int tc_add_vlan_flow(struct stmmac_priv *priv,
>   			    struct flow_cls_offload *cls)
>   {
> +	struct stmmac_rfs_entry *entry = tc_find_rfs(priv, cls, false);
>   	struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
>   	struct flow_dissector *dissector = rule->match.dissector;
>   	int tc = tc_classid_to_hwtc(priv->dev, cls->classid);
>   	struct flow_match_vlan match;
> 

While we're at it, shouldn't we also check that no actions are being 
requested and fail if there are, instead of silently ignoring them?

> +	if (!entry) {
> +		entry = tc_find_rfs(priv, cls, true);
> +		if (!entry)
> +			return -ENOENT;
> +	}
> +
> +	if (priv->rfs_entries_cnt[STMMAC_RFS_T_VLAN] >=
> +	    priv->rfs_entries_max[STMMAC_RFS_T_VLAN])
> +		return -ENOENT;
> +
>   	/* Nothing to do here */
>   	if (!dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_VLAN))
>   		return -EINVAL;
> @@ -638,6 +693,12 @@ static int tc_add_vlan_flow(struct stmmac_priv *priv,
>   
>   		prio = BIT(match.key->vlan_priority);
>   		stmmac_rx_queue_prio(priv, priv->hw, prio, tc);
> +
> +		entry->in_use = true;
> +		entry->cookie = cls->cookie;
> +		entry->tc = tc;
> +		entry->type = STMMAC_RFS_T_VLAN;
> +		priv->rfs_entries_cnt[STMMAC_RFS_T_VLAN]++;
>   	}
>   
>   	return 0;
> @@ -646,20 +707,19 @@ static int tc_add_vlan_flow(struct stmmac_priv *priv,
>   static int tc_del_vlan_flow(struct stmmac_priv *priv,
>   			    struct flow_cls_offload *cls)
>   {
> -	struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
> -	struct flow_dissector *dissector = rule->match.dissector;
> -	int tc = tc_classid_to_hwtc(priv->dev, cls->classid);
> +	struct stmmac_rfs_entry *entry = tc_find_rfs(priv, cls, false);
>   
> -	/* Nothing to do here */
> -	if (!dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_VLAN))
> -		return -EINVAL;
> +	if (!entry || !entry->in_use || entry->type != STMMAC_RFS_T_VLAN)
> +		return -ENOENT;
>   
> -	if (tc < 0) {
> -		netdev_err(priv->dev, "Invalid traffic class\n");
> -		return -EINVAL;
> -	}
> +	stmmac_rx_queue_prio(priv, priv->hw, 0, entry->tc);
> +
> +	entry->in_use = false;
> +	entry->cookie = 0;
> +	entry->tc = 0;
> +	entry->type = 0;
>   
> -	stmmac_rx_queue_prio(priv, priv->hw, 0, tc);
> +	priv->rfs_entries_cnt[STMMAC_RFS_T_VLAN]--;
>   
>   	return 0;
>   }
> 

I was about to post a very similar fix for that same problem (except I 
was adding support for other packet steering types)...
I can confirm your patch works. Note that a simpler way to reproduce is 
simply to add a filter, then remove all the filters, e.g.:
$ IFDEVNAME=eth0
$ tc qdisc add dev $IFDEVNAME ingress
$ tc qdisc add dev $IFDEVNAME root mqprio num_tc 8 \
    map 0 1 2 3 4 5 6 7 0 0 0 0 0 0 0 0 \
    queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 hw 0
$ tc filter add dev $IFDEVNAME parent ffff: protocol 802.1Q \
    flower vlan_prio 0 hw_tc 0
$ tc filter del dev $IFDEVNAME ingress


Yannick

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ