linux-kernel - Re: [PATCH] net/mlx5e: Avoid field-overflowing memcpy()

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <920853c06192a4f5cadf59c90b1510411b197a5e.camel@kernel.org>
Date:   Fri, 06 Aug 2021 15:17:56 -0700
From:   Saeed Mahameed <saeed@...nel.org>
To:     Kees Cook <keescook@...omium.org>
Cc:     Leon Romanovsky <leon@...nel.org>,
        "David S. Miller" <davem@...emloft.net>,
        Jakub Kicinski <kuba@...nel.org>, netdev@...r.kernel.org,
        linux-rdma@...r.kernel.org, linux-kernel@...r.kernel.org,
        linux-hardening@...r.kernel.org
Subject: Re: [PATCH] net/mlx5e: Avoid field-overflowing memcpy()

On Fri, 2021-08-06 at 14:50 -0700, Kees Cook wrote:
> In preparation for FORTIFY_SOURCE performing compile-time and run-
> time
> field bounds checking for memcpy(), memmove(), and memset(), avoid
> intentionally writing across neighboring fields.
> 
> Use flexible arrays instead of zero-element arrays (which look like
> they
> are always overflowing) and split the cross-field memcpy() into two
> halves
> that can be appropriately bounds-checked by the compiler.
> 
> We were doing:
> 
>         #define ETH_HLEN  14
>         #define VLAN_HLEN  4
>         ...
>         #define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
>         ...
>         struct mlx5e_tx_wqe      *wqe  = mlx5_wq_cyc_get_wqe(wq, pi);
>         ...
>         struct mlx5_wqe_eth_seg  *eseg = &wqe->eth;
>         struct mlx5_wqe_data_seg *dseg = wqe->data;
>         ...
>         memcpy(eseg->inline_hdr.start, xdptxd->data,
> MLX5E_XDP_MIN_INLINE);
> 
> target is wqe->eth.inline_hdr.start (which the compiler sees as being
> 2 bytes in size), but copying 18, intending to write across start
> (really vlan_tci, 2 bytes). The remaining 16 bytes get written into
> wqe->data[0], covering byte_count (4 bytes), lkey (4 bytes), and addr
> (8 bytes).
> 
> struct mlx5e_tx_wqe {
>         struct mlx5_wqe_ctrl_seg   ctrl;                 /*     0   
> 16 */
>         struct mlx5_wqe_eth_seg    eth;                  /*    16   
> 16 */
>         struct mlx5_wqe_data_seg   data[];               /*    32    
> 0 */
> 
>         /* size: 32, cachelines: 1, members: 3 */
>         /* last cacheline: 32 bytes */
> };
> 
> struct mlx5_wqe_eth_seg {
>         u8                         swp_outer_l4_offset;  /*     0    
> 1 */
>         u8                         swp_outer_l3_offset;  /*     1    
> 1 */
>         u8                         swp_inner_l4_offset;  /*     2    
> 1 */
>         u8                         swp_inner_l3_offset;  /*     3    
> 1 */
>         u8                         cs_flags;             /*     4    
> 1 */
>         u8                         swp_flags;            /*     5    
> 1 */
>         __be16                     mss;                  /*     6    
> 2 */
>         __be32                     flow_table_metadata;  /*     8    
> 4 */
>         union {
>                 struct {
>                         __be16     sz;                   /*    12    
> 2 */
>                         u8         start[2];             /*    14    
> 2 */
>                 } inline_hdr;                            /*    12    
> 4 */
>                 struct {
>                         __be16     type;                 /*    12    
> 2 */
>                         __be16     vlan_tci;             /*    14    
> 2 */
>                 } insert;                                /*    12    
> 4 */
>                 __be32             trailer;              /*    12    
> 4 */
>         };                                               /*    12    
> 4 */
> 
>         /* size: 16, cachelines: 1, members: 9 */
>         /* last cacheline: 16 bytes */
> };
> 
> struct mlx5_wqe_data_seg {
>         __be32                     byte_count;           /*     0    
> 4 */
>         __be32                     lkey;                 /*     4    
> 4 */
>         __be64                     addr;                 /*     8    
> 8 */
> 
>         /* size: 16, cachelines: 1, members: 3 */
>         /* last cacheline: 16 bytes */
> };
> 
> So, split the memcpy() so the compiler can reason about the buffer
> sizes.
> 
> "pahole" shows no size nor member offset changes to struct
> mlx5e_tx_wqe
> nor struct mlx5e_umr_wqe. "objdump -d" shows no meaningful object
> code changes (i.e. only source line number induced differences and
> optimizations).
> 
> 

spiting the memcpy doesn't induce any performance degradation ? extra
instruction to copy the 1st 2 bytes ? 


[...]
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c

why only here ? mlx5 has at least 3 other places where we use this
unbound memcpy .. 

> @@ -341,8 +341,10 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq,
> struct mlx5e_xmit_data *xdptxd,
>  
>         /* copy the inline part if required */
>         if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
> -               memcpy(eseg->inline_hdr.start, xdptxd->data,
> MLX5E_XDP_MIN_INLINE);
> +               memcpy(eseg->inline_hdr.start, xdptxd->data,
> sizeof(eseg->inline_hdr.start));
>                 eseg->inline_hdr.sz =
> cpu_to_be16(MLX5E_XDP_MIN_INLINE);
> +               memcpy(dseg, xdptxd->data + sizeof(eseg-
> >inline_hdr.start),
> +                      MLX5E_XDP_MIN_INLINE - sizeof(eseg-
> >inline_hdr.start));
>                 dma_len  -= MLX5E_XDP_MIN_INLINE;
>                 dma_addr += MLX5E_XDP_MIN_INLINE;
>                 dseg++;