lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Date:   Mon, 29 Oct 2018 12:19:11 +0800
From:   Jianfeng Tan <jianfeng.tan@...ux.alibaba.com>
To:     Jason Wang <jasowang@...hat.com>, netdev@...r.kernel.org
Cc:     davem@...emloft.net, mst@...hat.com
Subject: Re: [PATCH] net/packet: support vhost mrg_rxbuf


On 10/29/2018 10:54 AM, Jason Wang wrote:
>
> On 2018/10/27 下午8:04, Jianfeng Tan wrote:
>> Previouly, virtio net header size is hardcoded to be 10, which makes
>> the feature mrg_rxbuf not available.
>>
>> We redefine PACKET_VNET_HDR ioctl which treats user input as boolean,
>> but now as int, 0, 10, 12, or everything else be treated as 10.
>>
>> There will be one case which is treated differently: if user input is
>> 12, previously, the header size will be 10; but now it's 12.
>>
>> Signed-off-by: Jianfeng Tan <jianfeng.tan@...ux.alibaba.com>
>
>
> This should go for net-next which is closed. You may consider to 
> re-submit when it was open.

Thank you for the reminder. We'll re-evaluate the necessity of this patch.

>
>
>> ---
>>   net/packet/af_packet.c | 97 ++++++++++++++++++++++++++----------------
>>   net/packet/diag.c      |  2 +-
>>   net/packet/internal.h  |  2 +-
>>   3 files changed, 63 insertions(+), 38 deletions(-)
>>
>> diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
>> index ec3095f13aae..1bd7f4cdcc80 100644
>> --- a/net/packet/af_packet.c
>> +++ b/net/packet/af_packet.c
>> @@ -1999,18 +1999,24 @@ static unsigned int run_filter(struct sk_buff 
>> *skb,
>>   }
>>     static int packet_rcv_vnet(struct msghdr *msg, const struct 
>> sk_buff *skb,
>> -               size_t *len)
>> +               size_t *len, int vnet_hdr_len)
>>   {
>> +    int res;
>>       struct virtio_net_hdr vnet_hdr;
>>   -    if (*len < sizeof(vnet_hdr))
>> +    if (*len < vnet_hdr_len)
>>           return -EINVAL;
>> -    *len -= sizeof(vnet_hdr);
>> +    *len -= vnet_hdr_len;
>>         if (virtio_net_hdr_from_skb(skb, &vnet_hdr, vio_le(), true, 0))
>>           return -EINVAL;
>>   -    return memcpy_to_msg(msg, (void *)&vnet_hdr, sizeof(vnet_hdr));
>> +    res = memcpy_to_msg(msg, (void *)&vnet_hdr, sizeof(vnet_hdr));
>> +    if (res == 0)
>> +        iov_iter_advance(&msg->msg_iter,
>> +                 vnet_hdr_len - sizeof(vnet_hdr));
>> +
>> +    return res;
>>   }
>>     /*
>> @@ -2206,11 +2212,13 @@ static int tpacket_rcv(struct sk_buff *skb, 
>> struct net_device *dev,
>>                     po->tp_reserve;
>>       } else {
>>           unsigned int maclen = skb_network_offset(skb);
>> +        int vnet_hdr_sz = READ_ONCE(po->vnet_hdr_sz);
>> +
>>           netoff = TPACKET_ALIGN(po->tp_hdrlen +
>>                          (maclen < 16 ? 16 : maclen)) +
>>                          po->tp_reserve;
>> -        if (po->has_vnet_hdr) {
>> -            netoff += sizeof(struct virtio_net_hdr);
>> +        if (vnet_hdr_sz) {
>> +            netoff += vnet_hdr_sz;
>>               do_vnet = true;
>>           }
>>           macoff = netoff - maclen;
>> @@ -2429,19 +2437,6 @@ static int __packet_snd_vnet_parse(struct 
>> virtio_net_hdr *vnet_hdr, size_t len)
>>       return 0;
>>   }
>>   -static int packet_snd_vnet_parse(struct msghdr *msg, size_t *len,
>> -                 struct virtio_net_hdr *vnet_hdr)
>> -{
>> -    if (*len < sizeof(*vnet_hdr))
>> -        return -EINVAL;
>> -    *len -= sizeof(*vnet_hdr);
>> -
>> -    if (!copy_from_iter_full(vnet_hdr, sizeof(*vnet_hdr), 
>> &msg->msg_iter))
>> -        return -EFAULT;
>> -
>> -    return __packet_snd_vnet_parse(vnet_hdr, *len);
>> -}
>> -
>>   static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff 
>> *skb,
>>           void *frame, struct net_device *dev, void *data, int tp_len,
>>           __be16 proto, unsigned char *addr, int hlen, int copylen,
>> @@ -2609,6 +2604,7 @@ static int tpacket_snd(struct packet_sock *po, 
>> struct msghdr *msg)
>>       int len_sum = 0;
>>       int status = TP_STATUS_AVAILABLE;
>>       int hlen, tlen, copylen = 0;
>> +    int vnet_hdr_sz;
>>         mutex_lock(&po->pg_vec_lock);
>>   @@ -2648,7 +2644,8 @@ static int tpacket_snd(struct packet_sock 
>> *po, struct msghdr *msg)
>>       size_max = po->tx_ring.frame_size
>>           - (po->tp_hdrlen - sizeof(struct sockaddr_ll));
>>   -    if ((size_max > dev->mtu + reserve + VLAN_HLEN) && 
>> !po->has_vnet_hdr)
>> +    vnet_hdr_sz = READ_ONCE(po->vnet_hdr_sz);
>> +    if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !vnet_hdr_sz)
>>           size_max = dev->mtu + reserve + VLAN_HLEN;
>>         do {
>> @@ -2668,10 +2665,10 @@ static int tpacket_snd(struct packet_sock 
>> *po, struct msghdr *msg)
>>           status = TP_STATUS_SEND_REQUEST;
>>           hlen = LL_RESERVED_SPACE(dev);
>>           tlen = dev->needed_tailroom;
>> -        if (po->has_vnet_hdr) {
>> +        if (vnet_hdr_sz) {
>>               vnet_hdr = data;
>> -            data += sizeof(*vnet_hdr);
>> -            tp_len -= sizeof(*vnet_hdr);
>> +            data += vnet_hdr_sz;
>> +            tp_len -= vnet_hdr_sz;
>>               if (tp_len < 0 ||
>>                   __packet_snd_vnet_parse(vnet_hdr, tp_len)) {
>>                   tp_len = -EINVAL;
>> @@ -2696,7 +2693,7 @@ static int tpacket_snd(struct packet_sock *po, 
>> struct msghdr *msg)
>>                         addr, hlen, copylen, &sockc);
>>           if (likely(tp_len >= 0) &&
>>               tp_len > dev->mtu + reserve &&
>> -            !po->has_vnet_hdr &&
>> +            !vnet_hdr_sz &&
>>               !packet_extra_vlan_len_allowed(dev, skb))
>>               tp_len = -EMSGSIZE;
>>   @@ -2715,7 +2712,7 @@ static int tpacket_snd(struct packet_sock 
>> *po, struct msghdr *msg)
>>               }
>>           }
>>   -        if (po->has_vnet_hdr) {
>> +        if (vnet_hdr_sz) {
>>               if (virtio_net_hdr_to_skb(skb, vnet_hdr, vio_le())) {
>>                   tp_len = -EINVAL;
>>                   goto tpacket_error;
>> @@ -2802,9 +2799,9 @@ static int packet_snd(struct socket *sock, 
>> struct msghdr *msg, size_t len)
>>       int err, reserve = 0;
>>       struct sockcm_cookie sockc;
>>       struct virtio_net_hdr vnet_hdr = { 0 };
>> +    int vnet_hdr_sz;
>>       int offset = 0;
>>       struct packet_sock *po = pkt_sk(sk);
>> -    bool has_vnet_hdr = false;
>>       int hlen, tlen, linear;
>>       int extra_len = 0;
>>   @@ -2844,11 +2841,29 @@ static int packet_snd(struct socket *sock, 
>> struct msghdr *msg, size_t len)
>>         if (sock->type == SOCK_RAW)
>>           reserve = dev->hard_header_len;
>> -    if (po->has_vnet_hdr) {
>> -        err = packet_snd_vnet_parse(msg, &len, &vnet_hdr);
>> -        if (err)
>> +
>> +    vnet_hdr_sz = READ_ONCE(po->vnet_hdr_sz);
>> +    if (vnet_hdr_sz) {
>> +        if (len < vnet_hdr_sz) {
>> +            err = -EINVAL;
>>               goto out_unlock;
>> -        has_vnet_hdr = true;
>> +        }
>> +        len -= vnet_hdr_sz;
>> +
>> +        if (!copy_from_iter_full(&vnet_hdr, sizeof(vnet_hdr),
>> +                     &msg->msg_iter)) {
>> +            err = -EFAULT;
>> +            goto out_unlock;
>> +        }
>> +
>> +        if (__packet_snd_vnet_parse(&vnet_hdr, len)) {
>> +            err = -EINVAL;
>> +            goto out_unlock;
>> +        }
>
>
> Any reason to open code packet_snd_vnet_parse() here?

No particular reason. Will try to add an parameter, and keep the vnet 
related code inside that function if there will be resubmit.

>
>
>> +
>> +        /* TODO: check hdr_len with len? */
>> +
>> +        iov_iter_advance(&msg->msg_iter, vnet_hdr_sz - 
>> sizeof(vnet_hdr));
>>       }
>>         if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
>> @@ -2912,7 +2927,7 @@ static int packet_snd(struct socket *sock, 
>> struct msghdr *msg, size_t len)
>>       skb->mark = sockc.mark;
>>       skb->tstamp = sockc.transmit_time;
>>   -    if (has_vnet_hdr) {
>> +    if (vnet_hdr_sz) {
>>           err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le());
>>           if (err)
>>               goto out_free;
>> @@ -3307,11 +3322,11 @@ static int packet_recvmsg(struct socket 
>> *sock, struct msghdr *msg, size_t len,
>>       if (pkt_sk(sk)->pressure)
>>           packet_rcv_has_room(pkt_sk(sk), NULL);
>>   -    if (pkt_sk(sk)->has_vnet_hdr) {
>> -        err = packet_rcv_vnet(msg, skb, &len);
>> +    vnet_hdr_len = READ_ONCE(pkt_sk(sk)->vnet_hdr_sz);
>> +    if (vnet_hdr_len) {
>> +        err = packet_rcv_vnet(msg, skb, &len, vnet_hdr_len);
>>           if (err)
>>               goto out_free;
>> -        vnet_hdr_len = sizeof(struct virtio_net_hdr);
>>       }
>>         /* You lose any data beyond the buffer you gave. If it worries
>> @@ -3772,7 +3787,17 @@ packet_setsockopt(struct socket *sock, int 
>> level, int optname, char __user *optv
>>           if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
>>               ret = -EBUSY;
>>           } else {
>> -            po->has_vnet_hdr = !!val;
>> +            /* Previouly we treat user input as boolean (!!val),
>> +             * now we treat it as int. After the below correction,
>> +             * the only violation case is 12, which results in
>> +             * vnet header size of 12 instead of 10.
>> +             */
>> +            if (val &&
>> +                val != sizeof(struct virtio_net_hdr) &&
>> +                val != sizeof(struct virtio_net_hdr_mrg_rxbuf))
>> +                val = sizeof(struct virtio_net_hdr);
>> +
>> +            po->vnet_hdr_sz = val;
>>               ret = 0;
>>           }
>>           release_sock(sk);
>> @@ -3903,7 +3928,7 @@ static int packet_getsockopt(struct socket 
>> *sock, int level, int optname,
>>           val = po->origdev;
>>           break;
>>       case PACKET_VNET_HDR:
>> -        val = po->has_vnet_hdr;
>> +        val = po->vnet_hdr_sz;
>
>
> So the change here is noticeable by userspace. Maybe we need a new opt 
> for this?

Nice catch, users may assume that only 0 or 1 is returned.

Thanks,
Jianfeng

>
> Thanks
>
>
>>           break;
>>       case PACKET_VERSION:
>>           val = po->tp_version;
>> diff --git a/net/packet/diag.c b/net/packet/diag.c
>> index 7ef1c881ae74..950015b6704f 100644
>> --- a/net/packet/diag.c
>> +++ b/net/packet/diag.c
>> @@ -26,7 +26,7 @@ static int pdiag_put_info(const struct packet_sock 
>> *po, struct sk_buff *nlskb)
>>           pinfo.pdi_flags |= PDI_AUXDATA;
>>       if (po->origdev)
>>           pinfo.pdi_flags |= PDI_ORIGDEV;
>> -    if (po->has_vnet_hdr)
>> +    if (po->vnet_hdr_sz)
>>           pinfo.pdi_flags |= PDI_VNETHDR;
>>       if (po->tp_loss)
>>           pinfo.pdi_flags |= PDI_LOSS;
>> diff --git a/net/packet/internal.h b/net/packet/internal.h
>> index 3bb7c5fb3bff..11bc75950f28 100644
>> --- a/net/packet/internal.h
>> +++ b/net/packet/internal.h
>> @@ -115,9 +115,9 @@ struct packet_sock {
>>       unsigned int        running;    /* bind_lock must be held */
>>       unsigned int        auxdata:1,    /* writer must hold sock lock */
>>                   origdev:1,
>> -                has_vnet_hdr:1,
>>                   tp_loss:1,
>>                   tp_tx_has_off:1;
>> +    int            vnet_hdr_sz;
>>       int            pressure;
>>       int            ifindex;    /* bound device        */
>>       __be16            num;

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ