[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <f8542bbf-106f-2221-1e2d-34775bdea6f8@redhat.com>
Date: Fri, 7 Sep 2018 11:41:29 +0800
From: Jason Wang <jasowang@...hat.com>
To: "Michael S. Tsirkin" <mst@...hat.com>
Cc: netdev@...r.kernel.org, linux-kernel@...r.kernel.org,
kvm@...r.kernel.org, virtualization@...ts.linux-foundation.org
Subject: Re: [PATCH net-next 10/11] tap: accept an array of XDP buffs through
sendmsg()
On 2018年09月07日 02:00, Michael S. Tsirkin wrote:
> On Thu, Sep 06, 2018 at 12:05:25PM +0800, Jason Wang wrote:
>> This patch implement TUN_MSG_PTR msg_control type. This type allows
>> the caller to pass an array of XDP buffs to tuntap through ptr field
>> of the tun_msg_control. Tap will build skb through those XDP buffers.
>>
>> This will avoid lots of indirect calls thus improves the icache
>> utilization and allows to do XDP batched flushing when doing XDP
>> redirection.
>>
>> Signed-off-by: Jason Wang <jasowang@...hat.com>
>> ---
>> drivers/net/tap.c | 73 +++++++++++++++++++++++++++++++++++++++++++++--
>> 1 file changed, 71 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/net/tap.c b/drivers/net/tap.c
>> index 7996ed7cbf18..50eb7bf22225 100644
>> --- a/drivers/net/tap.c
>> +++ b/drivers/net/tap.c
>> @@ -1146,14 +1146,83 @@ static const struct file_operations tap_fops = {
>> #endif
>> };
>>
>> +static int tap_get_user_xdp(struct tap_queue *q, struct xdp_buff *xdp)
>> +{
>> + struct virtio_net_hdr *gso = xdp->data_hard_start + sizeof(int);
>> + int buflen = *(int *)xdp->data_hard_start;
>> + int vnet_hdr_len = 0;
>> + struct tap_dev *tap;
>> + struct sk_buff *skb;
>> + int err, depth;
>> +
>> + if (q->flags & IFF_VNET_HDR)
>> + vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz);
>> +
>> + skb = build_skb(xdp->data_hard_start, buflen);
>> + if (!skb) {
>> + err = -ENOMEM;
>> + goto err;
>> + }
> So fundamentally why is it called XDP?
> We just build and skb, don't we?
The reason is that the function accepts a pointer to XDP. And also the
for the future development, I think the name is ok:
- we will probably do XDP offloading in this function.
- we may have a chance to call lower device's ndo_xdp_xmit() in the future.
Thanks
>
>> +
>> + skb_reserve(skb, xdp->data - xdp->data_hard_start);
>> + skb_put(skb, xdp->data_end - xdp->data);
>> +
>> + skb_set_network_header(skb, ETH_HLEN);
>> + skb_reset_mac_header(skb);
>> + skb->protocol = eth_hdr(skb)->h_proto;
>> +
>> + if (vnet_hdr_len) {
>> + err = virtio_net_hdr_to_skb(skb, gso, tap_is_little_endian(q));
>> + if (err)
>> + goto err_kfree;
>> + }
>> +
>> + skb_probe_transport_header(skb, ETH_HLEN);
>> +
>> + /* Move network header to the right position for VLAN tagged packets */
>> + if ((skb->protocol == htons(ETH_P_8021Q) ||
>> + skb->protocol == htons(ETH_P_8021AD)) &&
>> + __vlan_get_protocol(skb, skb->protocol, &depth) != 0)
>> + skb_set_network_header(skb, depth);
>> +
>> + rcu_read_lock();
>> + tap = rcu_dereference(q->tap);
>> + if (tap) {
>> + skb->dev = tap->dev;
>> + dev_queue_xmit(skb);
>> + } else {
>> + kfree_skb(skb);
>> + }
>> + rcu_read_unlock();
>> +
>> + return 0;
>> +
>> +err_kfree:
>> + kfree_skb(skb);
>> +err:
>> + rcu_read_lock();
>> + tap = rcu_dereference(q->tap);
>> + if (tap && tap->count_tx_dropped)
>> + tap->count_tx_dropped(tap);
>> + rcu_read_unlock();
>> + return err;
>> +}
>> +
>> static int tap_sendmsg(struct socket *sock, struct msghdr *m,
>> size_t total_len)
>> {
>> struct tap_queue *q = container_of(sock, struct tap_queue, sock);
>> struct tun_msg_ctl *ctl = m->msg_control;
>> + struct xdp_buff *xdp;
>> + int i;
>>
>> - if (ctl && ctl->type != TUN_MSG_UBUF)
>> - return -EINVAL;
>> + if (ctl && ((ctl->type & 0xF) == TUN_MSG_PTR)) {
>> + for (i = 0; i < ctl->type >> 16; i++) {
>> + xdp = &((struct xdp_buff *)ctl->ptr)[i];
>> + tap_get_user_xdp(q, xdp);
>> + }
>> + return 0;
>> + }
>>
>> return tap_get_user(q, ctl ? ctl->ptr : NULL, &m->msg_iter,
>> m->msg_flags & MSG_DONTWAIT);
>> --
>> 2.17.1
Powered by blists - more mailing lists