[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <f687ffe4-3310-9c8e-2ee8-a6977af1ae8f@gmail.com>
Date: Thu, 28 Oct 2021 01:12:07 +0900
From: Taehee Yoo <ap420073@...il.com>
To: David Ahern <dsahern@...il.com>, davem@...emloft.net,
kuba@...nel.org, dsahern@...nel.org, netdev@...r.kernel.org
Cc: dkirjanov@...e.de
Subject: Re: [PATCH net-next 2/4 v4] amt: add data plane of amt interface
Hi, David,
Thank you for your review!
On 10/28/21 12:00 AM, David Ahern wrote:
> On 10/26/21 9:10 AM, Taehee Yoo wrote:
>> diff --git a/drivers/net/amt.c b/drivers/net/amt.c
>> index 8d4782c66cde..c3ac94b6d3e8 100644
>> --- a/drivers/net/amt.c
>> +++ b/drivers/net/amt.c
>> @@ -31,6 +31,1220 @@
>>
>> static struct workqueue_struct *amt_wq;
>>
>> +static char *status_str[] = {
>> + "AMT_STATUS_INIT",
>> + "AMT_STATUS_SENT_DISCOVERY",
>> + "AMT_STATUS_RECEIVED_DISCOVERY",
>> + "AMT_STATUS_SENT_ADVERTISEMENT",
>> + "AMT_STATUS_RECEIVED_ADVERTISEMENT",
>> + "AMT_STATUS_SENT_REQUEST",
>> + "AMT_STATUS_RECEIVED_REQUEST",
>> + "AMT_STATUS_SENT_QUERY",
>> + "AMT_STATUS_RECEIVED_QUERY",
>> + "AMT_STATUS_SENT_UPDATE",
>> + "AMT_STATUS_RECEIVED_UPDATE",
>> +};
>> +
>> +static char *type_str[] = {
>> + "AMT_MSG_DISCOVERY",
>> + "AMT_MSG_ADVERTISEMENT",
>> + "AMT_MSG_REQUEST",
>> + "AMT_MSG_MEMBERSHIP_QUERY",
>> + "AMT_MSG_MEMBERSHIP_UPDATE",
>> + "AMT_MSG_MULTICAST_DATA",
>> + "AMT_MSG_TEATDOWM",
>
> TEARDOWN is misspelled.
>
Okay, I will fix it.
>> +};
>> +
>> +static struct amt_skb_cb *amt_skb_cb(struct sk_buff *skb)
>> +{
>> + return (struct amt_skb_cb *)((void *)skb->cb +
>> + sizeof(struct qdisc_skb_cb));
>
> how will you know of sizeof(struct qdisc_skb_cb) + sizeof(struct
> amt_skb_cb) exceeds the cb space? You should add a BUILD_BUG_ON.
>
Thanks, I will add BUILD_BUG_ON.
>
>> +}
>> +
>> +static struct sk_buff *amt_build_igmp_gq(struct amt_dev *amt)
>> +{
>> + int hlen = LL_RESERVED_SPACE(amt->dev);
>> + int tlen = amt->dev->needed_tailroom;
>> + struct igmpv3_query *ihv3;
>> + void *csum_start = NULL;
>> + __sum16 *csum = NULL;
>> + struct sk_buff *skb;
>> + struct ethhdr *eth;
>> + struct iphdr *iph;
>> + unsigned int len;
>> + int offset;
>> +
>> + len = hlen + tlen + sizeof(*iph) + 4 + sizeof(*ihv3);
>> + skb = netdev_alloc_skb_ip_align(amt->dev, len);
>> + if (!skb)
>> + return NULL;
>> +
>> + skb_reserve(skb, hlen);
>> + skb_push(skb, sizeof(*eth));
>> + skb->protocol = htons(ETH_P_IP);
>> + skb_reset_mac_header(skb);
>> + skb->priority = TC_PRIO_CONTROL;
>> + skb_put(skb, sizeof(*iph) + 4 + sizeof(*ihv3));
>> + skb_pull(skb, sizeof(*eth));
>> + skb_reset_network_header(skb);
>> +
>> + iph = ip_hdr(skb);
>> + iph->version = 4;
>> + iph->ihl = (sizeof(struct iphdr) + 4) >> 2;
>> + iph->tos = 0xc0;
>> + iph->tot_len = htons(sizeof(*iph) + 4 + sizeof(*ihv3));
>> + iph->frag_off = htons(IP_DF);
>> + iph->ttl = 1;
>> + iph->id = 0;
>> + iph->protocol = IPPROTO_IGMP;
>> + iph->daddr = htonl(INADDR_ALLHOSTS_GROUP);
>> + iph->saddr = htonl(INADDR_ANY);
>> + ((u8 *)&iph[1])[0] = IPOPT_RA;
>> + ((u8 *)&iph[1])[1] = 4;
>> + ((u8 *)&iph[1])[2] = 0;
>> + ((u8 *)&iph[1])[3] = 0;
>
> This reads weird to me. Why not use a local variable like:
> #define AMT_IPHDR_OPTS 4
> u8 buf[AMT_IPHDR_OPTS] = (u8 *)(iph + 1);
>
> and then use AMT_IPHDR_OPTS in place of the magic '4' in the skb
> manipulations.
>
Thanks, I will use what you suggested.
>> + ip_send_check(iph);
>> +
>> + eth = eth_hdr(skb);
>> + ether_addr_copy(eth->h_source, amt->dev->dev_addr);
>> + ip_eth_mc_map(htonl(INADDR_ALLHOSTS_GROUP), eth->h_dest);
>> + eth->h_proto = htons(ETH_P_IP);
>> +
>> + ihv3 = skb_pull(skb, sizeof(*iph) + 4);
>> + skb_reset_transport_header(skb);
>> + ihv3->type = IGMP_HOST_MEMBERSHIP_QUERY;
>> + ihv3->code = 1;
>> + ihv3->group = 0;
>> + ihv3->qqic = amt->qi;
>> + ihv3->nsrcs = 0;
>> + ihv3->resv = 0;
>> + ihv3->suppress = false;
>> + ihv3->qrv = amt->net->ipv4.sysctl_igmp_qrv;
>> + ihv3->csum = 0;
>> + csum = &ihv3->csum;
>> + csum_start = (void *)ihv3;
>> + *csum = ip_compute_csum(csum_start, sizeof(*ihv3));
>> + offset = skb_transport_offset(skb);
>> + skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
>> + skb->ip_summed = CHECKSUM_NONE;
>> +
>> + skb_push(skb, sizeof(*eth) + sizeof(*iph) + 4);
>> +
>> + return skb;
>> +}
>> +
>
>
>
>> +static void amt_send_request(struct amt_dev *amt, bool v6)
>> +{
>> + struct amt_header_request *amtrh;
>> + int hlen, tlen, offset;
>> + struct socket *sock;
>> + struct udphdr *udph;
>> + struct sk_buff *skb;
>> + struct iphdr *iph;
>> + struct rtable *rt;
>> + struct flowi4 fl4;
>> + u32 len;
>> + int err;
>> +
>> + rcu_read_lock();
>> + sock = rcu_dereference(amt->sock);
>> + if (!sock)
>> + goto out;
>> +
>> + if (!netif_running(amt->stream_dev) || !netif_running(amt->dev))
>> + goto out;
>> +
>> + rt = ip_route_output_ports(amt->net, &fl4, sock->sk,
>> + amt->remote_ip, amt->local_ip,
>> + amt->gw_port, amt->relay_port,
>> + IPPROTO_UDP, 0,
>> + amt->stream_dev->ifindex);
>> + if (IS_ERR(rt)) {
>> + amt->dev->stats.tx_errors++;
>> + goto out;
>> + }
>> +
>> + hlen = LL_RESERVED_SPACE(amt->dev);
>> + tlen = amt->dev->needed_tailroom;
>> + len = hlen + tlen + sizeof(*iph) + sizeof(*udph) + sizeof(*amtrh);
>> + skb = netdev_alloc_skb_ip_align(amt->dev, len);
>> + if (!skb) {
>> + ip_rt_put(rt);
>> + amt->dev->stats.tx_errors++;
>> + goto out;
>> + }
>> +
>> + skb->priority = TC_PRIO_CONTROL;
>> + skb_dst_set(skb, &rt->dst);
>> +
>> + len = sizeof(*iph) + sizeof(*udph) + sizeof(*amtrh);
>> + skb_reset_network_header(skb);
>> + skb_put(skb, len);
>> + amtrh = skb_pull(skb, sizeof(*iph) + sizeof(*udph));
>> + amtrh->version = 0;
>> + amtrh->type = AMT_MSG_REQUEST;
>> + amtrh->reserved1 = 0;
>> + amtrh->p = v6;
>> + amtrh->reserved2 = 0;
>> + amtrh->nonce = amt->nonce;
>> + skb_push(skb, sizeof(*udph));
>> + skb_reset_transport_header(skb);
>> + udph = udp_hdr(skb);
>> + udph->source = amt->gw_port;
>> + udph->dest = amt->relay_port;
>> + udph->len = htons(sizeof(*amtrh) + sizeof(*udph));
>> + udph->check = 0;
>> + offset = skb_transport_offset(skb);
>> + skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
>> + udph->check = csum_tcpudp_magic(amt->local_ip, amt->remote_ip,
>> + sizeof(*udph) + sizeof(*amtrh),
>> + IPPROTO_UDP, skb->csum);
>> +
>> + skb_push(skb, sizeof(*iph));
>> + iph = ip_hdr(skb);
>> + iph->version = 4;
>> + iph->ihl = (sizeof(struct iphdr)) >> 2;
>> + iph->tos = 0xc0;
>
> why c0? Use a macro with a proper name.
>
Okay, I will make a new macro.
>> + iph->frag_off = 0;
>> + iph->ttl = ip4_dst_hoplimit(&rt->dst);
>> + iph->daddr = amt->remote_ip;
>> + iph->saddr = amt->local_ip;
>> + iph->protocol = IPPROTO_UDP;
>> + iph->tot_len = htons(len);
>> +
>> + skb->ip_summed = CHECKSUM_NONE;
>> + ip_select_ident(amt->net, skb, NULL);
>> + ip_send_check(iph);
>> + err = ip_local_out(amt->net, sock->sk, skb);
>> + if (unlikely(net_xmit_eval(err)))
>> + amt->dev->stats.tx_errors++;
>> +
>> +out:
>> + rcu_read_unlock();
>> +}
>> +
>> +static void amt_send_igmp_gq(struct amt_dev *amt,
>> + struct amt_tunnel_list *tunnel)
>> +{
>> + struct sk_buff *skb;
>> +
>> + skb = amt_build_igmp_gq(amt);
>> + if (!skb)
>> + return;
>> +
>> + amt_skb_cb(skb)->tunnel = tunnel;
>> + dev_queue_xmit(skb);
>> +}
>> +
>
>> +
>> +static netdev_tx_t amt_dev_xmit(struct sk_buff *skb, struct
net_device *dev)
>> +{
>> + struct amt_dev *amt = netdev_priv(dev);
>> + struct amt_tunnel_list *tunnel;
>> + bool report = false;
>> + struct igmphdr *ih;
>> + bool query = false;
>> + struct iphdr *iph;
>> + bool data = false;
>> + bool v6 = false;
>> +
>> + rcu_read_lock();
>
> As I recall ndo_start_xmit functions are called inside of
> rcu_read_lock_bh().
>
>
Thanks, I checked out it.
dev_queue_xmit() hold rcu_read_lock_bh() then it calls ->ndo_start_xmit().
I will remove it.
>> + iph = ip_hdr(skb);> + if (iph->version == 4) {
>> + if (!ipv4_is_multicast(iph->daddr))
>> + goto free;
>> +
>> + if (!ip_mc_check_igmp(skb)) {
>> + ih = igmp_hdr(skb);
>> + switch (ih->type) {
>> + case IGMPV3_HOST_MEMBERSHIP_REPORT:
>> + case IGMP_HOST_MEMBERSHIP_REPORT:
>> + report = true;
>> + break;
>> + case IGMP_HOST_MEMBERSHIP_QUERY:
>> + query = true;
>> + break;
>> + default:
>> + goto free;
>> + }
>> + } else {
>> + data = true;
>> + }
>> + v6 = false;
>> + } else {
>> + dev->stats.tx_errors++;
>> + goto free;
>> + }
>> +
>> + if (!pskb_may_pull(skb, sizeof(struct ethhdr)))
>> + goto free;
>> +
>> + skb_pull(skb, sizeof(struct ethhdr));
>> +
>> + if (amt->mode == AMT_MODE_GATEWAY) {
>> + /* Gateway only passes IGMP/MLD packets */
>> + if (!report)
>> + goto free;
>> + if ((!v6 && !amt->ready4) || (v6 && !amt->ready6))
>> + goto free;
>> + if (amt_send_membership_update(amt, skb, v6))
>> + goto free;
>> + goto unlock;
>> + } else if (amt->mode == AMT_MODE_RELAY) {
>> + if (query) {
>> + tunnel = amt_skb_cb(skb)->tunnel;
>> + if (!tunnel) {
>> + WARN_ON(1);
>> + goto free;
>> + }
>> +
>> + /* Do not forward unexpected query */
>> + if (amt_send_membership_query(amt, skb, tunnel, v6))
>> + goto free;
>> + goto unlock;
>> + }
>> +
>> + if (!data)
>> + goto free;
>> + list_for_each_entry_rcu(tunnel, &amt->tunnel_list, list)
>> + amt_send_multicast_data(amt, skb, tunnel, v6);
>> + }
>> +
>> + dev_kfree_skb(skb);
>> + rcu_read_unlock();
>> + return NETDEV_TX_OK;
>> +free:
>> + dev_kfree_skb(skb);
>> +unlock:
>> + rcu_read_unlock();
>> + dev->stats.tx_dropped++;
>> + return NETDEV_TX_OK;
>> +}
>> +
>
>
>
>
>> +static bool amt_request_handler(struct amt_dev *amt, struct sk_buff
*skb)
>> +{
>> + struct amt_header_request *amtrh;
>> + struct amt_tunnel_list *tunnel;
>> + unsigned long long key;
>> + struct udphdr *udph;
>> + struct iphdr *iph;
>> + u64 mac;
>> + int i;
>> +
>> + if (!pskb_may_pull(skb, sizeof(*udph) + sizeof(*amtrh)))
>> + return true;
>> +
>> + iph = ip_hdr(skb);
>> + udph = udp_hdr(skb);
>> + amtrh = (struct amt_header_request *)(udp_hdr(skb) + 1);
>> +
>> + if (amtrh->reserved1 || amtrh->reserved2 || amtrh->version)
>> + return true;
>> +
>> + list_for_each_entry_rcu(tunnel, &amt->tunnel_list, list)
>> + if (tunnel->ip4 == iph->saddr)
>> + goto send;
>> +
>> + if (amt->nr_tunnels >= amt->max_tunnels) {
>> + icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
>> + return true;
>> + }
>> +
>> + tunnel = kmalloc(sizeof(*tunnel) +
>
> kzalloc and then you do not need the '= 0' below.
>
Thanks, I will use kzalloc instead of kmalloc.
>> + (sizeof(struct hlist_head) * amt->hash_buckets),
>> + GFP_ATOMIC);
>> + if (!tunnel)
>> + return true;
>> +
>> + tunnel->source_port = udph->source;
>> + tunnel->ip4 = iph->saddr;
>> + tunnel->nr_groups = 0;
>> + tunnel->nr_sources = 0;
>> +
>> + memcpy(&key, &tunnel->key, sizeof(unsigned long long));
>> + tunnel->reserved = 0;
>> + tunnel->amt = amt;
>> + spin_lock_init(&tunnel->lock);
>> + for (i = 0; i < amt->hash_buckets; i++)
>> + INIT_HLIST_HEAD(&tunnel->groups[i]);
>> +
>> + INIT_DELAYED_WORK(&tunnel->gc_wq, amt_tunnel_expire);
>> +
>> + spin_lock_bh(&amt->lock);
>> + list_add_tail_rcu(&tunnel->list, &amt->tunnel_list);
>> + tunnel->key = amt->key;
>> + amt_update_relay_status(tunnel, AMT_STATUS_RECEIVED_REQUEST, true);
>> + amt->nr_tunnels++;
>> + mod_delayed_work(amt_wq, &tunnel->gc_wq,
>> + msecs_to_jiffies(amt_gmi(amt)));
>> + spin_unlock_bh(&amt->lock);
>> +
>> +send:
>> + tunnel->nonce = amtrh->nonce;
>> + mac = siphash_3u32((__force u32)tunnel->ip4,
>> + (__force u32)tunnel->source_port,
>> + (__force u32)tunnel->nonce,
>> + &tunnel->key);
>> + tunnel->mac = mac >> 16;
>> +
>> + if (!netif_running(amt->dev) || !netif_running(amt->stream_dev))
>> + return true;
>> +
>> + if (!amtrh->p)
>> + amt_send_igmp_gq(amt, tunnel);
>> +
>> + return false;
>> +}
>> +
>
> you create some selftests and include some torture cases around cleanup
> of all the work activity.
>
Okay, I will create selftests!
Thanks a lot for your review!
Thanks,
Taehee
Powered by blists - more mailing lists