[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ac69dddd-0346-d663-bacf-bb03f01b0b96@gmail.com>
Date: Wed, 27 Oct 2021 09:00:57 -0600
From: David Ahern <dsahern@...il.com>
To: Taehee Yoo <ap420073@...il.com>, davem@...emloft.net,
kuba@...nel.org, dsahern@...nel.org, netdev@...r.kernel.org
Cc: dkirjanov@...e.de
Subject: Re: [PATCH net-next 2/4 v4] amt: add data plane of amt interface
On 10/26/21 9:10 AM, Taehee Yoo wrote:
> diff --git a/drivers/net/amt.c b/drivers/net/amt.c
> index 8d4782c66cde..c3ac94b6d3e8 100644
> --- a/drivers/net/amt.c
> +++ b/drivers/net/amt.c
> @@ -31,6 +31,1220 @@
>
> static struct workqueue_struct *amt_wq;
>
> +static char *status_str[] = {
> + "AMT_STATUS_INIT",
> + "AMT_STATUS_SENT_DISCOVERY",
> + "AMT_STATUS_RECEIVED_DISCOVERY",
> + "AMT_STATUS_SENT_ADVERTISEMENT",
> + "AMT_STATUS_RECEIVED_ADVERTISEMENT",
> + "AMT_STATUS_SENT_REQUEST",
> + "AMT_STATUS_RECEIVED_REQUEST",
> + "AMT_STATUS_SENT_QUERY",
> + "AMT_STATUS_RECEIVED_QUERY",
> + "AMT_STATUS_SENT_UPDATE",
> + "AMT_STATUS_RECEIVED_UPDATE",
> +};
> +
> +static char *type_str[] = {
> + "AMT_MSG_DISCOVERY",
> + "AMT_MSG_ADVERTISEMENT",
> + "AMT_MSG_REQUEST",
> + "AMT_MSG_MEMBERSHIP_QUERY",
> + "AMT_MSG_MEMBERSHIP_UPDATE",
> + "AMT_MSG_MULTICAST_DATA",
> + "AMT_MSG_TEATDOWM",
TEARDOWN is misspelled.
> +};
> +
> +static struct amt_skb_cb *amt_skb_cb(struct sk_buff *skb)
> +{
> + return (struct amt_skb_cb *)((void *)skb->cb +
> + sizeof(struct qdisc_skb_cb));
how will you know of sizeof(struct qdisc_skb_cb) + sizeof(struct
amt_skb_cb) exceeds the cb space? You should add a BUILD_BUG_ON.
> +}
> +
> +static struct sk_buff *amt_build_igmp_gq(struct amt_dev *amt)
> +{
> + int hlen = LL_RESERVED_SPACE(amt->dev);
> + int tlen = amt->dev->needed_tailroom;
> + struct igmpv3_query *ihv3;
> + void *csum_start = NULL;
> + __sum16 *csum = NULL;
> + struct sk_buff *skb;
> + struct ethhdr *eth;
> + struct iphdr *iph;
> + unsigned int len;
> + int offset;
> +
> + len = hlen + tlen + sizeof(*iph) + 4 + sizeof(*ihv3);
> + skb = netdev_alloc_skb_ip_align(amt->dev, len);
> + if (!skb)
> + return NULL;
> +
> + skb_reserve(skb, hlen);
> + skb_push(skb, sizeof(*eth));
> + skb->protocol = htons(ETH_P_IP);
> + skb_reset_mac_header(skb);
> + skb->priority = TC_PRIO_CONTROL;
> + skb_put(skb, sizeof(*iph) + 4 + sizeof(*ihv3));
> + skb_pull(skb, sizeof(*eth));
> + skb_reset_network_header(skb);
> +
> + iph = ip_hdr(skb);
> + iph->version = 4;
> + iph->ihl = (sizeof(struct iphdr) + 4) >> 2;
> + iph->tos = 0xc0;
> + iph->tot_len = htons(sizeof(*iph) + 4 + sizeof(*ihv3));
> + iph->frag_off = htons(IP_DF);
> + iph->ttl = 1;
> + iph->id = 0;
> + iph->protocol = IPPROTO_IGMP;
> + iph->daddr = htonl(INADDR_ALLHOSTS_GROUP);
> + iph->saddr = htonl(INADDR_ANY);
> + ((u8 *)&iph[1])[0] = IPOPT_RA;
> + ((u8 *)&iph[1])[1] = 4;
> + ((u8 *)&iph[1])[2] = 0;
> + ((u8 *)&iph[1])[3] = 0;
This reads weird to me. Why not use a local variable like:
#define AMT_IPHDR_OPTS 4
u8 buf[AMT_IPHDR_OPTS] = (u8 *)(iph + 1);
and then use AMT_IPHDR_OPTS in place of the magic '4' in the skb
manipulations.
> + ip_send_check(iph);
> +
> + eth = eth_hdr(skb);
> + ether_addr_copy(eth->h_source, amt->dev->dev_addr);
> + ip_eth_mc_map(htonl(INADDR_ALLHOSTS_GROUP), eth->h_dest);
> + eth->h_proto = htons(ETH_P_IP);
> +
> + ihv3 = skb_pull(skb, sizeof(*iph) + 4);
> + skb_reset_transport_header(skb);
> + ihv3->type = IGMP_HOST_MEMBERSHIP_QUERY;
> + ihv3->code = 1;
> + ihv3->group = 0;
> + ihv3->qqic = amt->qi;
> + ihv3->nsrcs = 0;
> + ihv3->resv = 0;
> + ihv3->suppress = false;
> + ihv3->qrv = amt->net->ipv4.sysctl_igmp_qrv;
> + ihv3->csum = 0;
> + csum = &ihv3->csum;
> + csum_start = (void *)ihv3;
> + *csum = ip_compute_csum(csum_start, sizeof(*ihv3));
> + offset = skb_transport_offset(skb);
> + skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
> + skb->ip_summed = CHECKSUM_NONE;
> +
> + skb_push(skb, sizeof(*eth) + sizeof(*iph) + 4);
> +
> + return skb;
> +}
> +
> +static void amt_send_request(struct amt_dev *amt, bool v6)
> +{
> + struct amt_header_request *amtrh;
> + int hlen, tlen, offset;
> + struct socket *sock;
> + struct udphdr *udph;
> + struct sk_buff *skb;
> + struct iphdr *iph;
> + struct rtable *rt;
> + struct flowi4 fl4;
> + u32 len;
> + int err;
> +
> + rcu_read_lock();
> + sock = rcu_dereference(amt->sock);
> + if (!sock)
> + goto out;
> +
> + if (!netif_running(amt->stream_dev) || !netif_running(amt->dev))
> + goto out;
> +
> + rt = ip_route_output_ports(amt->net, &fl4, sock->sk,
> + amt->remote_ip, amt->local_ip,
> + amt->gw_port, amt->relay_port,
> + IPPROTO_UDP, 0,
> + amt->stream_dev->ifindex);
> + if (IS_ERR(rt)) {
> + amt->dev->stats.tx_errors++;
> + goto out;
> + }
> +
> + hlen = LL_RESERVED_SPACE(amt->dev);
> + tlen = amt->dev->needed_tailroom;
> + len = hlen + tlen + sizeof(*iph) + sizeof(*udph) + sizeof(*amtrh);
> + skb = netdev_alloc_skb_ip_align(amt->dev, len);
> + if (!skb) {
> + ip_rt_put(rt);
> + amt->dev->stats.tx_errors++;
> + goto out;
> + }
> +
> + skb->priority = TC_PRIO_CONTROL;
> + skb_dst_set(skb, &rt->dst);
> +
> + len = sizeof(*iph) + sizeof(*udph) + sizeof(*amtrh);
> + skb_reset_network_header(skb);
> + skb_put(skb, len);
> + amtrh = skb_pull(skb, sizeof(*iph) + sizeof(*udph));
> + amtrh->version = 0;
> + amtrh->type = AMT_MSG_REQUEST;
> + amtrh->reserved1 = 0;
> + amtrh->p = v6;
> + amtrh->reserved2 = 0;
> + amtrh->nonce = amt->nonce;
> + skb_push(skb, sizeof(*udph));
> + skb_reset_transport_header(skb);
> + udph = udp_hdr(skb);
> + udph->source = amt->gw_port;
> + udph->dest = amt->relay_port;
> + udph->len = htons(sizeof(*amtrh) + sizeof(*udph));
> + udph->check = 0;
> + offset = skb_transport_offset(skb);
> + skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
> + udph->check = csum_tcpudp_magic(amt->local_ip, amt->remote_ip,
> + sizeof(*udph) + sizeof(*amtrh),
> + IPPROTO_UDP, skb->csum);
> +
> + skb_push(skb, sizeof(*iph));
> + iph = ip_hdr(skb);
> + iph->version = 4;
> + iph->ihl = (sizeof(struct iphdr)) >> 2;
> + iph->tos = 0xc0;
why c0? Use a macro with a proper name.
> + iph->frag_off = 0;
> + iph->ttl = ip4_dst_hoplimit(&rt->dst);
> + iph->daddr = amt->remote_ip;
> + iph->saddr = amt->local_ip;
> + iph->protocol = IPPROTO_UDP;
> + iph->tot_len = htons(len);
> +
> + skb->ip_summed = CHECKSUM_NONE;
> + ip_select_ident(amt->net, skb, NULL);
> + ip_send_check(iph);
> + err = ip_local_out(amt->net, sock->sk, skb);
> + if (unlikely(net_xmit_eval(err)))
> + amt->dev->stats.tx_errors++;
> +
> +out:
> + rcu_read_unlock();
> +}
> +
> +static void amt_send_igmp_gq(struct amt_dev *amt,
> + struct amt_tunnel_list *tunnel)
> +{
> + struct sk_buff *skb;
> +
> + skb = amt_build_igmp_gq(amt);
> + if (!skb)
> + return;
> +
> + amt_skb_cb(skb)->tunnel = tunnel;
> + dev_queue_xmit(skb);
> +}
> +
> +
> +static netdev_tx_t amt_dev_xmit(struct sk_buff *skb, struct net_device *dev)
> +{
> + struct amt_dev *amt = netdev_priv(dev);
> + struct amt_tunnel_list *tunnel;
> + bool report = false;
> + struct igmphdr *ih;
> + bool query = false;
> + struct iphdr *iph;
> + bool data = false;
> + bool v6 = false;
> +
> + rcu_read_lock();
As I recall ndo_start_xmit functions are called inside of
rcu_read_lock_bh().
> + iph = ip_hdr(skb);> + if (iph->version == 4) {
> + if (!ipv4_is_multicast(iph->daddr))
> + goto free;
> +
> + if (!ip_mc_check_igmp(skb)) {
> + ih = igmp_hdr(skb);
> + switch (ih->type) {
> + case IGMPV3_HOST_MEMBERSHIP_REPORT:
> + case IGMP_HOST_MEMBERSHIP_REPORT:
> + report = true;
> + break;
> + case IGMP_HOST_MEMBERSHIP_QUERY:
> + query = true;
> + break;
> + default:
> + goto free;
> + }
> + } else {
> + data = true;
> + }
> + v6 = false;
> + } else {
> + dev->stats.tx_errors++;
> + goto free;
> + }
> +
> + if (!pskb_may_pull(skb, sizeof(struct ethhdr)))
> + goto free;
> +
> + skb_pull(skb, sizeof(struct ethhdr));
> +
> + if (amt->mode == AMT_MODE_GATEWAY) {
> + /* Gateway only passes IGMP/MLD packets */
> + if (!report)
> + goto free;
> + if ((!v6 && !amt->ready4) || (v6 && !amt->ready6))
> + goto free;
> + if (amt_send_membership_update(amt, skb, v6))
> + goto free;
> + goto unlock;
> + } else if (amt->mode == AMT_MODE_RELAY) {
> + if (query) {
> + tunnel = amt_skb_cb(skb)->tunnel;
> + if (!tunnel) {
> + WARN_ON(1);
> + goto free;
> + }
> +
> + /* Do not forward unexpected query */
> + if (amt_send_membership_query(amt, skb, tunnel, v6))
> + goto free;
> + goto unlock;
> + }
> +
> + if (!data)
> + goto free;
> + list_for_each_entry_rcu(tunnel, &amt->tunnel_list, list)
> + amt_send_multicast_data(amt, skb, tunnel, v6);
> + }
> +
> + dev_kfree_skb(skb);
> + rcu_read_unlock();
> + return NETDEV_TX_OK;
> +free:
> + dev_kfree_skb(skb);
> +unlock:
> + rcu_read_unlock();
> + dev->stats.tx_dropped++;
> + return NETDEV_TX_OK;
> +}
> +
> +static bool amt_request_handler(struct amt_dev *amt, struct sk_buff *skb)
> +{
> + struct amt_header_request *amtrh;
> + struct amt_tunnel_list *tunnel;
> + unsigned long long key;
> + struct udphdr *udph;
> + struct iphdr *iph;
> + u64 mac;
> + int i;
> +
> + if (!pskb_may_pull(skb, sizeof(*udph) + sizeof(*amtrh)))
> + return true;
> +
> + iph = ip_hdr(skb);
> + udph = udp_hdr(skb);
> + amtrh = (struct amt_header_request *)(udp_hdr(skb) + 1);
> +
> + if (amtrh->reserved1 || amtrh->reserved2 || amtrh->version)
> + return true;
> +
> + list_for_each_entry_rcu(tunnel, &amt->tunnel_list, list)
> + if (tunnel->ip4 == iph->saddr)
> + goto send;
> +
> + if (amt->nr_tunnels >= amt->max_tunnels) {
> + icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
> + return true;
> + }
> +
> + tunnel = kmalloc(sizeof(*tunnel) +
kzalloc and then you do not need the '= 0' below.
> + (sizeof(struct hlist_head) * amt->hash_buckets),
> + GFP_ATOMIC);
> + if (!tunnel)
> + return true;
> +
> + tunnel->source_port = udph->source;
> + tunnel->ip4 = iph->saddr;
> + tunnel->nr_groups = 0;
> + tunnel->nr_sources = 0;
> +
> + memcpy(&key, &tunnel->key, sizeof(unsigned long long));
> + tunnel->reserved = 0;
> + tunnel->amt = amt;
> + spin_lock_init(&tunnel->lock);
> + for (i = 0; i < amt->hash_buckets; i++)
> + INIT_HLIST_HEAD(&tunnel->groups[i]);
> +
> + INIT_DELAYED_WORK(&tunnel->gc_wq, amt_tunnel_expire);
> +
> + spin_lock_bh(&amt->lock);
> + list_add_tail_rcu(&tunnel->list, &amt->tunnel_list);
> + tunnel->key = amt->key;
> + amt_update_relay_status(tunnel, AMT_STATUS_RECEIVED_REQUEST, true);
> + amt->nr_tunnels++;
> + mod_delayed_work(amt_wq, &tunnel->gc_wq,
> + msecs_to_jiffies(amt_gmi(amt)));
> + spin_unlock_bh(&amt->lock);
> +
> +send:
> + tunnel->nonce = amtrh->nonce;
> + mac = siphash_3u32((__force u32)tunnel->ip4,
> + (__force u32)tunnel->source_port,
> + (__force u32)tunnel->nonce,
> + &tunnel->key);
> + tunnel->mac = mac >> 16;
> +
> + if (!netif_running(amt->dev) || !netif_running(amt->stream_dev))
> + return true;
> +
> + if (!amtrh->p)
> + amt_send_igmp_gq(amt, tunnel);
> +
> + return false;
> +}
> +
you create some selftests and include some torture cases around cleanup
of all the work activity.
Powered by blists - more mailing lists