[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ba294077-8734-8ff3-d914-c4baa3821c2f@blackwall.org>
Date: Thu, 26 Oct 2023 09:21:48 +0300
From: Nikolay Aleksandrov <razor@...ckwall.org>
To: Jiri Pirko <jiri@...nulli.us>
Cc: Daniel Borkmann <daniel@...earbox.net>, bpf@...r.kernel.org,
netdev@...r.kernel.org, martin.lau@...ux.dev, ast@...nel.org,
andrii@...nel.org, john.fastabend@...il.com, sdf@...gle.com,
toke@...nel.org, kuba@...nel.org, andrew@...n.ch,
Toke Høiland-Jørgensen <toke@...hat.com>
Subject: Re: [PATCH bpf-next v4 1/7] netkit, bpf: Add bpf programmable net
device
On 10/26/23 08:26, Jiri Pirko wrote:
> Wed, Oct 25, 2023 at 09:21:01PM CEST, razor@...ckwall.org wrote:
>> On 10/25/23 18:47, Jiri Pirko wrote:
>>> Tue, Oct 24, 2023 at 11:48:58PM CEST, daniel@...earbox.net wrote:
>>>> This work adds a new, minimal BPF-programmable device called "netkit"
>> [snip]
>>>
>>> Couple of nitpicks below:
>>>
>>> [..]
>>>
>>>
>>
>> Hi,
>> Thanks for the review. I know about the nits below but decided against
>> changing them, more below each...
>>
>>>> +static int netkit_check_policy(int policy, struct nlattr *tb,
>>>> + struct netlink_ext_ack *extack)
>>>> +{
>>>> + switch (policy) {
>>>> + case NETKIT_PASS:
>>>> + case NETKIT_DROP:
>>>> + return 0;
>>>> + default:
>>>
>>> Isn't this job for netlink policy?
>>>
>>>
>>
>> This cannot be handled by policies AFAIK, because only 2 sparse values from
>> more are allowed. We could potentially do it through validate() but
>
> Perhaps good time to extend the netlink validation for list
> of values allowed?
>
>
>
>> it's the same minus the explicit policy type info. IMO this approach is good.
>>
>>>> + NL_SET_ERR_MSG_ATTR(extack, tb,
>>>> + "Provided default xmit policy not supported");
>>>> + return -EINVAL;
>>>> + }
>>>> +}
>>>> +
>>>> +static int netkit_check_mode(int mode, struct nlattr *tb,
>>>> + struct netlink_ext_ack *extack)
>>>> +{
>>>> + switch (mode) {
>>>> + case NETKIT_L2:
>>>> + case NETKIT_L3:
>>>> + return 0;
>>>> + default:
>>>
>>> Isn't this job for netlink policy?
>>>
>>>
>>
>> This one can be handled by policy indeed, but then we lose the nice user
>> error. Again can be done through validate(), but it's the same and we
>> lose explicit policy type information.
>
> But the netlink validator setups the extack properly. What's wrong with
> that?
>
>
"integer out of range" vs "Provided device mode can only be L2 or L3" ?
I like the second one better and it's more informative. The way to get
it is to use NLA_POLICY_VALIDATE_FN() as type and provide custom
validate() callback, which is identical to the current solution, I see
no added value in changing it.
>
>>
>>>> + NL_SET_ERR_MSG_ATTR(extack, tb,
>>>> + "Provided device mode can only be L2 or L3");
>>>> + return -EINVAL;
>>>> + }
>>>> +}
>>>> +
>>>> +static int netkit_validate(struct nlattr *tb[], struct nlattr *data[],
>>>> + struct netlink_ext_ack *extack)
>>>> +{
>>>> + struct nlattr *attr = tb[IFLA_ADDRESS];
>>>> +
>>>> + if (!attr)
>>>> + return 0;
>>>> + NL_SET_ERR_MSG_ATTR(extack, attr,
>>>> + "Setting Ethernet address is not supported");
>>>> + return -EOPNOTSUPP;
>>>> +}
>>>> +
>>>> +static struct rtnl_link_ops netkit_link_ops;
>>>> +
>>>> +static int netkit_new_link(struct net *src_net, struct net_device *dev,
>>>> + struct nlattr *tb[], struct nlattr *data[],
>>>> + struct netlink_ext_ack *extack)
>>>> +{
>>>> + struct nlattr *peer_tb[IFLA_MAX + 1], **tbp = tb, *attr;
>>>> + enum netkit_action default_prim = NETKIT_PASS;
>>>> + enum netkit_action default_peer = NETKIT_PASS;
>>>> + enum netkit_mode mode = NETKIT_L3;
>>>> + unsigned char ifname_assign_type;
>>>> + struct ifinfomsg *ifmp = NULL;
>>>> + struct net_device *peer;
>>>> + char ifname[IFNAMSIZ];
>>>> + struct netkit *nk;
>>>> + struct net *net;
>>>> + int err;
>>>> +
>>>> + if (data) {
>>>> + if (data[IFLA_NETKIT_MODE]) {
>>>> + attr = data[IFLA_NETKIT_MODE];
>>>> + mode = nla_get_u32(attr);
>>>> + err = netkit_check_mode(mode, attr, extack);
>>>> + if (err < 0)
>>>> + return err;
>>>> + }
>>>> + if (data[IFLA_NETKIT_PEER_INFO]) {
>>>> + attr = data[IFLA_NETKIT_PEER_INFO];
>>>> + ifmp = nla_data(attr);
>>>> + err = rtnl_nla_parse_ifinfomsg(peer_tb, attr, extack);
>>>> + if (err < 0)
>>>> + return err;
>>>> + err = netkit_validate(peer_tb, NULL, extack);
>>>> + if (err < 0)
>>>> + return err;
>>>> + tbp = peer_tb;
>>>> + }
>>>> + if (data[IFLA_NETKIT_POLICY]) {
>>>> + attr = data[IFLA_NETKIT_POLICY];
>>>> + default_prim = nla_get_u32(attr);
>>>> + err = netkit_check_policy(default_prim, attr, extack);
>>>> + if (err < 0)
>>>> + return err;
>>>> + }
>>>> + if (data[IFLA_NETKIT_PEER_POLICY]) {
>>>> + attr = data[IFLA_NETKIT_PEER_POLICY];
>>>> + default_peer = nla_get_u32(attr);
>>>> + err = netkit_check_policy(default_peer, attr, extack);
>>>> + if (err < 0)
>>>> + return err;
>>>> + }
>>>> + }
>>>> +
>>>> + if (ifmp && tbp[IFLA_IFNAME]) {
>>>> + nla_strscpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ);
>>>> + ifname_assign_type = NET_NAME_USER;
>>>> + } else {
>>>> + strscpy(ifname, "nk%d", IFNAMSIZ);
>>>> + ifname_assign_type = NET_NAME_ENUM;
>>>> + }
>>>> +
>>>> + net = rtnl_link_get_net(src_net, tbp);
>>>> + if (IS_ERR(net))
>>>> + return PTR_ERR(net);
>>>> +
>>>> + peer = rtnl_create_link(net, ifname, ifname_assign_type,
>>>> + &netkit_link_ops, tbp, extack);
>>>> + if (IS_ERR(peer)) {
>>>> + put_net(net);
>>>> + return PTR_ERR(peer);
>>>> + }
>>>> +
>>>> + netif_inherit_tso_max(peer, dev);
>>>> +
>>>> + if (mode == NETKIT_L2)
>>>> + eth_hw_addr_random(peer);
>>>> + if (ifmp && dev->ifindex)
>>>> + peer->ifindex = ifmp->ifi_index;
>>>> +
>>>> + nk = netkit_priv(peer);
>>>> + nk->primary = false;
>>>> + nk->policy = default_peer;
>>>> + nk->mode = mode;
>>>> + bpf_mprog_bundle_init(&nk->bundle);
>>>> + RCU_INIT_POINTER(nk->active, NULL);
>>>> + RCU_INIT_POINTER(nk->peer, NULL);
>>>
>>> Aren't these already 0?
>>>
>>>
>>
>> Yep, they are. Here decided in favor of explicit show of values, although
>> it's minor and I'm fine either way.
>
> It is always confusing to see this. Reader might think this is necessary
> as if the mem was not previuosly cleared. The general approach is to
> rely on mem being zeroed, not sure why this is different.
>
>
Oh come on :) you really believe someone reading this code would start
inferring about netdev_alloc mem zeroing instead of getting a mental
picture of the expected state? Anyway, as I said I think this is way too
minor and it's fine either way, we can remove the explicit
initialization and rely on the implicit one.
>>
>>>> +
>>>> + err = register_netdevice(peer);
>>>> + put_net(net);
>>>> + if (err < 0)
>>>> + goto err_register_peer;
>>>> + netif_carrier_off(peer);
>>>> + if (mode == NETKIT_L2)
>>>> + dev_change_flags(peer, peer->flags & ~IFF_NOARP, NULL);
>>>> +
>>>> + err = rtnl_configure_link(peer, NULL, 0, NULL);
>>>> + if (err < 0)
>>>> + goto err_configure_peer;
>>>> +
>>>> + if (mode == NETKIT_L2)
>>>> + eth_hw_addr_random(dev);
>>>> + if (tb[IFLA_IFNAME])
>>>> + nla_strscpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ);
>>>> + else
>>>> + strscpy(dev->name, "nk%d", IFNAMSIZ);
>>>> +
>>>> + nk = netkit_priv(dev);
>>>> + nk->primary = true;
>>>> + nk->policy = default_prim;
>>>> + nk->mode = mode;
>>>> + bpf_mprog_bundle_init(&nk->bundle);
>>>> + RCU_INIT_POINTER(nk->active, NULL);
>>>> + RCU_INIT_POINTER(nk->peer, NULL);
>>>> +
>>>> + err = register_netdevice(dev);
>>>> + if (err < 0)
>>>> + goto err_configure_peer;
>>>> + netif_carrier_off(dev);
>>>> + if (mode == NETKIT_L2)
>>>> + dev_change_flags(dev, dev->flags & ~IFF_NOARP, NULL);
>>>> +
>>>> + rcu_assign_pointer(netkit_priv(dev)->peer, peer);
>>>> + rcu_assign_pointer(netkit_priv(peer)->peer, dev);
>>>> + return 0;
>>>> +err_configure_peer:
>>>> + unregister_netdevice(peer);
>>>> + return err;
>>>> +err_register_peer:
>>>> + free_netdev(peer);
>>>> + return err;
>>>> +}
>>>> +
>>>
>>> [..]
>>
>> Cheers,
>> Nik
>>
Powered by blists - more mailing lists