[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <428eae59-83f1-4937-b376-0167521f107f@davidwei.uk>
Date: Sat, 16 Dec 2023 12:52:31 -0800
From: David Wei <dw@...idwei.uk>
To: Jiri Pirko <jiri@...nulli.us>
Cc: Jakub Kicinski <kuba@...nel.org>, Sabrina Dubroca <sd@...asysnail.net>,
netdev@...r.kernel.org, "David S. Miller" <davem@...emloft.net>,
Eric Dumazet <edumazet@...gle.com>, Paolo Abeni <pabeni@...hat.com>
Subject: Re: [PATCH net-next v3 1/4] netdevsim: allow two netdevsim ports to
be connected
On 2023-12-16 01:21, Jiri Pirko wrote:
> Fri, Dec 15, 2023 at 08:13:45PM CET, dw@...idwei.uk wrote:
>> On 2023-12-15 03:11, Jiri Pirko wrote:
>>> Thu, Dec 14, 2023 at 10:24:40PM CET, dw@...idwei.uk wrote:
>>>> Add a debugfs file in
>>>> /sys/kernel/debug/netdevsim/netdevsimN/ports/A/peer
>>>>
>>>> Writing "M B" to this file will link port A of netdevsim N with port B of
>>>> netdevsim M.
>>>>
>>>> Reading this file will return the linked netdevsim id and port, if any.
>>>>
>>>> Signed-off-by: David Wei <dw@...idwei.uk>
>>>> ---
>>>> drivers/net/netdevsim/bus.c | 17 ++++++
>>>> drivers/net/netdevsim/dev.c | 88 +++++++++++++++++++++++++++++++
>>>> drivers/net/netdevsim/netdev.c | 6 +++
>>>> drivers/net/netdevsim/netdevsim.h | 3 ++
>>>> 4 files changed, 114 insertions(+)
>>>>
>>>> diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c
>>>> index bcbc1e19edde..1ef95661a3f5 100644
>>>> --- a/drivers/net/netdevsim/bus.c
>>>> +++ b/drivers/net/netdevsim/bus.c
>>>> @@ -323,6 +323,23 @@ static struct device_driver nsim_driver = {
>>>> .owner = THIS_MODULE,
>>>> };
>>>>
>>>> +struct nsim_bus_dev *nsim_bus_dev_get(unsigned int id)
>>>
>>> This sounds definitelly incorrect. You should not need to touch bus.c
>>> code. It arranges the bus and devices on it. The fact that a device is
>>> probed or not is parallel to this.
>>>
>>> I think you need to maintain a separate list/xarray of netdevsim devices
>>> probed by nsim_drv_probe()
>>
>> There is a 1:1 relationship between bus devices (nsim_bus_dev) and nsim devices
>
> Of course it is not. I thought I exaplained that. If you unbind (or not
> bind at all), it is still in this list, however not probed.
Right, I understand now thanks. The 1:1 relationship is initially true, but
devices can be removed by unbinding their drivers. I tried manually unbinding
using /sys/bus/netdevsim/drivers/netdevsim/unbind which removes a device but
keeps the bus device.
The current implementation is not resilient to this and I can trigger crashes
e.g. by unbinding then trying to add a port.
>
>
>> (nsim_dev). Adding a separate list for nsim devices seemed redundant to me when
>> there is already a list for bus devices.
>
> Again, please don't call into bus.c here.
Got it, I will maintain a separate list of bound devices.
>
>
>>
>>>
>>>
>>>> +{
>>>> + struct nsim_bus_dev *nsim_bus_dev;
>>>> +
>>>> + mutex_lock(&nsim_bus_dev_list_lock);
>>>> + list_for_each_entry(nsim_bus_dev, &nsim_bus_dev_list, list) {
>>>> + if (nsim_bus_dev->dev.id == id) {
>>>> + get_device(&nsim_bus_dev->dev);
>>>> + mutex_unlock(&nsim_bus_dev_list_lock);
>>>> + return nsim_bus_dev;
>>>> + }
>>>> + }
>>>> + mutex_unlock(&nsim_bus_dev_list_lock);
>>>> +
>>>> + return NULL;
>>>> +}
>>>> +
>>>> int nsim_bus_init(void)
>>>> {
>>>> int err;
>>>> diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
>>>> index b4d3b9cde8bd..034145ba1861 100644
>>>> --- a/drivers/net/netdevsim/dev.c
>>>> +++ b/drivers/net/netdevsim/dev.c
>>>> @@ -388,6 +388,91 @@ static const struct file_operations nsim_dev_rate_parent_fops = {
>>>> .owner = THIS_MODULE,
>>>> };
>>>>
>>>> +static ssize_t nsim_dev_peer_read(struct file *file, char __user *data,
>>>> + size_t count, loff_t *ppos)
>>>> +{
>>>> + struct nsim_dev_port *nsim_dev_port;
>>>> + struct netdevsim *peer;
>>>> + unsigned int id, port;
>>>> + char buf[23];
>>>> + ssize_t len;
>>>> +
>>>> + nsim_dev_port = file->private_data;
>>>> + rcu_read_lock();
>>>> + peer = rcu_dereference(nsim_dev_port->ns->peer);
>>>> + if (!peer) {
>>>> + rcu_read_unlock();
>>>> + return 0;
>>>> + }
>>>> +
>>>> + id = peer->nsim_bus_dev->dev.id;
>>>> + port = peer->nsim_dev_port->port_index;
>>>> + len = scnprintf(buf, sizeof(buf), "%u %u\n", id, port);
>>>> +
>>>> + rcu_read_unlock();
>>>> + return simple_read_from_buffer(data, count, ppos, buf, len);
>>>> +}
>>>> +
>>>> +static ssize_t nsim_dev_peer_write(struct file *file,
>>>> + const char __user *data,
>>>> + size_t count, loff_t *ppos)
>>>> +{
>>>> + struct nsim_dev_port *nsim_dev_port, *peer_dev_port;
>>>> + struct nsim_bus_dev *peer_bus_dev;
>>>> + struct nsim_dev *peer_dev;
>>>> + unsigned int id, port;
>>>> + char buf[22];
>>>> + ssize_t ret;
>>>> +
>>>> + if (count >= sizeof(buf))
>>>> + return -ENOSPC;
>>>> +
>>>> + ret = copy_from_user(buf, data, count);
>>>> + if (ret)
>>>> + return -EFAULT;
>>>> + buf[count] = '\0';
>>>> +
>>>> + ret = sscanf(buf, "%u %u", &id, &port);
>>>> + if (ret != 2) {
>>>> + pr_err("Format for adding a peer is \"id port\" (uint uint)");
>>>> + return -EINVAL;
>>>> + }
>>>> +
>>>> + /* invalid netdevsim id */
>>>> + peer_bus_dev = nsim_bus_dev_get(id);
>>>> + if (!peer_bus_dev)
>>>> + return -EINVAL;
>>>> +
>>>> + ret = -EINVAL;
>>>> + /* cannot link to self */
>>>> + nsim_dev_port = file->private_data;
>>>> + if (nsim_dev_port->ns->nsim_bus_dev == peer_bus_dev &&
>>>> + nsim_dev_port->port_index == port)
>>>> + goto out;
>>>> +
>>>> + peer_dev = dev_get_drvdata(&peer_bus_dev->dev);
>>>
>>> Again, no bus touching should be needed. (btw, this could be null is dev
>>> is not probed)
>>
>> That's fair, I can do a null check.
>>
>>>
>>>
>>>> + list_for_each_entry(peer_dev_port, &peer_dev->port_list, list) {
>>>> + if (peer_dev_port->port_index != port)
>>>> + continue;
>>>> + rcu_assign_pointer(nsim_dev_port->ns->peer, peer_dev_port->ns);
>>>> + rcu_assign_pointer(peer_dev_port->ns->peer, nsim_dev_port->ns);
>>>
>>> What is stopping another cpu from setting different peer for the same
>>> port here, making a mess?
>>
>> Looking into RCU a bit more, you're right that it does not protect from
>> multiple writers. Would adding a lock (spinlock?) to nsim_dev and taking that
>> be sufficient here?
>>
>> Or what if I took rtnl_lock()?
>
> You have multiple choices how to handle this.
I'll start with a spinlock but it would be good to know what the 'best' option
is in this case. I don't have a well calibrated sense for it yet.
>
>>
>>>
>>>
>>>> + ret = count;
>>>> + goto out;
>>>> + }
>>>> +
>>>> +out:
>>>> + put_device(&peer_bus_dev->dev);
>>>> + return ret;
>>>> +}
>>>> +
>>>> +static const struct file_operations nsim_dev_peer_fops = {
>>>> + .open = simple_open,
>>>> + .read = nsim_dev_peer_read,
>>>> + .write = nsim_dev_peer_write,
>>>> + .llseek = generic_file_llseek,
>>>> + .owner = THIS_MODULE,
>>>> +};
>>>> +
>>>> static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev,
>>>> struct nsim_dev_port *nsim_dev_port)
>>>> {
>>>> @@ -418,6 +503,9 @@ static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev,
>>>> }
>>>> debugfs_create_symlink("dev", nsim_dev_port->ddir, dev_link_name);
>>>>
>>>> + debugfs_create_file("peer", 0600, nsim_dev_port->ddir,
>>>> + nsim_dev_port, &nsim_dev_peer_fops);
>>>> +
>>>> return 0;
>>>> }
>>>>
>>>> diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
>>>> index aecaf5f44374..e290c54b0e70 100644
>>>> --- a/drivers/net/netdevsim/netdev.c
>>>> +++ b/drivers/net/netdevsim/netdev.c
>>>> @@ -388,6 +388,7 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
>>>> ns->nsim_dev = nsim_dev;
>>>> ns->nsim_dev_port = nsim_dev_port;
>>>> ns->nsim_bus_dev = nsim_dev->nsim_bus_dev;
>>>> + RCU_INIT_POINTER(ns->peer, NULL);
>>>> SET_NETDEV_DEV(dev, &ns->nsim_bus_dev->dev);
>>>> SET_NETDEV_DEVLINK_PORT(dev, &nsim_dev_port->devlink_port);
>>>> nsim_ethtool_init(ns);
>>>> @@ -407,9 +408,14 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
>>>> void nsim_destroy(struct netdevsim *ns)
>>>> {
>>>> struct net_device *dev = ns->netdev;
>>>> + struct netdevsim *peer;
>>>>
>>>> rtnl_lock();
>>>> + peer = rtnl_dereference(ns->peer);
>>>> + RCU_INIT_POINTER(ns->peer, NULL);
>>>> unregister_netdevice(dev);
>>>> + if (peer)
>>>> + RCU_INIT_POINTER(peer->peer, NULL);
>>>
>>> What is stopping the another CPU from setting this back to this "ns"?
>>> Or what is stopping another netdevsim port from setting this ns while
>>> going away?
>>>
>>> Do you rely on RTNL_LOCK in any way (other then synchronize_net() in
>>> unlock())? If yes, looks wrong.
>>>
>>> This ns->peer update locking looks very broken to me :/
>>
>> As above, would a spinlock on nsim_dev or taking rtnl_lock() in
>> nsim_dev_peer_write() resolve this?
>>
>>>
>>>
>>>
>>>
>>>> if (nsim_dev_port_is_pf(ns->nsim_dev_port)) {
>>>> nsim_macsec_teardown(ns);
>>>> nsim_ipsec_teardown(ns);
>>>> diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
>>>> index 028c825b86db..61ac3a80cf9a 100644
>>>> --- a/drivers/net/netdevsim/netdevsim.h
>>>> +++ b/drivers/net/netdevsim/netdevsim.h
>>>> @@ -125,6 +125,7 @@ struct netdevsim {
>>>> } udp_ports;
>>>>
>>>> struct nsim_ethtool ethtool;
>>>> + struct netdevsim __rcu *peer;
>>>> };
>>>>
>>>> struct netdevsim *
>>>> @@ -415,5 +416,7 @@ struct nsim_bus_dev {
>>>> bool init;
>>>> };
>>>>
>>>> +struct nsim_bus_dev *nsim_bus_dev_get(unsigned int id);
>>>> +
>>>> int nsim_bus_init(void);
>>>> void nsim_bus_exit(void);
>>>> --
>>>> 2.39.3
>>>>
Powered by blists - more mailing lists