lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <88c33cde-6572-43d1-9713-bc63d1faaa68@amperemail.onmicrosoft.com>
Date: Mon, 20 Oct 2025 20:54:11 -0400
From: Adam Young <admiyo@...eremail.onmicrosoft.com>
To: Jeremy Kerr <jk@...econstruct.com.au>,
 Adam Young <admiyo@...amperecomputing.com>,
 Matt Johnston <matt@...econstruct.com.au>,
 Andrew Lunn <andrew+netdev@...n.ch>, "David S. Miller"
 <davem@...emloft.net>, Eric Dumazet <edumazet@...gle.com>,
 Jakub Kicinski <kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>
Cc: netdev@...r.kernel.org, linux-kernel@...r.kernel.org,
 Sudeep Holla <sudeep.holla@....com>,
 Jonathan Cameron <Jonathan.Cameron@...wei.com>,
 Huisong Li <lihuisong@...wei.com>
Subject: Re: [PATCH v30 3/3] mctp pcc: Implement MCTP over PCC Transport


On 10/16/25 23:01, Jeremy Kerr wrote:
> Hi Adam,
>
> Looking pretty good, a few things inline:
>
>> diff --git a/drivers/net/mctp/mctp-pcc.c b/drivers/net/mctp/mctp-pcc.c
>> new file mode 100644
>> index 000000000000..927a525c1121
>> --- /dev/null
>> +++ b/drivers/net/mctp/mctp-pcc.c
>> @@ -0,0 +1,319 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * mctp-pcc.c - Driver for MCTP over PCC.
>> + * Copyright (c) 2024-2025, Ampere Computing LLC
>> + *
>> + */
>> +
>> +/* Implementation of MCTP over PCC DMTF Specification DSP0256
>> + * https://www.dmtf.org/sites/default/files/standards/documents/DSP0292_1.0.0WIP50.pdf
>> + */
>> +
>> +#include <linux/acpi.h>
>> +#include <linux/hrtimer.h>
>> +#include <linux/if_arp.h>
>> +#include <linux/init.h>
>> +#include <linux/kernel.h>
>> +#include <linux/mailbox_client.h>
>> +#include <linux/module.h>
>> +#include <linux/netdevice.h>
>> +#include <linux/platform_device.h>
>> +#include <linux/skbuff.h>
>> +#include <linux/string.h>
>> +
>> +#include <acpi/acpi_bus.h>
>> +#include <acpi/acpi_drivers.h>
>> +#include <acpi/acrestyp.h>
>> +#include <acpi/actbl.h>
>> +#include <acpi/pcc.h>
>> +#include <net/mctp.h>
>> +#include <net/mctpdevice.h>
>> +
>> +#define MCTP_SIGNATURE          "MCTP"
>> +#define MCTP_SIGNATURE_LENGTH   (sizeof(MCTP_SIGNATURE) - 1)
>> +#define MCTP_MIN_MTU            68
>> +#define PCC_DWORD_TYPE          0x0c
>> +
>> +struct mctp_pcc_mailbox {
>> +       u32 index;
>> +       struct pcc_mbox_chan *chan;
>> +       struct mbox_client client;
>> +};
>> +
>> +/* The netdev structure. One of these per PCC adapter. */
>> +struct mctp_pcc_ndev {
>> +       struct net_device *ndev;
>> +       struct acpi_device *acpi_device;
>> +       struct mctp_pcc_mailbox inbox;
>> +       struct mctp_pcc_mailbox outbox;
>> +};
>> +
>> +static void mctp_pcc_client_rx_callback(struct mbox_client *cl, void *mssg)
>> +{
>> +       struct pcc_extended_header pcc_header;
>> +       struct mctp_pcc_ndev *mctp_pcc_ndev;
>> +       struct mctp_pcc_mailbox *inbox;
>> +       struct mctp_skb_cb *cb;
>> +       struct sk_buff *skb;
>> +       int size;
>> +
>> +       mctp_pcc_ndev = container_of(cl, struct mctp_pcc_ndev, inbox.client);
>> +       inbox = &mctp_pcc_ndev->inbox;
>> +       size = pcc_mbox_query_bytes_available(inbox->chan);
>> +       if (size == 0)
>> +               return;
>> +       skb = netdev_alloc_skb(mctp_pcc_ndev->ndev, size);
>> +       if (!skb) {
>> +               dev_dstats_rx_dropped(mctp_pcc_ndev->ndev);
>> +               return;
>> +       }
>> +       skb_put(skb, size);
>> +       skb->protocol = htons(ETH_P_MCTP);
>> +       pcc_mbox_read_from_buffer(inbox->chan, size, skb->data);
>> +       dev_dstats_rx_add(mctp_pcc_ndev->ndev, skb->len);
>> +       skb_reset_mac_header(skb);
>> +       skb_pull(skb, sizeof(pcc_header));
>> +       skb_reset_network_header(skb);
>> +       cb = __mctp_cb(skb);
>> +       cb->halen = 0;
>> +       netif_rx(skb);
>> +}
>> +
>> +static netdev_tx_t mctp_pcc_tx(struct sk_buff *skb, struct net_device *ndev)
>> +{
>> +       struct mctp_pcc_ndev *mpnd = netdev_priv(ndev);
>> +       struct pcc_extended_header *pcc_header;
>> +       int len = skb->len;
>> +       int rc;
>> +
>> +       rc = skb_cow_head(skb, sizeof(*pcc_header));
>> +       if (rc) {
>> +               dev_dstats_tx_dropped(ndev);
>> +               kfree_skb(skb);
>> +               return NETDEV_TX_OK;
>> +       }
>> +
>> +       pcc_header = skb_push(skb, sizeof(*pcc_header));
>> +       pcc_header->signature = PCC_SIGNATURE | mpnd->outbox.index;
>> +       pcc_header->flags = PCC_CMD_COMPLETION_NOTIFY;
>> +       memcpy(&pcc_header->command, MCTP_SIGNATURE, MCTP_SIGNATURE_LENGTH);
>> +       pcc_header->length = len + MCTP_SIGNATURE_LENGTH;
>> +       rc = mbox_send_message(mpnd->outbox.chan->mchan, skb);
>> +
>> +       if (rc < 0) {
>> +               netif_stop_queue(ndev);
>> +               return NETDEV_TX_BUSY;
>> +       }
> super minor: the grouping here (and a couple of other places) is a bit
> strange; try and keep the rc handler together with where rc is set, and
> the setup and send distinct. I'd typically do this as:
>
>         pcc_header = skb_push(skb, sizeof(*pcc_header));
>         pcc_header->signature = PCC_SIGNATURE | mpnd->outbox.index;
>         pcc_header->flags = PCC_CMD_COMPLETION_NOTIFY;
>         memcpy(&pcc_header->command, MCTP_SIGNATURE, MCTP_SIGNATURE_LENGTH);
>         pcc_header->length = len + MCTP_SIGNATURE_LENGTH;
>
>         rc = mbox_send_message(mpnd->outbox.chan->mchan, skb);
>         if (rc < 0) {
>                 netif_stop_queue(ndev);
>                 return NETDEV_TX_BUSY;
>         }

OK I can see  the rationale.  I do find that easier to read.


>
>> +
>> +       dev_dstats_tx_add(ndev, len);
>> +       return NETDEV_TX_OK;
>> +}
>> +
>> +static void mctp_pcc_tx_prepare(struct mbox_client *cl, void *mssg)
>> +{
>> +       struct mctp_pcc_ndev *mctp_pcc_ndev;
>> +       struct mctp_pcc_mailbox *outbox;
>> +       struct sk_buff *skb = mssg;
>> +       int len_sent;
>> +
>> +       mctp_pcc_ndev = container_of(cl, struct mctp_pcc_ndev, outbox.client);
>> +       outbox = &mctp_pcc_ndev->outbox;
>> +
>> +       if (!skb)
>> +               return;
>> +
>> +       len_sent = pcc_mbox_write_to_buffer(outbox->chan, skb->len, skb->data);
>> +       if (len_sent == 0)
>> +               pr_info("packet dropped");
> You probably don't want a bare pr_info() in the failure path here;
> either something debug level, or ratelimited.
>
> and probably use a netdev-specific log function (netdev_dbg() perhaps),
> so you get the device information too.
>
> I'm not super clear on this failure mode, do you need to update the
> tx statistics on the drop here? In which case, you may not need the
> _dbg/_info message at all.

Honestly, this is a pretty bad case, unlike, say, a UDP packet drop, 
this implies that something in the system is very much not aligned with 
the users expectations.  I can go with a netdev specific, but I think 
this should be info level at least.


>
>> +}
>> +
>> +static void mctp_pcc_tx_done(struct mbox_client *c, void *mssg, int r)
>> +{
>> +       struct mctp_pcc_ndev *mctp_pcc_ndev;
>> +       struct mctp_pcc_mailbox *outbox;
>> +       struct sk_buff *skb = mssg;
> Nice, no more list lookups.
Yep.
>
>> +
>> +       mctp_pcc_ndev = container_of(c, struct mctp_pcc_ndev, outbox.client);
>> +       outbox = container_of(c, struct mctp_pcc_mailbox, client);
>> +       if (skb)
>> +               dev_consume_skb_any(skb);
> minor: dev_consume_skb_any() will tolerate a null argument, you can
> leave out the conditional here.
OK
>
>> +       netif_wake_queue(mctp_pcc_ndev->ndev);
>> +}
>> +
>> +static int mctp_pcc_ndo_open(struct net_device *ndev)
>> +{
>> +       struct mctp_pcc_ndev *mctp_pcc_ndev = netdev_priv(ndev);
>> +       struct mctp_pcc_mailbox *outbox, *inbox;
>> +
>> +       outbox = &mctp_pcc_ndev->outbox;
>> +       inbox = &mctp_pcc_ndev->inbox;
>> +
>> +       outbox->chan = pcc_mbox_request_channel(&outbox->client, outbox->index);
>> +       if (IS_ERR(outbox->chan))
>> +               return PTR_ERR(outbox->chan);
>> +
>> +       inbox->client.rx_callback = mctp_pcc_client_rx_callback;
>> +       inbox->chan = pcc_mbox_request_channel(&inbox->client, inbox->index);
>> +       if (IS_ERR(inbox->chan)) {
>> +               pcc_mbox_free_channel(outbox->chan);
>> +               return PTR_ERR(inbox->chan);
>> +       }
>> +       return 0;
>> +}
> Having the channels fully set-up before calling request_channel looks
> much safer now :)

This actually showed that the same problem would have happened in 
setting MTU.  It is why we need and accessor to the shared buffer size.


>
>> +
>> +static int mctp_pcc_ndo_stop(struct net_device *ndev)
>> +{
>> +       struct mctp_pcc_ndev *mctp_pcc_ndev = netdev_priv(ndev);
>> +
>> +       pcc_mbox_free_channel(mctp_pcc_ndev->outbox.chan);
>> +       pcc_mbox_free_channel(mctp_pcc_ndev->inbox.chan);
>> +       return 0;
>> +}
>> +
>> +static const struct net_device_ops mctp_pcc_netdev_ops = {
>> +       .ndo_open = mctp_pcc_ndo_open,
>> +       .ndo_stop = mctp_pcc_ndo_stop,
>> +       .ndo_start_xmit = mctp_pcc_tx,
>> +};
>> +
>> +static void mctp_pcc_setup(struct net_device *ndev)
>> +{
>> +       ndev->type = ARPHRD_MCTP;
>> +       ndev->hard_header_len = 0;
> Isn't this sizeof(struct pcc_extended_header) ?
Yes it is.
>
>> +       ndev->tx_queue_len = 0;
>> +       ndev->flags = IFF_NOARP;
>> +       ndev->netdev_ops = &mctp_pcc_netdev_ops;
>> +       ndev->needs_free_netdev = true;
>> +       ndev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS;
>> +}
>> +
>> +struct mctp_pcc_lookup_context {
>> +       int index;
>> +       u32 inbox_index;
>> +       u32 outbox_index;
>> +};
>> +
>> +static acpi_status lookup_pcct_indices(struct acpi_resource *ares,
>> +                                      void *context)
>> +{
>> +       struct mctp_pcc_lookup_context *luc = context;
>> +       struct acpi_resource_address32 *addr;
>> +
>> +       if (ares->type != PCC_DWORD_TYPE)
>> +               return AE_OK;
>> +
>> +       addr = ACPI_CAST_PTR(struct acpi_resource_address32, &ares->data);
>> +       switch (luc->index) {
>> +       case 0:
>> +               luc->outbox_index = addr[0].address.minimum;
>> +               break;
>> +       case 1:
>> +               luc->inbox_index = addr[0].address.minimum;
>> +               break;
>> +       }
>> +       luc->index++;
>> +       return AE_OK;
>> +}
>> +
>> +static void mctp_cleanup_netdev(void *data)
>> +{
>> +       struct net_device *ndev = data;
>> +
>> +       mctp_unregister_netdev(ndev);
>> +}
>> +
>> +static int initialize_MTU(struct net_device *ndev)
>> +{
>> +       struct mctp_pcc_ndev *mctp_pcc_ndev = netdev_priv(ndev);
>> +       struct mctp_pcc_mailbox *outbox;
>> +       int mctp_pcc_mtu;
>> +
>> +       outbox = &mctp_pcc_ndev->outbox;
>> +       mctp_pcc_mtu = pcc_mbox_buffer_size(outbox->index);
>> +       if (mctp_pcc_mtu == -1)
>> +               return -1;
> You may want to check that this is at least
> sizeof(struct pcc_extended_header), to avoid an underflow below, and
> possibly at least MCTP_MIN_MTU after subtracting the header size (as
> this would be less than the MCTP BTU)

And if it is?  I think that is a sign of a hardware problem, beyond the 
scope of what we can fix here.  No matter what, the interface will be 
unusable.



>
>> +
>> +       mctp_pcc_mtu = mctp_pcc_mtu - sizeof(struct pcc_extended_header);
>> +       mctp_pcc_ndev = netdev_priv(ndev);
> unneeded?
Yep, and gone....
>
>> +       ndev->mtu = MCTP_MIN_MTU;
>> +       ndev->max_mtu = mctp_pcc_mtu;
>> +       ndev->min_mtu = MCTP_MIN_MTU;
>> +
>> +       return 0;
>> +}
> Now that the initialize_MTU implementation is simpler, you may want to
> reinstate it inline in driver_add(), but also fine as-is.
>
>> +
>> +static int mctp_pcc_driver_add(struct acpi_device *acpi_dev)
>> +{
>> +       struct mctp_pcc_lookup_context context = {0};
>> +       struct mctp_pcc_ndev *mctp_pcc_ndev;
>> +       struct device *dev = &acpi_dev->dev;
>> +       struct net_device *ndev;
>> +       acpi_handle dev_handle;
>> +       acpi_status status;
>> +       char name[32];
>> +       int rc;
>> +
>> +       dev_dbg(dev, "Adding mctp_pcc device for HID %s\n",
>> +               acpi_device_hid(acpi_dev));
>> +       dev_handle = acpi_device_handle(acpi_dev);
>> +       status = acpi_walk_resources(dev_handle, "_CRS", lookup_pcct_indices,
>> +                                    &context);
>> +       if (!ACPI_SUCCESS(status)) {
>> +               dev_err(dev, "FAILED to lookup PCC indexes from CRS\n");
>> +               return -EINVAL;
>> +       }
>> +
>> +       snprintf(name, sizeof(name), "mctppcc%d", context.inbox_index);
>> +       ndev = alloc_netdev(sizeof(*mctp_pcc_ndev), name, NET_NAME_PREDICTABLE,
>> +                           mctp_pcc_setup);
>> +       if (!ndev)
>> +               return -ENOMEM;
>> +
>> +       mctp_pcc_ndev = netdev_priv(ndev);
>> +
>> +       mctp_pcc_ndev->inbox.index = context.inbox_index;
>> +       mctp_pcc_ndev->inbox.client.dev = dev;
>> +       mctp_pcc_ndev->outbox.index = context.outbox_index;
>> +       mctp_pcc_ndev->outbox.client.dev = dev;
>> +
>> +       mctp_pcc_ndev->outbox.client.tx_prepare = mctp_pcc_tx_prepare;
>> +       mctp_pcc_ndev->outbox.client.tx_done = mctp_pcc_tx_done;
>> +       mctp_pcc_ndev->acpi_device = acpi_dev;
>> +       mctp_pcc_ndev->ndev = ndev;
>> +       acpi_dev->driver_data = mctp_pcc_ndev;
>> +
>> +       rc = initialize_MTU(ndev);
>> +       if (rc)
>> +               goto free_netdev;
>> +
>> +       rc = mctp_register_netdev(ndev, NULL, MCTP_PHYS_BINDING_PCC);
>> +       if (rc)
>> +               goto free_netdev;
>> +
>> +       return devm_add_action_or_reset(dev, mctp_cleanup_netdev, ndev);
>> +free_netdev:
>> +       free_netdev(ndev);
>> +       return rc;
>> +}
>> +
>> +static const struct acpi_device_id mctp_pcc_device_ids[] = {
>> +       { "DMT0001" },
>> +       {}
>> +};
>> +
>> +static struct acpi_driver mctp_pcc_driver = {
>> +       .name = "mctp_pcc",
>> +       .class = "Unknown",
>> +       .ids = mctp_pcc_device_ids,
>> +       .ops = {
>> +               .add = mctp_pcc_driver_add,
>> +       },
>> +};
>> +
>> +module_acpi_driver(mctp_pcc_driver);
>> +
>> +MODULE_DEVICE_TABLE(acpi, mctp_pcc_device_ids);
>> +
>> +MODULE_DESCRIPTION("MCTP PCC ACPI device");
>> +MODULE_LICENSE("GPL");
>> +MODULE_AUTHOR("Adam Young <admiyo@...amperecomputing.com>");
> Cheers,
>
>
> Jeremy

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ