[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <2b3d5d75-22d8-4b5c-8445-a5ca3fe0bc69@ti.com>
Date: Tue, 14 Oct 2025 18:03:54 +0530
From: Aksh Garg <a-garg7@...com>
To: Paolo Abeni <pabeni@...hat.com>, <netdev@...r.kernel.org>,
<davem@...emloft.net>, <kuba@...nel.org>, <andrew+netdev@...n.ch>,
<edumazet@...gle.com>
CC: <linux-kernel@...r.kernel.org>, <c-vankar@...com>, <s-vadapalli@...com>,
<danishanwar@...com>
Subject: Re: [PATCH net] net: ethernet: ti: am65-cpts: fix timestamp loss due
to race conditions
On 14/10/25 15:02, Paolo Abeni wrote:
> On 10/10/25 5:08 PM, Aksh Garg wrote:
>> diff --git a/drivers/net/ethernet/ti/am65-cpts.c b/drivers/net/ethernet/ti/am65-cpts.c
>> index 59d6ab989c55..2e9719264ba5 100644
>> --- a/drivers/net/ethernet/ti/am65-cpts.c
>> +++ b/drivers/net/ethernet/ti/am65-cpts.c
>> @@ -163,7 +163,9 @@ struct am65_cpts {
>> struct device_node *clk_mux_np;
>> struct clk *refclk;
>> u32 refclk_freq;
>> - struct list_head events;
>> + /* separate lists to handle TX and RX timestamp independently */
>> + struct list_head events_tx;
>> + struct list_head events_rx;
>> struct list_head pool;
>> struct am65_cpts_event pool_data[AM65_CPTS_MAX_EVENTS];
>> spinlock_t lock; /* protects events lists*/
>> @@ -172,6 +174,7 @@ struct am65_cpts {
>> u32 ts_add_val;
>> int irq;
>> struct mutex ptp_clk_lock; /* PHC access sync */
>> + struct mutex rx_ts_lock; /* serialize RX timestamp match */
>> u64 timestamp;
>> u32 genf_enable;
>> u32 hw_ts_enable;
>> @@ -245,7 +248,16 @@ static int am65_cpts_cpts_purge_events(struct am65_cpts *cpts)
>> struct am65_cpts_event *event;
>> int removed = 0;
>>
>> - list_for_each_safe(this, next, &cpts->events) {
>> + list_for_each_safe(this, next, &cpts->events_tx) {
>> + event = list_entry(this, struct am65_cpts_event, list);
>> + if (time_after(jiffies, event->tmo)) {
>> + list_del_init(&event->list);
>> + list_add(&event->list, &cpts->pool);
>> + ++removed;
>> + }
>> + }
>
> Minor nit: you can move the loop in a separate helper taking the event
> list as an argument and avoid some code duplication with the the rx loop.
I will create a helper function for this.
>
>> +
>> + list_for_each_safe(this, next, &cpts->events_rx) {
>> event = list_entry(this, struct am65_cpts_event, list);
>> if (time_after(jiffies, event->tmo)) {
>> list_del_init(&event->list);
>> @@ -306,11 +318,21 @@ static int __am65_cpts_fifo_read(struct am65_cpts *cpts)
>> cpts->timestamp);
>> break;
>> case AM65_CPTS_EV_RX:
>> + event->tmo = jiffies +
>> + msecs_to_jiffies(AM65_CPTS_EVENT_RX_TX_TIMEOUT);
>> +
>> + list_move_tail(&event->list, &cpts->events_rx);
>> +
>> + dev_dbg(cpts->dev,
>> + "AM65_CPTS_EV_RX e1:%08x e2:%08x t:%lld\n",
>> + event->event1, event->event2,
>> + event->timestamp);
>> + break;
>> case AM65_CPTS_EV_TX:
>> event->tmo = jiffies +
>> msecs_to_jiffies(AM65_CPTS_EVENT_RX_TX_TIMEOUT);
>>
>> - list_move_tail(&event->list, &cpts->events);
>> + list_move_tail(&event->list, &cpts->events_tx);
>
> Similar thing here.
The dbg_dev() message have different debug messages. So, do you think a
helper function here makes much difference?
>
>>
>> dev_dbg(cpts->dev,
>> "AM65_CPTS_EV_TX e1:%08x e2:%08x t:%lld\n",
>> @@ -828,7 +850,7 @@ static bool am65_cpts_match_tx_ts(struct am65_cpts *cpts,
>> return found;
>> }
>>
>> -static void am65_cpts_find_ts(struct am65_cpts *cpts)
>> +static void am65_cpts_find_tx_ts(struct am65_cpts *cpts)
>> {
>> struct am65_cpts_event *event;
>> struct list_head *this, *next;
>> @@ -837,7 +859,7 @@ static void am65_cpts_find_ts(struct am65_cpts *cpts)
>> LIST_HEAD(events);
>>
>> spin_lock_irqsave(&cpts->lock, flags);
>> - list_splice_init(&cpts->events, &events);
>> + list_splice_init(&cpts->events_tx, &events);
>> spin_unlock_irqrestore(&cpts->lock, flags);
>>
>> list_for_each_safe(this, next, &events) {
>> @@ -850,7 +872,7 @@ static void am65_cpts_find_ts(struct am65_cpts *cpts)
>> }
>>
>> spin_lock_irqsave(&cpts->lock, flags);
>> - list_splice_tail(&events, &cpts->events);
>> + list_splice_tail(&events, &cpts->events_tx);
>
> I see the behavior is pre-existing, but why splicing on tail? events
> added in between should be older???
I will handle this in future patch.
>
>> list_splice_tail(&events_free, &cpts->pool);
>> spin_unlock_irqrestore(&cpts->lock, flags);
>> }
>> @@ -861,7 +883,7 @@ static long am65_cpts_ts_work(struct ptp_clock_info *ptp)
>> unsigned long flags;
>> long delay = -1;
>>
>> - am65_cpts_find_ts(cpts);
>> + am65_cpts_find_tx_ts(cpts);
>>
>> spin_lock_irqsave(&cpts->txq.lock, flags);
>> if (!skb_queue_empty(&cpts->txq))
>> @@ -899,16 +921,21 @@ static u64 am65_cpts_find_rx_ts(struct am65_cpts *cpts, u32 skb_mtype_seqid)
>> {
>> struct list_head *this, *next;
>> struct am65_cpts_event *event;
>> + LIST_HEAD(events_free);
>> unsigned long flags;
>> + LIST_HEAD(events);
>> u32 mtype_seqid;
>> u64 ns = 0;
>>
>> spin_lock_irqsave(&cpts->lock, flags);
>> __am65_cpts_fifo_read(cpts);
>> - list_for_each_safe(this, next, &cpts->events) {
>> + list_splice_init(&cpts->events_rx, &events);
>> + spin_unlock_irqrestore(&cpts->lock, flags);
>
> Why are you changing the behaviour here, releasing and reacquiring the
> cpts->lock? It looks like a separate change, if needed at all.
It was added as an optimization, as acquiring the lock for entire loop
will delay other events to be handled. I will add this optimization in
future patch.
>
>> + list_for_each_safe(this, next, &events) {
>> event = list_entry(this, struct am65_cpts_event, list);
>> if (time_after(jiffies, event->tmo)) {
>> - list_move(&event->list, &cpts->pool);
>> + list_move(&event->list, &events_free);
>> continue;
>> }
>>
>> @@ -919,10 +946,14 @@ static u64 am65_cpts_find_rx_ts(struct am65_cpts *cpts, u32 skb_mtype_seqid)
>>
>> if (mtype_seqid == skb_mtype_seqid) {
>> ns = event->timestamp;
>> - list_move(&event->list, &cpts->pool);
>> + list_move(&event->list, &events_free);
>> break;
>> }
>> }
>> +
>> + spin_lock_irqsave(&cpts->lock, flags);
>> + list_splice_tail(&events, &cpts->events_rx);
>> + list_splice_tail(&events_free, &cpts->pool);
>> spin_unlock_irqrestore(&cpts->lock, flags);
>>
>> return ns;
>> @@ -948,7 +979,9 @@ void am65_cpts_rx_timestamp(struct am65_cpts *cpts, struct sk_buff *skb)
>>
>> dev_dbg(cpts->dev, "%s mtype seqid %08x\n", __func__, skb_cb->skb_mtype_seqid);
>>
>> + mutex_lock(&cpts->rx_ts_lock);
>> ns = am65_cpts_find_rx_ts(cpts, skb_cb->skb_mtype_seqid);
>> + mutex_unlock(&cpts->rx_ts_lock);
>
> The call chain is:
>
> am65_cpsw_nuss_rx_poll() -> am65_cpsw_nuss_rx_packets() ->
> am65_cpts_rx_timestamp()
>
> this runs in BH context, can't acquire a mutex. Also I don't see why any
> additional locking would be needed?
>
The rationale for adding this lock was to handle concurrent RX threads
accessing the timestamp event list. If one RX thread moves all events
from events_rx to a temporary list and releases the spinlock, another
concurrent RX thread could acquire the lock and find events_rx empty,
potentially missing its timestamp.
I need clarification on the RX processing behavior: Can
am65_cpsw_nuss_rx_packets() be called for a new RX packet concurrently
while a previous RX packet is still being processed, or is RX processing
serialized? If RX processing is serialized, then the lock is not
required at all.
Anyways, I will remove the lock from this patch, as it was a part of the
optimization mentioned above.
> /P
>
Powered by blists - more mailing lists