[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <87a52uxhat.fsf@AUSNATLYNCH.amd.com>
Date: Tue, 16 Sep 2025 15:40:10 -0500
From: Nathan Lynch <nathan.lynch@....com>
To: Jonathan Cameron <jonathan.cameron@...wei.com>
CC: Vinod Koul <vkoul@...nel.org>, Wei Huang <wei.huang2@....com>, "Mario
Limonciello" <mario.limonciello@....com>, Bjorn Helgaas
<bhelgaas@...gle.com>, <linux-pci@...r.kernel.org>,
<linux-kernel@...r.kernel.org>, <dmaengine@...r.kernel.org>
Subject: Re: [PATCH RFC 08/13] dmaengine: sdxi: Context creation/removal,
descriptor submission
Jonathan Cameron <jonathan.cameron@...wei.com> writes:
> On Fri, 05 Sep 2025 13:48:31 -0500
> Nathan Lynch via B4 Relay <devnull+nathan.lynch.amd.com@...nel.org> wrote:
>
>> From: Nathan Lynch <nathan.lynch@....com>
>>
>> Add functions for creating and removing SDXI contexts and submitting
>> descriptors against them.
>>
>> An SDXI function supports one or more contexts, each of which has its
>> own descriptor ring and associated state. Each context has a 16-bit
>> index. A special context is installed at index 0 and is used for
>> configuring other contexts and performing administrative actions.
>>
>> The creation of each context entails the allocation of the following
>> control structure hierarchy:
>>
>> * Context L1 Table slot
>> * Access key (AKey) table
>> * Context control block
>> * Descriptor ring
>> * Write index
>> * Context status block
>>
>> Co-developed-by: Wei Huang <wei.huang2@....com>
>> Signed-off-by: Wei Huang <wei.huang2@....com>
>> Signed-off-by: Nathan Lynch <nathan.lynch@....com>
> Some superficial stuff inline.
>
> I haven't yet reread the spec against this (and it's been a while
> since I looked at sdxi!) but overall seems reasonable.
>> ---
>> drivers/dma/sdxi/context.c | 547 +++++++++++++++++++++++++++++++++++++++++++++
>> drivers/dma/sdxi/context.h | 28 +++
>> 2 files changed, 575 insertions(+)
>>
>> diff --git a/drivers/dma/sdxi/context.c b/drivers/dma/sdxi/context.c
>> new file mode 100644
>> index 0000000000000000000000000000000000000000..50eae5b3b303d67891113377e2df209d199aa533
>> --- /dev/null
>> +++ b/drivers/dma/sdxi/context.c
>> @@ -0,0 +1,547 @@
>
>
>> +
>> +static struct sdxi_cxt *alloc_cxt(struct sdxi_dev *sdxi)
>> +{
>> + struct sdxi_cxt *cxt;
>> + u16 id, l2_idx, l1_idx;
>> +
>> + if (sdxi->cxt_count >= sdxi->max_cxts)
>> + return NULL;
>> +
>> + /* search for an empty context slot */
>> + for (id = 0; id < sdxi->max_cxts; id++) {
>> + l2_idx = ID_TO_L2_INDEX(id);
>> + l1_idx = ID_TO_L1_INDEX(id);
>> +
>> + if (sdxi->cxt_array[l2_idx] == NULL) {
>> + int sz = sizeof(struct sdxi_cxt *) * L1_TABLE_ENTRIES;
>> + struct sdxi_cxt **ptr = kzalloc(sz, GFP_KERNEL);
>> +
>> + sdxi->cxt_array[l2_idx] = ptr;
>> + if (!(sdxi->cxt_array[l2_idx]))
>> + return NULL;
>> + }
>> +
>> + cxt = (sdxi->cxt_array)[l2_idx][l1_idx];
>> + /* found one empty slot */
>> + if (!cxt)
>> + break;
>> + }
>> +
>> + /* nothing found, bail... */
>> + if (id == sdxi->max_cxts)
>> + return NULL;
>> +
>> + /* alloc context and initialize it */
>> + cxt = kzalloc(sizeof(struct sdxi_cxt), GFP_KERNEL);
>
> sizeof(*ctx) usually preferred as it saves anyone checking types
> match.
yep, will fix.
>> + if (!cxt)
>> + return NULL;
>> +
>> + cxt->akey_table = dma_alloc_coherent(sdxi_to_dev(sdxi),
>> + sizeof(*cxt->akey_table),
>> + &cxt->akey_table_dma, GFP_KERNEL);
>> + if (!cxt->akey_table) {
>> + kfree(cxt);
>
> Similar to below. You could use a DEFINE_FREE() to auto cleanup up ctx
> on error.
OK, I've been hesitant to try the cleanup stuff so far but I'll give it
a shot (here and other places).
>> + return NULL;
>> + }
>> +
>> + cxt->sdxi = sdxi;
>> + cxt->id = id;
>> + cxt->db_base = sdxi->dbs_bar + id * sdxi->db_stride;
>> + cxt->db = sdxi->dbs + id * sdxi->db_stride;
>> +
>> + sdxi->cxt_array[l2_idx][l1_idx] = cxt;
>> + sdxi->cxt_count++;
>> +
>> + return cxt;
>> +}
>
>> +struct sdxi_cxt *sdxi_working_cxt_init(struct sdxi_dev *sdxi,
>> + enum sdxi_cxt_id id)
>> +{
>> + struct sdxi_cxt *cxt;
>> + struct sdxi_sq *sq;
>> +
>> + cxt = sdxi_cxt_alloc(sdxi);
>> + if (!cxt) {
>> + sdxi_err(sdxi, "failed to alloc a new context\n");
>> + return NULL;
>> + }
>> +
>> + /* check if context ID matches */
>> + if (id < SDXI_ANY_CXT_ID && cxt->id != id) {
>> + sdxi_err(sdxi, "failed to alloc a context with id=%d\n", id);
>> + goto err_cxt_id;
>> + }
>> +
>> + sq = sdxi_sq_alloc_default(cxt);
>> + if (!sq) {
>> + sdxi_err(sdxi, "failed to alloc a submission queue (sq)\n");
>> + goto err_sq_alloc;
>> + }
>> +
>> + return cxt;
>> +
>> +err_sq_alloc:
>> +err_cxt_id:
>> + sdxi_cxt_free(cxt);
>
> Might be worth doing a DEFINE_FREE() then you can use return_ptr(ctx); instead
> of return ctx. Will allow you to simply return on any errors.
>
>> +
>> + return NULL;
>> +}
>> +
>> +static const char *cxt_sts_state_str(enum cxt_sts_state state)
>> +{
>> + static const char *const context_states[] = {
>> + [CXTV_STOP_SW] = "stopped (software)",
>> + [CXTV_RUN] = "running",
>> + [CXTV_STOPG_SW] = "stopping (software)",
>> + [CXTV_STOP_FN] = "stopped (function)",
>> + [CXTV_STOPG_FN] = "stopping (function)",
>> + [CXTV_ERR_FN] = "error",
>> + };
>> + const char *str = "unknown";
>> +
>> + switch (state) {
>> + case CXTV_STOP_SW:
>> + case CXTV_RUN:
>> + case CXTV_STOPG_SW:
>> + case CXTV_STOP_FN:
>> + case CXTV_STOPG_FN:
>> + case CXTV_ERR_FN:
>> + str = context_states[state];
>
> I'd do a default to make it explicit that there are other states. If
> there aren't then just return here and skip the return below. A
> compiler should be able to see if you handled them all and complain
> loudly if a new one is added that you haven't handled.
The CXTV_... values are the only valid states that an SDXI device is
allowed to report for a context, but this function is intended to be
resilient against unspecified values in case of implementation bugs (in
the caller, or firmware, whatever). That's why it falls back to
returning "unknown".
But it's coded without a default label so that -Wswitch (which is
enabled by -Wall and so is generally active for kernel code) will warn
on an unhandled case. The presence of a default label will actually
defeat this unless the compiler is invoked with -Wswitch-enum, which
even W=1 doesn't enable.
I really do want warnings on unhandled cases of this sort, so I suppose
at the very least this code deserves a comment to deter well-meaning
people from trying to add a default label. Or I could add the default
label and see how painful it is to use -Wswitch-enum throughout the
driver. There are several similar functions in the error reporting code
so this isn't the only instance of this pattern in the driver.
>> + }
>> +
>> + return str;
>> +}
>> +
>> +int sdxi_submit_desc(struct sdxi_cxt *cxt, const struct sdxi_desc *desc)
>> +{
>> + struct sdxi_sq *sq;
>> + __le64 __iomem *db;
>> + __le64 *ring_base;
>> + u64 ring_entries;
>> + __le64 *rd_idx;
>> + __le64 *wr_idx;
>> +
>> + sq = cxt->sq;
>> + ring_entries = sq->ring_entries;
>> + ring_base = sq->desc_ring[0].qw;
>> + rd_idx = &sq->cxt_sts->read_index;
>> + wr_idx = sq->write_index;
>> + db = cxt->db;
> I'm not sure the local variables add anything, but if you really want
> to keep them, then at least combine with declaration.
>
> struct sdxi_sq *sq = ctx->sq;
> __le64 __iomem *db = ctx->db;
>
>
> just to keep thing code more compact.
>
> Personally I'd just have a local sq and do the rest in the call
>
> return sdxi_enqueue(desc->qw, 1, sq->desc_ring[0].wq,
> etc
Yeah, that makes sense.
>
>
>> +
>> + return sdxi_enqueue(desc->qw, 1, ring_base, ring_entries,
>> + rd_idx, wr_idx, db);
>
>> +}
>> +
>> +static void sdxi_cxt_shutdown(struct sdxi_cxt *target_cxt)
>> +{
>> + unsigned long deadline = jiffies + msecs_to_jiffies(1000);
>> + struct sdxi_cxt *admin_cxt = target_cxt->sdxi->admin_cxt;
>> + struct sdxi_dev *sdxi = target_cxt->sdxi;
>> + struct sdxi_cxt_sts *sts = target_cxt->sq->cxt_sts;
>> + struct sdxi_desc desc;
>> + u16 cxtid = target_cxt->id;
>> + struct sdxi_cxt_stop params = {
>> + .range = sdxi_cxt_range(cxtid),
>> + };
>> + enum cxt_sts_state state = sdxi_cxt_sts_state(sts);
>> + int err;
>> +
>> + sdxi_dbg(sdxi, "%s entry: context state: %s",
>> + __func__, cxt_sts_state_str(state));
>> +
>> + err = sdxi_encode_cxt_stop(&desc, ¶ms);
>> + if (err)
>> + return;
>> +
>> + err = sdxi_submit_desc(admin_cxt, &desc);
>> + if (err)
>> + return;
>> +
>> + sdxi_dbg(sdxi, "shutting down context %u\n", cxtid);
>> +
>> + do {
>> + enum cxt_sts_state state = sdxi_cxt_sts_state(sts);
>> +
>> + sdxi_dbg(sdxi, "context %u state: %s", cxtid,
>> + cxt_sts_state_str(state));
>> +
>> + switch (state) {
>> + case CXTV_ERR_FN:
>> + sdxi_err(sdxi, "context %u went into error state while stopping\n",
>> + cxtid);
>> + fallthrough;
>
> I'd just return unless a later patch adds something more interesting to the next
> cases.
Agreed.
>> + case CXTV_STOP_SW:
>> + case CXTV_STOP_FN:
>> + return;
>> + case CXTV_RUN:
>> + case CXTV_STOPG_SW:
>> + case CXTV_STOPG_FN:
>> + /* transitional states */
>> + fsleep(1000);
>> + break;
>> + default:
>> + sdxi_err(sdxi, "context %u in unknown state %u\n",
>> + cxtid, state);
>> + return;
>> + }
>> + } while (time_before(jiffies, deadline));
>> +
>> + sdxi_err(sdxi, "stopping context %u timed out (state = %u)\n",
>> + cxtid, sdxi_cxt_sts_state(sts));
>> +}
>> +
>> +void sdxi_working_cxt_exit(struct sdxi_cxt *cxt)
>> +{
>> + struct sdxi_sq *sq;
>> +
>> + if (!cxt)
> Superficially this looks like defensive programming that we don't need
> as it makes not sense to call this if ctx is NULL. Add a comment if
> there is a path where this actually happens.
>> + return;
>> +
>> + sq = cxt->sq;
>> + if (!sq)
> Add a comment on why this might happen, or drop teh cehck.
Yeah we can probably remove these checks.
Powered by blists - more mailing lists