[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <68d55114-fc3b-c27a-c816-e948c0c4679e@arm.com>
Date: Wed, 21 Aug 2019 10:00:52 +0100
From: Steven Price <steven.price@....com>
To: Tomeu Vizoso <tomeu.vizoso@...labora.com>,
Rob Herring <robh@...nel.org>
Cc: linux-kernel@...r.kernel.org, dri-devel@...ts.freedesktop.org
Subject: Re: [PATCH] drm/panfrost: Queue jobs on the hardware
On 20/08/2019 06:23, Tomeu Vizoso wrote:
> On 8/16/19 11:31 AM, Steven Price wrote:
>> The hardware has a set of '_NEXT' registers that can hold a second job
>> while the first is executing. Make use of these registers to enqueue a
>> second job per slot.
>
> I like this in principle, but upon some quick testing I found that Mesa
> is around 10% slower with this patch (when using the performance governor).
>
> There's also the question of how this affects the utilization
> calculation in the devfreq code.
Yes - as far as I can tell the devfreq code is already broken as it is
using a per-JS utilisation metric. I'll try to find some time to fix
that up as well before reposting.
Steve
> I will be trying to find time to understand why Mesa is slower and not
> faster, but TBH performance doesn't have top priority for me yet. Would
> be great if somebody else could look at it.
>
> Thanks,
>
> Tomeu
>
>> Signed-off-by: Steven Price <steven.price@....com>
>> ---
>> Note that this is based on top of Rob Herring's "per FD address space"
>> patch[1].
>>
>> [1]
>> https://marc.info/?i=20190813150115.30338-1-robh%20()%20kernel%20!%20org
>>
>> drivers/gpu/drm/panfrost/panfrost_device.h | 4 +-
>> drivers/gpu/drm/panfrost/panfrost_job.c | 76 ++++++++++++++++++----
>> drivers/gpu/drm/panfrost/panfrost_mmu.c | 2 +-
>> 3 files changed, 67 insertions(+), 15 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h
>> b/drivers/gpu/drm/panfrost/panfrost_device.h
>> index f503c566e99f..0153defd6085 100644
>> --- a/drivers/gpu/drm/panfrost/panfrost_device.h
>> +++ b/drivers/gpu/drm/panfrost/panfrost_device.h
>> @@ -55,7 +55,7 @@ struct panfrost_devfreq_slot {
>> ktime_t busy_time;
>> ktime_t idle_time;
>> ktime_t time_last_update;
>> - bool busy;
>> + int busy;
>> };
>> struct panfrost_device {
>> @@ -80,7 +80,7 @@ struct panfrost_device {
>> struct panfrost_job_slot *js;
>> - struct panfrost_job *jobs[NUM_JOB_SLOTS];
>> + struct panfrost_job *jobs[NUM_JOB_SLOTS][2];
>> struct list_head scheduled_jobs;
>> struct panfrost_perfcnt *perfcnt;
>> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c
>> b/drivers/gpu/drm/panfrost/panfrost_job.c
>> index 05c85f45a0de..b2b5027af976 100644
>> --- a/drivers/gpu/drm/panfrost/panfrost_job.c
>> +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
>> @@ -138,6 +138,37 @@ static void panfrost_job_write_affinity(struct
>> panfrost_device *pfdev,
>> job_write(pfdev, JS_AFFINITY_NEXT_HI(js), affinity >> 32);
>> }
>> +static int panfrost_job_count(struct panfrost_device *pfdev, int slot)
>> +{
>> + if (pfdev->jobs[slot][0] == NULL)
>> + return 0;
>> + if (pfdev->jobs[slot][1] == NULL)
>> + return 1;
>> + return 2;
>> +}
>> +
>> +static struct panfrost_job *panfrost_dequeue_job(
>> + struct panfrost_device *pfdev, int slot)
>> +{
>> + struct panfrost_job *job = pfdev->jobs[slot][0];
>> +
>> + pfdev->jobs[slot][0] = pfdev->jobs[slot][1];
>> + pfdev->jobs[slot][1] = NULL;
>> +
>> + return job;
>> +}
>> +
>> +static void panfrost_enqueue_job(struct panfrost_device *pfdev, int
>> slot,
>> + struct panfrost_job *job)
>> +{
>> + if (pfdev->jobs[slot][0] == NULL) {
>> + pfdev->jobs[slot][0] = job;
>> + return;
>> + }
>> + WARN_ON(pfdev->jobs[slot][1] != NULL);
>> + pfdev->jobs[slot][1] = job;
>> +}
>> +
>> static void panfrost_job_hw_submit(struct panfrost_job *job, int js)
>> {
>> struct panfrost_device *pfdev = job->pfdev;
>> @@ -150,13 +181,16 @@ static void panfrost_job_hw_submit(struct
>> panfrost_job *job, int js)
>> if (ret < 0)
>> return;
>> - if (WARN_ON(job_read(pfdev, JS_COMMAND_NEXT(js))))
>> - goto end;
>> -
>> cfg = panfrost_mmu_as_get(pfdev, &job->file_priv->mmu);
>> - panfrost_devfreq_record_transition(pfdev, js);
>> spin_lock_irqsave(&pfdev->hwaccess_lock, flags);
>> + panfrost_enqueue_job(pfdev, js, job);
>> +
>> + if (WARN_ON(job_read(pfdev, JS_COMMAND_NEXT(js))))
>> + goto end;
>> +
>> + if (panfrost_job_count(pfdev, js) == 1)
>> + panfrost_devfreq_record_transition(pfdev, js);
>> job_write(pfdev, JS_HEAD_NEXT_LO(js), jc_head & 0xFFFFFFFF);
>> job_write(pfdev, JS_HEAD_NEXT_HI(js), jc_head >> 32);
>> @@ -186,9 +220,9 @@ static void panfrost_job_hw_submit(struct
>> panfrost_job *job, int js)
>> job_write(pfdev, JS_COMMAND_NEXT(js), JS_COMMAND_START);
>> +end:
>> spin_unlock_irqrestore(&pfdev->hwaccess_lock, flags);
>> -end:
>> pm_runtime_mark_last_busy(pfdev->dev);
>> pm_runtime_put_autosuspend(pfdev->dev);
>> }
>> @@ -336,8 +370,6 @@ static struct dma_fence *panfrost_job_run(struct
>> drm_sched_job *sched_job)
>> if (unlikely(job->base.s_fence->finished.error))
>> return NULL;
>> - pfdev->jobs[slot] = job;
>> -
>> fence = panfrost_fence_create(pfdev, slot);
>> if (IS_ERR(fence))
>> return NULL;
>> @@ -421,21 +453,36 @@ static irqreturn_t panfrost_job_irq_handler(int
>> irq, void *data)
>> struct panfrost_device *pfdev = data;
>> u32 status = job_read(pfdev, JOB_INT_STAT);
>> int j;
>> + unsigned long flags;
>> dev_dbg(pfdev->dev, "jobslot irq status=%x\n", status);
>> if (!status)
>> return IRQ_NONE;
>> + spin_lock_irqsave(&pfdev->hwaccess_lock, flags);
>> +
>> pm_runtime_mark_last_busy(pfdev->dev);
>> for (j = 0; status; j++) {
>> u32 mask = MK_JS_MASK(j);
>> + int jobs = panfrost_job_count(pfdev, j);
>> + int active;
>> if (!(status & mask))
>> continue;
>> job_write(pfdev, JOB_INT_CLEAR, mask);
>> + active = (job_read(pfdev, JOB_INT_JS_STATE) &
>> + JOB_INT_MASK_DONE(j)) ? 1 : 0;
>> +
>> + if (!(status & JOB_INT_MASK_ERR(j))) {
>> + /* Recheck RAWSTAT to check if there's a newly
>> + * failed job (since JOB_INT_STAT was read)
>> + */
>> + status |= job_read(pfdev, JOB_INT_RAWSTAT) &
>> + JOB_INT_MASK_ERR(j);
>> + }
>> if (status & JOB_INT_MASK_ERR(j)) {
>> job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_NOP);
>> @@ -447,20 +494,25 @@ static irqreturn_t panfrost_job_irq_handler(int
>> irq, void *data)
>> job_read(pfdev, JS_TAIL_LO(j)));
>> drm_sched_fault(&pfdev->js->queue[j].sched);
>> + jobs --;
>> }
>> - if (status & JOB_INT_MASK_DONE(j)) {
>> - struct panfrost_job *job = pfdev->jobs[j];
>> + while (jobs -- > active) {
>> + struct panfrost_job *job =
>> + panfrost_dequeue_job(pfdev, j);
>> - pfdev->jobs[j] = NULL;
>> panfrost_mmu_as_put(pfdev, &job->file_priv->mmu);
>> - panfrost_devfreq_record_transition(pfdev, j);
>> dma_fence_signal(job->done_fence);
>> }
>> + if (!active)
>> + panfrost_devfreq_record_transition(pfdev, j);
>> +
>> status &= ~mask;
>> }
>> + spin_unlock_irqrestore(&pfdev->hwaccess_lock, flags);
>> +
>> return IRQ_HANDLED;
>> }
>> @@ -491,7 +543,7 @@ int panfrost_job_init(struct panfrost_device
>> *pfdev)
>> ret = drm_sched_init(&js->queue[j].sched,
>> &panfrost_sched_ops,
>> - 1, 0, msecs_to_jiffies(500),
>> + 2, 0, msecs_to_jiffies(500),
>> "pan_js");
>> if (ret) {
>> dev_err(pfdev->dev, "Failed to create scheduler: %d.",
>> ret);
>> diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c
>> b/drivers/gpu/drm/panfrost/panfrost_mmu.c
>> index f22d8f02568d..c25fd88ef437 100644
>> --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
>> +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
>> @@ -147,7 +147,7 @@ u32 panfrost_mmu_as_get(struct panfrost_device
>> *pfdev, struct panfrost_mmu *mmu)
>> as = mmu->as;
>> if (as >= 0) {
>> int en = atomic_inc_return(&mmu->as_count);
>> - WARN_ON(en >= NUM_JOB_SLOTS);
>> + WARN_ON(en >= NUM_JOB_SLOTS*2);
>> list_move(&mmu->list, &pfdev->as_lru_list);
>> goto out;
>>
> _______________________________________________
> dri-devel mailing list
> dri-devel@...ts.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Powered by blists - more mailing lists