lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <e7772524-e6d9-a09c-1cd4-4676a5daad38@hisilicon.com>
Date: Mon, 8 Jul 2024 10:30:25 +0800
From: Junxian Huang <huangjunxian6@...ilicon.com>
To: Zhu Yanjun <yanjun.zhu@...ux.dev>, <jgg@...pe.ca>, <leon@...nel.org>
CC: <linux-rdma@...r.kernel.org>, <linuxarm@...wei.com>,
	<linux-kernel@...r.kernel.org>
Subject: Re: [PATCH for-rc 3/9] RDMA/hns: Fix soft lockup under heavy CEQE
 load



On 2024/7/5 18:47, Zhu Yanjun wrote:
> 在 2024/7/5 16:59, Junxian Huang 写道:
>> CEQEs are handled in interrupt handler currently. This may cause the
>> CPU core staying in interrupt context too long and lead to soft lockup
>> under heavy load.
>>
>> Handle CEQEs in tasklet and set an upper limit for the number of CEQE
>> handled by a single call of tasklet.
> 
> https://patchwork.kernel.org/project/linux-rdma/cover/20240621050525.3720069-1-allen.lkml@gmail.com/
> 
> In the above link, it seems that tasklet is not good enough. The tasklet is marked deprecated and has some design flaws. It is being replace BH workqueue.
> 
> So directly use workqueue instead of tasklet?
> 
> Zhu Yanjun
> 

Thanks, I'll have a look at it.

Junxian

>>
>> Fixes: a5073d6054f7 ("RDMA/hns: Add eq support of hip08")
>> Signed-off-by: Junxian Huang <huangjunxian6@...ilicon.com>
>> ---
>>   drivers/infiniband/hw/hns/hns_roce_device.h |  1 +
>>   drivers/infiniband/hw/hns/hns_roce_hw_v2.c  | 88 ++++++++++++---------
>>   2 files changed, 53 insertions(+), 36 deletions(-)
>>
>> diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
>> index 05005079258c..5a2445f357ab 100644
>> --- a/drivers/infiniband/hw/hns/hns_roce_device.h
>> +++ b/drivers/infiniband/hw/hns/hns_roce_device.h
>> @@ -717,6 +717,7 @@ struct hns_roce_eq {
>>       int                shift;
>>       int                event_type;
>>       int                sub_type;
>> +    struct tasklet_struct        tasklet;
>>   };
>>     struct hns_roce_eq_table {
>> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
>> index ff135df1a761..f73de06a3ca5 100644
>> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
>> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
>> @@ -6146,33 +6146,11 @@ static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq)
>>           !!(eq->cons_index & eq->entries)) ? ceqe : NULL;
>>   }
>>   -static irqreturn_t hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev,
>> -                       struct hns_roce_eq *eq)
>> +static irqreturn_t hns_roce_v2_ceq_int(struct hns_roce_eq *eq)
>>   {
>> -    struct hns_roce_ceqe *ceqe = next_ceqe_sw_v2(eq);
>> -    irqreturn_t ceqe_found = IRQ_NONE;
>> -    u32 cqn;
>> -
>> -    while (ceqe) {
>> -        /* Make sure we read CEQ entry after we have checked the
>> -         * ownership bit
>> -         */
>> -        dma_rmb();
>> -
>> -        cqn = hr_reg_read(ceqe, CEQE_CQN);
>> -
>> -        hns_roce_cq_completion(hr_dev, cqn);
>> -
>> -        ++eq->cons_index;
>> -        ceqe_found = IRQ_HANDLED;
>> -        atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CEQE_CNT]);
>> -
>> -        ceqe = next_ceqe_sw_v2(eq);
>> -    }
>> +    tasklet_schedule(&eq->tasklet);
>>   -    update_eq_db(eq);
>> -
>> -    return IRQ_RETVAL(ceqe_found);
>> +    return IRQ_HANDLED;
>>   }
>>     static irqreturn_t hns_roce_v2_msix_interrupt_eq(int irq, void *eq_ptr)
>> @@ -6183,7 +6161,7 @@ static irqreturn_t hns_roce_v2_msix_interrupt_eq(int irq, void *eq_ptr)
>>         if (eq->type_flag == HNS_ROCE_CEQ)
>>           /* Completion event interrupt */
>> -        int_work = hns_roce_v2_ceq_int(hr_dev, eq);
>> +        int_work = hns_roce_v2_ceq_int(eq);
>>       else
>>           /* Asynchronous event interrupt */
>>           int_work = hns_roce_v2_aeq_int(hr_dev, eq);
>> @@ -6551,6 +6529,34 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev,
>>       return ret;
>>   }
>>   +static void hns_roce_ceq_task(struct tasklet_struct *task)
>> +{
>> +    struct hns_roce_eq *eq = from_tasklet(eq, task, tasklet);
>> +    struct hns_roce_ceqe *ceqe = next_ceqe_sw_v2(eq);
>> +    struct hns_roce_dev *hr_dev = eq->hr_dev;
>> +    int ceqe_num = 0;
>> +    u32 cqn;
>> +
>> +    while (ceqe && ceqe_num < hr_dev->caps.ceqe_depth) {
>> +        /* Make sure we read CEQ entry after we have checked the
>> +         * ownership bit
>> +         */
>> +        dma_rmb();
>> +
>> +        cqn = hr_reg_read(ceqe, CEQE_CQN);
>> +
>> +        hns_roce_cq_completion(hr_dev, cqn);
>> +
>> +        ++eq->cons_index;
>> +        ++ceqe_num;
>> +        atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CEQE_CNT]);
>> +
>> +        ceqe = next_ceqe_sw_v2(eq);
>> +    }
>> +
>> +    update_eq_db(eq);
>> +}
>> +
>>   static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num,
>>                     int comp_num, int aeq_num, int other_num)
>>   {
>> @@ -6582,21 +6588,24 @@ static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num,
>>                j - other_num - aeq_num);
>>         for (j = 0; j < irq_num; j++) {
>> -        if (j < other_num)
>> +        if (j < other_num) {
>>               ret = request_irq(hr_dev->irq[j],
>>                         hns_roce_v2_msix_interrupt_abn,
>>                         0, hr_dev->irq_names[j], hr_dev);
>> -
>> -        else if (j < (other_num + comp_num))
>> +        } else if (j < (other_num + comp_num)) {
>> +            tasklet_setup(&eq_table->eq[j - other_num].tasklet,
>> +                      hns_roce_ceq_task);
>>               ret = request_irq(eq_table->eq[j - other_num].irq,
>>                         hns_roce_v2_msix_interrupt_eq,
>>                         0, hr_dev->irq_names[j + aeq_num],
>>                         &eq_table->eq[j - other_num]);
>> -        else
>> +        } else {
>>               ret = request_irq(eq_table->eq[j - other_num].irq,
>>                         hns_roce_v2_msix_interrupt_eq,
>>                         0, hr_dev->irq_names[j - comp_num],
>>                         &eq_table->eq[j - other_num]);
>> +        }
>> +
>>           if (ret) {
>>               dev_err(hr_dev->dev, "request irq error!\n");
>>               goto err_request_failed;
>> @@ -6606,12 +6615,16 @@ static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num,
>>       return 0;
>>     err_request_failed:
>> -    for (j -= 1; j >= 0; j--)
>> -        if (j < other_num)
>> +    for (j -= 1; j >= 0; j--) {
>> +        if (j < other_num) {
>>               free_irq(hr_dev->irq[j], hr_dev);
>> -        else
>> -            free_irq(eq_table->eq[j - other_num].irq,
>> -                 &eq_table->eq[j - other_num]);
>> +            continue;
>> +        }
>> +        free_irq(eq_table->eq[j - other_num].irq,
>> +             &eq_table->eq[j - other_num]);
>> +        if (j < other_num + comp_num)
>> +            tasklet_kill(&eq_table->eq[j - other_num].tasklet);
>> +    }
>>     err_kzalloc_failed:
>>       for (i -= 1; i >= 0; i--)
>> @@ -6632,8 +6645,11 @@ static void __hns_roce_free_irq(struct hns_roce_dev *hr_dev)
>>       for (i = 0; i < hr_dev->caps.num_other_vectors; i++)
>>           free_irq(hr_dev->irq[i], hr_dev);
>>   -    for (i = 0; i < eq_num; i++)
>> +    for (i = 0; i < eq_num; i++) {
>>           free_irq(hr_dev->eq_table.eq[i].irq, &hr_dev->eq_table.eq[i]);
>> +        if (i < hr_dev->caps.num_comp_vectors)
>> +            tasklet_kill(&hr_dev->eq_table.eq[i].tasklet);
>> +    }
>>         for (i = 0; i < irq_num; i++)
>>           kfree(hr_dev->irq_names[i]);
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ