[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <fa15134608944498d9f5bc7950b98cde@codeaurora.org>
Date: Mon, 21 Jan 2019 20:09:49 +0530
From: Balakrishna Godavarthi <bgodavar@...eaurora.org>
To: Marcel Holtmann <marcel@...tmann.org>
Cc: Johan Hedberg <johan.hedberg@...il.com>,
Matthias Kaehlcke <mka@...omium.org>,
Linux Kernel Mailing List <linux-kernel@...r.kernel.org>,
Bluez mailing list <linux-bluetooth@...r.kernel.org>,
hemantg@...eaurora.org, linux-arm-msm@...r.kernel.org
Subject: Re: [RFC,v2] Bluetooth: hci_qca: Collect controller memory dump
during SSR
Hi Marcel,
On 2019-01-18 14:24, Marcel Holtmann wrote:
> Hi Balakrishna,
>
>>>>>> We will collect the ramdump of BT controller when hardware error
>>>>>> event
>>>>>> received before rebooting the HCI layer. Before restarting a
>>>>>> subsystem
>>>>>> or a process running on a subsystem, it is often required to
>>>>>> request
>>>>>> either a subsystem or a process to perform proper cache dump and
>>>>>> software failure reason into a memory buffer which application
>>>>>> processor can retrieve afterwards. SW developers can often provide
>>>>>> initial investigation by looking into that debugging information.
>>>>>> Signed-off-by: Balakrishna Godavarthi <bgodavar@...eaurora.org>
>>>>>> ---
>>>>>> changes v2:
>>>>>> * entirely an newer approach handling with an work queue.
>>>>>> ---
>>>>>> drivers/bluetooth/hci_qca.c | 289
>>>>>> ++++++++++++++++++++++++++++++++++--
>>>>>> 1 file changed, 278 insertions(+), 11 deletions(-)
>>>>>> diff --git a/drivers/bluetooth/hci_qca.c
>>>>>> b/drivers/bluetooth/hci_qca.c
>>>>>> index f036c8f98ea3..62b768bc32ec 100644
>>>>>> --- a/drivers/bluetooth/hci_qca.c
>>>>>> +++ b/drivers/bluetooth/hci_qca.c
>>>>>> @@ -32,6 +32,7 @@
>>>>>> #include <linux/clk.h>
>>>>>> #include <linux/debugfs.h>
>>>>>> #include <linux/delay.h>
>>>>>> +#include <linux/devcoredump.h>
>>>>>> #include <linux/device.h>
>>>>>> #include <linux/gpio/consumer.h>
>>>>>> #include <linux/mod_devicetable.h>
>>>>> don't we have some crashdump core facility that could be utilized
>>>>> here?
>>>> [Bala]: no i don't think so. that is reason calling devcoredump.h
>>> what is wrong with using devcoredump.h?
>>
>> [Bala]: using the devcoredump.h
>>
>>>>>> @@ -57,9 +58,10 @@
>>>>>> /* Controller states */
>>>>>> #define STATE_IN_BAND_SLEEP_ENABLED 1
>>>>>> -#define IBS_WAKE_RETRANS_TIMEOUT_MS 100
>>>>>> +#define IBS_WAKE_RETRANS_TIMEOUT_MS 100
>>>>>> #define IBS_TX_IDLE_TIMEOUT_MS 2000
>>>>>> -#define BAUDRATE_SETTLE_TIMEOUT_MS 300
>>>>>> +#define BAUDRATE_SETTLE_TIMEOUT_MS 300
>>>>>> +#define MEMDUMP_COLLECTION_TIMEOUT_MS 8000
>>>>>> /* susclk rate */
>>>>>> #define SUSCLK_RATE_32KHZ 32768
>>>>>> @@ -67,6 +69,13 @@
>>>>>> /* Controller debug log header */
>>>>>> #define QCA_DEBUG_HANDLE 0x2EDC
>>>>>> +/* Controller dump header */
>>>>>> +#define QCA_SSR_DUMP_HANDLE 0x0108
>>>>>> +#define QCA_DUMP_PACKET_SIZE 255
>>>>>> +#define QCA_LAST_SEQUENCE_NUM 0xFFFF
>>>>>> +#define QCA_CRASHBYTE_PACKET_LEN 1100
>>>>>> +#define QCA_MEMDUMP_BYTE 0xFB
>>>>>> +
>>>>>> /* HCI_IBS transmit side sleep protocol states */
>>>>>> enum tx_ibs_states {
>>>>>> HCI_IBS_TX_ASLEEP,
>>>>>> @@ -89,12 +98,41 @@ enum hci_ibs_clock_state_vote {
>>>>>> HCI_IBS_RX_VOTE_CLOCK_OFF,
>>>>>> };
>>>>>> +/* Controller memory dump states */
>>>>>> +enum qca_memdump_states {
>>>>>> + QCA_MEMDUMP_IDLE,
>>>>>> + QCA_MEMDUMP_COLLECTING,
>>>>>> + QCA_MEMDUMP_COLLECTED,
>>>>>> + QCA_MEMDUMP_TIMEOUT,
>>>>>> +};
>>>>>> +
>>>>>> +struct qca_memdump_data {
>>>>>> + char *memdump_buf_head;
>>>>>> + char *memdump_buf_tail;
>>>>>> + u32 current_seq_no;
>>>>>> + u32 received_dump;
>>>>>> +};
>>>>>> +
>>>>>> +struct qca_memdump_event_hdr {
>>>>>> + __u8 evt;
>>>>>> + __u8 plen;
>>>>>> + __u16 opcode;
>>>>>> + __u16 seq_no;
>>>>>> + __u8 reserved;
>>>>>> +} __packed;
>>>>>> +
>>>>>> +
>>>>>> +struct qca_dump_size {
>>>>>> + u32 dump_size;
>>>>>> +} __packed;
>>>>>> +
>>>>>> struct qca_data {
>>>>>> struct hci_uart *hu;
>>>>>> struct sk_buff *rx_skb;
>>>>>> struct sk_buff_head txq;
>>>>>> - struct sk_buff_head tx_wait_q; /* HCI_IBS wait queue */
>>>>>> - spinlock_t hci_ibs_lock; /* HCI_IBS state lock */
>>>>>> + struct sk_buff_head tx_wait_q; /* HCI_IBS wait queue */
>>>>>> + struct sk_buff_head rx_memdump_q; /* Memdump wait queue */
>>>>>> + spinlock_t hci_ibs_lock; /* HCI_IBS state lock */
>>>>>> u8 tx_ibs_state; /* HCI_IBS transmit side power state*/
>>>>>> u8 rx_ibs_state; /* HCI_IBS receive side power state */
>>>>>> bool tx_vote; /* Clock must be on for TX */
>>>>>> @@ -103,12 +141,17 @@ struct qca_data {
>>>>>> u32 tx_idle_delay;
>>>>>> struct timer_list wake_retrans_timer;
>>>>>> u32 wake_retrans;
>>>>>> + struct timer_list memdump_timer;
>>>>>> + u32 memdump_delay;
>>>>>> struct workqueue_struct *workqueue;
>>>>>> struct work_struct ws_awake_rx;
>>>>>> struct work_struct ws_awake_device;
>>>>>> struct work_struct ws_rx_vote_off;
>>>>>> struct work_struct ws_tx_vote_off;
>>>>>> + struct work_struct ctrl_memdump_evt;
>>>>>> + struct qca_memdump_data *qca_memdump;
>>>>>> unsigned long flags;
>>>>>> + enum qca_memdump_states memdump_state;
>>>>>> /* For debugging purpose */
>>>>>> u64 ibs_sent_wacks;
>>>>>> @@ -173,6 +216,7 @@ struct qca_serdev {
>>>>>> static int qca_power_setup(struct hci_uart *hu, bool on);
>>>>>> static void qca_power_shutdown(struct hci_uart *hu);
>>>>>> static int qca_power_off(struct hci_dev *hdev);
>>>>>> +static void qca_controller_memdump(struct work_struct *work);
>>>>>> static void __serial_clock_on(struct tty_struct *tty)
>>>>>> {
>>>>>> @@ -446,6 +490,21 @@ static void
>>>>>> hci_ibs_wake_retrans_timeout(struct timer_list *t)
>>>>>> hci_uart_tx_wakeup(hu);
>>>>>> }
>>>>>> +static void hci_memdump_timeout(struct timer_list *t)
>>>>>> +{
>>>>>> + struct qca_data *qca = from_timer(qca, t, tx_idle_timer);
>>>>>> + struct hci_uart *hu = qca->hu;
>>>>>> + struct qca_memdump_data *qca_memdump = qca->qca_memdump;
>>>>>> + char *memdump_buf = qca_memdump->memdump_buf_tail;
>>>>>> +
>>>>>> + bt_dev_err(hu->hdev, "clearing allocated memory due to memdump
>>>>>> timeout");
>>>>>> + kfree(memdump_buf);
>>>>>> + kfree(qca_memdump);
>>>>>> + qca->memdump_state = QCA_MEMDUMP_TIMEOUT;
>>>>>> + del_timer(&qca->memdump_timer);
>>>>>> + cancel_work_sync(&qca->ctrl_memdump_evt);
>>>>>> +}
>>>>>> +
>>>>>> /* Initialize protocol */
>>>>>> static int qca_open(struct hci_uart *hu)
>>>>>> {
>>>>>> @@ -461,6 +520,7 @@ static int qca_open(struct hci_uart *hu)
>>>>>> skb_queue_head_init(&qca->txq);
>>>>>> skb_queue_head_init(&qca->tx_wait_q);
>>>>>> + skb_queue_head_init(&qca->rx_memdump_q);
>>>>>> spin_lock_init(&qca->hci_ibs_lock);
>>>>>> qca->workqueue = alloc_ordered_workqueue("qca_wq", 0);
>>>>>> if (!qca->workqueue) {
>>>>>> @@ -473,6 +533,7 @@ static int qca_open(struct hci_uart *hu)
>>>>>> INIT_WORK(&qca->ws_awake_device, qca_wq_awake_device);
>>>>>> INIT_WORK(&qca->ws_rx_vote_off, qca_wq_serial_rx_clock_vote_off);
>>>>>> INIT_WORK(&qca->ws_tx_vote_off, qca_wq_serial_tx_clock_vote_off);
>>>>>> + INIT_WORK(&qca->ctrl_memdump_evt, qca_controller_memdump);
>>>>>> qca->hu = hu;
>>>>>> @@ -500,7 +561,7 @@ static int qca_open(struct hci_uart *hu)
>>>>>> qca->tx_votes_off = 0;
>>>>>> qca->rx_votes_on = 0;
>>>>>> qca->rx_votes_off = 0;
>>>>>> -
>>>>>> + qca->memdump_state = QCA_MEMDUMP_IDLE;
>>>>>> hu->priv = qca;
>>>>>> if (hu->serdev) {
>>>>>> @@ -528,6 +589,9 @@ static int qca_open(struct hci_uart *hu)
>>>>>> timer_setup(&qca->tx_idle_timer, hci_ibs_tx_idle_timeout, 0);
>>>>>> qca->tx_idle_delay = IBS_TX_IDLE_TIMEOUT_MS;
>>>>>> + timer_setup(&qca->memdump_timer, hci_memdump_timeout, 0);
>>>>>> + qca->memdump_delay = MEMDUMP_COLLECTION_TIMEOUT_MS;
>>>>>> +
>>>>>> BT_DBG("HCI_UART_QCA open, tx_idle_delay=%u, wake_retrans=%u",
>>>>>> qca->tx_idle_delay, qca->wake_retrans);
>>>>>> @@ -605,8 +669,10 @@ static int qca_close(struct hci_uart *hu)
>>>>>> skb_queue_purge(&qca->tx_wait_q);
>>>>>> skb_queue_purge(&qca->txq);
>>>>>> + skb_queue_purge(&qca->rx_memdump_q);
>>>>>> del_timer(&qca->tx_idle_timer);
>>>>>> del_timer(&qca->wake_retrans_timer);
>>>>>> + del_timer(&qca->memdump_timer);
>>>>>> destroy_workqueue(qca->workqueue);
>>>>>> qca->hu = NULL;
>>>>>> @@ -866,6 +932,141 @@ static int qca_recv_acl_data(struct hci_dev
>>>>>> *hdev, struct sk_buff *skb)
>>>>>> return hci_recv_frame(hdev, skb);
>>>>>> }
>>>>>> +static void qca_controller_memdump(struct work_struct *work)
>>>>>> +{
>>>>>> + struct qca_data *qca = container_of(work, struct qca_data,
>>>>>> + ctrl_memdump_evt);
>>>>>> + struct hci_uart *hu = qca->hu;
>>>>>> + struct sk_buff *skb;
>>>>>> + struct qca_memdump_event_hdr *cmd_hdr;
>>>>>> + struct qca_memdump_data *qca_memdump = qca->qca_memdump;
>>>>>> + struct qca_dump_size *dump;
>>>>>> + char *memdump_buf;
>>>>>> + char nullBuff[QCA_DUMP_PACKET_SIZE] = { 0 };
>>>>>> + u16 opcode, seq_no;
>>>>>> + u32 dump_size;
>>>>>> +
>>>>>> + while ((skb = skb_dequeue(&qca->rx_memdump_q))) {
>>>>>> +
>>>>>> + if (!qca_memdump) {
>>>>>> + qca_memdump = kzalloc(sizeof(struct qca_memdump_data),
>>>>>> + GFP_ATOMIC);
>>>>>> + if (!qca_memdump)
>>>>>> + return;
>>>>>> +
>>>>>> + qca->qca_memdump = qca_memdump;
>>>>>> + }
>>>>>> +
>>>>>> + qca->memdump_state = QCA_MEMDUMP_COLLECTING;
>>>>>> + cmd_hdr = (void *) skb->data;
>>>>>> + opcode = __le16_to_cpu(cmd_hdr->opcode);
>>>>>> + seq_no = __le16_to_cpu(cmd_hdr->seq_no);
>>>>>> + skb_pull(skb, sizeof(struct qca_memdump_event_hdr));
>>>>>> +
>>>>>> + if (!seq_no) {
>>>>>> +
>>>>>> + /* This is the first frame of memdump packet from
>>>>>> + * the controller, Disable IBS to recevie dump
>>>>>> + * with out any interruption, ideally time required for
>>>>>> + * the controller to send the dump is 8 seconds. let us
>>>>>> + * start timer to handle this asynchronous activity.
>>>>>> + */
>>>>>> + clear_bit(STATE_IN_BAND_SLEEP_ENABLED, &qca->flags);
>>>>>> +
>>>>>> + dump = (void *) skb->data;
>>>>>> + dump_size = __le32_to_cpu(dump->dump_size);
>>>>>> + if (!(dump_size)) {
>>>>>> + bt_dev_err(hu->hdev, "QCA invalid memdump size");
>>>>>> + kfree_skb(skb);
>>>>>> + return;
>>>>>> + }
>>>>>> +
>>>>>> + bt_dev_info(hu->hdev, "QCA: collecting memdump started"
>>>>>> + "of size:%u", dump_size);
>>>>>> + mod_timer(&qca->memdump_timer, (jiffies +
>>>>>> + msecs_to_jiffies(qca->memdump_delay)));
>>>>>> +
>>>>>> + skb_pull(skb, sizeof(dump_size));
>>>>>> + memdump_buf = vmalloc(dump_size);
>>>>>> + qca_memdump->memdump_buf_head = memdump_buf;
>>>>>> + qca_memdump->memdump_buf_tail = memdump_buf;
>>>>>> + }
>>>>>> +
>>>>>> + memdump_buf = qca_memdump->memdump_buf_tail;
>>>>>> +
>>>>>> + /* If sequence no 0 is missed then there is no point in
>>>>>> + * accepting the other sequences.
>>>>>> + */
>>>>>> + if (!memdump_buf) {
>>>>>> + bt_dev_err(hu->hdev, "QCA: Discarding other packets");
>>>>>> + kfree(qca_memdump);
>>>>>> + kfree_skb(skb);
>>>>>> + qca->qca_memdump = NULL;
>>>>>> + return;
>>>>>> + }
>>>>>> +
>>>>>> + /* There could be chance of missing some packets from
>>>>>> + * the controller. In such cases let us store the dummy
>>>>>> + * packets in the buffer.
>>>>>> + */
>>>>>> + while ((seq_no > qca_memdump->current_seq_no + 1) &&
>>>>>> + seq_no != QCA_LAST_SEQUENCE_NUM) {
>>>>>> + bt_dev_err(hu->hdev, "QCA controller missed packet:%d",
>>>>>> + qca_memdump->current_seq_no);
>>>>>> + memcpy(memdump_buf, nullBuff, QCA_DUMP_PACKET_SIZE);
>>>>>> + memdump_buf = memdump_buf + QCA_DUMP_PACKET_SIZE;
>>>>>> + qca_memdump->received_dump += QCA_DUMP_PACKET_SIZE;
>>>>>> + qca_memdump->current_seq_no++;
>>>>>> + }
>>>>>> +
>>>>>> + memcpy(memdump_buf, (unsigned char *) skb->data, skb->len);
>>>>>> + memdump_buf = memdump_buf + skb->len;
>>>>>> + qca_memdump->memdump_buf_tail = memdump_buf;
>>>>>> + qca_memdump->current_seq_no = seq_no + 1;
>>>>>> + qca_memdump->received_dump += skb->len;
>>>>>> + qca->qca_memdump = qca_memdump;
>>>>>> + kfree_skb(skb);
>>>>>> + if (seq_no == QCA_LAST_SEQUENCE_NUM) {
>>>>>> + bt_dev_info(hu->hdev, "QCA writing crash dump of size %d
>>>>>> bytes",
>>>>>> + qca_memdump->received_dump);
>>>>>> + memdump_buf = qca_memdump->memdump_buf_head;
>>>>>> + dev_coredumpv(&hu->serdev->dev, memdump_buf,
>>>>>> + qca_memdump->received_dump, GFP_KERNEL);
>>>>>> + // Revisit for free(memdump) if needed.
>>>>>> + del_timer(&qca->memdump_timer);
>>>>>> + kfree(qca->qca_memdump);
>>>>>> + qca->qca_memdump = NULL;
>>>>>> + qca->memdump_state = QCA_MEMDUMP_COLLECTED;
>>>>>> + }
>>>>>> + }
>>>>>> +
>>>>>> +}
>>>>>> +
>>>>>> +int qca_controller_memdump_event(struct hci_dev *hdev, struct
>>>>>> sk_buff *skb)
>>>>>> +{
>>>>>> + struct hci_uart *hu = hci_get_drvdata(hdev);
>>>>>> + struct qca_data *qca = hu->priv;
>>>>>> +
>>>>>> + skb_queue_tail(&qca->rx_memdump_q, skb);
>>>>>> + queue_work(qca->workqueue, &qca->ctrl_memdump_evt);
>>>>>> +
>>>>>> + return 0;
>>>>>> +}
>>>>>> +
>>>>>> +static int qca_recv_event_data(struct hci_dev *hdev, struct
>>>>>> sk_buff *skb)
>>>>>> +{
>>>>>> + /* We receive chip memory dump as an event packet, With a
>>>>>> dedicated
>>>>>> + * handler followed by a hardware error event. When this event
>>>>>> is
>>>>>> + * received we store dump into a file before closing hci. This
>>>>>> + * dump will help in triaging the issues.
>>>>>> + */
>>>>>> + if ((skb->data[0] == HCI_VENDOR_PKT) &&
>>>>>> + (get_unaligned_be16((skb->data) + 2) ==
>>>>>> QCA_SSR_DUMP_HANDLE))
>>>>> The (skb->data) is pointless, just do skb->data.
>>>> [Bala]: will update.
>>>>>> + return qca_controller_memdump_event(hdev, skb);
>>>>>> +
>>>>>> + return hci_recv_frame(hdev, skb);
>>>>>> +}
>>>>>> +
>>>>>> #define QCA_IBS_SLEEP_IND_EVENT \
>>>>>> .type = HCI_IBS_SLEEP_IND, \
>>>>>> .hlen = 0, \
>>>>>> @@ -888,12 +1089,12 @@ static int qca_recv_acl_data(struct hci_dev
>>>>>> *hdev, struct sk_buff *skb)
>>>>>> .maxlen = HCI_MAX_IBS_SIZE
>>>>>> static const struct h4_recv_pkt qca_recv_pkts[] = {
>>>>>> - { H4_RECV_ACL, .recv = qca_recv_acl_data },
>>>>>> - { H4_RECV_SCO, .recv = hci_recv_frame },
>>>>>> - { H4_RECV_EVENT, .recv = hci_recv_frame },
>>>>>> - { QCA_IBS_WAKE_IND_EVENT, .recv = qca_ibs_wake_ind },
>>>>>> - { QCA_IBS_WAKE_ACK_EVENT, .recv = qca_ibs_wake_ack },
>>>>>> - { QCA_IBS_SLEEP_IND_EVENT, .recv = qca_ibs_sleep_ind },
>>>>>> + { H4_RECV_ACL, .recv = qca_recv_acl_data },
>>>>>> + { H4_RECV_SCO, .recv = hci_recv_frame },
>>>>>> + { H4_RECV_EVENT, .recv = qca_recv_event_data },
>>>>> call it qca_recv_event and don't realign the whole table.
>>>> [Bala]: will update.
>>>>>> + { QCA_IBS_WAKE_IND_EVENT, .recv = qca_ibs_wake_ind },
>>>>>> + { QCA_IBS_WAKE_ACK_EVENT, .recv = qca_ibs_wake_ack },
>>>>>> + { QCA_IBS_SLEEP_IND_EVENT, .recv = qca_ibs_sleep_ind },
>>>>>> };
>>>>>> static int qca_recv(struct hci_uart *hu, const void *data, int
>>>>>> count)
>>>>>> @@ -1114,6 +1315,68 @@ static int qca_set_speed(struct hci_uart
>>>>>> *hu, enum qca_speed_type speed_type)
>>>>>> return 0;
>>>>>> }
>>>>>> +static int qca_send_crashbuffer(struct hci_uart *hu)
>>>>>> +{
>>>>>> + struct qca_data *qca = hu->priv;
>>>>>> + struct sk_buff *skb;
>>>>>> + unsigned char buf[QCA_CRASHBYTE_PACKET_LEN];
>>>>>> +
>>>>>> + bt_dev_dbg(hu->hdev, "sending soc crash buffer to controller");
>>>>>> +
>>>>>> + /* We forcefully crash the controller, by sending 0xfb byte for
>>>>>> + * 1024 times. We also might have chance of losing data, To be
>>>>>> + * on safer side we send 1100 bytes to the SoC.
>>>>>> + */
>>>>>> + memset(buf, QCA_MEMDUMP_BYTE, QCA_CRASHBYTE_PACKET_LEN);
>>>>>> +
>>>>>> + skb = bt_skb_alloc(QCA_CRASHBYTE_PACKET_LEN, GFP_KERNEL);
>>>>>> + if (!skb) {
>>>>>> + bt_dev_err(hu->hdev, "Failed to allocate memory for skb
>>>>>> packet");
>>>>>> + return -ENOMEM;
>>>>>> + }
>>>>>> +
>>>>>> + skb_put_data(skb, buf, QCA_CRASHBYTE_PACKET_LEN);
>>>>>> + hci_skb_pkt_type(skb) = HCI_COMMAND_PKT;
>>>>>> +
>>>>>> + skb_queue_tail(&qca->txq, skb);
>>>>>> + hci_uart_tx_wakeup(hu);
>>>>>> +
>>>>>> + return 0;
>>>>>> +}
>>>>>> +
>>>>>> +static void qca_hw_error(struct hci_dev *hdev, u8 code)
>>>>>> +{
>>>>>> + struct hci_uart *hu = hci_get_drvdata(hdev);
>>>>>> + struct qca_data *qca = hu->priv;
>>>>>> +
>>>>>> + bt_dev_info(hdev, "mem_dump_status: %d", qca->memdump_state);
>>>>>> +
>>>>>> + if (qca->memdump_state == QCA_MEMDUMP_IDLE) {
>>>>>> + /* If hardware error event received for other than QCA
>>>>>> + * soc memory dump event, then we need to crash the SOC
>>>>>> + * and wait here for 8 seconds to get the dump packets.
>>>>>> + * This will block main thread to be on hold until we
>>>>>> + * collect dump.
>>>>>> + */
>>>>>> + mod_timer(&qca->memdump_timer,
>>>>>> + (jiffies + msecs_to_jiffies(qca->memdump_delay)));
>>>>> Why a timer that then ends up in interrupt context?
>>>> [Bala]: some times we only receive hardware error from the chip, so
>>>> we are sending an special buffer to request the
>>>> chip for memory dump. we are using an timer to check whether
>>>> the data memdump is collected. if the timer
>>>> over flows we are cleaning up all the assigned memory and
>>>> unblock the main thread,
>>>> in short this timer is a watch dog timer for the memory dump
>>>> collection.
>>> I saw that you used a bunch of timers actually. My question is why
>>> are
>>> these not delayed_wq instead? I wonder if we really want to keep
>>> using
>>> timers and handling interrupt context locking if we could move
>>> everything to wq instead.
>>
>> [Bala]: Let me brief you the requirement.
>> This patch is used to collect the memory dump received from the
>> chip and store it into a file.
>> We receive chip memory dump in two cases
>> 1. When chip goes into the bad state it sends the dump followed
>> by and hardware error event.
>> 2. Some time when we send some garbage or un recognized command
>> to chip.
>> it will respond with a hardware error event. So we send an
>> 0xfb bytes to the chip requesting it to send the memory dump.
>>
>> In both cases, chip will go into the bad state once it sends the dump.
>> so we need to turn OFF and ON chip and perform the init procedure.
>> So keeping the above cases in the view, we register an handle for
>> hardware error event. So that in both the cases hw error routine will
>> execute.
>>
>> As the dump as some specific headers we will queue the received data
>> into work queue.
>> We are blocking the main thread until the total dump is stored into
>> the file.
>> Here is the answer for the question, yes we check the status of the
>> work queue but for handling case 2 we require timers.
>> But let us assume the wq is taking more time than expected. Then we
>> also need a timer.
>> we are using timer so that we will not block the main thread
>> indefinitely.
>> They are some cases where when we request a memory dump, after some
>> packet sent by the chip.
>> it may go into a bad state where it will not pump the remaining dump,
>> so we are using a timer to unblock the main thread.
>> so during timeout, we are restarting the flags to unblock the main
>> thread.
>>
>>
>>>>>> + qca_send_crashbuffer(hu);
>>>>>> +
>>>>>> + while (qca->memdump_state != QCA_MEMDUMP_COLLECTED &&
>>>>>> + qca->memdump_state != QCA_MEMDUMP_TIMEOUT)
>>>>>> + ;
>>>>> What is this? Busy loop? I rather not do that.
>>>> [Bala]: this busy required to collected the dump from the chip
>>>> before we close an reopen the hci.
>>>> >> + } else if (qca->memdump_state == QCA_MEMDUMP_COLLECTING)
>>>> {
>>> Why not use some wakeup handling here. I am really against any kind
>>> of
>>> busy loops in a driver.
>> [Bala]: this busy loop is require. pls refer my comments to the above
>> query.
>> this will help in mainly in case 2, where we will block the
>> main thread until we have dump collected.
>>
>> let us assume that we received an hardware error event, hw
>> error routine we execute which sends an
>> special byte to the chip requesting for memory dump. it starts
>> sending the dump, but we move forward and turn off the
>> chip so we will miss the dump. so here the busy loop is to wait
>> until we have dump.
>> but the we are not waiting indefinitely we have timer to
>> unblock them.
>
> I do not see how any of this is correct. Can you not use some wakeup
> helper like everybody else does. I also do not see how these two
> threads are correctly protected. It looks really hackish to me.
>
> Regards
>
> Marcel
Ok sure, will check and update accordingly in the incremental patch.
--
Regards
Balakrishna.
Powered by blists - more mailing lists