lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <87k1uie1jm.fsf@notabene.neil.brown.name>
Date:   Mon, 12 Mar 2018 08:37:49 +1100
From:   NeilBrown <neilb@...e.com>
To:     "Dilger\, Andreas" <andreas.dilger@...el.com>
Cc:     "Drokin\, Oleg" <oleg.drokin@...el.com>,
        Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
        James Simmons <jsimmons@...radead.org>,
        "Linux Kernel Mailing List" <linux-kernel@...r.kernel.org>,
        Lustre Development List <lustre-devel@...ts.lustre.org>
Subject: Re: [PATCH 11/17] staging: lustre: ptlrpc: use workqueue for pinger

On Thu, Mar 08 2018, Dilger, Andreas wrote:

> On Mar 1, 2018, at 16:31, NeilBrown <neilb@...e.com> wrote:
>> 
>> lustre has a "Pinger" kthread which periodically pings peers
>> to ensure all hosts are functioning.
>> 
>> This can more easily be done using a work queue.
>> 
>> As maintaining contact with other peers is import for
>> keeping the filesystem running, and as the filesystem might
>> be involved in freeing memory, it is safest to have a
>> separate WQ_MEM_RECLAIM workqueue.
>> 
>> The SVC_EVENT functionality to wake up the thread can be
>> replaced with mod_delayed_work().
>> 
>> Also use round_jiffies_up_relative() rather than setting a
>> minimum of 1 second delay.  The PING_INTERVAL is measured in
>> seconds so this meets the need is allow the workqueue to
>> keep wakeups synchronized.
>> 
>> Signed-off-by: NeilBrown <neilb@...e.com>
>
> Looks reasonable.  Fortunately, pinging the server does not need
> to be very accurate since it is only done occasionally when the
> client is otherwise idle, so it shouldn't matter if the workqueue
> operation is delayed by a few seconds.
>
> Reviewed-by: Andreas Dilger <andreas.dilger@...el.com>

Thanks a lot for the thorough review!
The above implies that we don't need WQ_MEM_RECLAIM.  I didn't dig in to
exactly when and why pings happened so I thought having WQ_MEM_RECLAIM
we safest.  If pings only happen when the client is otherwise idle, the
it isn't needed.  I'll remove it.

Thanks,
NeilBrown


>
>> ---
>> drivers/staging/lustre/lustre/ptlrpc/pinger.c |   81 +++++++------------------
>> 1 file changed, 24 insertions(+), 57 deletions(-)
>> 
>> diff --git a/drivers/staging/lustre/lustre/ptlrpc/pinger.c b/drivers/staging/lustre/lustre/ptlrpc/pinger.c
>> index b5f3cfee8e75..0775b7a048bb 100644
>> --- a/drivers/staging/lustre/lustre/ptlrpc/pinger.c
>> +++ b/drivers/staging/lustre/lustre/ptlrpc/pinger.c
>> @@ -217,21 +217,18 @@ static void ptlrpc_pinger_process_import(struct obd_import *imp,
>> 	}
>> }
>> 
>> -static int ptlrpc_pinger_main(void *arg)
>> -{
>> -	struct ptlrpc_thread *thread = arg;
>> -
>> -	/* Record that the thread is running */
>> -	thread_set_flags(thread, SVC_RUNNING);
>> -	wake_up(&thread->t_ctl_waitq);
>> +static struct workqueue_struct *pinger_wq;
>> +static void ptlrpc_pinger_main(struct work_struct *ws);
>> +static DECLARE_DELAYED_WORK(ping_work, ptlrpc_pinger_main);
>> 
>> -	/* And now, loop forever, pinging as needed. */
>> -	while (1) {
>> -		unsigned long this_ping = cfs_time_current();
>> -		long time_to_next_wake;
>> -		struct timeout_item *item;
>> -		struct obd_import *imp;
>> +static void ptlrpc_pinger_main(struct work_struct *ws)
>> +{
>> +	unsigned long this_ping = cfs_time_current();
>> +	long time_to_next_wake;
>> +	struct timeout_item *item;
>> +	struct obd_import *imp;
>> 
>> +	do {
>> 		mutex_lock(&pinger_mutex);
>> 		list_for_each_entry(item, &timeout_list, ti_chain) {
>> 			item->ti_cb(item, item->ti_cb_data);
>> @@ -260,50 +257,24 @@ static int ptlrpc_pinger_main(void *arg)
>> 		       time_to_next_wake,
>> 		       cfs_time_add(this_ping,
>> 				    PING_INTERVAL * HZ));
>> -		if (time_to_next_wake > 0) {
>> -			wait_event_idle_timeout(thread->t_ctl_waitq,
>> -						thread_is_stopping(thread) ||
>> -						thread_is_event(thread),
>> -						max_t(long, time_to_next_wake, HZ));
>> -			if (thread_test_and_clear_flags(thread, SVC_STOPPING))
>> -				break;
>> -			/* woken after adding import to reset timer */
>> -			thread_test_and_clear_flags(thread, SVC_EVENT);
>> -		}
>> -	}
>> +	} while (time_to_next_wake <= 0);
>> 
>> -	thread_set_flags(thread, SVC_STOPPED);
>> -	wake_up(&thread->t_ctl_waitq);
>> -
>> -	CDEBUG(D_NET, "pinger thread exiting, process %d\n", current_pid());
>> -	return 0;
>> +	queue_delayed_work(pinger_wq, &ping_work,
>> +			   round_jiffies_up_relative(time_to_next_wake));
>> }
>> 
>> -static struct ptlrpc_thread pinger_thread;
>> -
>> int ptlrpc_start_pinger(void)
>> {
>> -	struct task_struct *task;
>> -	int rc;
>> -
>> -	if (!thread_is_init(&pinger_thread) &&
>> -	    !thread_is_stopped(&pinger_thread))
>> +	if (pinger_wq)
>> 		return -EALREADY;
>> 
>> -	init_waitqueue_head(&pinger_thread.t_ctl_waitq);
>> -
>> -	strcpy(pinger_thread.t_name, "ll_ping");
>> -
>> -	task = kthread_run(ptlrpc_pinger_main, &pinger_thread,
>> -			   pinger_thread.t_name);
>> -	if (IS_ERR(task)) {
>> -		rc = PTR_ERR(task);
>> -		CERROR("cannot start pinger thread: rc = %d\n", rc);
>> -		return rc;
>> +	pinger_wq = alloc_workqueue("ptlrpc_pinger", WQ_MEM_RECLAIM, 1);
>> +	if (!pinger_wq) {
>> +		CERROR("cannot start pinger workqueue\n");
>> +		return -ENOMEM;
>> 	}
>> -	wait_event_idle(pinger_thread.t_ctl_waitq,
>> -			thread_is_running(&pinger_thread));
>> 
>> +	queue_delayed_work(pinger_wq, &ping_work, 0);
>> 	return 0;
>> }
>> 
>> @@ -313,16 +284,13 @@ int ptlrpc_stop_pinger(void)
>> {
>> 	int rc = 0;
>> 
>> -	if (thread_is_init(&pinger_thread) ||
>> -	    thread_is_stopped(&pinger_thread))
>> +	if (!pinger_wq)
>> 		return -EALREADY;
>> 
>> 	ptlrpc_pinger_remove_timeouts();
>> -	thread_set_flags(&pinger_thread, SVC_STOPPING);
>> -	wake_up(&pinger_thread.t_ctl_waitq);
>> -
>> -	wait_event_idle(pinger_thread.t_ctl_waitq,
>> -			thread_is_stopped(&pinger_thread));
>> +	cancel_delayed_work_sync(&ping_work);
>> +	destroy_workqueue(pinger_wq);
>> +	pinger_wq = NULL;
>> 
>> 	return rc;
>> }
>> @@ -505,6 +473,5 @@ static int ptlrpc_pinger_remove_timeouts(void)
>> 
>> void ptlrpc_pinger_wake_up(void)
>> {
>> -	thread_add_flags(&pinger_thread, SVC_EVENT);
>> -	wake_up(&pinger_thread.t_ctl_waitq);
>> +	mod_delayed_work(pinger_wq, &ping_work, 0);
>> }
>> 
>> 
>
> Cheers, Andreas
> --
> Andreas Dilger
> Lustre Principal Architect
> Intel Corporation

Download attachment "signature.asc" of type "application/pgp-signature" (833 bytes)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ