lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Fri, 14 Mar 2008 11:21:16 -0700
From:	Jeremy Fitzhardinge <jeremy@...p.org>
To:	Jens Axboe <jens.axboe@...cle.com>
CC:	linux-kernel@...r.kernel.org, npiggin@...e.de, dgc@....com
Subject: Re: [PATCH 1/7] x86-64: introduce fast variant of smp_call_function_single()

Jens Axboe wrote:
> rom: Nick Piggin <npiggin@...e.de>
>   

Why is this necessary?  How is smp_call_function_single slow?

    J

> Signed-off-by: Jens Axboe <jens.axboe@...cle.com>
> ---
>  arch/x86/kernel/entry_64.S                |    3 +
>  arch/x86/kernel/i8259_64.c                |    1 +
>  arch/x86/kernel/smp_64.c                  |  303 +++++++++++++++++++++--------
>  include/asm-x86/hw_irq_64.h               |    4 +-
>  include/asm-x86/mach-default/entry_arch.h |    1 +
>  include/linux/smp.h                       |    2 +-
>  6 files changed, 232 insertions(+), 82 deletions(-)
>
> diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
> index c20c9e7..22caf56 100644
> --- a/arch/x86/kernel/entry_64.S
> +++ b/arch/x86/kernel/entry_64.S
> @@ -713,6 +713,9 @@ END(invalidate_interrupt\num)
>  ENTRY(call_function_interrupt)
>  	apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
>  END(call_function_interrupt)
> +ENTRY(call_function_single_interrupt)
> +	apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
> +END(call_function_single_interrupt)
>  ENTRY(irq_move_cleanup_interrupt)
>  	apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
>  END(irq_move_cleanup_interrupt)
> diff --git a/arch/x86/kernel/i8259_64.c b/arch/x86/kernel/i8259_64.c
> index fa57a15..2b0b6d2 100644
> --- a/arch/x86/kernel/i8259_64.c
> +++ b/arch/x86/kernel/i8259_64.c
> @@ -493,6 +493,7 @@ void __init native_init_IRQ(void)
>  
>  	/* IPI for generic function call */
>  	set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
> +	set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt);
>  
>  	/* Low priority IPI to cleanup after moving an irq */
>  	set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
> diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/smp_64.c
> index 2fd74b0..1196a12 100644
> --- a/arch/x86/kernel/smp_64.c
> +++ b/arch/x86/kernel/smp_64.c
> @@ -18,6 +18,7 @@
>  #include <linux/kernel_stat.h>
>  #include <linux/mc146818rtc.h>
>  #include <linux/interrupt.h>
> +#include <linux/rcupdate.h>
>  
>  #include <asm/mtrr.h>
>  #include <asm/pgalloc.h>
> @@ -295,21 +296,29 @@ void smp_send_reschedule(int cpu)
>  	send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
>  }
>  
> +#define CALL_WAIT		0x01
> +#define CALL_FALLBACK		0x02
>  /*
>   * Structure and data for smp_call_function(). This is designed to minimise
>   * static memory requirements. It also looks cleaner.
>   */
>  static DEFINE_SPINLOCK(call_lock);
>  
> -struct call_data_struct {
> +struct call_data {
> +	spinlock_t lock;
> +	struct list_head list;
>  	void (*func) (void *info);
>  	void *info;
> -	atomic_t started;
> -	atomic_t finished;
> -	int wait;
> +	unsigned int flags;
> +	unsigned int refs;
> +	cpumask_t cpumask;
> +	struct rcu_head rcu_head;
>  };
>  
> -static struct call_data_struct * call_data;
> +static LIST_HEAD(call_queue);
> +
> +static unsigned long call_fallback_used;
> +static struct call_data call_data_fallback;
>  
>  void lock_ipi_call_lock(void)
>  {
> @@ -321,55 +330,47 @@ void unlock_ipi_call_lock(void)
>  	spin_unlock_irq(&call_lock);
>  }
>  
> -/*
> - * this function sends a 'generic call function' IPI to all other CPU
> - * of the system defined in the mask.
> - */
> -static int __smp_call_function_mask(cpumask_t mask,
> -				    void (*func)(void *), void *info,
> -				    int wait)
> -{
> -	struct call_data_struct data;
> -	cpumask_t allbutself;
> -	int cpus;
>  
> -	allbutself = cpu_online_map;
> -	cpu_clear(smp_processor_id(), allbutself);
> -
> -	cpus_and(mask, mask, allbutself);
> -	cpus = cpus_weight(mask);
> -
> -	if (!cpus)
> -		return 0;
> -
> -	data.func = func;
> -	data.info = info;
> -	atomic_set(&data.started, 0);
> -	data.wait = wait;
> -	if (wait)
> -		atomic_set(&data.finished, 0);
> +struct call_single_data {
> +	struct list_head list;
> +	void (*func) (void *info);
> +	void *info;
> +	unsigned int flags;
> +};
>  
> -	call_data = &data;
> -	wmb();
> +struct call_single_queue {
> +	spinlock_t lock;
> +	struct list_head list;
> +};
> +static DEFINE_PER_CPU(struct call_single_queue, call_single_queue);
>  
> -	/* Send a message to other CPUs */
> -	if (cpus_equal(mask, allbutself))
> -		send_IPI_allbutself(CALL_FUNCTION_VECTOR);
> -	else
> -		send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
> +static unsigned long call_single_fallback_used;
> +static struct call_single_data call_single_data_fallback;
>  
> -	/* Wait for response */
> -	while (atomic_read(&data.started) != cpus)
> -		cpu_relax();
> +int __cpuinit init_smp_call(void)
> +{
> +	int i;
>  
> -	if (!wait)
> -		return 0;
> +	for_each_cpu_mask(i, cpu_possible_map) {
> +		spin_lock_init(&per_cpu(call_single_queue, i).lock);
> +		INIT_LIST_HEAD(&per_cpu(call_single_queue, i).list);
> +	}
> +	return 0;
> +}
> +core_initcall(init_smp_call);
>  
> -	while (atomic_read(&data.finished) != cpus)
> -		cpu_relax();
> +static void rcu_free_call_data(struct rcu_head *head)
> +{
> +	struct call_data *data;
> +	data = container_of(head, struct call_data, rcu_head);
> +	kfree(data);
> +}
>  
> -	return 0;
> +static void free_call_data(struct call_data *data)
> +{
> +	call_rcu(&data->rcu_head, rcu_free_call_data);
>  }
> +
>  /**
>   * smp_call_function_mask(): Run a function on a set of other CPUs.
>   * @mask: The set of cpus to run on.  Must not include the current cpu.
> @@ -389,15 +390,69 @@ int smp_call_function_mask(cpumask_t mask,
>  			   void (*func)(void *), void *info,
>  			   int wait)
>  {
> -	int ret;
> +	struct call_data *data;
> +	cpumask_t allbutself;
> +	unsigned int flags;
> +	int cpus;
>  
>  	/* Can deadlock when called with interrupts disabled */
>  	WARN_ON(irqs_disabled());
> +	WARN_ON(preemptible());
> +
> +	allbutself = cpu_online_map;
> +	cpu_clear(smp_processor_id(), allbutself);
> +
> +	cpus_and(mask, mask, allbutself);
> +	cpus = cpus_weight(mask);
> +
> +	if (!cpus)
> +		return 0;
>  
> -	spin_lock(&call_lock);
> -	ret = __smp_call_function_mask(mask, func, info, wait);
> +	flags = wait ? CALL_WAIT : 0;
> +	data = kmalloc(sizeof(struct call_data), GFP_ATOMIC);
> +	if (unlikely(!data)) {
> +		while (test_and_set_bit_lock(0, &call_fallback_used))
> +			cpu_relax();
> +		data = &call_data_fallback;
> +		flags |= CALL_FALLBACK;
> +		/* XXX: can IPI all to "synchronize" RCU? */
> +	}
> +
> +	spin_lock_init(&data->lock);
> +	data->func = func;
> +	data->info = info;
> +	data->flags = flags;
> +	data->refs = cpus;
> +	data->cpumask = mask;
> +
> +	local_irq_disable();
> +	while (!spin_trylock(&call_lock)) {
> +		local_irq_enable();
> +		cpu_relax();
> +		local_irq_disable();
> +	}
> +	/* could do ipi = list_empty(&dst->list) || !cpumask_ipi_pending() */
> +	list_add_tail_rcu(&data->list, &call_queue);
>  	spin_unlock(&call_lock);
> -	return ret;
> +	local_irq_enable();
> +
> +	/* Send a message to other CPUs */
> +	if (cpus_equal(mask, allbutself))
> +		send_IPI_allbutself(CALL_FUNCTION_VECTOR);
> +	else
> +		send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
> +
> +	if (wait) {
> +		/* Wait for response */
> +		while (data->flags)
> +			cpu_relax();
> +		if (likely(!(flags & CALL_FALLBACK)))
> +			free_call_data(data);
> +		else
> +			clear_bit_unlock(0, &call_fallback_used);
> +	}
> +
> +	return 0;
>  }
>  EXPORT_SYMBOL(smp_call_function_mask);
>  
> @@ -414,11 +469,11 @@ EXPORT_SYMBOL(smp_call_function_mask);
>   * or is or has executed.
>   */
>  
> -int smp_call_function_single (int cpu, void (*func) (void *info), void *info,
> +int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
>  			      int nonatomic, int wait)
>  {
>  	/* prevent preemption and reschedule on another processor */
> -	int ret, me = get_cpu();
> +	int me = get_cpu();
>  
>  	/* Can deadlock when called with interrupts disabled */
>  	WARN_ON(irqs_disabled());
> @@ -427,14 +482,54 @@ int smp_call_function_single (int cpu, void (*func) (void *info), void *info,
>  		local_irq_disable();
>  		func(info);
>  		local_irq_enable();
> -		put_cpu();
> -		return 0;
> -	}
> +	} else {
> +		struct call_single_data d;
> +		struct call_single_data *data;
> +		struct call_single_queue *dst;
> +		cpumask_t mask = cpumask_of_cpu(cpu);
> +		unsigned int flags = wait ? CALL_WAIT : 0;
> +		int ipi;
> +
> +		if (!wait) {
> +			data = kmalloc(sizeof(struct call_single_data), GFP_ATOMIC);
> +			if (unlikely(!data)) {
> +				while (test_and_set_bit_lock(0, &call_single_fallback_used))
> +					cpu_relax();
> +				data = &call_single_data_fallback;
> +				flags |= CALL_FALLBACK;
> +			}
> +		} else {
> +			data = &d;
> +		}
> +
> +		data->func = func;
> +		data->info = info;
> +		data->flags = flags;
> +		dst = &per_cpu(call_single_queue, cpu);
> +
> +		local_irq_disable();
> +		while (!spin_trylock(&dst->lock)) {
> +			local_irq_enable();
> +			cpu_relax();
> +			local_irq_disable();
> +		}
> +		ipi = list_empty(&dst->list);
> +		list_add_tail(&data->list, &dst->list);
> +		spin_unlock(&dst->lock);
> +		local_irq_enable();
>  
> -	ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait);
> +		if (ipi)
> +			send_IPI_mask(mask, CALL_FUNCTION_SINGLE_VECTOR);
> +
> +		if (wait) {
> +			/* Wait for response */
> +			while (data->flags)
> +				cpu_relax();
> +		}
> +	}
>  
>  	put_cpu();
> -	return ret;
> +	return 0;
>  }
>  EXPORT_SYMBOL(smp_call_function_single);
>  
> @@ -474,18 +569,13 @@ static void stop_this_cpu(void *dummy)
>  
>  void smp_send_stop(void)
>  {
> -	int nolock;
>  	unsigned long flags;
>  
>  	if (reboot_force)
>  		return;
>  
> -	/* Don't deadlock on the call lock in panic */
> -	nolock = !spin_trylock(&call_lock);
>  	local_irq_save(flags);
> -	__smp_call_function_mask(cpu_online_map, stop_this_cpu, NULL, 0);
> -	if (!nolock)
> -		spin_unlock(&call_lock);
> +	smp_call_function(stop_this_cpu, NULL, 0, 0);
>  	disable_local_APIC();
>  	local_irq_restore(flags);
>  }
> @@ -503,28 +593,83 @@ asmlinkage void smp_reschedule_interrupt(void)
>  
>  asmlinkage void smp_call_function_interrupt(void)
>  {
> -	void (*func) (void *info) = call_data->func;
> -	void *info = call_data->info;
> -	int wait = call_data->wait;
> +	struct list_head *pos, *tmp;
> +	int cpu = smp_processor_id();
>  
>  	ack_APIC_irq();
> -	/*
> -	 * Notify initiating CPU that I've grabbed the data and am
> -	 * about to execute the function
> -	 */
> -	mb();
> -	atomic_inc(&call_data->started);
> -	/*
> -	 * At this point the info structure may be out of scope unless wait==1
> -	 */
>  	exit_idle();
>  	irq_enter();
> -	(*func)(info);
> +
> +	list_for_each_safe_rcu(pos, tmp, &call_queue) {
> +		struct call_data *data;
> +		int refs;
> +
> +		data = list_entry(pos, struct call_data, list);
> +		if (!cpu_isset(cpu, data->cpumask))
> +			continue;
> +
> +		data->func(data->info);
> +		spin_lock(&data->lock);
> +		WARN_ON(!cpu_isset(cpu, data->cpumask));
> +		cpu_clear(cpu, data->cpumask);
> +		WARN_ON(data->refs == 0);
> +		data->refs--;
> +		refs = data->refs;
> +		spin_unlock(&data->lock);
> +
> +		if (refs == 0) {
> +			WARN_ON(cpus_weight(data->cpumask));
> +			spin_lock(&call_lock);
> +			list_del_rcu(&data->list);
> +			spin_unlock(&call_lock);
> +			if (data->flags & CALL_WAIT) {
> +				smp_wmb();
> +				data->flags = 0;
> +			} else {
> +				if (likely(!(data->flags & CALL_FALLBACK)))
> +					free_call_data(data);
> +				else
> +					clear_bit_unlock(0, &call_fallback_used);
> +			}
> +		}
> +	}
> +
>  	add_pda(irq_call_count, 1);
>  	irq_exit();
> -	if (wait) {
> -		mb();
> -		atomic_inc(&call_data->finished);
> +}
> +
> +asmlinkage void smp_call_function_single_interrupt(void)
> +{
> +	struct call_single_queue *q;
> +	LIST_HEAD(list);
> +
> +	ack_APIC_irq();
> +	exit_idle();
> +	irq_enter();
> +
> +	q = &__get_cpu_var(call_single_queue);
> +	spin_lock(&q->lock);
> +	list_replace_init(&q->list, &list);
> +	spin_unlock(&q->lock);
> +
> +	while (!list_empty(&list)) {
> +		struct call_single_data *data;
> +
> +		data = list_entry(list.next, struct call_single_data, list);
> +		list_del(&data->list);
> +
> +		data->func(data->info);
> +		if (data->flags & CALL_WAIT) {
> +			smp_wmb();
> +			data->flags = 0;
> +		} else {
> +			if (likely(!(data->flags & CALL_FALLBACK)))
> +				kfree(data);
> +			else
> +				clear_bit_unlock(0, &call_single_fallback_used);
> +		}
>  	}
> +	add_pda(irq_call_count, 1);
> +	irq_exit();
>  }
>  
> diff --git a/include/asm-x86/hw_irq_64.h b/include/asm-x86/hw_irq_64.h
> index 312a58d..06ac80c 100644
> --- a/include/asm-x86/hw_irq_64.h
> +++ b/include/asm-x86/hw_irq_64.h
> @@ -68,8 +68,7 @@
>  #define ERROR_APIC_VECTOR	0xfe
>  #define RESCHEDULE_VECTOR	0xfd
>  #define CALL_FUNCTION_VECTOR	0xfc
> -/* fb free - please don't readd KDB here because it's useless
> -   (hint - think what a NMI bit does to a vector) */
> +#define CALL_FUNCTION_SINGLE_VECTOR	0xfb
>  #define THERMAL_APIC_VECTOR	0xfa
>  #define THRESHOLD_APIC_VECTOR   0xf9
>  /* f8 free */
> @@ -102,6 +101,7 @@ void spurious_interrupt(void);
>  void error_interrupt(void);
>  void reschedule_interrupt(void);
>  void call_function_interrupt(void);
> +void call_function_single_interrupt(void);
>  void irq_move_cleanup_interrupt(void);
>  void invalidate_interrupt0(void);
>  void invalidate_interrupt1(void);
> diff --git a/include/asm-x86/mach-default/entry_arch.h b/include/asm-x86/mach-default/entry_arch.h
> index bc86146..9283b60 100644
> --- a/include/asm-x86/mach-default/entry_arch.h
> +++ b/include/asm-x86/mach-default/entry_arch.h
> @@ -13,6 +13,7 @@
>  BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
>  BUILD_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR)
>  BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
> +BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
>  #endif
>  
>  /*
> diff --git a/include/linux/smp.h b/include/linux/smp.h
> index 55232cc..c938d26 100644
> --- a/include/linux/smp.h
> +++ b/include/linux/smp.h
> @@ -53,7 +53,6 @@ extern void smp_cpus_done(unsigned int max_cpus);
>   * Call a function on all other processors
>   */
>  int smp_call_function(void(*func)(void *info), void *info, int retry, int wait);
> -
>  int smp_call_function_single(int cpuid, void (*func) (void *info), void *info,
>  				int retry, int wait);
>  
> @@ -92,6 +91,7 @@ static inline int up_smp_call_function(void (*func)(void *), void *info)
>  }
>  #define smp_call_function(func, info, retry, wait) \
>  			(up_smp_call_function(func, info))
> +
>  #define on_each_cpu(func,info,retry,wait)	\
>  	({					\
>  		local_irq_disable();		\
>   

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ