Some of the file operations in /proc/sal require to run code on the requested cpu. This is achieved by temporarily setting the affinity of the calling user space thread to the requested CPU and reset it to the original affinity afterwards. That's racy vs. CPU hotplug and concurrent affinity settings for that thread resulting in code executing on the wrong CPU and overwriting the new affinity setting. Replace it by using work_on_cpu_safe() which guarantees to run the code on the requested CPU or to fail in case the CPU is offline. Signed-off-by: Thomas Gleixner Cc: Tony Luck Cc: Fenghua Yu Cc: linux-ia64@vger.kernel.org --- arch/ia64/kernel/salinfo.c | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) --- a/arch/ia64/kernel/salinfo.c +++ b/arch/ia64/kernel/salinfo.c @@ -179,14 +179,14 @@ struct salinfo_platform_oemdata_parms { const u8 *efi_guid; u8 **oemdata; u64 *oemdata_size; - int ret; }; -static void +static long salinfo_platform_oemdata_cpu(void *context) { struct salinfo_platform_oemdata_parms *parms = context; - parms->ret = salinfo_platform_oemdata(parms->efi_guid, parms->oemdata, parms->oemdata_size); + + return salinfo_platform_oemdata(parms->efi_guid, parms->oemdata, parms->oemdata_size); } static void @@ -380,16 +380,7 @@ salinfo_log_release(struct inode *inode, return 0; } -static void -call_on_cpu(int cpu, void (*fn)(void *), void *arg) -{ - cpumask_t save_cpus_allowed = current->cpus_allowed; - set_cpus_allowed_ptr(current, cpumask_of(cpu)); - (*fn)(arg); - set_cpus_allowed_ptr(current, &save_cpus_allowed); -} - -static void +static long salinfo_log_read_cpu(void *context) { struct salinfo_data *data = context; @@ -399,6 +390,7 @@ salinfo_log_read_cpu(void *context) /* Clear corrected errors as they are read from SAL */ if (rh->severity == sal_log_severity_corrected) ia64_sal_clear_state_info(data->type); + return 0; } static void @@ -430,7 +422,7 @@ salinfo_log_new_read(int cpu, struct sal spin_unlock_irqrestore(&data_saved_lock, flags); if (!data->saved_num) - call_on_cpu(cpu, salinfo_log_read_cpu, data); + work_on_cpu_safe(cpu, salinfo_log_read_cpu, data); if (!data->log_size) { data->state = STATE_NO_DATA; cpumask_clear_cpu(cpu, &data->cpu_event); @@ -459,11 +451,13 @@ salinfo_log_read(struct file *file, char return simple_read_from_buffer(buffer, count, ppos, buf, bufsize); } -static void +static long salinfo_log_clear_cpu(void *context) { struct salinfo_data *data = context; + ia64_sal_clear_state_info(data->type); + return 0; } static int @@ -486,7 +480,7 @@ salinfo_log_clear(struct salinfo_data *d rh = (sal_log_record_header_t *)(data->log_buffer); /* Corrected errors have already been cleared from SAL */ if (rh->severity != sal_log_severity_corrected) - call_on_cpu(cpu, salinfo_log_clear_cpu, data); + work_on_cpu_safe(cpu, salinfo_log_clear_cpu, data); /* clearing a record may make a new record visible */ salinfo_log_new_read(cpu, data); if (data->state == STATE_LOG_RECORD) { @@ -531,9 +525,8 @@ salinfo_log_write(struct file *file, con .oemdata = &data->oemdata, .oemdata_size = &data->oemdata_size }; - call_on_cpu(cpu, salinfo_platform_oemdata_cpu, &parms); - if (parms.ret) - count = parms.ret; + count = work_on_cpu_safe(cpu, salinfo_platform_oemdata_cpu, + &parms); } else data->oemdata_size = 0; } else