[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-Id: <160578808695214@kroah.com>
Date: Thu, 19 Nov 2020 13:14:47 +0100
From: Greg Kroah-Hartman <gregkh@...uxfoundation.org>
To: linux-kernel@...r.kernel.org, akpm@...ux-foundation.org,
torvalds@...ux-foundation.org, stable@...r.kernel.org
Cc: lwn@....net, jslaby@...e.cz,
Greg Kroah-Hartman <gregkh@...uxfoundation.org>
Subject: Re: Linux 4.14.207
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 0a59fcf934f4..e0ce14f028d8 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5022,6 +5022,14 @@
Disables the PV optimizations forcing the HVM guest to
run as generic HVM guest with no PV drivers.
+ xen.event_eoi_delay= [XEN]
+ How long to delay EOI handling in case of event
+ storms (jiffies). Default is 10.
+
+ xen.event_loop_timeout= [XEN]
+ After which time (jiffies) the event handling loop
+ should start to delay EOI handling. Default is 2.
+
xirc2ps_cs= [NET,PCMCIA]
Format:
<irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]]
diff --git a/Makefile b/Makefile
index 2d5ec8b7bcf5..c4bb19c1e4c7 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
VERSION = 4
PATCHLEVEL = 14
-SUBLEVEL = 206
+SUBLEVEL = 207
EXTRAVERSION =
NAME = Petit Gorille
diff --git a/arch/Kconfig b/arch/Kconfig
index 77b3e21c4844..95567f683275 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -339,6 +339,13 @@ config HAVE_RCU_TABLE_FREE
config HAVE_RCU_TABLE_INVALIDATE
bool
+config ARCH_WANT_IRQS_OFF_ACTIVATE_MM
+ bool
+ help
+ Temporary select until all architectures can be converted to have
+ irqs disabled over activate_mm. Architectures that do IPI based TLB
+ shootdowns should enable this.
+
config ARCH_HAVE_NMI_SAFE_CMPXCHG
bool
diff --git a/arch/arm/include/asm/kprobes.h b/arch/arm/include/asm/kprobes.h
index 59655459da59..74cef57bf806 100644
--- a/arch/arm/include/asm/kprobes.h
+++ b/arch/arm/include/asm/kprobes.h
@@ -54,20 +54,20 @@ int kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data);
/* optinsn template addresses */
-extern __visible kprobe_opcode_t optprobe_template_entry;
-extern __visible kprobe_opcode_t optprobe_template_val;
-extern __visible kprobe_opcode_t optprobe_template_call;
-extern __visible kprobe_opcode_t optprobe_template_end;
-extern __visible kprobe_opcode_t optprobe_template_sub_sp;
-extern __visible kprobe_opcode_t optprobe_template_add_sp;
-extern __visible kprobe_opcode_t optprobe_template_restore_begin;
-extern __visible kprobe_opcode_t optprobe_template_restore_orig_insn;
-extern __visible kprobe_opcode_t optprobe_template_restore_end;
+extern __visible kprobe_opcode_t optprobe_template_entry[];
+extern __visible kprobe_opcode_t optprobe_template_val[];
+extern __visible kprobe_opcode_t optprobe_template_call[];
+extern __visible kprobe_opcode_t optprobe_template_end[];
+extern __visible kprobe_opcode_t optprobe_template_sub_sp[];
+extern __visible kprobe_opcode_t optprobe_template_add_sp[];
+extern __visible kprobe_opcode_t optprobe_template_restore_begin[];
+extern __visible kprobe_opcode_t optprobe_template_restore_orig_insn[];
+extern __visible kprobe_opcode_t optprobe_template_restore_end[];
#define MAX_OPTIMIZED_LENGTH 4
#define MAX_OPTINSN_SIZE \
- ((unsigned long)&optprobe_template_end - \
- (unsigned long)&optprobe_template_entry)
+ ((unsigned long)optprobe_template_end - \
+ (unsigned long)optprobe_template_entry)
#define RELATIVEJUMP_SIZE 4
struct arch_optimized_insn {
diff --git a/arch/arm/probes/kprobes/opt-arm.c b/arch/arm/probes/kprobes/opt-arm.c
index 0dc23fc227ed..cf08cb726767 100644
--- a/arch/arm/probes/kprobes/opt-arm.c
+++ b/arch/arm/probes/kprobes/opt-arm.c
@@ -98,21 +98,21 @@ asm (
"optprobe_template_end:\n");
#define TMPL_VAL_IDX \
- ((unsigned long *)&optprobe_template_val - (unsigned long *)&optprobe_template_entry)
+ ((unsigned long *)optprobe_template_val - (unsigned long *)optprobe_template_entry)
#define TMPL_CALL_IDX \
- ((unsigned long *)&optprobe_template_call - (unsigned long *)&optprobe_template_entry)
+ ((unsigned long *)optprobe_template_call - (unsigned long *)optprobe_template_entry)
#define TMPL_END_IDX \
- ((unsigned long *)&optprobe_template_end - (unsigned long *)&optprobe_template_entry)
+ ((unsigned long *)optprobe_template_end - (unsigned long *)optprobe_template_entry)
#define TMPL_ADD_SP \
- ((unsigned long *)&optprobe_template_add_sp - (unsigned long *)&optprobe_template_entry)
+ ((unsigned long *)optprobe_template_add_sp - (unsigned long *)optprobe_template_entry)
#define TMPL_SUB_SP \
- ((unsigned long *)&optprobe_template_sub_sp - (unsigned long *)&optprobe_template_entry)
+ ((unsigned long *)optprobe_template_sub_sp - (unsigned long *)optprobe_template_entry)
#define TMPL_RESTORE_BEGIN \
- ((unsigned long *)&optprobe_template_restore_begin - (unsigned long *)&optprobe_template_entry)
+ ((unsigned long *)optprobe_template_restore_begin - (unsigned long *)optprobe_template_entry)
#define TMPL_RESTORE_ORIGN_INSN \
- ((unsigned long *)&optprobe_template_restore_orig_insn - (unsigned long *)&optprobe_template_entry)
+ ((unsigned long *)optprobe_template_restore_orig_insn - (unsigned long *)optprobe_template_entry)
#define TMPL_RESTORE_END \
- ((unsigned long *)&optprobe_template_restore_end - (unsigned long *)&optprobe_template_entry)
+ ((unsigned long *)optprobe_template_restore_end - (unsigned long *)optprobe_template_entry)
/*
* ARM can always optimize an instruction when using ARM ISA, except
@@ -247,7 +247,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *or
}
/* Copy arch-dep-instance from template. */
- memcpy(code, (unsigned long *)&optprobe_template_entry,
+ memcpy(code, (unsigned long *)optprobe_template_entry,
TMPL_END_IDX * sizeof(kprobe_opcode_t));
/* Adjust buffer according to instruction. */
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 81fd41d5a0d9..0661227d935c 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -1190,7 +1190,7 @@ static int pt_event_addr_filters_validate(struct list_head *filters)
if (!filter->range || !filter->size)
return -EOPNOTSUPP;
- if (!filter->inode) {
+ if (!filter->path.dentry) {
if (!valid_kernel_ip(filter->offset))
return -EINVAL;
@@ -1217,7 +1217,7 @@ static void pt_event_addr_filters_sync(struct perf_event *event)
return;
list_for_each_entry(filter, &head->list, entry) {
- if (filter->inode && !offs[range]) {
+ if (filter->path.dentry && !offs[range]) {
msr_a = msr_b = 0;
} else {
/* apply the offset */
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 245184152892..1dbc64bbd866 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -1240,6 +1240,14 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
return 0;
}
+static bool is_spec_ib_user_controlled(void)
+{
+ return spectre_v2_user_ibpb == SPECTRE_V2_USER_PRCTL ||
+ spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_PRCTL ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP;
+}
+
static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
{
switch (ctrl) {
@@ -1247,17 +1255,26 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE &&
spectre_v2_user_stibp == SPECTRE_V2_USER_NONE)
return 0;
- /*
- * Indirect branch speculation is always disabled in strict
- * mode. It can neither be enabled if it was force-disabled
- * by a previous prctl call.
+ /*
+ * With strict mode for both IBPB and STIBP, the instruction
+ * code paths avoid checking this task flag and instead,
+ * unconditionally run the instruction. However, STIBP and IBPB
+ * are independent and either can be set to conditionally
+ * enabled regardless of the mode of the other.
+ *
+ * If either is set to conditional, allow the task flag to be
+ * updated, unless it was force-disabled by a previous prctl
+ * call. Currently, this is possible on an AMD CPU which has the
+ * feature X86_FEATURE_AMD_STIBP_ALWAYS_ON. In this case, if the
+ * kernel is booted with 'spectre_v2_user=seccomp', then
+ * spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP and
+ * spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED.
*/
- if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT ||
- spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
- spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ||
+ if (!is_spec_ib_user_controlled() ||
task_spec_ib_force_disable(task))
return -EPERM;
+
task_clear_spec_ib_disable(task);
task_update_spec_tif(task);
break;
@@ -1270,10 +1287,10 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE &&
spectre_v2_user_stibp == SPECTRE_V2_USER_NONE)
return -EPERM;
- if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT ||
- spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
- spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED)
+
+ if (!is_spec_ib_user_controlled())
return 0;
+
task_set_spec_ib_disable(task);
if (ctrl == PR_SPEC_FORCE_DISABLE)
task_set_spec_ib_force_disable(task);
@@ -1336,20 +1353,17 @@ static int ib_prctl_get(struct task_struct *task)
if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE &&
spectre_v2_user_stibp == SPECTRE_V2_USER_NONE)
return PR_SPEC_ENABLE;
- else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT ||
- spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
- spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED)
- return PR_SPEC_DISABLE;
- else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_PRCTL ||
- spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP ||
- spectre_v2_user_stibp == SPECTRE_V2_USER_PRCTL ||
- spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP) {
+ else if (is_spec_ib_user_controlled()) {
if (task_spec_ib_force_disable(task))
return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
if (task_spec_ib_disable(task))
return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
- } else
+ } else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED)
+ return PR_SPEC_DISABLE;
+ else
return PR_SPEC_NOT_AFFECTED;
}
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index cdf62fb94fb1..70ef826af7f8 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -268,7 +268,7 @@ static void nbd_size_clear(struct nbd_device *nbd)
}
}
-static void nbd_size_update(struct nbd_device *nbd)
+static void nbd_size_update(struct nbd_device *nbd, bool start)
{
struct nbd_config *config = nbd->config;
struct block_device *bdev = bdget_disk(nbd->disk, 0);
@@ -279,7 +279,8 @@ static void nbd_size_update(struct nbd_device *nbd)
if (bdev) {
if (bdev->bd_disk) {
bd_set_size(bdev, config->bytesize);
- set_blocksize(bdev, config->blksize);
+ if (start)
+ set_blocksize(bdev, config->blksize);
} else
bdev->bd_invalidated = 1;
bdput(bdev);
@@ -294,7 +295,7 @@ static void nbd_size_set(struct nbd_device *nbd, loff_t blocksize,
config->blksize = blocksize;
config->bytesize = blocksize * nr_blocks;
if (nbd->task_recv != NULL)
- nbd_size_update(nbd);
+ nbd_size_update(nbd, false);
}
static void nbd_complete_rq(struct request *req)
@@ -1231,7 +1232,7 @@ static int nbd_start_device(struct nbd_device *nbd)
args->index = i;
queue_work(nbd->recv_workq, &args->work);
}
- nbd_size_update(nbd);
+ nbd_size_update(nbd, true);
return error;
}
@@ -1431,6 +1432,7 @@ static void nbd_release(struct gendisk *disk, fmode_t mode)
if (test_bit(NBD_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) &&
bdev->bd_openers == 0)
nbd_disconnect_and_put(nbd);
+ bdput(bdev);
nbd_config_put(nbd);
nbd_put(nbd);
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index c1d1b94f71b5..04ae2474e334 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -183,7 +183,7 @@ static inline void shrink_free_pagepool(struct xen_blkif_ring *ring, int num)
#define vaddr(page) ((unsigned long)pfn_to_kaddr(page_to_pfn(page)))
-static int do_block_io_op(struct xen_blkif_ring *ring);
+static int do_block_io_op(struct xen_blkif_ring *ring, unsigned int *eoi_flags);
static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
struct blkif_request *req,
struct pending_req *pending_req);
@@ -608,6 +608,8 @@ int xen_blkif_schedule(void *arg)
struct xen_vbd *vbd = &blkif->vbd;
unsigned long timeout;
int ret;
+ bool do_eoi;
+ unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS;
set_freezable();
while (!kthread_should_stop()) {
@@ -632,16 +634,23 @@ int xen_blkif_schedule(void *arg)
if (timeout == 0)
goto purge_gnt_list;
+ do_eoi = ring->waiting_reqs;
+
ring->waiting_reqs = 0;
smp_mb(); /* clear flag *before* checking for work */
- ret = do_block_io_op(ring);
+ ret = do_block_io_op(ring, &eoi_flags);
if (ret > 0)
ring->waiting_reqs = 1;
if (ret == -EACCES)
wait_event_interruptible(ring->shutdown_wq,
kthread_should_stop());
+ if (do_eoi && !ring->waiting_reqs) {
+ xen_irq_lateeoi(ring->irq, eoi_flags);
+ eoi_flags |= XEN_EOI_FLAG_SPURIOUS;
+ }
+
purge_gnt_list:
if (blkif->vbd.feature_gnt_persistent &&
time_after(jiffies, ring->next_lru)) {
@@ -1114,7 +1123,7 @@ static void end_block_io_op(struct bio *bio)
* and transmute it to the block API to hand it over to the proper block disk.
*/
static int
-__do_block_io_op(struct xen_blkif_ring *ring)
+__do_block_io_op(struct xen_blkif_ring *ring, unsigned int *eoi_flags)
{
union blkif_back_rings *blk_rings = &ring->blk_rings;
struct blkif_request req;
@@ -1137,6 +1146,9 @@ __do_block_io_op(struct xen_blkif_ring *ring)
if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
break;
+ /* We've seen a request, so clear spurious eoi flag. */
+ *eoi_flags &= ~XEN_EOI_FLAG_SPURIOUS;
+
if (kthread_should_stop()) {
more_to_do = 1;
break;
@@ -1195,13 +1207,13 @@ __do_block_io_op(struct xen_blkif_ring *ring)
}
static int
-do_block_io_op(struct xen_blkif_ring *ring)
+do_block_io_op(struct xen_blkif_ring *ring, unsigned int *eoi_flags)
{
union blkif_back_rings *blk_rings = &ring->blk_rings;
int more_to_do;
do {
- more_to_do = __do_block_io_op(ring);
+ more_to_do = __do_block_io_op(ring, eoi_flags);
if (more_to_do)
break;
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index e9fa4a1fc791..d19adf1db1f1 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -236,9 +236,8 @@ static int xen_blkif_map(struct xen_blkif_ring *ring, grant_ref_t *gref,
BUG();
}
- err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn,
- xen_blkif_be_int, 0,
- "blkif-backend", ring);
+ err = bind_interdomain_evtchn_to_irqhandler_lateeoi(blkif->domid,
+ evtchn, xen_blkif_be_int, 0, "blkif-backend", ring);
if (err < 0) {
xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring);
ring->blk_rings.common.sring = NULL;
diff --git a/drivers/char/random.c b/drivers/char/random.c
index b202f66fc383..868d2620b7ac 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1246,7 +1246,6 @@ void add_interrupt_randomness(int irq, int irq_flags)
fast_mix(fast_pool);
add_interrupt_bench(cycles);
- this_cpu_add(net_rand_state.s1, fast_pool->pool[cycles & 3]);
if (unlikely(crng_init == 0)) {
if ((fast_pool->count >= 64) &&
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index 11beef7c595f..d35e5d8e8a05 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -1098,22 +1098,19 @@ static int cik_sdma_soft_reset(void *handle)
{
u32 srbm_soft_reset = 0;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- u32 tmp = RREG32(mmSRBM_STATUS2);
+ u32 tmp;
- if (tmp & SRBM_STATUS2__SDMA_BUSY_MASK) {
- /* sdma0 */
- tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET);
- tmp |= SDMA0_F32_CNTL__HALT_MASK;
- WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp);
- srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA_MASK;
- }
- if (tmp & SRBM_STATUS2__SDMA1_BUSY_MASK) {
- /* sdma1 */
- tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET);
- tmp |= SDMA0_F32_CNTL__HALT_MASK;
- WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp);
- srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA1_MASK;
- }
+ /* sdma0 */
+ tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET);
+ tmp |= SDMA0_F32_CNTL__HALT_MASK;
+ WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp);
+ srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA_MASK;
+
+ /* sdma1 */
+ tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET);
+ tmp |= SDMA0_F32_CNTL__HALT_MASK;
+ WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp);
+ srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA1_MASK;
if (srbm_soft_reset) {
tmp = RREG32(mmSRBM_SOFT_RESET);
diff --git a/drivers/gpu/drm/gma500/psb_irq.c b/drivers/gpu/drm/gma500/psb_irq.c
index 78eb10902809..076b6da44f46 100644
--- a/drivers/gpu/drm/gma500/psb_irq.c
+++ b/drivers/gpu/drm/gma500/psb_irq.c
@@ -350,6 +350,7 @@ int psb_irq_postinstall(struct drm_device *dev)
{
struct drm_psb_private *dev_priv = dev->dev_private;
unsigned long irqflags;
+ unsigned int i;
spin_lock_irqsave(&dev_priv->irqmask_lock, irqflags);
@@ -362,20 +363,12 @@ int psb_irq_postinstall(struct drm_device *dev)
PSB_WVDC32(dev_priv->vdc_irq_mask, PSB_INT_ENABLE_R);
PSB_WVDC32(0xFFFFFFFF, PSB_HWSTAM);
- if (dev->vblank[0].enabled)
- psb_enable_pipestat(dev_priv, 0, PIPE_VBLANK_INTERRUPT_ENABLE);
- else
- psb_disable_pipestat(dev_priv, 0, PIPE_VBLANK_INTERRUPT_ENABLE);
-
- if (dev->vblank[1].enabled)
- psb_enable_pipestat(dev_priv, 1, PIPE_VBLANK_INTERRUPT_ENABLE);
- else
- psb_disable_pipestat(dev_priv, 1, PIPE_VBLANK_INTERRUPT_ENABLE);
-
- if (dev->vblank[2].enabled)
- psb_enable_pipestat(dev_priv, 2, PIPE_VBLANK_INTERRUPT_ENABLE);
- else
- psb_disable_pipestat(dev_priv, 2, PIPE_VBLANK_INTERRUPT_ENABLE);
+ for (i = 0; i < dev->num_crtcs; ++i) {
+ if (dev->vblank[i].enabled)
+ psb_enable_pipestat(dev_priv, i, PIPE_VBLANK_INTERRUPT_ENABLE);
+ else
+ psb_disable_pipestat(dev_priv, i, PIPE_VBLANK_INTERRUPT_ENABLE);
+ }
if (dev_priv->ops->hotplug_enable)
dev_priv->ops->hotplug_enable(dev, true);
@@ -388,6 +381,7 @@ void psb_irq_uninstall(struct drm_device *dev)
{
struct drm_psb_private *dev_priv = dev->dev_private;
unsigned long irqflags;
+ unsigned int i;
spin_lock_irqsave(&dev_priv->irqmask_lock, irqflags);
@@ -396,14 +390,10 @@ void psb_irq_uninstall(struct drm_device *dev)
PSB_WVDC32(0xFFFFFFFF, PSB_HWSTAM);
- if (dev->vblank[0].enabled)
- psb_disable_pipestat(dev_priv, 0, PIPE_VBLANK_INTERRUPT_ENABLE);
-
- if (dev->vblank[1].enabled)
- psb_disable_pipestat(dev_priv, 1, PIPE_VBLANK_INTERRUPT_ENABLE);
-
- if (dev->vblank[2].enabled)
- psb_disable_pipestat(dev_priv, 2, PIPE_VBLANK_INTERRUPT_ENABLE);
+ for (i = 0; i < dev->num_crtcs; ++i) {
+ if (dev->vblank[i].enabled)
+ psb_disable_pipestat(dev_priv, i, PIPE_VBLANK_INTERRUPT_ENABLE);
+ }
dev_priv->vdc_irq_mask &= _PSB_IRQ_SGX_FLAG |
_PSB_IRQ_MSVDX_FLAG |
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index 2d93c8f454bc..423754cc6c30 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -1230,7 +1230,7 @@ static void balloon_up(struct work_struct *dummy)
/* Refuse to balloon below the floor. */
if (avail_pages < num_pages || avail_pages - num_pages < floor) {
- pr_warn("Balloon request will be partially fulfilled. %s\n",
+ pr_info("Balloon request will be partially fulfilled. %s\n",
avail_pages < num_pages ? "Not enough memory." :
"Balloon floor reached.");
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 74c8638aac2b..ac3cac052af9 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -404,7 +404,11 @@ extern bool amd_iommu_np_cache;
/* Only true if all IOMMUs support device IOTLBs */
extern bool amd_iommu_iotlb_sup;
-#define MAX_IRQS_PER_TABLE 256
+/*
+ * AMD IOMMU hardware only support 512 IRTEs despite
+ * the architectural limitation of 2048 entries.
+ */
+#define MAX_IRQS_PER_TABLE 512
#define IRQ_TABLE_ALIGNMENT 128
struct irq_remap_table {
diff --git a/drivers/misc/mei/client.h b/drivers/misc/mei/client.h
index 5371df4d8af3..be4fe2ea0afd 100644
--- a/drivers/misc/mei/client.h
+++ b/drivers/misc/mei/client.h
@@ -138,11 +138,11 @@ static inline u8 mei_cl_me_id(const struct mei_cl *cl)
*
* @cl: host client
*
- * Return: mtu
+ * Return: mtu or 0 if client is not connected
*/
static inline size_t mei_cl_mtu(const struct mei_cl *cl)
{
- return cl->me_cl->props.max_msg_length;
+ return cl->me_cl ? cl->me_cl->props.max_msg_length : 0;
}
/**
diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 05ad5ed145a3..e79965a390aa 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -492,9 +492,13 @@ struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8
*/
struct sk_buff *skb = priv->echo_skb[idx];
struct canfd_frame *cf = (struct canfd_frame *)skb->data;
- u8 len = cf->len;
- *len_ptr = len;
+ /* get the real payload length for netdev statistics */
+ if (cf->can_id & CAN_RTR_FLAG)
+ *len_ptr = 0;
+ else
+ *len_ptr = cf->len;
+
priv->echo_skb[idx] = NULL;
return skb;
@@ -519,7 +523,11 @@ unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx)
if (!skb)
return 0;
- netif_rx(skb);
+ skb_get(skb);
+ if (netif_rx(skb) == NET_RX_SUCCESS)
+ dev_consume_skb_any(skb);
+ else
+ dev_kfree_skb_any(skb);
return len;
}
diff --git a/drivers/net/can/peak_canfd/peak_canfd.c b/drivers/net/can/peak_canfd/peak_canfd.c
index ed8561d4a90f..a38dc6d9c978 100644
--- a/drivers/net/can/peak_canfd/peak_canfd.c
+++ b/drivers/net/can/peak_canfd/peak_canfd.c
@@ -256,8 +256,7 @@ static int pucan_handle_can_rx(struct peak_canfd_priv *priv,
cf_len = get_can_dlc(pucan_msg_get_dlc(msg));
/* if this frame is an echo, */
- if ((rx_msg_flags & PUCAN_MSG_LOOPED_BACK) &&
- !(rx_msg_flags & PUCAN_MSG_SELF_RECEIVE)) {
+ if (rx_msg_flags & PUCAN_MSG_LOOPED_BACK) {
unsigned long flags;
spin_lock_irqsave(&priv->echo_lock, flags);
@@ -271,7 +270,13 @@ static int pucan_handle_can_rx(struct peak_canfd_priv *priv,
netif_wake_queue(priv->ndev);
spin_unlock_irqrestore(&priv->echo_lock, flags);
- return 0;
+
+ /* if this frame is only an echo, stop here. Otherwise,
+ * continue to push this application self-received frame into
+ * its own rx queue.
+ */
+ if (!(rx_msg_flags & PUCAN_MSG_SELF_RECEIVE))
+ return 0;
}
/* otherwise, it should be pushed into rx fifo */
diff --git a/drivers/net/can/rx-offload.c b/drivers/net/can/rx-offload.c
index 54ffd1e91a69..a04680183066 100644
--- a/drivers/net/can/rx-offload.c
+++ b/drivers/net/can/rx-offload.c
@@ -281,7 +281,7 @@ int can_rx_offload_queue_sorted(struct can_rx_offload *offload,
if (skb_queue_len(&offload->skb_queue) >
offload->skb_queue_len_max) {
- kfree_skb(skb);
+ dev_kfree_skb_any(skb);
return -ENOBUFS;
}
@@ -326,7 +326,7 @@ int can_rx_offload_queue_tail(struct can_rx_offload *offload,
{
if (skb_queue_len(&offload->skb_queue) >
offload->skb_queue_len_max) {
- kfree_skb(skb);
+ dev_kfree_skb_any(skb);
return -ENOBUFS;
}
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
index 85d92f129af2..9d78ba777614 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
@@ -154,14 +154,55 @@ void peak_usb_get_ts_tv(struct peak_time_ref *time_ref, u32 ts,
/* protect from getting timeval before setting now */
if (time_ref->tv_host.tv_sec > 0) {
u64 delta_us;
+ s64 delta_ts = 0;
+
+ /* General case: dev_ts_1 < dev_ts_2 < ts, with:
+ *
+ * - dev_ts_1 = previous sync timestamp
+ * - dev_ts_2 = last sync timestamp
+ * - ts = event timestamp
+ * - ts_period = known sync period (theoretical)
+ * ~ dev_ts2 - dev_ts1
+ * *but*:
+ *
+ * - time counters wrap (see adapter->ts_used_bits)
+ * - sometimes, dev_ts_1 < ts < dev_ts2
+ *
+ * "normal" case (sync time counters increase):
+ * must take into account case when ts wraps (tsw)
+ *
+ * < ts_period > < >
+ * | | |
+ * ---+--------+----+-------0-+--+-->
+ * ts_dev_1 | ts_dev_2 |
+ * ts tsw
+ */
+ if (time_ref->ts_dev_1 < time_ref->ts_dev_2) {
+ /* case when event time (tsw) wraps */
+ if (ts < time_ref->ts_dev_1)
+ delta_ts = 1 << time_ref->adapter->ts_used_bits;
+
+ /* Otherwise, sync time counter (ts_dev_2) has wrapped:
+ * handle case when event time (tsn) hasn't.
+ *
+ * < ts_period > < >
+ * | | |
+ * ---+--------+--0-+---------+--+-->
+ * ts_dev_1 | ts_dev_2 |
+ * tsn ts
+ */
+ } else if (time_ref->ts_dev_1 < ts) {
+ delta_ts = -(1 << time_ref->adapter->ts_used_bits);
+ }
- delta_us = ts - time_ref->ts_dev_2;
- if (ts < time_ref->ts_dev_2)
- delta_us &= (1 << time_ref->adapter->ts_used_bits) - 1;
+ /* add delay between last sync and event timestamps */
+ delta_ts += (signed int)(ts - time_ref->ts_dev_2);
- delta_us += time_ref->ts_total;
+ /* add time from beginning to last sync */
+ delta_ts += time_ref->ts_total;
- delta_us *= time_ref->adapter->us_per_ts_scale;
+ /* convert ticks number into microseconds */
+ delta_us = delta_ts * time_ref->adapter->us_per_ts_scale;
delta_us >>= time_ref->adapter->us_per_ts_shift;
*tv = time_ref->tv_host_0;
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
index 773fc15ac3ab..0d762bdac4f8 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
@@ -476,12 +476,18 @@ static int pcan_usb_fd_decode_canmsg(struct pcan_usb_fd_if *usb_if,
struct pucan_msg *rx_msg)
{
struct pucan_rx_msg *rm = (struct pucan_rx_msg *)rx_msg;
- struct peak_usb_device *dev = usb_if->dev[pucan_msg_get_channel(rm)];
- struct net_device *netdev = dev->netdev;
+ struct peak_usb_device *dev;
+ struct net_device *netdev;
struct canfd_frame *cfd;
struct sk_buff *skb;
const u16 rx_msg_flags = le16_to_cpu(rm->flags);
+ if (pucan_msg_get_channel(rm) >= ARRAY_SIZE(usb_if->dev))
+ return -ENOMEM;
+
+ dev = usb_if->dev[pucan_msg_get_channel(rm)];
+ netdev = dev->netdev;
+
if (rx_msg_flags & PUCAN_MSG_EXT_DATA_LEN) {
/* CANFD frame case */
skb = alloc_canfd_skb(netdev, &cfd);
@@ -528,15 +534,21 @@ static int pcan_usb_fd_decode_status(struct pcan_usb_fd_if *usb_if,
struct pucan_msg *rx_msg)
{
struct pucan_status_msg *sm = (struct pucan_status_msg *)rx_msg;
- struct peak_usb_device *dev = usb_if->dev[pucan_stmsg_get_channel(sm)];
- struct pcan_usb_fd_device *pdev =
- container_of(dev, struct pcan_usb_fd_device, dev);
+ struct pcan_usb_fd_device *pdev;
enum can_state new_state = CAN_STATE_ERROR_ACTIVE;
enum can_state rx_state, tx_state;
- struct net_device *netdev = dev->netdev;
+ struct peak_usb_device *dev;
+ struct net_device *netdev;
struct can_frame *cf;
struct sk_buff *skb;
+ if (pucan_stmsg_get_channel(sm) >= ARRAY_SIZE(usb_if->dev))
+ return -ENOMEM;
+
+ dev = usb_if->dev[pucan_stmsg_get_channel(sm)];
+ pdev = container_of(dev, struct pcan_usb_fd_device, dev);
+ netdev = dev->netdev;
+
/* nothing should be sent while in BUS_OFF state */
if (dev->can.state == CAN_STATE_BUS_OFF)
return 0;
@@ -589,9 +601,14 @@ static int pcan_usb_fd_decode_error(struct pcan_usb_fd_if *usb_if,
struct pucan_msg *rx_msg)
{
struct pucan_error_msg *er = (struct pucan_error_msg *)rx_msg;
- struct peak_usb_device *dev = usb_if->dev[pucan_ermsg_get_channel(er)];
- struct pcan_usb_fd_device *pdev =
- container_of(dev, struct pcan_usb_fd_device, dev);
+ struct pcan_usb_fd_device *pdev;
+ struct peak_usb_device *dev;
+
+ if (pucan_ermsg_get_channel(er) >= ARRAY_SIZE(usb_if->dev))
+ return -EINVAL;
+
+ dev = usb_if->dev[pucan_ermsg_get_channel(er)];
+ pdev = container_of(dev, struct pcan_usb_fd_device, dev);
/* keep a trace of tx and rx error counters for later use */
pdev->bec.txerr = er->tx_err_cnt;
@@ -605,11 +622,17 @@ static int pcan_usb_fd_decode_overrun(struct pcan_usb_fd_if *usb_if,
struct pucan_msg *rx_msg)
{
struct pcan_ufd_ovr_msg *ov = (struct pcan_ufd_ovr_msg *)rx_msg;
- struct peak_usb_device *dev = usb_if->dev[pufd_omsg_get_channel(ov)];
- struct net_device *netdev = dev->netdev;
+ struct peak_usb_device *dev;
+ struct net_device *netdev;
struct can_frame *cf;
struct sk_buff *skb;
+ if (pufd_omsg_get_channel(ov) >= ARRAY_SIZE(usb_if->dev))
+ return -EINVAL;
+
+ dev = usb_if->dev[pufd_omsg_get_channel(ov)];
+ netdev = dev->netdev;
+
/* allocate an skb to store the error frame */
skb = alloc_can_err_skb(netdev, &cf);
if (!skb)
@@ -727,6 +750,9 @@ static int pcan_usb_fd_encode_msg(struct peak_usb_device *dev,
u16 tx_msg_size, tx_msg_flags;
u8 can_dlc;
+ if (cfd->len > CANFD_MAX_DLEN)
+ return -EINVAL;
+
tx_msg_size = ALIGN(sizeof(struct pucan_tx_msg) + cfd->len, 4);
tx_msg->size = cpu_to_le16(tx_msg_size);
tx_msg->type = cpu_to_le16(PUCAN_MSG_CAN_TX);
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 72961082e398..530b8da11960 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -7143,7 +7143,8 @@ static bool rtl8169_tso_csum_v2(struct rtl8169_private *tp,
opts[1] |= transport_offset << TCPHO_SHIFT;
} else {
if (unlikely(rtl_test_hw_pad_bug(tp, skb)))
- return !eth_skb_pad(skb);
+ /* eth_skb_pad would free the skb on error */
+ return !__skb_put_padto(skb, ETH_ZLEN, false);
}
return true;
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 811fe0bde8a3..6e58ee9f143e 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -334,8 +334,7 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev)
return ret;
}
-static int vrf_finish_direct(struct net *net, struct sock *sk,
- struct sk_buff *skb)
+static void vrf_finish_direct(struct sk_buff *skb)
{
struct net_device *vrf_dev = skb->dev;
@@ -354,7 +353,8 @@ static int vrf_finish_direct(struct net *net, struct sock *sk,
skb_pull(skb, ETH_HLEN);
}
- return 1;
+ /* reset skb device */
+ nf_reset(skb);
}
#if IS_ENABLED(CONFIG_IPV6)
@@ -433,15 +433,41 @@ static struct sk_buff *vrf_ip6_out_redirect(struct net_device *vrf_dev,
return skb;
}
+static int vrf_output6_direct_finish(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ vrf_finish_direct(skb);
+
+ return vrf_ip6_local_out(net, sk, skb);
+}
+
static int vrf_output6_direct(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
+ int err = 1;
+
skb->protocol = htons(ETH_P_IPV6);
- return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
- net, sk, skb, NULL, skb->dev,
- vrf_finish_direct,
- !(IPCB(skb)->flags & IPSKB_REROUTED));
+ if (!(IPCB(skb)->flags & IPSKB_REROUTED))
+ err = nf_hook(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb,
+ NULL, skb->dev, vrf_output6_direct_finish);
+
+ if (likely(err == 1))
+ vrf_finish_direct(skb);
+
+ return err;
+}
+
+static int vrf_ip6_out_direct_finish(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ int err;
+
+ err = vrf_output6_direct(net, sk, skb);
+ if (likely(err == 1))
+ err = vrf_ip6_local_out(net, sk, skb);
+
+ return err;
}
static struct sk_buff *vrf_ip6_out_direct(struct net_device *vrf_dev,
@@ -454,18 +480,15 @@ static struct sk_buff *vrf_ip6_out_direct(struct net_device *vrf_dev,
skb->dev = vrf_dev;
err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk,
- skb, NULL, vrf_dev, vrf_output6_direct);
+ skb, NULL, vrf_dev, vrf_ip6_out_direct_finish);
if (likely(err == 1))
err = vrf_output6_direct(net, sk, skb);
- /* reset skb device */
if (likely(err == 1))
- nf_reset(skb);
- else
- skb = NULL;
+ return skb;
- return skb;
+ return NULL;
}
static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev,
@@ -649,15 +672,41 @@ static struct sk_buff *vrf_ip_out_redirect(struct net_device *vrf_dev,
return skb;
}
+static int vrf_output_direct_finish(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ vrf_finish_direct(skb);
+
+ return vrf_ip_local_out(net, sk, skb);
+}
+
static int vrf_output_direct(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
+ int err = 1;
+
skb->protocol = htons(ETH_P_IP);
- return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
- net, sk, skb, NULL, skb->dev,
- vrf_finish_direct,
- !(IPCB(skb)->flags & IPSKB_REROUTED));
+ if (!(IPCB(skb)->flags & IPSKB_REROUTED))
+ err = nf_hook(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb,
+ NULL, skb->dev, vrf_output_direct_finish);
+
+ if (likely(err == 1))
+ vrf_finish_direct(skb);
+
+ return err;
+}
+
+static int vrf_ip_out_direct_finish(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ int err;
+
+ err = vrf_output_direct(net, sk, skb);
+ if (likely(err == 1))
+ err = vrf_ip_local_out(net, sk, skb);
+
+ return err;
}
static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev,
@@ -670,18 +719,15 @@ static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev,
skb->dev = vrf_dev;
err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk,
- skb, NULL, vrf_dev, vrf_output_direct);
+ skb, NULL, vrf_dev, vrf_ip_out_direct_finish);
if (likely(err == 1))
err = vrf_output_direct(net, sk, skb);
- /* reset skb device */
if (likely(err == 1))
- nf_reset(skb);
- else
- skb = NULL;
+ return skb;
- return skb;
+ return NULL;
}
static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev,
diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index 6ea16260ec76..1a1f2d1069fb 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -902,6 +902,7 @@ static ssize_t cosa_write(struct file *file,
chan->tx_status = 1;
spin_unlock_irqrestore(&cosa->lock, flags);
up(&chan->wsem);
+ kfree(kbuf);
return -ERESTARTSYS;
}
}
diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
index 11d06021b5e4..6782c3d0c333 100644
--- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
+++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
@@ -973,7 +973,7 @@ static bool ath9k_rx_prepare(struct ath9k_htc_priv *priv,
struct ath_htc_rx_status *rxstatus;
struct ath_rx_status rx_stats;
bool decrypt_error = false;
- __be16 rs_datalen;
+ u16 rs_datalen;
bool is_phyerr;
if (skb->len < HTC_RX_FRAME_HEADER_SIZE) {
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 5b1d2e8402d9..347c796afd4e 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -140,6 +140,20 @@ struct xenvif_queue { /* Per-queue data for xenvif */
char name[QUEUE_NAME_SIZE]; /* DEVNAME-qN */
struct xenvif *vif; /* Parent VIF */
+ /*
+ * TX/RX common EOI handling.
+ * When feature-split-event-channels = 0, interrupt handler sets
+ * NETBK_COMMON_EOI, otherwise NETBK_RX_EOI and NETBK_TX_EOI are set
+ * by the RX and TX interrupt handlers.
+ * RX and TX handler threads will issue an EOI when either
+ * NETBK_COMMON_EOI or their specific bits (NETBK_RX_EOI or
+ * NETBK_TX_EOI) are set and they will reset those bits.
+ */
+ atomic_t eoi_pending;
+#define NETBK_RX_EOI 0x01
+#define NETBK_TX_EOI 0x02
+#define NETBK_COMMON_EOI 0x04
+
/* Use NAPI for guest TX */
struct napi_struct napi;
/* When feature-split-event-channels = 0, tx_irq = rx_irq. */
@@ -356,6 +370,7 @@ int xenvif_dealloc_kthread(void *data);
irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data);
+bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread);
void xenvif_rx_action(struct xenvif_queue *queue);
void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb);
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index b5fa910b47b7..007600b7b868 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -77,12 +77,28 @@ int xenvif_schedulable(struct xenvif *vif)
!vif->disabled;
}
+static bool xenvif_handle_tx_interrupt(struct xenvif_queue *queue)
+{
+ bool rc;
+
+ rc = RING_HAS_UNCONSUMED_REQUESTS(&queue->tx);
+ if (rc)
+ napi_schedule(&queue->napi);
+ return rc;
+}
+
static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id)
{
struct xenvif_queue *queue = dev_id;
+ int old;
- if (RING_HAS_UNCONSUMED_REQUESTS(&queue->tx))
- napi_schedule(&queue->napi);
+ old = atomic_fetch_or(NETBK_TX_EOI, &queue->eoi_pending);
+ WARN(old & NETBK_TX_EOI, "Interrupt while EOI pending\n");
+
+ if (!xenvif_handle_tx_interrupt(queue)) {
+ atomic_andnot(NETBK_TX_EOI, &queue->eoi_pending);
+ xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS);
+ }
return IRQ_HANDLED;
}
@@ -116,19 +132,46 @@ static int xenvif_poll(struct napi_struct *napi, int budget)
return work_done;
}
+static bool xenvif_handle_rx_interrupt(struct xenvif_queue *queue)
+{
+ bool rc;
+
+ rc = xenvif_have_rx_work(queue, false);
+ if (rc)
+ xenvif_kick_thread(queue);
+ return rc;
+}
+
static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id)
{
struct xenvif_queue *queue = dev_id;
+ int old;
- xenvif_kick_thread(queue);
+ old = atomic_fetch_or(NETBK_RX_EOI, &queue->eoi_pending);
+ WARN(old & NETBK_RX_EOI, "Interrupt while EOI pending\n");
+
+ if (!xenvif_handle_rx_interrupt(queue)) {
+ atomic_andnot(NETBK_RX_EOI, &queue->eoi_pending);
+ xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS);
+ }
return IRQ_HANDLED;
}
irqreturn_t xenvif_interrupt(int irq, void *dev_id)
{
- xenvif_tx_interrupt(irq, dev_id);
- xenvif_rx_interrupt(irq, dev_id);
+ struct xenvif_queue *queue = dev_id;
+ int old;
+
+ old = atomic_fetch_or(NETBK_COMMON_EOI, &queue->eoi_pending);
+ WARN(old, "Interrupt while EOI pending\n");
+
+ /* Use bitwise or as we need to call both functions. */
+ if ((!xenvif_handle_tx_interrupt(queue) |
+ !xenvif_handle_rx_interrupt(queue))) {
+ atomic_andnot(NETBK_COMMON_EOI, &queue->eoi_pending);
+ xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS);
+ }
return IRQ_HANDLED;
}
@@ -595,7 +638,7 @@ int xenvif_connect_ctrl(struct xenvif *vif, grant_ref_t ring_ref,
shared = (struct xen_netif_ctrl_sring *)addr;
BACK_RING_INIT(&vif->ctrl, shared, XEN_PAGE_SIZE);
- err = bind_interdomain_evtchn_to_irq(vif->domid, evtchn);
+ err = bind_interdomain_evtchn_to_irq_lateeoi(vif->domid, evtchn);
if (err < 0)
goto err_unmap;
@@ -653,7 +696,7 @@ int xenvif_connect_data(struct xenvif_queue *queue,
if (tx_evtchn == rx_evtchn) {
/* feature-split-event-channels == 0 */
- err = bind_interdomain_evtchn_to_irqhandler(
+ err = bind_interdomain_evtchn_to_irqhandler_lateeoi(
queue->vif->domid, tx_evtchn, xenvif_interrupt, 0,
queue->name, queue);
if (err < 0)
@@ -664,7 +707,7 @@ int xenvif_connect_data(struct xenvif_queue *queue,
/* feature-split-event-channels == 1 */
snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name),
"%s-tx", queue->name);
- err = bind_interdomain_evtchn_to_irqhandler(
+ err = bind_interdomain_evtchn_to_irqhandler_lateeoi(
queue->vif->domid, tx_evtchn, xenvif_tx_interrupt, 0,
queue->tx_irq_name, queue);
if (err < 0)
@@ -674,7 +717,7 @@ int xenvif_connect_data(struct xenvif_queue *queue,
snprintf(queue->rx_irq_name, sizeof(queue->rx_irq_name),
"%s-rx", queue->name);
- err = bind_interdomain_evtchn_to_irqhandler(
+ err = bind_interdomain_evtchn_to_irqhandler_lateeoi(
queue->vif->domid, rx_evtchn, xenvif_rx_interrupt, 0,
queue->rx_irq_name, queue);
if (err < 0)
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index a871aa6418d0..b8100298017b 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -162,6 +162,10 @@ void xenvif_napi_schedule_or_enable_events(struct xenvif_queue *queue)
if (more_to_do)
napi_schedule(&queue->napi);
+ else if (atomic_fetch_andnot(NETBK_TX_EOI | NETBK_COMMON_EOI,
+ &queue->eoi_pending) &
+ (NETBK_TX_EOI | NETBK_COMMON_EOI))
+ xen_irq_lateeoi(queue->tx_irq, 0);
}
static void tx_add_credit(struct xenvif_queue *queue)
@@ -1615,9 +1619,14 @@ static bool xenvif_ctrl_work_todo(struct xenvif *vif)
irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data)
{
struct xenvif *vif = data;
+ unsigned int eoi_flag = XEN_EOI_FLAG_SPURIOUS;
- while (xenvif_ctrl_work_todo(vif))
+ while (xenvif_ctrl_work_todo(vif)) {
xenvif_ctrl_action(vif);
+ eoi_flag = 0;
+ }
+
+ xen_irq_lateeoi(irq, eoi_flag);
return IRQ_HANDLED;
}
diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c
index b1cf7c6f407a..f152246c7dfb 100644
--- a/drivers/net/xen-netback/rx.c
+++ b/drivers/net/xen-netback/rx.c
@@ -490,13 +490,13 @@ static bool xenvif_rx_queue_ready(struct xenvif_queue *queue)
return queue->stalled && prod - cons >= 1;
}
-static bool xenvif_have_rx_work(struct xenvif_queue *queue)
+bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread)
{
return xenvif_rx_ring_slots_available(queue) ||
(queue->vif->stall_timeout &&
(xenvif_rx_queue_stalled(queue) ||
xenvif_rx_queue_ready(queue))) ||
- kthread_should_stop() ||
+ (test_kthread && kthread_should_stop()) ||
queue->vif->disabled;
}
@@ -527,15 +527,20 @@ static void xenvif_wait_for_rx_work(struct xenvif_queue *queue)
{
DEFINE_WAIT(wait);
- if (xenvif_have_rx_work(queue))
+ if (xenvif_have_rx_work(queue, true))
return;
for (;;) {
long ret;
prepare_to_wait(&queue->wq, &wait, TASK_INTERRUPTIBLE);
- if (xenvif_have_rx_work(queue))
+ if (xenvif_have_rx_work(queue, true))
break;
+ if (atomic_fetch_andnot(NETBK_RX_EOI | NETBK_COMMON_EOI,
+ &queue->eoi_pending) &
+ (NETBK_RX_EOI | NETBK_COMMON_EOI))
+ xen_irq_lateeoi(queue->rx_irq, 0);
+
ret = schedule_timeout(xenvif_rx_queue_timeout(queue));
if (!ret)
break;
diff --git a/drivers/of/address.c b/drivers/of/address.c
index 456339c19aed..94f4ff22044f 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -899,11 +899,13 @@ EXPORT_SYMBOL_GPL(of_dma_get_range);
*/
bool of_dma_is_coherent(struct device_node *np)
{
- struct device_node *node = of_node_get(np);
+ struct device_node *node;
if (IS_ENABLED(CONFIG_OF_DMA_DEFAULT_COHERENT))
return true;
+ node = of_node_get(np);
+
while (node) {
if (of_property_read_bool(node, "dma-coherent")) {
of_node_put(node);
diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed.c b/drivers/pinctrl/aspeed/pinctrl-aspeed.c
index 7f13ce8450a3..5249033ed413 100644
--- a/drivers/pinctrl/aspeed/pinctrl-aspeed.c
+++ b/drivers/pinctrl/aspeed/pinctrl-aspeed.c
@@ -458,13 +458,14 @@ int aspeed_pinmux_set_mux(struct pinctrl_dev *pctldev, unsigned int function,
static bool aspeed_expr_is_gpio(const struct aspeed_sig_expr *expr)
{
/*
- * The signal type is GPIO if the signal name has "GPIO" as a prefix.
+ * The signal type is GPIO if the signal name has "GPI" as a prefix.
* strncmp (rather than strcmp) is used to implement the prefix
* requirement.
*
- * expr->signal might look like "GPIOT3" in the GPIO case.
+ * expr->signal might look like "GPIOB1" in the GPIO case.
+ * expr->signal might look like "GPIT0" in the GPI case.
*/
- return strncmp(expr->signal, "GPIO", 4) == 0;
+ return strncmp(expr->signal, "GPI", 3) == 0;
}
static bool aspeed_gpio_in_exprs(const struct aspeed_sig_expr **exprs)
diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c
index 71df0f70b61f..45b062b0d418 100644
--- a/drivers/pinctrl/intel/pinctrl-intel.c
+++ b/drivers/pinctrl/intel/pinctrl-intel.c
@@ -602,6 +602,10 @@ static int intel_config_set_pull(struct intel_pinctrl *pctrl, unsigned pin,
value |= PADCFG1_TERM_UP;
+ /* Set default strength value in case none is given */
+ if (arg == 1)
+ arg = 5000;
+
switch (arg) {
case 20000:
value |= PADCFG1_TERM_20K << PADCFG1_TERM_SHIFT;
@@ -624,6 +628,10 @@ static int intel_config_set_pull(struct intel_pinctrl *pctrl, unsigned pin,
case PIN_CONFIG_BIAS_PULL_DOWN:
value &= ~(PADCFG1_TERM_UP | PADCFG1_TERM_MASK);
+ /* Set default strength value in case none is given */
+ if (arg == 1)
+ arg = 5000;
+
switch (arg) {
case 20000:
value |= PADCFG1_TERM_20K << PADCFG1_TERM_SHIFT;
diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c
index 7385cd81498c..d4cd24ad077c 100644
--- a/drivers/pinctrl/pinctrl-amd.c
+++ b/drivers/pinctrl/pinctrl-amd.c
@@ -144,7 +144,7 @@ static int amd_gpio_set_debounce(struct gpio_chip *gc, unsigned offset,
pin_reg |= BIT(DB_TMR_OUT_UNIT_OFF);
pin_reg &= ~BIT(DB_TMR_LARGE_OFF);
} else if (debounce < 250000) {
- time = debounce / 15600;
+ time = debounce / 15625;
pin_reg |= time & DB_TMR_OUT_MASK;
pin_reg &= ~BIT(DB_TMR_OUT_UNIT_OFF);
pin_reg |= BIT(DB_TMR_LARGE_OFF);
@@ -154,14 +154,14 @@ static int amd_gpio_set_debounce(struct gpio_chip *gc, unsigned offset,
pin_reg |= BIT(DB_TMR_OUT_UNIT_OFF);
pin_reg |= BIT(DB_TMR_LARGE_OFF);
} else {
- pin_reg &= ~DB_CNTRl_MASK;
+ pin_reg &= ~(DB_CNTRl_MASK << DB_CNTRL_OFF);
ret = -EINVAL;
}
} else {
pin_reg &= ~BIT(DB_TMR_OUT_UNIT_OFF);
pin_reg &= ~BIT(DB_TMR_LARGE_OFF);
pin_reg &= ~DB_TMR_OUT_MASK;
- pin_reg &= ~DB_CNTRl_MASK;
+ pin_reg &= ~(DB_CNTRl_MASK << DB_CNTRL_OFF);
}
writel(pin_reg, gpio_dev->base + offset * 4);
raw_spin_unlock_irqrestore(&gpio_dev->lock, flags);
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index a3c265177855..978e145b5c8f 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -3217,6 +3217,8 @@ static int _regulator_get_voltage(struct regulator_dev *rdev)
ret = rdev->desc->fixed_uV;
} else if (rdev->supply) {
ret = _regulator_get_voltage(rdev->supply->rdev);
+ } else if (rdev->supply_name) {
+ return -EPROBE_DEFER;
} else {
return -EINVAL;
}
diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
index 135376ee2cbf..ba68454109ba 100644
--- a/drivers/scsi/device_handler/scsi_dh_alua.c
+++ b/drivers/scsi/device_handler/scsi_dh_alua.c
@@ -653,8 +653,8 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
rcu_read_lock();
list_for_each_entry_rcu(h,
&tmp_pg->dh_list, node) {
- /* h->sdev should always be valid */
- BUG_ON(!h->sdev);
+ if (!h->sdev)
+ continue;
h->sdev->access_state = desc[0];
}
rcu_read_unlock();
@@ -700,7 +700,8 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
pg->expiry = 0;
rcu_read_lock();
list_for_each_entry_rcu(h, &pg->dh_list, node) {
- BUG_ON(!h->sdev);
+ if (!h->sdev)
+ continue;
h->sdev->access_state =
(pg->state & SCSI_ACCESS_STATE_MASK);
if (pg->pref)
@@ -1138,7 +1139,6 @@ static void alua_bus_detach(struct scsi_device *sdev)
spin_lock(&h->pg_lock);
pg = rcu_dereference_protected(h->pg, lockdep_is_held(&h->pg_lock));
rcu_assign_pointer(h->pg, NULL);
- h->sdev = NULL;
spin_unlock(&h->pg_lock);
if (pg) {
spin_lock_irq(&pg->lock);
@@ -1147,6 +1147,7 @@ static void alua_bus_detach(struct scsi_device *sdev)
kref_put(&pg->kref, release_port_group);
}
sdev->handler_data = NULL;
+ synchronize_rcu();
kfree(h);
}
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 3b892918d821..9ad9910cc085 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -8549,7 +8549,7 @@ static int hpsa_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
/* hook into SCSI subsystem */
rc = hpsa_scsi_add_host(h);
if (rc)
- goto clean7; /* perf, sg, cmd, irq, shost, pci, lu, aer/h */
+ goto clean8; /* lastlogicals, perf, sg, cmd, irq, shost, pci, lu, aer/h */
/* Monitor the controller for firmware lockups */
h->heartbeat_sample_interval = HEARTBEAT_SAMPLE_INTERVAL;
@@ -8564,6 +8564,8 @@ static int hpsa_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
HPSA_EVENT_MONITOR_INTERVAL);
return 0;
+clean8: /* lastlogicals, perf, sg, cmd, irq, shost, pci, lu, aer/h */
+ kfree(h->lastlogicals);
clean7: /* perf, sg, cmd, irq, shost, pci, lu, aer/h */
hpsa_free_performant_mode(h);
h->access.set_intr_mask(h, HPSA_INTR_OFF);
diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c
index af44e6e6b3bf..8b38e639e880 100644
--- a/drivers/thunderbolt/nhi.c
+++ b/drivers/thunderbolt/nhi.c
@@ -315,12 +315,23 @@ static int ring_request_msix(struct tb_ring *ring, bool no_suspend)
ring->vector = ret;
- ring->irq = pci_irq_vector(ring->nhi->pdev, ring->vector);
- if (ring->irq < 0)
- return ring->irq;
+ ret = pci_irq_vector(ring->nhi->pdev, ring->vector);
+ if (ret < 0)
+ goto err_ida_remove;
+
+ ring->irq = ret;
irqflags = no_suspend ? IRQF_NO_SUSPEND : 0;
- return request_irq(ring->irq, ring_msix, irqflags, "thunderbolt", ring);
+ ret = request_irq(ring->irq, ring_msix, irqflags, "thunderbolt", ring);
+ if (ret)
+ goto err_ida_remove;
+
+ return 0;
+
+err_ida_remove:
+ ida_simple_remove(&nhi->msix_ida, ring->vector);
+
+ return ret;
}
static void ring_release_msix(struct tb_ring *ring)
diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index 4fc94b5e15ef..be33a5ce3d8e 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -414,10 +414,10 @@ static int uio_get_minor(struct uio_device *idev)
return retval;
}
-static void uio_free_minor(struct uio_device *idev)
+static void uio_free_minor(unsigned long minor)
{
mutex_lock(&minor_lock);
- idr_remove(&uio_idr, idev->minor);
+ idr_remove(&uio_idr, minor);
mutex_unlock(&minor_lock);
}
@@ -989,7 +989,7 @@ int __uio_register_device(struct module *owner,
err_uio_dev_add_attributes:
device_del(&idev->dev);
err_device_create:
- uio_free_minor(idev);
+ uio_free_minor(idev->minor);
put_device(&idev->dev);
return ret;
}
@@ -1003,11 +1003,13 @@ EXPORT_SYMBOL_GPL(__uio_register_device);
void uio_unregister_device(struct uio_info *info)
{
struct uio_device *idev;
+ unsigned long minor;
if (!info || !info->uio_dev)
return;
idev = info->uio_dev;
+ minor = idev->minor;
mutex_lock(&idev->info_lock);
uio_dev_del_attributes(idev);
@@ -1020,7 +1022,7 @@ void uio_unregister_device(struct uio_info *info)
device_unregister(&idev->dev);
- uio_free_minor(idev);
+ uio_free_minor(minor);
return;
}
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index d07033d99c81..38f85b0e32ab 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -1751,6 +1751,15 @@ static const struct usb_device_id acm_ids[] = {
{ USB_DEVICE(0x0870, 0x0001), /* Metricom GS Modem */
.driver_info = NO_UNION_NORMAL, /* has no union descriptor */
},
+ { USB_DEVICE(0x045b, 0x023c), /* Renesas USB Download mode */
+ .driver_info = DISABLE_ECHO, /* Don't echo banner */
+ },
+ { USB_DEVICE(0x045b, 0x0248), /* Renesas USB Download mode */
+ .driver_info = DISABLE_ECHO, /* Don't echo banner */
+ },
+ { USB_DEVICE(0x045b, 0x024D), /* Renesas USB Download mode */
+ .driver_info = DISABLE_ECHO, /* Don't echo banner */
+ },
{ USB_DEVICE(0x0e8d, 0x0003), /* FIREFLY, MediaTek Inc; andrey.arapov@...il.com */
.driver_info = NO_UNION_NORMAL, /* has no union descriptor */
},
diff --git a/drivers/usb/gadget/udc/goku_udc.c b/drivers/usb/gadget/udc/goku_udc.c
index 8433c22900dc..0ed0d0365132 100644
--- a/drivers/usb/gadget/udc/goku_udc.c
+++ b/drivers/usb/gadget/udc/goku_udc.c
@@ -1772,6 +1772,7 @@ static int goku_probe(struct pci_dev *pdev, const struct pci_device_id *id)
goto err;
}
+ pci_set_drvdata(pdev, dev);
spin_lock_init(&dev->lock);
dev->pdev = pdev;
dev->gadget.ops = &goku_ops;
@@ -1805,7 +1806,6 @@ static int goku_probe(struct pci_dev *pdev, const struct pci_device_id *id)
}
dev->regs = (struct goku_udc_regs __iomem *) base;
- pci_set_drvdata(pdev, dev);
INFO(dev, "%s\n", driver_desc);
INFO(dev, "version: " DRIVER_VERSION " %s\n", dmastr());
INFO(dev, "irq %d, pci mem %p\n", pdev->irq, base);
diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c
index 8edef51c92e5..f026624898e7 100644
--- a/drivers/xen/events/events_2l.c
+++ b/drivers/xen/events/events_2l.c
@@ -91,6 +91,8 @@ static void evtchn_2l_unmask(unsigned port)
BUG_ON(!irqs_disabled());
+ smp_wmb(); /* All writes before unmask must be visible. */
+
if (unlikely((cpu != cpu_from_evtchn(port))))
do_hypercall = 1;
else {
@@ -159,7 +161,7 @@ static inline xen_ulong_t active_evtchns(unsigned int cpu,
* a bitset of words which contain pending event bits. The second
* level is a bitset of pending events themselves.
*/
-static void evtchn_2l_handle_events(unsigned cpu)
+static void evtchn_2l_handle_events(unsigned cpu, struct evtchn_loop_ctrl *ctrl)
{
int irq;
xen_ulong_t pending_words;
@@ -240,10 +242,7 @@ static void evtchn_2l_handle_events(unsigned cpu)
/* Process port. */
port = (word_idx * BITS_PER_EVTCHN_WORD) + bit_idx;
- irq = get_evtchn_to_irq(port);
-
- if (irq != -1)
- generic_handle_irq(irq);
+ handle_irq_for_port(port, ctrl);
bit_idx = (bit_idx + 1) % BITS_PER_EVTCHN_WORD;
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index fdeeef2b9947..aca845675279 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -32,6 +32,10 @@
#include <linux/slab.h>
#include <linux/irqnr.h>
#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/cpuhotplug.h>
+#include <linux/atomic.h>
+#include <linux/ktime.h>
#ifdef CONFIG_X86
#include <asm/desc.h>
@@ -61,6 +65,15 @@
#include "events_internal.h"
+#undef MODULE_PARAM_PREFIX
+#define MODULE_PARAM_PREFIX "xen."
+
+static uint __read_mostly event_loop_timeout = 2;
+module_param(event_loop_timeout, uint, 0644);
+
+static uint __read_mostly event_eoi_delay = 10;
+module_param(event_eoi_delay, uint, 0644);
+
const struct evtchn_ops *evtchn_ops;
/*
@@ -69,6 +82,24 @@ const struct evtchn_ops *evtchn_ops;
*/
static DEFINE_MUTEX(irq_mapping_update_lock);
+/*
+ * Lock protecting event handling loop against removing event channels.
+ * Adding of event channels is no issue as the associated IRQ becomes active
+ * only after everything is setup (before request_[threaded_]irq() the handler
+ * can't be entered for an event, as the event channel will be unmasked only
+ * then).
+ */
+static DEFINE_RWLOCK(evtchn_rwlock);
+
+/*
+ * Lock hierarchy:
+ *
+ * irq_mapping_update_lock
+ * evtchn_rwlock
+ * IRQ-desc lock
+ * percpu eoi_list_lock
+ */
+
static LIST_HEAD(xen_irq_list_head);
/* IRQ <-> VIRQ mapping. */
@@ -93,17 +124,20 @@ static bool (*pirq_needs_eoi)(unsigned irq);
static struct irq_info *legacy_info_ptrs[NR_IRQS_LEGACY];
static struct irq_chip xen_dynamic_chip;
+static struct irq_chip xen_lateeoi_chip;
static struct irq_chip xen_percpu_chip;
static struct irq_chip xen_pirq_chip;
static void enable_dynirq(struct irq_data *data);
static void disable_dynirq(struct irq_data *data);
+static DEFINE_PER_CPU(unsigned int, irq_epoch);
+
static void clear_evtchn_to_irq_row(unsigned row)
{
unsigned col;
for (col = 0; col < EVTCHN_PER_ROW; col++)
- evtchn_to_irq[row][col] = -1;
+ WRITE_ONCE(evtchn_to_irq[row][col], -1);
}
static void clear_evtchn_to_irq_all(void)
@@ -140,7 +174,7 @@ static int set_evtchn_to_irq(unsigned evtchn, unsigned irq)
clear_evtchn_to_irq_row(row);
}
- evtchn_to_irq[row][col] = irq;
+ WRITE_ONCE(evtchn_to_irq[row][col], irq);
return 0;
}
@@ -150,7 +184,7 @@ int get_evtchn_to_irq(unsigned evtchn)
return -1;
if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL)
return -1;
- return evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)];
+ return READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]);
}
/* Get info for IRQ */
@@ -259,10 +293,14 @@ static void xen_irq_info_cleanup(struct irq_info *info)
*/
unsigned int evtchn_from_irq(unsigned irq)
{
- if (unlikely(WARN(irq >= nr_irqs, "Invalid irq %d!\n", irq)))
+ const struct irq_info *info = NULL;
+
+ if (likely(irq < nr_irqs))
+ info = info_for_irq(irq);
+ if (!info)
return 0;
- return info_for_irq(irq)->evtchn;
+ return info->evtchn;
}
unsigned irq_from_evtchn(unsigned int evtchn)
@@ -373,9 +411,157 @@ void notify_remote_via_irq(int irq)
}
EXPORT_SYMBOL_GPL(notify_remote_via_irq);
+struct lateeoi_work {
+ struct delayed_work delayed;
+ spinlock_t eoi_list_lock;
+ struct list_head eoi_list;
+};
+
+static DEFINE_PER_CPU(struct lateeoi_work, lateeoi);
+
+static void lateeoi_list_del(struct irq_info *info)
+{
+ struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
+ unsigned long flags;
+
+ spin_lock_irqsave(&eoi->eoi_list_lock, flags);
+ list_del_init(&info->eoi_list);
+ spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
+}
+
+static void lateeoi_list_add(struct irq_info *info)
+{
+ struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
+ struct irq_info *elem;
+ u64 now = get_jiffies_64();
+ unsigned long delay;
+ unsigned long flags;
+
+ if (now < info->eoi_time)
+ delay = info->eoi_time - now;
+ else
+ delay = 1;
+
+ spin_lock_irqsave(&eoi->eoi_list_lock, flags);
+
+ if (list_empty(&eoi->eoi_list)) {
+ list_add(&info->eoi_list, &eoi->eoi_list);
+ mod_delayed_work_on(info->eoi_cpu, system_wq,
+ &eoi->delayed, delay);
+ } else {
+ list_for_each_entry_reverse(elem, &eoi->eoi_list, eoi_list) {
+ if (elem->eoi_time <= info->eoi_time)
+ break;
+ }
+ list_add(&info->eoi_list, &elem->eoi_list);
+ }
+
+ spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
+}
+
+static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious)
+{
+ evtchn_port_t evtchn;
+ unsigned int cpu;
+ unsigned int delay = 0;
+
+ evtchn = info->evtchn;
+ if (!VALID_EVTCHN(evtchn) || !list_empty(&info->eoi_list))
+ return;
+
+ if (spurious) {
+ if ((1 << info->spurious_cnt) < (HZ << 2))
+ info->spurious_cnt++;
+ if (info->spurious_cnt > 1) {
+ delay = 1 << (info->spurious_cnt - 2);
+ if (delay > HZ)
+ delay = HZ;
+ if (!info->eoi_time)
+ info->eoi_cpu = smp_processor_id();
+ info->eoi_time = get_jiffies_64() + delay;
+ }
+ } else {
+ info->spurious_cnt = 0;
+ }
+
+ cpu = info->eoi_cpu;
+ if (info->eoi_time &&
+ (info->irq_epoch == per_cpu(irq_epoch, cpu) || delay)) {
+ lateeoi_list_add(info);
+ return;
+ }
+
+ info->eoi_time = 0;
+ unmask_evtchn(evtchn);
+}
+
+static void xen_irq_lateeoi_worker(struct work_struct *work)
+{
+ struct lateeoi_work *eoi;
+ struct irq_info *info;
+ u64 now = get_jiffies_64();
+ unsigned long flags;
+
+ eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed);
+
+ read_lock_irqsave(&evtchn_rwlock, flags);
+
+ while (true) {
+ spin_lock(&eoi->eoi_list_lock);
+
+ info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info,
+ eoi_list);
+
+ if (info == NULL || now < info->eoi_time) {
+ spin_unlock(&eoi->eoi_list_lock);
+ break;
+ }
+
+ list_del_init(&info->eoi_list);
+
+ spin_unlock(&eoi->eoi_list_lock);
+
+ info->eoi_time = 0;
+
+ xen_irq_lateeoi_locked(info, false);
+ }
+
+ if (info)
+ mod_delayed_work_on(info->eoi_cpu, system_wq,
+ &eoi->delayed, info->eoi_time - now);
+
+ read_unlock_irqrestore(&evtchn_rwlock, flags);
+}
+
+static void xen_cpu_init_eoi(unsigned int cpu)
+{
+ struct lateeoi_work *eoi = &per_cpu(lateeoi, cpu);
+
+ INIT_DELAYED_WORK(&eoi->delayed, xen_irq_lateeoi_worker);
+ spin_lock_init(&eoi->eoi_list_lock);
+ INIT_LIST_HEAD(&eoi->eoi_list);
+}
+
+void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags)
+{
+ struct irq_info *info;
+ unsigned long flags;
+
+ read_lock_irqsave(&evtchn_rwlock, flags);
+
+ info = info_for_irq(irq);
+
+ if (info)
+ xen_irq_lateeoi_locked(info, eoi_flags & XEN_EOI_FLAG_SPURIOUS);
+
+ read_unlock_irqrestore(&evtchn_rwlock, flags);
+}
+EXPORT_SYMBOL_GPL(xen_irq_lateeoi);
+
static void xen_irq_init(unsigned irq)
{
struct irq_info *info;
+
#ifdef CONFIG_SMP
/* By default all event channels notify CPU#0. */
cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(0));
@@ -390,6 +576,7 @@ static void xen_irq_init(unsigned irq)
set_info_for_irq(irq, info);
+ INIT_LIST_HEAD(&info->eoi_list);
list_add_tail(&info->list, &xen_irq_list_head);
}
@@ -438,16 +625,24 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi)
static void xen_free_irq(unsigned irq)
{
struct irq_info *info = info_for_irq(irq);
+ unsigned long flags;
if (WARN_ON(!info))
return;
+ write_lock_irqsave(&evtchn_rwlock, flags);
+
+ if (!list_empty(&info->eoi_list))
+ lateeoi_list_del(info);
+
list_del(&info->list);
set_info_for_irq(irq, NULL);
WARN_ON(info->refcnt > 0);
+ write_unlock_irqrestore(&evtchn_rwlock, flags);
+
kfree(info);
/* Legacy IRQ descriptors are managed by the arch. */
@@ -839,7 +1034,7 @@ int xen_pirq_from_irq(unsigned irq)
}
EXPORT_SYMBOL_GPL(xen_pirq_from_irq);
-int bind_evtchn_to_irq(unsigned int evtchn)
+static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip)
{
int irq;
int ret;
@@ -856,7 +1051,7 @@ int bind_evtchn_to_irq(unsigned int evtchn)
if (irq < 0)
goto out;
- irq_set_chip_and_handler_name(irq, &xen_dynamic_chip,
+ irq_set_chip_and_handler_name(irq, chip,
handle_edge_irq, "event");
ret = xen_irq_info_evtchn_setup(irq, evtchn);
@@ -877,8 +1072,19 @@ int bind_evtchn_to_irq(unsigned int evtchn)
return irq;
}
+
+int bind_evtchn_to_irq(evtchn_port_t evtchn)
+{
+ return bind_evtchn_to_irq_chip(evtchn, &xen_dynamic_chip);
+}
EXPORT_SYMBOL_GPL(bind_evtchn_to_irq);
+int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn)
+{
+ return bind_evtchn_to_irq_chip(evtchn, &xen_lateeoi_chip);
+}
+EXPORT_SYMBOL_GPL(bind_evtchn_to_irq_lateeoi);
+
static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
{
struct evtchn_bind_ipi bind_ipi;
@@ -920,8 +1126,9 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
return irq;
}
-int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
- unsigned int remote_port)
+static int bind_interdomain_evtchn_to_irq_chip(unsigned int remote_domain,
+ evtchn_port_t remote_port,
+ struct irq_chip *chip)
{
struct evtchn_bind_interdomain bind_interdomain;
int err;
@@ -932,10 +1139,26 @@ int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
&bind_interdomain);
- return err ? : bind_evtchn_to_irq(bind_interdomain.local_port);
+ return err ? : bind_evtchn_to_irq_chip(bind_interdomain.local_port,
+ chip);
+}
+
+int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
+ evtchn_port_t remote_port)
+{
+ return bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port,
+ &xen_dynamic_chip);
}
EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq);
+int bind_interdomain_evtchn_to_irq_lateeoi(unsigned int remote_domain,
+ evtchn_port_t remote_port)
+{
+ return bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port,
+ &xen_lateeoi_chip);
+}
+EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq_lateeoi);
+
static int find_virq(unsigned int virq, unsigned int cpu)
{
struct evtchn_status status;
@@ -1031,14 +1254,15 @@ static void unbind_from_irq(unsigned int irq)
mutex_unlock(&irq_mapping_update_lock);
}
-int bind_evtchn_to_irqhandler(unsigned int evtchn,
- irq_handler_t handler,
- unsigned long irqflags,
- const char *devname, void *dev_id)
+static int bind_evtchn_to_irqhandler_chip(evtchn_port_t evtchn,
+ irq_handler_t handler,
+ unsigned long irqflags,
+ const char *devname, void *dev_id,
+ struct irq_chip *chip)
{
int irq, retval;
- irq = bind_evtchn_to_irq(evtchn);
+ irq = bind_evtchn_to_irq_chip(evtchn, chip);
if (irq < 0)
return irq;
retval = request_irq(irq, handler, irqflags, devname, dev_id);
@@ -1049,18 +1273,38 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn,
return irq;
}
+
+int bind_evtchn_to_irqhandler(evtchn_port_t evtchn,
+ irq_handler_t handler,
+ unsigned long irqflags,
+ const char *devname, void *dev_id)
+{
+ return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags,
+ devname, dev_id,
+ &xen_dynamic_chip);
+}
EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);
-int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
- unsigned int remote_port,
- irq_handler_t handler,
- unsigned long irqflags,
- const char *devname,
- void *dev_id)
+int bind_evtchn_to_irqhandler_lateeoi(evtchn_port_t evtchn,
+ irq_handler_t handler,
+ unsigned long irqflags,
+ const char *devname, void *dev_id)
+{
+ return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags,
+ devname, dev_id,
+ &xen_lateeoi_chip);
+}
+EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler_lateeoi);
+
+static int bind_interdomain_evtchn_to_irqhandler_chip(
+ unsigned int remote_domain, evtchn_port_t remote_port,
+ irq_handler_t handler, unsigned long irqflags,
+ const char *devname, void *dev_id, struct irq_chip *chip)
{
int irq, retval;
- irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port);
+ irq = bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port,
+ chip);
if (irq < 0)
return irq;
@@ -1072,8 +1316,33 @@ int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
return irq;
}
+
+int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
+ evtchn_port_t remote_port,
+ irq_handler_t handler,
+ unsigned long irqflags,
+ const char *devname,
+ void *dev_id)
+{
+ return bind_interdomain_evtchn_to_irqhandler_chip(remote_domain,
+ remote_port, handler, irqflags, devname,
+ dev_id, &xen_dynamic_chip);
+}
EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler);
+int bind_interdomain_evtchn_to_irqhandler_lateeoi(unsigned int remote_domain,
+ evtchn_port_t remote_port,
+ irq_handler_t handler,
+ unsigned long irqflags,
+ const char *devname,
+ void *dev_id)
+{
+ return bind_interdomain_evtchn_to_irqhandler_chip(remote_domain,
+ remote_port, handler, irqflags, devname,
+ dev_id, &xen_lateeoi_chip);
+}
+EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler_lateeoi);
+
int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
irq_handler_t handler,
unsigned long irqflags, const char *devname, void *dev_id)
@@ -1186,7 +1455,7 @@ int evtchn_get(unsigned int evtchn)
goto done;
err = -EINVAL;
- if (info->refcnt <= 0)
+ if (info->refcnt <= 0 || info->refcnt == SHRT_MAX)
goto done;
info->refcnt++;
@@ -1225,6 +1494,54 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
notify_remote_via_irq(irq);
}
+struct evtchn_loop_ctrl {
+ ktime_t timeout;
+ unsigned count;
+ bool defer_eoi;
+};
+
+void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
+{
+ int irq;
+ struct irq_info *info;
+
+ irq = get_evtchn_to_irq(port);
+ if (irq == -1)
+ return;
+
+ /*
+ * Check for timeout every 256 events.
+ * We are setting the timeout value only after the first 256
+ * events in order to not hurt the common case of few loop
+ * iterations. The 256 is basically an arbitrary value.
+ *
+ * In case we are hitting the timeout we need to defer all further
+ * EOIs in order to ensure to leave the event handling loop rather
+ * sooner than later.
+ */
+ if (!ctrl->defer_eoi && !(++ctrl->count & 0xff)) {
+ ktime_t kt = ktime_get();
+
+ if (!ctrl->timeout) {
+ kt = ktime_add_ms(kt,
+ jiffies_to_msecs(event_loop_timeout));
+ ctrl->timeout = kt;
+ } else if (kt > ctrl->timeout) {
+ ctrl->defer_eoi = true;
+ }
+ }
+
+ info = info_for_irq(irq);
+
+ if (ctrl->defer_eoi) {
+ info->eoi_cpu = smp_processor_id();
+ info->irq_epoch = __this_cpu_read(irq_epoch);
+ info->eoi_time = get_jiffies_64() + event_eoi_delay;
+ }
+
+ generic_handle_irq(irq);
+}
+
static DEFINE_PER_CPU(unsigned, xed_nesting_count);
static void __xen_evtchn_do_upcall(void)
@@ -1232,6 +1549,9 @@ static void __xen_evtchn_do_upcall(void)
struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
int cpu = get_cpu();
unsigned count;
+ struct evtchn_loop_ctrl ctrl = { 0 };
+
+ read_lock(&evtchn_rwlock);
do {
vcpu_info->evtchn_upcall_pending = 0;
@@ -1239,7 +1559,7 @@ static void __xen_evtchn_do_upcall(void)
if (__this_cpu_inc_return(xed_nesting_count) - 1)
goto out;
- xen_evtchn_handle_events(cpu);
+ xen_evtchn_handle_events(cpu, &ctrl);
BUG_ON(!irqs_disabled());
@@ -1248,6 +1568,14 @@ static void __xen_evtchn_do_upcall(void)
} while (count != 1 || vcpu_info->evtchn_upcall_pending);
out:
+ read_unlock(&evtchn_rwlock);
+
+ /*
+ * Increment irq_epoch only now to defer EOIs only for
+ * xen_irq_lateeoi() invocations occurring from inside the loop
+ * above.
+ */
+ __this_cpu_inc(irq_epoch);
put_cpu();
}
@@ -1614,6 +1942,21 @@ static struct irq_chip xen_dynamic_chip __read_mostly = {
.irq_retrigger = retrigger_dynirq,
};
+static struct irq_chip xen_lateeoi_chip __read_mostly = {
+ /* The chip name needs to contain "xen-dyn" for irqbalance to work. */
+ .name = "xen-dyn-lateeoi",
+
+ .irq_disable = disable_dynirq,
+ .irq_mask = disable_dynirq,
+ .irq_unmask = enable_dynirq,
+
+ .irq_ack = mask_ack_dynirq,
+ .irq_mask_ack = mask_ack_dynirq,
+
+ .irq_set_affinity = set_affinity_irq,
+ .irq_retrigger = retrigger_dynirq,
+};
+
static struct irq_chip xen_pirq_chip __read_mostly = {
.name = "xen-pirq",
@@ -1680,12 +2023,31 @@ void xen_callback_vector(void)
void xen_callback_vector(void) {}
#endif
-#undef MODULE_PARAM_PREFIX
-#define MODULE_PARAM_PREFIX "xen."
-
static bool fifo_events = true;
module_param(fifo_events, bool, 0);
+static int xen_evtchn_cpu_prepare(unsigned int cpu)
+{
+ int ret = 0;
+
+ xen_cpu_init_eoi(cpu);
+
+ if (evtchn_ops->percpu_init)
+ ret = evtchn_ops->percpu_init(cpu);
+
+ return ret;
+}
+
+static int xen_evtchn_cpu_dead(unsigned int cpu)
+{
+ int ret = 0;
+
+ if (evtchn_ops->percpu_deinit)
+ ret = evtchn_ops->percpu_deinit(cpu);
+
+ return ret;
+}
+
void __init xen_init_IRQ(void)
{
int ret = -EINVAL;
@@ -1696,6 +2058,12 @@ void __init xen_init_IRQ(void)
if (ret < 0)
xen_evtchn_2l_init();
+ xen_cpu_init_eoi(smp_processor_id());
+
+ cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE,
+ "xen/evtchn:prepare",
+ xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead);
+
evtchn_to_irq = kcalloc(EVTCHN_ROW(xen_evtchn_max_channels()),
sizeof(*evtchn_to_irq), GFP_KERNEL);
BUG_ON(!evtchn_to_irq);
diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c
index 76b318e88382..33462521bfd0 100644
--- a/drivers/xen/events/events_fifo.c
+++ b/drivers/xen/events/events_fifo.c
@@ -227,19 +227,25 @@ static bool evtchn_fifo_is_masked(unsigned port)
return sync_test_bit(EVTCHN_FIFO_BIT(MASKED, word), BM(word));
}
/*
- * Clear MASKED, spinning if BUSY is set.
+ * Clear MASKED if not PENDING, spinning if BUSY is set.
+ * Return true if mask was cleared.
*/
-static void clear_masked(volatile event_word_t *word)
+static bool clear_masked_cond(volatile event_word_t *word)
{
event_word_t new, old, w;
w = *word;
do {
+ if (w & (1 << EVTCHN_FIFO_PENDING))
+ return false;
+
old = w & ~(1 << EVTCHN_FIFO_BUSY);
new = old & ~(1 << EVTCHN_FIFO_MASKED);
w = sync_cmpxchg(word, old, new);
} while (w != old);
+
+ return true;
}
static void evtchn_fifo_unmask(unsigned port)
@@ -248,8 +254,7 @@ static void evtchn_fifo_unmask(unsigned port)
BUG_ON(!irqs_disabled());
- clear_masked(word);
- if (evtchn_fifo_is_pending(port)) {
+ if (!clear_masked_cond(word)) {
struct evtchn_unmask unmask = { .port = port };
(void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
}
@@ -270,19 +275,9 @@ static uint32_t clear_linked(volatile event_word_t *word)
return w & EVTCHN_FIFO_LINK_MASK;
}
-static void handle_irq_for_port(unsigned port)
-{
- int irq;
-
- irq = get_evtchn_to_irq(port);
- if (irq != -1)
- generic_handle_irq(irq);
-}
-
-static void consume_one_event(unsigned cpu,
+static void consume_one_event(unsigned cpu, struct evtchn_loop_ctrl *ctrl,
struct evtchn_fifo_control_block *control_block,
- unsigned priority, unsigned long *ready,
- bool drop)
+ unsigned priority, unsigned long *ready)
{
struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu);
uint32_t head;
@@ -315,16 +310,17 @@ static void consume_one_event(unsigned cpu,
clear_bit(priority, ready);
if (evtchn_fifo_is_pending(port) && !evtchn_fifo_is_masked(port)) {
- if (unlikely(drop))
+ if (unlikely(!ctrl))
pr_warn("Dropping pending event for port %u\n", port);
else
- handle_irq_for_port(port);
+ handle_irq_for_port(port, ctrl);
}
q->head[priority] = head;
}
-static void __evtchn_fifo_handle_events(unsigned cpu, bool drop)
+static void __evtchn_fifo_handle_events(unsigned cpu,
+ struct evtchn_loop_ctrl *ctrl)
{
struct evtchn_fifo_control_block *control_block;
unsigned long ready;
@@ -336,14 +332,15 @@ static void __evtchn_fifo_handle_events(unsigned cpu, bool drop)
while (ready) {
q = find_first_bit(&ready, EVTCHN_FIFO_MAX_QUEUES);
- consume_one_event(cpu, control_block, q, &ready, drop);
+ consume_one_event(cpu, ctrl, control_block, q, &ready);
ready |= xchg(&control_block->ready, 0);
}
}
-static void evtchn_fifo_handle_events(unsigned cpu)
+static void evtchn_fifo_handle_events(unsigned cpu,
+ struct evtchn_loop_ctrl *ctrl)
{
- __evtchn_fifo_handle_events(cpu, false);
+ __evtchn_fifo_handle_events(cpu, ctrl);
}
static void evtchn_fifo_resume(void)
@@ -380,21 +377,6 @@ static void evtchn_fifo_resume(void)
event_array_pages = 0;
}
-static const struct evtchn_ops evtchn_ops_fifo = {
- .max_channels = evtchn_fifo_max_channels,
- .nr_channels = evtchn_fifo_nr_channels,
- .setup = evtchn_fifo_setup,
- .bind_to_cpu = evtchn_fifo_bind_to_cpu,
- .clear_pending = evtchn_fifo_clear_pending,
- .set_pending = evtchn_fifo_set_pending,
- .is_pending = evtchn_fifo_is_pending,
- .test_and_set_mask = evtchn_fifo_test_and_set_mask,
- .mask = evtchn_fifo_mask,
- .unmask = evtchn_fifo_unmask,
- .handle_events = evtchn_fifo_handle_events,
- .resume = evtchn_fifo_resume,
-};
-
static int evtchn_fifo_alloc_control_block(unsigned cpu)
{
void *control_block = NULL;
@@ -417,19 +399,36 @@ static int evtchn_fifo_alloc_control_block(unsigned cpu)
return ret;
}
-static int xen_evtchn_cpu_prepare(unsigned int cpu)
+static int evtchn_fifo_percpu_init(unsigned int cpu)
{
if (!per_cpu(cpu_control_block, cpu))
return evtchn_fifo_alloc_control_block(cpu);
return 0;
}
-static int xen_evtchn_cpu_dead(unsigned int cpu)
+static int evtchn_fifo_percpu_deinit(unsigned int cpu)
{
- __evtchn_fifo_handle_events(cpu, true);
+ __evtchn_fifo_handle_events(cpu, NULL);
return 0;
}
+static const struct evtchn_ops evtchn_ops_fifo = {
+ .max_channels = evtchn_fifo_max_channels,
+ .nr_channels = evtchn_fifo_nr_channels,
+ .setup = evtchn_fifo_setup,
+ .bind_to_cpu = evtchn_fifo_bind_to_cpu,
+ .clear_pending = evtchn_fifo_clear_pending,
+ .set_pending = evtchn_fifo_set_pending,
+ .is_pending = evtchn_fifo_is_pending,
+ .test_and_set_mask = evtchn_fifo_test_and_set_mask,
+ .mask = evtchn_fifo_mask,
+ .unmask = evtchn_fifo_unmask,
+ .handle_events = evtchn_fifo_handle_events,
+ .resume = evtchn_fifo_resume,
+ .percpu_init = evtchn_fifo_percpu_init,
+ .percpu_deinit = evtchn_fifo_percpu_deinit,
+};
+
int __init xen_evtchn_fifo_init(void)
{
int cpu = smp_processor_id();
@@ -443,9 +442,5 @@ int __init xen_evtchn_fifo_init(void)
evtchn_ops = &evtchn_ops_fifo;
- cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE,
- "xen/evtchn:prepare",
- xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead);
-
return ret;
}
diff --git a/drivers/xen/events/events_internal.h b/drivers/xen/events/events_internal.h
index 50c2050a1e32..b9b4f5919893 100644
--- a/drivers/xen/events/events_internal.h
+++ b/drivers/xen/events/events_internal.h
@@ -32,11 +32,16 @@ enum xen_irq_type {
*/
struct irq_info {
struct list_head list;
- int refcnt;
+ struct list_head eoi_list;
+ short refcnt;
+ short spurious_cnt;
enum xen_irq_type type; /* type */
unsigned irq;
unsigned int evtchn; /* event channel */
unsigned short cpu; /* cpu bound */
+ unsigned short eoi_cpu; /* EOI must happen on this cpu */
+ unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */
+ u64 eoi_time; /* Time in jiffies when to EOI. */
union {
unsigned short virq;
@@ -55,6 +60,8 @@ struct irq_info {
#define PIRQ_SHAREABLE (1 << 1)
#define PIRQ_MSI_GROUP (1 << 2)
+struct evtchn_loop_ctrl;
+
struct evtchn_ops {
unsigned (*max_channels)(void);
unsigned (*nr_channels)(void);
@@ -69,14 +76,18 @@ struct evtchn_ops {
void (*mask)(unsigned port);
void (*unmask)(unsigned port);
- void (*handle_events)(unsigned cpu);
+ void (*handle_events)(unsigned cpu, struct evtchn_loop_ctrl *ctrl);
void (*resume)(void);
+
+ int (*percpu_init)(unsigned int cpu);
+ int (*percpu_deinit)(unsigned int cpu);
};
extern const struct evtchn_ops *evtchn_ops;
extern int **evtchn_to_irq;
int get_evtchn_to_irq(unsigned int evtchn);
+void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl);
struct irq_info *info_for_irq(unsigned irq);
unsigned cpu_from_irq(unsigned irq);
@@ -134,9 +145,10 @@ static inline void unmask_evtchn(unsigned port)
return evtchn_ops->unmask(port);
}
-static inline void xen_evtchn_handle_events(unsigned cpu)
+static inline void xen_evtchn_handle_events(unsigned cpu,
+ struct evtchn_loop_ctrl *ctrl)
{
- return evtchn_ops->handle_events(cpu);
+ return evtchn_ops->handle_events(cpu, ctrl);
}
static inline void xen_evtchn_resume(void)
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index 055123f48039..0770f427beb2 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -166,7 +166,6 @@ static irqreturn_t evtchn_interrupt(int irq, void *data)
"Interrupt for port %d, but apparently not enabled; per-user %p\n",
evtchn->port, u);
- disable_irq_nosync(irq);
evtchn->enabled = false;
spin_lock(&u->ring_prod_lock);
@@ -292,7 +291,7 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf,
evtchn = find_evtchn(u, port);
if (evtchn && !evtchn->enabled) {
evtchn->enabled = true;
- enable_irq(irq_from_evtchn(port));
+ xen_irq_lateeoi(irq_from_evtchn(port), 0);
}
}
@@ -392,8 +391,8 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port)
if (rc < 0)
goto err;
- rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, 0,
- u->name, evtchn);
+ rc = bind_evtchn_to_irqhandler_lateeoi(port, evtchn_interrupt, 0,
+ u->name, evtchn);
if (rc < 0)
goto err;
diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c
index 62a0c4111dc4..b13d03aba791 100644
--- a/drivers/xen/pvcalls-back.c
+++ b/drivers/xen/pvcalls-back.c
@@ -75,6 +75,7 @@ struct sock_mapping {
atomic_t write;
atomic_t io;
atomic_t release;
+ atomic_t eoi;
void (*saved_data_ready)(struct sock *sk);
struct pvcalls_ioworker ioworker;
};
@@ -96,7 +97,7 @@ static int pvcalls_back_release_active(struct xenbus_device *dev,
struct pvcalls_fedata *fedata,
struct sock_mapping *map);
-static void pvcalls_conn_back_read(void *opaque)
+static bool pvcalls_conn_back_read(void *opaque)
{
struct sock_mapping *map = (struct sock_mapping *)opaque;
struct msghdr msg;
@@ -116,17 +117,17 @@ static void pvcalls_conn_back_read(void *opaque)
virt_mb();
if (error)
- return;
+ return false;
size = pvcalls_queued(prod, cons, array_size);
if (size >= array_size)
- return;
+ return false;
spin_lock_irqsave(&map->sock->sk->sk_receive_queue.lock, flags);
if (skb_queue_empty(&map->sock->sk->sk_receive_queue)) {
atomic_set(&map->read, 0);
spin_unlock_irqrestore(&map->sock->sk->sk_receive_queue.lock,
flags);
- return;
+ return true;
}
spin_unlock_irqrestore(&map->sock->sk->sk_receive_queue.lock, flags);
wanted = array_size - size;
@@ -154,7 +155,7 @@ static void pvcalls_conn_back_read(void *opaque)
ret = inet_recvmsg(map->sock, &msg, wanted, MSG_DONTWAIT);
WARN_ON(ret > wanted);
if (ret == -EAGAIN) /* shouldn't happen */
- return;
+ return true;
if (!ret)
ret = -ENOTCONN;
spin_lock_irqsave(&map->sock->sk->sk_receive_queue.lock, flags);
@@ -173,10 +174,10 @@ static void pvcalls_conn_back_read(void *opaque)
virt_wmb();
notify_remote_via_irq(map->irq);
- return;
+ return true;
}
-static void pvcalls_conn_back_write(struct sock_mapping *map)
+static bool pvcalls_conn_back_write(struct sock_mapping *map)
{
struct pvcalls_data_intf *intf = map->ring;
struct pvcalls_data *data = &map->data;
@@ -193,7 +194,7 @@ static void pvcalls_conn_back_write(struct sock_mapping *map)
array_size = XEN_FLEX_RING_SIZE(map->ring_order);
size = pvcalls_queued(prod, cons, array_size);
if (size == 0)
- return;
+ return false;
memset(&msg, 0, sizeof(msg));
msg.msg_flags |= MSG_DONTWAIT;
@@ -215,12 +216,11 @@ static void pvcalls_conn_back_write(struct sock_mapping *map)
atomic_set(&map->write, 0);
ret = inet_sendmsg(map->sock, &msg, size);
- if (ret == -EAGAIN || (ret >= 0 && ret < size)) {
+ if (ret == -EAGAIN) {
atomic_inc(&map->write);
atomic_inc(&map->io);
+ return true;
}
- if (ret == -EAGAIN)
- return;
/* write the data, then update the indexes */
virt_wmb();
@@ -233,9 +233,13 @@ static void pvcalls_conn_back_write(struct sock_mapping *map)
}
/* update the indexes, then notify the other end */
virt_wmb();
- if (prod != cons + ret)
+ if (prod != cons + ret) {
atomic_inc(&map->write);
+ atomic_inc(&map->io);
+ }
notify_remote_via_irq(map->irq);
+
+ return true;
}
static void pvcalls_back_ioworker(struct work_struct *work)
@@ -244,6 +248,7 @@ static void pvcalls_back_ioworker(struct work_struct *work)
struct pvcalls_ioworker, register_work);
struct sock_mapping *map = container_of(ioworker, struct sock_mapping,
ioworker);
+ unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS;
while (atomic_read(&map->io) > 0) {
if (atomic_read(&map->release) > 0) {
@@ -251,10 +256,18 @@ static void pvcalls_back_ioworker(struct work_struct *work)
return;
}
- if (atomic_read(&map->read) > 0)
- pvcalls_conn_back_read(map);
- if (atomic_read(&map->write) > 0)
- pvcalls_conn_back_write(map);
+ if (atomic_read(&map->read) > 0 &&
+ pvcalls_conn_back_read(map))
+ eoi_flags = 0;
+ if (atomic_read(&map->write) > 0 &&
+ pvcalls_conn_back_write(map))
+ eoi_flags = 0;
+
+ if (atomic_read(&map->eoi) > 0 && !atomic_read(&map->write)) {
+ atomic_set(&map->eoi, 0);
+ xen_irq_lateeoi(map->irq, eoi_flags);
+ eoi_flags = XEN_EOI_FLAG_SPURIOUS;
+ }
atomic_dec(&map->io);
}
@@ -351,12 +364,9 @@ static struct sock_mapping *pvcalls_new_active_socket(
goto out;
map->bytes = page;
- ret = bind_interdomain_evtchn_to_irqhandler(fedata->dev->otherend_id,
- evtchn,
- pvcalls_back_conn_event,
- 0,
- "pvcalls-backend",
- map);
+ ret = bind_interdomain_evtchn_to_irqhandler_lateeoi(
+ fedata->dev->otherend_id, evtchn,
+ pvcalls_back_conn_event, 0, "pvcalls-backend", map);
if (ret < 0)
goto out;
map->irq = ret;
@@ -890,15 +900,18 @@ static irqreturn_t pvcalls_back_event(int irq, void *dev_id)
{
struct xenbus_device *dev = dev_id;
struct pvcalls_fedata *fedata = NULL;
+ unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS;
- if (dev == NULL)
- return IRQ_HANDLED;
+ if (dev) {
+ fedata = dev_get_drvdata(&dev->dev);
+ if (fedata) {
+ pvcalls_back_work(fedata);
+ eoi_flags = 0;
+ }
+ }
- fedata = dev_get_drvdata(&dev->dev);
- if (fedata == NULL)
- return IRQ_HANDLED;
+ xen_irq_lateeoi(irq, eoi_flags);
- pvcalls_back_work(fedata);
return IRQ_HANDLED;
}
@@ -908,12 +921,15 @@ static irqreturn_t pvcalls_back_conn_event(int irq, void *sock_map)
struct pvcalls_ioworker *iow;
if (map == NULL || map->sock == NULL || map->sock->sk == NULL ||
- map->sock->sk->sk_user_data != map)
+ map->sock->sk->sk_user_data != map) {
+ xen_irq_lateeoi(irq, 0);
return IRQ_HANDLED;
+ }
iow = &map->ioworker;
atomic_inc(&map->write);
+ atomic_inc(&map->eoi);
atomic_inc(&map->io);
queue_work(iow->wq, &iow->register_work);
@@ -948,7 +964,7 @@ static int backend_connect(struct xenbus_device *dev)
goto error;
}
- err = bind_interdomain_evtchn_to_irq(dev->otherend_id, evtchn);
+ err = bind_interdomain_evtchn_to_irq_lateeoi(dev->otherend_id, evtchn);
if (err < 0)
goto error;
fedata->irq = err;
diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c
index 8c250f4a3a97..0842b8e26107 100644
--- a/drivers/xen/xen-pciback/pci_stub.c
+++ b/drivers/xen/xen-pciback/pci_stub.c
@@ -733,10 +733,17 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
wmb();
notify_remote_via_irq(pdev->evtchn_irq);
+ /* Enable IRQ to signal "request done". */
+ xen_pcibk_lateeoi(pdev, 0);
+
ret = wait_event_timeout(xen_pcibk_aer_wait_queue,
!(test_bit(_XEN_PCIB_active, (unsigned long *)
&sh_info->flags)), 300*HZ);
+ /* Enable IRQ for pcifront request if not already active. */
+ if (!test_bit(_PDEVF_op_active, &pdev->flags))
+ xen_pcibk_lateeoi(pdev, 0);
+
if (!ret) {
if (test_bit(_XEN_PCIB_active,
(unsigned long *)&sh_info->flags)) {
@@ -750,13 +757,6 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
}
clear_bit(_PCIB_op_pending, (unsigned long *)&pdev->flags);
- if (test_bit(_XEN_PCIF_active,
- (unsigned long *)&sh_info->flags)) {
- dev_dbg(&psdev->dev->dev,
- "schedule pci_conf service in " DRV_NAME "\n");
- xen_pcibk_test_and_schedule_op(psdev->pdev);
- }
-
res = (pci_ers_result_t)aer_op->err;
return res;
}
diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h
index 263c059bff90..235cdfe13494 100644
--- a/drivers/xen/xen-pciback/pciback.h
+++ b/drivers/xen/xen-pciback/pciback.h
@@ -14,6 +14,7 @@
#include <linux/spinlock.h>
#include <linux/workqueue.h>
#include <linux/atomic.h>
+#include <xen/events.h>
#include <xen/interface/io/pciif.h>
#define DRV_NAME "xen-pciback"
@@ -27,6 +28,8 @@ struct pci_dev_entry {
#define PDEVF_op_active (1<<(_PDEVF_op_active))
#define _PCIB_op_pending (1)
#define PCIB_op_pending (1<<(_PCIB_op_pending))
+#define _EOI_pending (2)
+#define EOI_pending (1<<(_EOI_pending))
struct xen_pcibk_device {
void *pci_dev_data;
@@ -182,12 +185,17 @@ static inline void xen_pcibk_release_devices(struct xen_pcibk_device *pdev)
irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id);
void xen_pcibk_do_op(struct work_struct *data);
+static inline void xen_pcibk_lateeoi(struct xen_pcibk_device *pdev,
+ unsigned int eoi_flag)
+{
+ if (test_and_clear_bit(_EOI_pending, &pdev->flags))
+ xen_irq_lateeoi(pdev->evtchn_irq, eoi_flag);
+}
+
int xen_pcibk_xenbus_register(void);
void xen_pcibk_xenbus_unregister(void);
extern int verbose_request;
-
-void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev);
#endif
/* Handles shared IRQs that can to device domain and control domain. */
diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c
index 7af047008ea2..0273c52e1b11 100644
--- a/drivers/xen/xen-pciback/pciback_ops.c
+++ b/drivers/xen/xen-pciback/pciback_ops.c
@@ -297,26 +297,41 @@ int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev,
return 0;
}
#endif
+
+static inline bool xen_pcibk_test_op_pending(struct xen_pcibk_device *pdev)
+{
+ return test_bit(_XEN_PCIF_active,
+ (unsigned long *)&pdev->sh_info->flags) &&
+ !test_and_set_bit(_PDEVF_op_active, &pdev->flags);
+}
+
/*
* Now the same evtchn is used for both pcifront conf_read_write request
* as well as pcie aer front end ack. We use a new work_queue to schedule
* xen_pcibk conf_read_write service for avoiding confict with aer_core
* do_recovery job which also use the system default work_queue
*/
-void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev)
+static void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev)
{
+ bool eoi = true;
+
/* Check that frontend is requesting an operation and that we are not
* already processing a request */
- if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags)
- && !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) {
+ if (xen_pcibk_test_op_pending(pdev)) {
schedule_work(&pdev->op_work);
+ eoi = false;
}
/*_XEN_PCIB_active should have been cleared by pcifront. And also make
sure xen_pcibk is waiting for ack by checking _PCIB_op_pending*/
if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags)
&& test_bit(_PCIB_op_pending, &pdev->flags)) {
wake_up(&xen_pcibk_aer_wait_queue);
+ eoi = false;
}
+
+ /* EOI if there was nothing to do. */
+ if (eoi)
+ xen_pcibk_lateeoi(pdev, XEN_EOI_FLAG_SPURIOUS);
}
/* Performing the configuration space reads/writes must not be done in atomic
@@ -324,10 +339,8 @@ void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev)
* use of semaphores). This function is intended to be called from a work
* queue in process context taking a struct xen_pcibk_device as a parameter */
-void xen_pcibk_do_op(struct work_struct *data)
+static void xen_pcibk_do_one_op(struct xen_pcibk_device *pdev)
{
- struct xen_pcibk_device *pdev =
- container_of(data, struct xen_pcibk_device, op_work);
struct pci_dev *dev;
struct xen_pcibk_dev_data *dev_data = NULL;
struct xen_pci_op *op = &pdev->op;
@@ -400,16 +413,31 @@ void xen_pcibk_do_op(struct work_struct *data)
smp_mb__before_atomic(); /* /after/ clearing PCIF_active */
clear_bit(_PDEVF_op_active, &pdev->flags);
smp_mb__after_atomic(); /* /before/ final check for work */
+}
- /* Check to see if the driver domain tried to start another request in
- * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active.
- */
- xen_pcibk_test_and_schedule_op(pdev);
+void xen_pcibk_do_op(struct work_struct *data)
+{
+ struct xen_pcibk_device *pdev =
+ container_of(data, struct xen_pcibk_device, op_work);
+
+ do {
+ xen_pcibk_do_one_op(pdev);
+ } while (xen_pcibk_test_op_pending(pdev));
+
+ xen_pcibk_lateeoi(pdev, 0);
}
irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id)
{
struct xen_pcibk_device *pdev = dev_id;
+ bool eoi;
+
+ /* IRQs might come in before pdev->evtchn_irq is written. */
+ if (unlikely(pdev->evtchn_irq != irq))
+ pdev->evtchn_irq = irq;
+
+ eoi = test_and_set_bit(_EOI_pending, &pdev->flags);
+ WARN(eoi, "IRQ while EOI pending\n");
xen_pcibk_test_and_schedule_op(pdev);
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index 581c4e1a8b82..3bbed47da3fa 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -123,7 +123,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref,
pdev->sh_info = vaddr;
- err = bind_interdomain_evtchn_to_irqhandler(
+ err = bind_interdomain_evtchn_to_irqhandler_lateeoi(
pdev->xdev->otherend_id, remote_evtchn, xen_pcibk_handle_event,
0, DRV_NAME, pdev);
if (err < 0) {
diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c
index e2f3e8b0fba9..fd32c3459b66 100644
--- a/drivers/xen/xen-scsiback.c
+++ b/drivers/xen/xen-scsiback.c
@@ -91,7 +91,6 @@ struct vscsibk_info {
unsigned int irq;
struct vscsiif_back_ring ring;
- int ring_error;
spinlock_t ring_lock;
atomic_t nr_unreplied_reqs;
@@ -721,7 +720,8 @@ static struct vscsibk_pend *prepare_pending_reqs(struct vscsibk_info *info,
return pending_req;
}
-static int scsiback_do_cmd_fn(struct vscsibk_info *info)
+static int scsiback_do_cmd_fn(struct vscsibk_info *info,
+ unsigned int *eoi_flags)
{
struct vscsiif_back_ring *ring = &info->ring;
struct vscsiif_request ring_req;
@@ -738,11 +738,12 @@ static int scsiback_do_cmd_fn(struct vscsibk_info *info)
rc = ring->rsp_prod_pvt;
pr_warn("Dom%d provided bogus ring requests (%#x - %#x = %u). Halting ring processing\n",
info->domid, rp, rc, rp - rc);
- info->ring_error = 1;
- return 0;
+ return -EINVAL;
}
while ((rc != rp)) {
+ *eoi_flags &= ~XEN_EOI_FLAG_SPURIOUS;
+
if (RING_REQUEST_CONS_OVERFLOW(ring, rc))
break;
@@ -801,13 +802,16 @@ static int scsiback_do_cmd_fn(struct vscsibk_info *info)
static irqreturn_t scsiback_irq_fn(int irq, void *dev_id)
{
struct vscsibk_info *info = dev_id;
+ int rc;
+ unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS;
- if (info->ring_error)
- return IRQ_HANDLED;
-
- while (scsiback_do_cmd_fn(info))
+ while ((rc = scsiback_do_cmd_fn(info, &eoi_flags)) > 0)
cond_resched();
+ /* In case of a ring error we keep the event channel masked. */
+ if (!rc)
+ xen_irq_lateeoi(irq, eoi_flags);
+
return IRQ_HANDLED;
}
@@ -828,7 +832,7 @@ static int scsiback_init_sring(struct vscsibk_info *info, grant_ref_t ring_ref,
sring = (struct vscsiif_sring *)area;
BACK_RING_INIT(&info->ring, sring, PAGE_SIZE);
- err = bind_interdomain_evtchn_to_irq(info->domid, evtchn);
+ err = bind_interdomain_evtchn_to_irq_lateeoi(info->domid, evtchn);
if (err < 0)
goto unmap_page;
@@ -1251,7 +1255,6 @@ static int scsiback_probe(struct xenbus_device *dev,
info->domid = dev->otherend_id;
spin_lock_init(&info->ring_lock);
- info->ring_error = 0;
atomic_set(&info->nr_unreplied_reqs, 0);
init_waitqueue_head(&info->waiting_to_free);
info->dev = dev;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 0ba338cffa93..fd56c22c12a0 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3869,6 +3869,10 @@ int btree_write_cache_pages(struct address_space *mapping,
if (!ret) {
free_extent_buffer(eb);
continue;
+ } else if (ret < 0) {
+ done = 1;
+ free_extent_buffer(eb);
+ break;
}
ret = write_one_eb(eb, fs_info, wbc, &epd);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 56123ce3b9f0..d3f76e3efd44 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3863,6 +3863,8 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
ret = -EINTR;
goto out;
}
+
+ cond_resched();
}
ret = 0;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index f3cb042a28d5..3d92feff249a 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2432,9 +2432,6 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
tmp = btrfs_super_num_devices(fs_info->super_copy);
btrfs_set_super_num_devices(fs_info->super_copy, tmp + 1);
- /* add sysfs device entry */
- btrfs_sysfs_add_device_link(fs_info->fs_devices, device);
-
/*
* we've got more storage, clear any full flags on the space
* infos
@@ -2442,6 +2439,10 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
btrfs_clear_space_info_full(fs_info);
mutex_unlock(&fs_info->chunk_mutex);
+
+ /* Add sysfs device entry */
+ btrfs_sysfs_add_device_link(fs_info->fs_devices, device);
+
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
if (seeding_dev) {
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index a2b2355e7f01..9986817532b1 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -501,7 +501,13 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
else if (map_chars == SFM_MAP_UNI_RSVD) {
bool end_of_string;
- if (i == srclen - 1)
+ /**
+ * Remap spaces and periods found at the end of every
+ * component of the path. The special cases of '.' and
+ * '..' do not need to be dealt with explicitly because
+ * they are addressed in namei.c:link_path_walk().
+ **/
+ if ((i == srclen - 1) || (source[i+1] == '\\'))
end_of_string = true;
else
end_of_string = false;
diff --git a/fs/exec.c b/fs/exec.c
index 92fd21f0b0ff..e8d1e6705977 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1024,10 +1024,23 @@ static int exec_mmap(struct mm_struct *mm)
}
}
task_lock(tsk);
+
+ local_irq_disable();
active_mm = tsk->active_mm;
- tsk->mm = mm;
tsk->active_mm = mm;
+ tsk->mm = mm;
+ /*
+ * This prevents preemption while active_mm is being loaded and
+ * it and mm are being updated, which could cause problems for
+ * lazy tlb mm refcounting when these are updated by context
+ * switches. Not all architectures can handle irqs off over
+ * activate_mm yet.
+ */
+ if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
+ local_irq_enable();
activate_mm(active_mm, mm);
+ if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
+ local_irq_enable();
tsk->mm->vmacache_seqnum = 0;
vmacache_flush(tsk);
task_unlock(tsk);
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 6064bcb8572b..89a4ac83cbcd 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -1895,6 +1895,7 @@ int ext4_inline_data_truncate(struct inode *inode, int *has_inline)
ext4_write_lock_xattr(inode, &no_expand);
if (!ext4_has_inline_data(inode)) {
+ ext4_write_unlock_xattr(inode, &no_expand);
*has_inline = 0;
ext4_journal_stop(handle);
return 0;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index ec96466f2909..eaa8bcd59b6a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1679,8 +1679,8 @@ static const struct mount_opts {
{Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA),
MOPT_CLEAR | MOPT_Q},
- {Opt_usrjquota, 0, MOPT_Q},
- {Opt_grpjquota, 0, MOPT_Q},
+ {Opt_usrjquota, 0, MOPT_Q | MOPT_STRING},
+ {Opt_grpjquota, 0, MOPT_Q | MOPT_STRING},
{Opt_offusrjquota, 0, MOPT_Q},
{Opt_offgrpjquota, 0, MOPT_Q},
{Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 7cb0672294df..c4eb6a5fcea9 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -720,9 +720,9 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
}
gfs2_free_clones(rgd);
+ return_all_reservations(rgd);
kfree(rgd->rd_bits);
rgd->rd_bits = NULL;
- return_all_reservations(rgd);
kmem_cache_free(gfs2_rgrpd_cachep, rgd);
}
}
@@ -1361,6 +1361,9 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
+ if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
+ return -EROFS;
+
if (!blk_queue_discard(q))
return -EOPNOTSUPP;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index c3f3f1ae4e1b..639e2c86758a 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -924,6 +924,7 @@ static void gfs2_put_super(struct super_block *sb)
gfs2_jindex_free(sdp);
/* Take apart glock structures and buffer lists */
gfs2_gl_hash_clear(sdp);
+ truncate_inode_pages_final(&sdp->sd_aspace);
gfs2_delete_debugfs_file(sdp);
/* Unmount the locking protocol */
gfs2_lm_unmount(sdp);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 8216550faf45..f9deabfa303e 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1733,6 +1733,7 @@ static void ocfs2_inode_init_once(void *data)
oi->ip_blkno = 0ULL;
oi->ip_clusters = 0;
+ oi->ip_next_orphan = NULL;
ocfs2_resv_init_once(&oi->ip_la_data_resv);
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 55c88a732690..1f16c2da2472 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -1319,7 +1319,7 @@ xfs_rmap_convert_shared(
* record for our insertion point. This will also give us the record for
* start block contiguity tests.
*/
- error = xfs_rmap_lookup_le_range(cur, bno, owner, offset, flags,
+ error = xfs_rmap_lookup_le_range(cur, bno, owner, offset, oldext,
&PREV, &i);
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
index 9d9c9192584c..cd689d21d3af 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -262,8 +262,8 @@ xfs_rmapbt_key_diff(
else if (y > x)
return -1;
- x = XFS_RMAP_OFF(be64_to_cpu(kp->rm_offset));
- y = rec->rm_offset;
+ x = be64_to_cpu(kp->rm_offset);
+ y = xfs_rmap_irec_offset_pack(rec);
if (x > y)
return 1;
else if (y > x)
@@ -294,8 +294,8 @@ xfs_rmapbt_diff_two_keys(
else if (y > x)
return -1;
- x = XFS_RMAP_OFF(be64_to_cpu(kp1->rm_offset));
- y = XFS_RMAP_OFF(be64_to_cpu(kp2->rm_offset));
+ x = be64_to_cpu(kp1->rm_offset);
+ y = be64_to_cpu(kp2->rm_offset);
if (x > y)
return 1;
else if (y > x)
@@ -400,8 +400,8 @@ xfs_rmapbt_keys_inorder(
return 1;
else if (a > b)
return 0;
- a = XFS_RMAP_OFF(be64_to_cpu(k1->rmap.rm_offset));
- b = XFS_RMAP_OFF(be64_to_cpu(k2->rmap.rm_offset));
+ a = be64_to_cpu(k1->rmap.rm_offset);
+ b = be64_to_cpu(k2->rmap.rm_offset);
if (a <= b)
return 1;
return 0;
@@ -430,8 +430,8 @@ xfs_rmapbt_recs_inorder(
return 1;
else if (a > b)
return 0;
- a = XFS_RMAP_OFF(be64_to_cpu(r1->rmap.rm_offset));
- b = XFS_RMAP_OFF(be64_to_cpu(r2->rmap.rm_offset));
+ a = be64_to_cpu(r1->rmap.rm_offset);
+ b = be64_to_cpu(r2->rmap.rm_offset);
if (a <= b)
return 1;
return 0;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 4e6f2c8574f7..42c670a31351 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -879,6 +879,16 @@ xfs_setattr_size(
if (newsize > oldsize) {
error = xfs_zero_eof(ip, newsize, oldsize, &did_zeroing);
} else {
+ /*
+ * iomap won't detect a dirty page over an unwritten block (or a
+ * cow block over a hole) and subsequently skips zeroing the
+ * newly post-EOF portion of the page. Flush the new EOF to
+ * convert the block before the pagecache truncate.
+ */
+ error = filemap_write_and_wait_range(inode->i_mapping, newsize,
+ newsize);
+ if (error)
+ return error;
error = iomap_truncate_page(inode, newsize, &did_zeroing,
&xfs_iomap_ops);
}
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index aa6c5c193f45..8538916d255e 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -140,7 +140,7 @@ xfs_fs_map_blocks(
goto out_unlock;
error = invalidate_inode_pages2(inode->i_mapping);
if (WARN_ON_ONCE(error))
- return error;
+ goto out_unlock;
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + length);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h
index b3379a97245c..a34694e675c9 100644
--- a/include/linux/can/skb.h
+++ b/include/linux/can/skb.h
@@ -61,21 +61,17 @@ static inline void can_skb_set_owner(struct sk_buff *skb, struct sock *sk)
*/
static inline struct sk_buff *can_create_echo_skb(struct sk_buff *skb)
{
- if (skb_shared(skb)) {
- struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
+ struct sk_buff *nskb;
- if (likely(nskb)) {
- can_skb_set_owner(nskb, skb->sk);
- consume_skb(skb);
- return nskb;
- } else {
- kfree_skb(skb);
- return NULL;
- }
+ nskb = skb_clone(skb, GFP_ATOMIC);
+ if (unlikely(!nskb)) {
+ kfree_skb(skb);
+ return NULL;
}
- /* we can assume to have an unshared skb with proper owner */
- return skb;
+ can_skb_set_owner(nskb, skb->sk);
+ consume_skb(skb);
+ return nskb;
}
#endif /* !_CAN_SKB_H */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 956d76744c91..41a3307a971c 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -466,7 +466,7 @@ struct pmu {
*/
struct perf_addr_filter {
struct list_head entry;
- struct inode *inode;
+ struct path path;
unsigned long offset;
unsigned long size;
unsigned int range : 1,
diff --git a/include/linux/prandom.h b/include/linux/prandom.h
index aa16e6468f91..cc1e71334e53 100644
--- a/include/linux/prandom.h
+++ b/include/linux/prandom.h
@@ -16,12 +16,44 @@ void prandom_bytes(void *buf, size_t nbytes);
void prandom_seed(u32 seed);
void prandom_reseed_late(void);
+#if BITS_PER_LONG == 64
+/*
+ * The core SipHash round function. Each line can be executed in
+ * parallel given enough CPU resources.
+ */
+#define PRND_SIPROUND(v0, v1, v2, v3) ( \
+ v0 += v1, v1 = rol64(v1, 13), v2 += v3, v3 = rol64(v3, 16), \
+ v1 ^= v0, v0 = rol64(v0, 32), v3 ^= v2, \
+ v0 += v3, v3 = rol64(v3, 21), v2 += v1, v1 = rol64(v1, 17), \
+ v3 ^= v0, v1 ^= v2, v2 = rol64(v2, 32) \
+)
+
+#define PRND_K0 (0x736f6d6570736575 ^ 0x6c7967656e657261)
+#define PRND_K1 (0x646f72616e646f6d ^ 0x7465646279746573)
+
+#elif BITS_PER_LONG == 32
+/*
+ * On 32-bit machines, we use HSipHash, a reduced-width version of SipHash.
+ * This is weaker, but 32-bit machines are not used for high-traffic
+ * applications, so there is less output for an attacker to analyze.
+ */
+#define PRND_SIPROUND(v0, v1, v2, v3) ( \
+ v0 += v1, v1 = rol32(v1, 5), v2 += v3, v3 = rol32(v3, 8), \
+ v1 ^= v0, v0 = rol32(v0, 16), v3 ^= v2, \
+ v0 += v3, v3 = rol32(v3, 7), v2 += v1, v1 = rol32(v1, 13), \
+ v3 ^= v0, v1 ^= v2, v2 = rol32(v2, 16) \
+)
+#define PRND_K0 0x6c796765
+#define PRND_K1 0x74656462
+
+#else
+#error Unsupported BITS_PER_LONG
+#endif
+
struct rnd_state {
__u32 s1, s2, s3, s4;
};
-DECLARE_PER_CPU(struct rnd_state, net_rand_state);
-
u32 prandom_u32_state(struct rnd_state *state);
void prandom_bytes_state(struct rnd_state *state, void *buf, size_t nbytes);
void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state);
diff --git a/include/linux/time64.h b/include/linux/time64.h
index ad33260618f7..99ab4a686c30 100644
--- a/include/linux/time64.h
+++ b/include/linux/time64.h
@@ -189,6 +189,10 @@ static inline bool timespec64_valid_strict(const struct timespec64 *ts)
*/
static inline s64 timespec64_to_ns(const struct timespec64 *ts)
{
+ /* Prevent multiplication overflow */
+ if ((unsigned long long)ts->tv_sec >= KTIME_SEC_MAX)
+ return KTIME_MAX;
+
return ((s64) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec;
}
diff --git a/include/xen/events.h b/include/xen/events.h
index 1650d39decae..d8255ed2052c 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -14,11 +14,16 @@
unsigned xen_evtchn_nr_channels(void);
-int bind_evtchn_to_irq(unsigned int evtchn);
-int bind_evtchn_to_irqhandler(unsigned int evtchn,
+int bind_evtchn_to_irq(evtchn_port_t evtchn);
+int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn);
+int bind_evtchn_to_irqhandler(evtchn_port_t evtchn,
irq_handler_t handler,
unsigned long irqflags, const char *devname,
void *dev_id);
+int bind_evtchn_to_irqhandler_lateeoi(evtchn_port_t evtchn,
+ irq_handler_t handler,
+ unsigned long irqflags, const char *devname,
+ void *dev_id);
int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu);
int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
irq_handler_t handler,
@@ -31,13 +36,21 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi,
const char *devname,
void *dev_id);
int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
- unsigned int remote_port);
+ evtchn_port_t remote_port);
+int bind_interdomain_evtchn_to_irq_lateeoi(unsigned int remote_domain,
+ evtchn_port_t remote_port);
int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
- unsigned int remote_port,
+ evtchn_port_t remote_port,
irq_handler_t handler,
unsigned long irqflags,
const char *devname,
void *dev_id);
+int bind_interdomain_evtchn_to_irqhandler_lateeoi(unsigned int remote_domain,
+ evtchn_port_t remote_port,
+ irq_handler_t handler,
+ unsigned long irqflags,
+ const char *devname,
+ void *dev_id);
/*
* Common unbind function for all event sources. Takes IRQ to unbind from.
@@ -46,6 +59,14 @@ int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
*/
void unbind_from_irqhandler(unsigned int irq, void *dev_id);
+/*
+ * Send late EOI for an IRQ bound to an event channel via one of the *_lateeoi
+ * functions above.
+ */
+void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags);
+/* Signal an event was spurious, i.e. there was no action resulting from it. */
+#define XEN_EOI_FLAG_SPURIOUS 0x00000001
+
#define XEN_IRQ_PRIORITY_MAX EVTCHN_FIFO_PRIORITY_MAX
#define XEN_IRQ_PRIORITY_DEFAULT EVTCHN_FIFO_PRIORITY_DEFAULT
#define XEN_IRQ_PRIORITY_MIN EVTCHN_FIFO_PRIORITY_MIN
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 5807fad2c405..75cf23232a92 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5207,11 +5207,11 @@ static void perf_pmu_output_stop(struct perf_event *event);
static void perf_mmap_close(struct vm_area_struct *vma)
{
struct perf_event *event = vma->vm_file->private_data;
-
struct ring_buffer *rb = ring_buffer_get(event);
struct user_struct *mmap_user = rb->mmap_user;
int mmap_locked = rb->mmap_locked;
unsigned long size = perf_data_size(rb);
+ bool detach_rest = false;
if (event->pmu->event_unmapped)
event->pmu->event_unmapped(event, vma->vm_mm);
@@ -5242,7 +5242,8 @@ static void perf_mmap_close(struct vm_area_struct *vma)
mutex_unlock(&event->mmap_mutex);
}
- atomic_dec(&rb->mmap_count);
+ if (atomic_dec_and_test(&rb->mmap_count))
+ detach_rest = true;
if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
goto out_put;
@@ -5251,7 +5252,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
mutex_unlock(&event->mmap_mutex);
/* If there's still other mmap()s of this buffer, we're done. */
- if (atomic_read(&rb->mmap_count))
+ if (!detach_rest)
goto out_put;
/*
@@ -6450,7 +6451,7 @@ static void perf_event_addr_filters_exec(struct perf_event *event, void *data)
raw_spin_lock_irqsave(&ifh->lock, flags);
list_for_each_entry(filter, &ifh->list, entry) {
- if (filter->inode) {
+ if (filter->path.dentry) {
event->addr_filters_offs[count] = 0;
restart++;
}
@@ -7124,7 +7125,11 @@ static bool perf_addr_filter_match(struct perf_addr_filter *filter,
struct file *file, unsigned long offset,
unsigned long size)
{
- if (filter->inode != file_inode(file))
+ /* d_inode(NULL) won't be equal to any mapped user-space file */
+ if (!filter->path.dentry)
+ return false;
+
+ if (d_inode(filter->path.dentry) != file_inode(file))
return false;
if (filter->offset > offset + size)
@@ -8345,8 +8350,7 @@ static void free_filters_list(struct list_head *filters)
struct perf_addr_filter *filter, *iter;
list_for_each_entry_safe(filter, iter, filters, entry) {
- if (filter->inode)
- iput(filter->inode);
+ path_put(&filter->path);
list_del(&filter->entry);
kfree(filter);
}
@@ -8443,7 +8447,7 @@ static void perf_event_addr_filters_apply(struct perf_event *event)
* Adjust base offset if the filter is associated to a binary
* that needs to be mapped:
*/
- if (filter->inode)
+ if (filter->path.dentry)
event->addr_filters_offs[count] =
perf_addr_filter_apply(filter, mm);
@@ -8516,7 +8520,6 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
{
struct perf_addr_filter *filter = NULL;
char *start, *orig, *filename = NULL;
- struct path path;
substring_t args[MAX_OPT_ARGS];
int state = IF_STATE_ACTION, token;
unsigned int kernel = 0;
@@ -8579,6 +8582,7 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
if (token == IF_SRC_FILE || token == IF_SRC_FILEADDR) {
int fpos = filter->range ? 2 : 1;
+ kfree(filename);
filename = match_strdup(&args[fpos]);
if (!filename) {
ret = -ENOMEM;
@@ -8617,22 +8621,18 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
*/
ret = -EOPNOTSUPP;
if (!event->ctx->task)
- goto fail_free_name;
+ goto fail;
/* look up the path and grab its inode */
- ret = kern_path(filename, LOOKUP_FOLLOW, &path);
+ ret = kern_path(filename, LOOKUP_FOLLOW,
+ &filter->path);
if (ret)
- goto fail_free_name;
-
- filter->inode = igrab(d_inode(path.dentry));
- path_put(&path);
- kfree(filename);
- filename = NULL;
+ goto fail;
ret = -EINVAL;
- if (!filter->inode ||
- !S_ISREG(filter->inode->i_mode))
- /* free_filters_list() will iput() */
+ if (!filter->path.dentry ||
+ !S_ISREG(d_inode(filter->path.dentry)
+ ->i_mode))
goto fail;
event->addr_filters.nr_file_filters++;
@@ -8647,13 +8647,13 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
if (state != IF_STATE_ACTION)
goto fail;
+ kfree(filename);
kfree(orig);
return 0;
-fail_free_name:
- kfree(filename);
fail:
+ kfree(filename);
free_filters_list(filters);
kfree(orig);
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 09b1537ae06c..41317d04eeae 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -213,7 +213,7 @@ static inline int get_recursion_context(int *recursion)
rctx = 3;
else if (in_irq())
rctx = 2;
- else if (in_softirq())
+ else if (in_serving_softirq())
rctx = 1;
else
rctx = 0;
diff --git a/kernel/exit.c b/kernel/exit.c
index 7a7984d7a4d8..9e70577b818a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -516,7 +516,10 @@ static void exit_mm(void)
up_read(&mm->mmap_sem);
self.task = current;
- self.next = xchg(&core_state->dumper.next, &self);
+ if (self.task->flags & PF_SIGNALED)
+ self.next = xchg(&core_state->dumper.next, &self);
+ else
+ self.task = NULL;
/*
* Implies mb(), the result of xchg() must be visible
* to core_state->dumper.
diff --git a/kernel/futex.c b/kernel/futex.c
index 774de77bc502..322b9a840da6 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -862,8 +862,9 @@ static void put_pi_state(struct futex_pi_state *pi_state)
*/
if (pi_state->owner) {
struct task_struct *owner;
+ unsigned long flags;
- raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
+ raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags);
owner = pi_state->owner;
if (owner) {
raw_spin_lock(&owner->pi_lock);
@@ -871,7 +872,7 @@ static void put_pi_state(struct futex_pi_state *pi_state)
raw_spin_unlock(&owner->pi_lock);
}
rt_mutex_proxy_unlock(&pi_state->pi_mutex, owner);
- raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
+ raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags);
}
if (current->pi_state_cache) {
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index a117adf7084b..779d858da2b3 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -80,6 +80,7 @@ config IRQ_FASTEOI_HIERARCHY_HANDLERS
# Generic IRQ IPI support
config GENERIC_IRQ_IPI
bool
+ select IRQ_DOMAIN_HIERARCHY
# Generic MSI interrupt support
config GENERIC_MSI_IRQ
diff --git a/kernel/reboot.c b/kernel/reboot.c
index bd30a973fe94..2946ed1d99d4 100644
--- a/kernel/reboot.c
+++ b/kernel/reboot.c
@@ -512,22 +512,22 @@ static int __init reboot_setup(char *str)
break;
case 's':
- {
- int rc;
-
- if (isdigit(*(str+1))) {
- rc = kstrtoint(str+1, 0, &reboot_cpu);
- if (rc)
- return rc;
- } else if (str[1] == 'm' && str[2] == 'p' &&
- isdigit(*(str+3))) {
- rc = kstrtoint(str+3, 0, &reboot_cpu);
- if (rc)
- return rc;
- } else
+ if (isdigit(*(str+1)))
+ reboot_cpu = simple_strtoul(str+1, NULL, 0);
+ else if (str[1] == 'm' && str[2] == 'p' &&
+ isdigit(*(str+3)))
+ reboot_cpu = simple_strtoul(str+3, NULL, 0);
+ else
reboot_mode = REBOOT_SOFT;
+ if (reboot_cpu >= num_possible_cpus()) {
+ pr_err("Ignoring the CPU number in reboot= option. "
+ "CPU %d exceeds possible cpu number %d\n",
+ reboot_cpu, num_possible_cpus());
+ reboot_cpu = 0;
+ break;
+ }
break;
- }
+
case 'g':
reboot_mode = REBOOT_GPIO;
break;
diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c
index f26acef5d7b4..7f718565507b 100644
--- a/kernel/time/itimer.c
+++ b/kernel/time/itimer.c
@@ -153,10 +153,6 @@ static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
u64 oval, nval, ointerval, ninterval;
struct cpu_itimer *it = &tsk->signal->it[clock_id];
- /*
- * Use the to_ktime conversion because that clamps the maximum
- * value to KTIME_MAX and avoid multiplication overflows.
- */
nval = ktime_to_ns(timeval_to_ktime(value->it_value));
ninterval = ktime_to_ns(timeval_to_ktime(value->it_interval));
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index d4bc272e03ee..99f885d2904a 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1596,13 +1596,6 @@ void update_process_times(int user_tick)
scheduler_tick();
if (IS_ENABLED(CONFIG_POSIX_TIMERS))
run_posix_cpu_timers(p);
-
- /* The current CPU might make use of net randoms without receiving IRQs
- * to renew them often enough. Let's update the net_rand_state from a
- * non-constant value that's not affine to the number of calls to make
- * sure it's updated when there's some activity (we don't care in idle).
- */
- this_cpu_add(net_rand_state.s1, rol32(jiffies, 24) + user_tick);
}
/**
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index b9b71e7fb697..8082328eb01a 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -416,14 +416,16 @@ struct rb_event_info {
/*
* Used for which event context the event is in.
- * NMI = 0
- * IRQ = 1
- * SOFTIRQ = 2
- * NORMAL = 3
+ * TRANSITION = 0
+ * NMI = 1
+ * IRQ = 2
+ * SOFTIRQ = 3
+ * NORMAL = 4
*
* See trace_recursive_lock() comment below for more details.
*/
enum {
+ RB_CTX_TRANSITION,
RB_CTX_NMI,
RB_CTX_IRQ,
RB_CTX_SOFTIRQ,
@@ -2553,10 +2555,10 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
* a bit of overhead in something as critical as function tracing,
* we use a bitmask trick.
*
- * bit 0 = NMI context
- * bit 1 = IRQ context
- * bit 2 = SoftIRQ context
- * bit 3 = normal context.
+ * bit 1 = NMI context
+ * bit 2 = IRQ context
+ * bit 3 = SoftIRQ context
+ * bit 4 = normal context.
*
* This works because this is the order of contexts that can
* preempt other contexts. A SoftIRQ never preempts an IRQ
@@ -2579,6 +2581,30 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
* The least significant bit can be cleared this way, and it
* just so happens that it is the same bit corresponding to
* the current context.
+ *
+ * Now the TRANSITION bit breaks the above slightly. The TRANSITION bit
+ * is set when a recursion is detected at the current context, and if
+ * the TRANSITION bit is already set, it will fail the recursion.
+ * This is needed because there's a lag between the changing of
+ * interrupt context and updating the preempt count. In this case,
+ * a false positive will be found. To handle this, one extra recursion
+ * is allowed, and this is done by the TRANSITION bit. If the TRANSITION
+ * bit is already set, then it is considered a recursion and the function
+ * ends. Otherwise, the TRANSITION bit is set, and that bit is returned.
+ *
+ * On the trace_recursive_unlock(), the TRANSITION bit will be the first
+ * to be cleared. Even if it wasn't the context that set it. That is,
+ * if an interrupt comes in while NORMAL bit is set and the ring buffer
+ * is called before preempt_count() is updated, since the check will
+ * be on the NORMAL bit, the TRANSITION bit will then be set. If an
+ * NMI then comes in, it will set the NMI bit, but when the NMI code
+ * does the trace_recursive_unlock() it will clear the TRANSTION bit
+ * and leave the NMI bit set. But this is fine, because the interrupt
+ * code that set the TRANSITION bit will then clear the NMI bit when it
+ * calls trace_recursive_unlock(). If another NMI comes in, it will
+ * set the TRANSITION bit and continue.
+ *
+ * Note: The TRANSITION bit only handles a single transition between context.
*/
static __always_inline int
@@ -2597,8 +2623,16 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
} else
bit = RB_CTX_NORMAL;
- if (unlikely(val & (1 << bit)))
- return 1;
+ if (unlikely(val & (1 << bit))) {
+ /*
+ * It is possible that this was called by transitioning
+ * between interrupt context, and preempt_count() has not
+ * been updated yet. In this case, use the TRANSITION bit.
+ */
+ bit = RB_CTX_TRANSITION;
+ if (val & (1 << bit))
+ return 1;
+ }
val |= (1 << bit);
cpu_buffer->current_context = val;
diff --git a/lib/random32.c b/lib/random32.c
index eb54663e9e94..f5e967f4adfa 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -40,16 +40,6 @@
#include <linux/sched.h>
#include <asm/unaligned.h>
-#ifdef CONFIG_RANDOM32_SELFTEST
-static void __init prandom_state_selftest(void);
-#else
-static inline void prandom_state_selftest(void)
-{
-}
-#endif
-
-DEFINE_PER_CPU(struct rnd_state, net_rand_state) __latent_entropy;
-
/**
* prandom_u32_state - seeded pseudo-random number generator.
* @state: pointer to state structure holding seeded state.
@@ -69,25 +59,6 @@ u32 prandom_u32_state(struct rnd_state *state)
}
EXPORT_SYMBOL(prandom_u32_state);
-/**
- * prandom_u32 - pseudo random number generator
- *
- * A 32 bit pseudo-random number is generated using a fast
- * algorithm suitable for simulation. This algorithm is NOT
- * considered safe for cryptographic use.
- */
-u32 prandom_u32(void)
-{
- struct rnd_state *state = &get_cpu_var(net_rand_state);
- u32 res;
-
- res = prandom_u32_state(state);
- put_cpu_var(net_rand_state);
-
- return res;
-}
-EXPORT_SYMBOL(prandom_u32);
-
/**
* prandom_bytes_state - get the requested number of pseudo-random bytes
*
@@ -119,20 +90,6 @@ void prandom_bytes_state(struct rnd_state *state, void *buf, size_t bytes)
}
EXPORT_SYMBOL(prandom_bytes_state);
-/**
- * prandom_bytes - get the requested number of pseudo-random bytes
- * @buf: where to copy the pseudo-random bytes to
- * @bytes: the requested number of bytes
- */
-void prandom_bytes(void *buf, size_t bytes)
-{
- struct rnd_state *state = &get_cpu_var(net_rand_state);
-
- prandom_bytes_state(state, buf, bytes);
- put_cpu_var(net_rand_state);
-}
-EXPORT_SYMBOL(prandom_bytes);
-
static void prandom_warmup(struct rnd_state *state)
{
/* Calling RNG ten times to satisfy recurrence condition */
@@ -148,96 +105,6 @@ static void prandom_warmup(struct rnd_state *state)
prandom_u32_state(state);
}
-static u32 __extract_hwseed(void)
-{
- unsigned int val = 0;
-
- (void)(arch_get_random_seed_int(&val) ||
- arch_get_random_int(&val));
-
- return val;
-}
-
-static void prandom_seed_early(struct rnd_state *state, u32 seed,
- bool mix_with_hwseed)
-{
-#define LCG(x) ((x) * 69069U) /* super-duper LCG */
-#define HWSEED() (mix_with_hwseed ? __extract_hwseed() : 0)
- state->s1 = __seed(HWSEED() ^ LCG(seed), 2U);
- state->s2 = __seed(HWSEED() ^ LCG(state->s1), 8U);
- state->s3 = __seed(HWSEED() ^ LCG(state->s2), 16U);
- state->s4 = __seed(HWSEED() ^ LCG(state->s3), 128U);
-}
-
-/**
- * prandom_seed - add entropy to pseudo random number generator
- * @seed: seed value
- *
- * Add some additional seeding to the prandom pool.
- */
-void prandom_seed(u32 entropy)
-{
- int i;
- /*
- * No locking on the CPUs, but then somewhat random results are, well,
- * expected.
- */
- for_each_possible_cpu(i) {
- struct rnd_state *state = &per_cpu(net_rand_state, i);
-
- state->s1 = __seed(state->s1 ^ entropy, 2U);
- prandom_warmup(state);
- }
-}
-EXPORT_SYMBOL(prandom_seed);
-
-/*
- * Generate some initially weak seeding values to allow
- * to start the prandom_u32() engine.
- */
-static int __init prandom_init(void)
-{
- int i;
-
- prandom_state_selftest();
-
- for_each_possible_cpu(i) {
- struct rnd_state *state = &per_cpu(net_rand_state, i);
- u32 weak_seed = (i + jiffies) ^ random_get_entropy();
-
- prandom_seed_early(state, weak_seed, true);
- prandom_warmup(state);
- }
-
- return 0;
-}
-core_initcall(prandom_init);
-
-static void __prandom_timer(unsigned long dontcare);
-
-static DEFINE_TIMER(seed_timer, __prandom_timer, 0, 0);
-
-static void __prandom_timer(unsigned long dontcare)
-{
- u32 entropy;
- unsigned long expires;
-
- get_random_bytes(&entropy, sizeof(entropy));
- prandom_seed(entropy);
-
- /* reseed every ~60 seconds, in [40 .. 80) interval with slack */
- expires = 40 + prandom_u32_max(40);
- seed_timer.expires = jiffies + msecs_to_jiffies(expires * MSEC_PER_SEC);
-
- add_timer(&seed_timer);
-}
-
-static void __init __prandom_start_seed_timer(void)
-{
- seed_timer.expires = jiffies + msecs_to_jiffies(40 * MSEC_PER_SEC);
- add_timer(&seed_timer);
-}
-
void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state)
{
int i;
@@ -257,51 +124,6 @@ void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state)
}
EXPORT_SYMBOL(prandom_seed_full_state);
-/*
- * Generate better values after random number generator
- * is fully initialized.
- */
-static void __prandom_reseed(bool late)
-{
- unsigned long flags;
- static bool latch = false;
- static DEFINE_SPINLOCK(lock);
-
- /* Asking for random bytes might result in bytes getting
- * moved into the nonblocking pool and thus marking it
- * as initialized. In this case we would double back into
- * this function and attempt to do a late reseed.
- * Ignore the pointless attempt to reseed again if we're
- * already waiting for bytes when the nonblocking pool
- * got initialized.
- */
-
- /* only allow initial seeding (late == false) once */
- if (!spin_trylock_irqsave(&lock, flags))
- return;
-
- if (latch && !late)
- goto out;
-
- latch = true;
- prandom_seed_full_state(&net_rand_state);
-out:
- spin_unlock_irqrestore(&lock, flags);
-}
-
-void prandom_reseed_late(void)
-{
- __prandom_reseed(true);
-}
-
-static int __init prandom_reseed(void)
-{
- __prandom_reseed(false);
- __prandom_start_seed_timer();
- return 0;
-}
-late_initcall(prandom_reseed);
-
#ifdef CONFIG_RANDOM32_SELFTEST
static struct prandom_test1 {
u32 seed;
@@ -421,7 +243,28 @@ static struct prandom_test2 {
{ 407983964U, 921U, 728767059U },
};
-static void __init prandom_state_selftest(void)
+static u32 __extract_hwseed(void)
+{
+ unsigned int val = 0;
+
+ (void)(arch_get_random_seed_int(&val) ||
+ arch_get_random_int(&val));
+
+ return val;
+}
+
+static void prandom_seed_early(struct rnd_state *state, u32 seed,
+ bool mix_with_hwseed)
+{
+#define LCG(x) ((x) * 69069U) /* super-duper LCG */
+#define HWSEED() (mix_with_hwseed ? __extract_hwseed() : 0)
+ state->s1 = __seed(HWSEED() ^ LCG(seed), 2U);
+ state->s2 = __seed(HWSEED() ^ LCG(state->s1), 8U);
+ state->s3 = __seed(HWSEED() ^ LCG(state->s2), 16U);
+ state->s4 = __seed(HWSEED() ^ LCG(state->s3), 128U);
+}
+
+static int __init prandom_state_selftest(void)
{
int i, j, errors = 0, runs = 0;
bool error = false;
@@ -461,5 +304,266 @@ static void __init prandom_state_selftest(void)
pr_warn("prandom: %d/%d self tests failed\n", errors, runs);
else
pr_info("prandom: %d self tests passed\n", runs);
+ return 0;
}
+core_initcall(prandom_state_selftest);
#endif
+
+/*
+ * The prandom_u32() implementation is now completely separate from the
+ * prandom_state() functions, which are retained (for now) for compatibility.
+ *
+ * Because of (ab)use in the networking code for choosing random TCP/UDP port
+ * numbers, which open DoS possibilities if guessable, we want something
+ * stronger than a standard PRNG. But the performance requirements of
+ * the network code do not allow robust crypto for this application.
+ *
+ * So this is a homebrew Junior Spaceman implementation, based on the
+ * lowest-latency trustworthy crypto primitive available, SipHash.
+ * (The authors of SipHash have not been consulted about this abuse of
+ * their work.)
+ *
+ * Standard SipHash-2-4 uses 2n+4 rounds to hash n words of input to
+ * one word of output. This abbreviated version uses 2 rounds per word
+ * of output.
+ */
+
+struct siprand_state {
+ unsigned long v0;
+ unsigned long v1;
+ unsigned long v2;
+ unsigned long v3;
+};
+
+static DEFINE_PER_CPU(struct siprand_state, net_rand_state) __latent_entropy;
+
+/*
+ * This is the core CPRNG function. As "pseudorandom", this is not used
+ * for truly valuable things, just intended to be a PITA to guess.
+ * For maximum speed, we do just two SipHash rounds per word. This is
+ * the same rate as 4 rounds per 64 bits that SipHash normally uses,
+ * so hopefully it's reasonably secure.
+ *
+ * There are two changes from the official SipHash finalization:
+ * - We omit some constants XORed with v2 in the SipHash spec as irrelevant;
+ * they are there only to make the output rounds distinct from the input
+ * rounds, and this application has no input rounds.
+ * - Rather than returning v0^v1^v2^v3, return v1+v3.
+ * If you look at the SipHash round, the last operation on v3 is
+ * "v3 ^= v0", so "v0 ^ v3" just undoes that, a waste of time.
+ * Likewise "v1 ^= v2". (The rotate of v2 makes a difference, but
+ * it still cancels out half of the bits in v2 for no benefit.)
+ * Second, since the last combining operation was xor, continue the
+ * pattern of alternating xor/add for a tiny bit of extra non-linearity.
+ */
+static inline u32 siprand_u32(struct siprand_state *s)
+{
+ unsigned long v0 = s->v0, v1 = s->v1, v2 = s->v2, v3 = s->v3;
+
+ PRND_SIPROUND(v0, v1, v2, v3);
+ PRND_SIPROUND(v0, v1, v2, v3);
+ s->v0 = v0; s->v1 = v1; s->v2 = v2; s->v3 = v3;
+ return v1 + v3;
+}
+
+
+/**
+ * prandom_u32 - pseudo random number generator
+ *
+ * A 32 bit pseudo-random number is generated using a fast
+ * algorithm suitable for simulation. This algorithm is NOT
+ * considered safe for cryptographic use.
+ */
+u32 prandom_u32(void)
+{
+ struct siprand_state *state = get_cpu_ptr(&net_rand_state);
+ u32 res = siprand_u32(state);
+
+ put_cpu_ptr(&net_rand_state);
+ return res;
+}
+EXPORT_SYMBOL(prandom_u32);
+
+/**
+ * prandom_bytes - get the requested number of pseudo-random bytes
+ * @buf: where to copy the pseudo-random bytes to
+ * @bytes: the requested number of bytes
+ */
+void prandom_bytes(void *buf, size_t bytes)
+{
+ struct siprand_state *state = get_cpu_ptr(&net_rand_state);
+ u8 *ptr = buf;
+
+ while (bytes >= sizeof(u32)) {
+ put_unaligned(siprand_u32(state), (u32 *)ptr);
+ ptr += sizeof(u32);
+ bytes -= sizeof(u32);
+ }
+
+ if (bytes > 0) {
+ u32 rem = siprand_u32(state);
+
+ do {
+ *ptr++ = (u8)rem;
+ rem >>= BITS_PER_BYTE;
+ } while (--bytes > 0);
+ }
+ put_cpu_ptr(&net_rand_state);
+}
+EXPORT_SYMBOL(prandom_bytes);
+
+/**
+ * prandom_seed - add entropy to pseudo random number generator
+ * @entropy: entropy value
+ *
+ * Add some additional seed material to the prandom pool.
+ * The "entropy" is actually our IP address (the only caller is
+ * the network code), not for unpredictability, but to ensure that
+ * different machines are initialized differently.
+ */
+void prandom_seed(u32 entropy)
+{
+ int i;
+
+ add_device_randomness(&entropy, sizeof(entropy));
+
+ for_each_possible_cpu(i) {
+ struct siprand_state *state = per_cpu_ptr(&net_rand_state, i);
+ unsigned long v0 = state->v0, v1 = state->v1;
+ unsigned long v2 = state->v2, v3 = state->v3;
+
+ do {
+ v3 ^= entropy;
+ PRND_SIPROUND(v0, v1, v2, v3);
+ PRND_SIPROUND(v0, v1, v2, v3);
+ v0 ^= entropy;
+ } while (unlikely(!v0 || !v1 || !v2 || !v3));
+
+ WRITE_ONCE(state->v0, v0);
+ WRITE_ONCE(state->v1, v1);
+ WRITE_ONCE(state->v2, v2);
+ WRITE_ONCE(state->v3, v3);
+ }
+}
+EXPORT_SYMBOL(prandom_seed);
+
+/*
+ * Generate some initially weak seeding values to allow
+ * the prandom_u32() engine to be started.
+ */
+static int __init prandom_init_early(void)
+{
+ int i;
+ unsigned long v0, v1, v2, v3;
+
+ if (!arch_get_random_long(&v0))
+ v0 = jiffies;
+ if (!arch_get_random_long(&v1))
+ v1 = random_get_entropy();
+ v2 = v0 ^ PRND_K0;
+ v3 = v1 ^ PRND_K1;
+
+ for_each_possible_cpu(i) {
+ struct siprand_state *state;
+
+ v3 ^= i;
+ PRND_SIPROUND(v0, v1, v2, v3);
+ PRND_SIPROUND(v0, v1, v2, v3);
+ v0 ^= i;
+
+ state = per_cpu_ptr(&net_rand_state, i);
+ state->v0 = v0; state->v1 = v1;
+ state->v2 = v2; state->v3 = v3;
+ }
+
+ return 0;
+}
+core_initcall(prandom_init_early);
+
+
+/* Stronger reseeding when available, and periodically thereafter. */
+static void prandom_reseed(unsigned long dontcare);
+
+static DEFINE_TIMER(seed_timer, prandom_reseed, 0, 0);
+
+static void prandom_reseed(unsigned long dontcare)
+{
+ unsigned long expires;
+ int i;
+
+ /*
+ * Reinitialize each CPU's PRNG with 128 bits of key.
+ * No locking on the CPUs, but then somewhat random results are,
+ * well, expected.
+ */
+ for_each_possible_cpu(i) {
+ struct siprand_state *state;
+ unsigned long v0 = get_random_long(), v2 = v0 ^ PRND_K0;
+ unsigned long v1 = get_random_long(), v3 = v1 ^ PRND_K1;
+#if BITS_PER_LONG == 32
+ int j;
+
+ /*
+ * On 32-bit machines, hash in two extra words to
+ * approximate 128-bit key length. Not that the hash
+ * has that much security, but this prevents a trivial
+ * 64-bit brute force.
+ */
+ for (j = 0; j < 2; j++) {
+ unsigned long m = get_random_long();
+
+ v3 ^= m;
+ PRND_SIPROUND(v0, v1, v2, v3);
+ PRND_SIPROUND(v0, v1, v2, v3);
+ v0 ^= m;
+ }
+#endif
+ /*
+ * Probably impossible in practice, but there is a
+ * theoretical risk that a race between this reseeding
+ * and the target CPU writing its state back could
+ * create the all-zero SipHash fixed point.
+ *
+ * To ensure that never happens, ensure the state
+ * we write contains no zero words.
+ */
+ state = per_cpu_ptr(&net_rand_state, i);
+ WRITE_ONCE(state->v0, v0 ? v0 : -1ul);
+ WRITE_ONCE(state->v1, v1 ? v1 : -1ul);
+ WRITE_ONCE(state->v2, v2 ? v2 : -1ul);
+ WRITE_ONCE(state->v3, v3 ? v3 : -1ul);
+ }
+
+ /* reseed every ~60 seconds, in [40 .. 80) interval with slack */
+ expires = round_jiffies(jiffies + 40 * HZ + prandom_u32_max(40 * HZ));
+ mod_timer(&seed_timer, expires);
+}
+
+/*
+ * The random ready callback can be called from almost any interrupt.
+ * To avoid worrying about whether it's safe to delay that interrupt
+ * long enough to seed all CPUs, just schedule an immediate timer event.
+ */
+static void prandom_timer_start(struct random_ready_callback *unused)
+{
+ mod_timer(&seed_timer, jiffies);
+}
+
+/*
+ * Start periodic full reseeding as soon as strong
+ * random numbers are available.
+ */
+static int __init prandom_init_late(void)
+{
+ static struct random_ready_callback random_ready = {
+ .func = prandom_timer_start
+ };
+ int ret = add_random_ready_callback(&random_ready);
+
+ if (ret == -EALREADY) {
+ prandom_timer_start(&random_ready);
+ ret = 0;
+ }
+ return ret;
+}
+late_initcall(prandom_init_late);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index b4c768de3344..e73617b11af1 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -254,6 +254,7 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
}
io_tlb_index = 0;
+ no_iotlb_memory = false;
if (verbose)
swiotlb_print_info();
@@ -285,9 +286,11 @@ swiotlb_init(int verbose)
if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose))
return;
- if (io_tlb_start)
+ if (io_tlb_start) {
memblock_free_early(io_tlb_start,
PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
+ io_tlb_start = 0;
+ }
pr_warn("Cannot allocate buffer");
no_iotlb_memory = true;
}
@@ -390,6 +393,7 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
}
io_tlb_index = 0;
+ no_iotlb_memory = false;
swiotlb_print_info();
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index d857e4770cc8..4e30d23943d5 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -496,7 +496,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
struct queue_pages *qp = walk->private;
unsigned long flags = qp->flags;
int ret;
- pte_t *pte;
+ pte_t *pte, *mapped_pte;
spinlock_t *ptl;
ptl = pmd_trans_huge_lock(pmd, vma);
@@ -511,7 +511,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
if (pmd_trans_unstable(pmd))
return 0;
retry:
- pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+ mapped_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
for (; addr != end; pte++, addr += PAGE_SIZE) {
if (!pte_present(*pte))
continue;
@@ -549,7 +549,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
} else
break;
}
- pte_unmap_unlock(pte - 1, ptl);
+ pte_unmap_unlock(mapped_pte, ptl);
cond_resched();
return addr != end ? -EIO : 0;
}
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 2f871424925e..bf8df824bae3 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -296,7 +296,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
__u32 cookie = ntohl(th->ack_seq) - 1;
struct sock *ret = sk;
struct request_sock *req;
- int mss;
+ int full_space, mss;
struct rtable *rt;
__u8 rcv_wscale;
struct flowi4 fl4;
@@ -389,8 +389,13 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
/* Try to redo what tcp_v4_send_synack did. */
req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
+ /* limit the window selection if the user enforce a smaller rx buffer */
+ full_space = tcp_full_space(sk);
+ if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
+ (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0))
+ req->rsk_window_clamp = full_space;
- tcp_select_initial_window(tcp_full_space(sk), req->mss,
+ tcp_select_initial_window(full_space, req->mss,
&req->rsk_rcv_wnd, &req->rsk_window_clamp,
ireq->wscale_ok, &rcv_wscale,
dst_metric(&rt->dst, RTAX_INITRWND));
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index fb3f917db57a..85b18319e2d8 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1073,7 +1073,6 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
if (tdev && !netif_is_l3_master(tdev)) {
int t_hlen = tunnel->hlen + sizeof(struct iphdr);
- dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
dev->mtu = tdev->mtu - t_hlen;
if (dev->mtu < IPV6_MIN_MTU)
dev->mtu = IPV6_MIN_MTU;
@@ -1363,7 +1362,6 @@ static void ipip6_tunnel_setup(struct net_device *dev)
dev->priv_destructor = ipip6_dev_free;
dev->type = ARPHRD_SIT;
- dev->hard_header_len = LL_MAX_HEADER + t_hlen;
dev->mtu = ETH_DATA_LEN - t_hlen;
dev->min_mtu = IPV6_MIN_MTU;
dev->max_mtu = IP6_MAX_MTU - t_hlen;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 0fe8ffcda1d7..d1bfdb9c2f2d 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -141,7 +141,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
__u32 cookie = ntohl(th->ack_seq) - 1;
struct sock *ret = sk;
struct request_sock *req;
- int mss;
+ int full_space, mss;
struct dst_entry *dst;
__u8 rcv_wscale;
u32 tsoff = 0;
@@ -244,7 +244,13 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
}
req->rsk_window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
- tcp_select_initial_window(tcp_full_space(sk), req->mss,
+ /* limit the window selection if the user enforce a smaller rx buffer */
+ full_space = tcp_full_space(sk);
+ if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
+ (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0))
+ req->rsk_window_clamp = full_space;
+
+ tcp_select_initial_window(full_space, req->mss,
&req->rsk_rcv_wnd, &req->rsk_window_clamp,
ireq->wscale_ok, &rcv_wscale,
dst_metric(dst, RTAX_INITRWND));
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 7a9cbc9502d9..91235769c1b7 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1552,7 +1552,8 @@ static int iucv_sock_shutdown(struct socket *sock, int how)
break;
}
- if (how == SEND_SHUTDOWN || how == SHUTDOWN_MASK) {
+ if ((how == SEND_SHUTDOWN || how == SHUTDOWN_MASK) &&
+ sk->sk_state == IUCV_CONNECTED) {
if (iucv->transport == AF_IUCV_TRANS_IUCV) {
txmsg.class = 0;
txmsg.tag = 0;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 1b1f2d6cb3f4..0ab710576673 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1851,19 +1851,24 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata,
/* device xmit handlers */
+enum ieee80211_encrypt {
+ ENCRYPT_NO,
+ ENCRYPT_MGMT,
+ ENCRYPT_DATA,
+};
+
static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb,
- int head_need, bool may_encrypt)
+ int head_need,
+ enum ieee80211_encrypt encrypt)
{
struct ieee80211_local *local = sdata->local;
- struct ieee80211_hdr *hdr;
bool enc_tailroom;
int tail_need = 0;
- hdr = (struct ieee80211_hdr *) skb->data;
- enc_tailroom = may_encrypt &&
- (sdata->crypto_tx_tailroom_needed_cnt ||
- ieee80211_is_mgmt(hdr->frame_control));
+ enc_tailroom = encrypt == ENCRYPT_MGMT ||
+ (encrypt == ENCRYPT_DATA &&
+ sdata->crypto_tx_tailroom_needed_cnt);
if (enc_tailroom) {
tail_need = IEEE80211_ENCRYPT_TAILROOM;
@@ -1896,21 +1901,27 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
int headroom;
- bool may_encrypt;
+ enum ieee80211_encrypt encrypt;
- may_encrypt = !(info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT);
+ if (info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT)
+ encrypt = ENCRYPT_NO;
+ else if (ieee80211_is_mgmt(hdr->frame_control))
+ encrypt = ENCRYPT_MGMT;
+ else
+ encrypt = ENCRYPT_DATA;
headroom = local->tx_headroom;
- if (may_encrypt)
+ if (encrypt != ENCRYPT_NO)
headroom += sdata->encrypt_headroom;
headroom -= skb_headroom(skb);
headroom = max_t(int, 0, headroom);
- if (ieee80211_skb_resize(sdata, skb, headroom, may_encrypt)) {
+ if (ieee80211_skb_resize(sdata, skb, headroom, encrypt)) {
ieee80211_free_txskb(&local->hw, skb);
return;
}
+ /* reload after potential resize */
hdr = (struct ieee80211_hdr *) skb->data;
info->control.vif = &sdata->vif;
@@ -2692,7 +2703,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
head_need += sdata->encrypt_headroom;
head_need += local->tx_headroom;
head_need = max_t(int, 0, head_need);
- if (ieee80211_skb_resize(sdata, skb, head_need, true)) {
+ if (ieee80211_skb_resize(sdata, skb, head_need, ENCRYPT_DATA)) {
ieee80211_free_txskb(&local->hw, skb);
skb = NULL;
return ERR_PTR(-ENOMEM);
@@ -3352,7 +3363,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
if (unlikely(ieee80211_skb_resize(sdata, skb,
max_t(int, extra_head + hw_headroom -
skb_headroom(skb), 0),
- false))) {
+ ENCRYPT_NO))) {
kfree_skb(skb);
return true;
}
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 9eb9d34cef7b..db8cc505caf7 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -2846,7 +2846,7 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd)
power_rule = ®_rule->power_rule;
if (reg_rule->flags & NL80211_RRF_AUTO_BW)
- snprintf(bw, sizeof(bw), "%d KHz, %d KHz AUTO",
+ snprintf(bw, sizeof(bw), "%d KHz, %u KHz AUTO",
freq_range->max_bandwidth_khz,
reg_get_max_bandwidth(rd, reg_rule));
else
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index f4fa33b84cde..790b79647004 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -823,7 +823,7 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr,
sock->state = SS_CONNECTED;
rc = 0;
out_put_neigh:
- if (rc) {
+ if (rc && x25->neighbour) {
read_lock_bh(&x25_list_lock);
x25_neigh_put(x25->neighbour);
x25->neighbour = NULL;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 05c275a712f1..5164dfe0aa09 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1783,6 +1783,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)
int err = -ENOENT;
__be32 minspi = htonl(low);
__be32 maxspi = htonl(high);
+ __be32 newspi = 0;
u32 mark = x->mark.v & x->mark.m;
spin_lock_bh(&x->lock);
@@ -1801,21 +1802,22 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)
xfrm_state_put(x0);
goto unlock;
}
- x->id.spi = minspi;
+ newspi = minspi;
} else {
u32 spi = 0;
for (h = 0; h < high-low+1; h++) {
spi = low + prandom_u32()%(high-low+1);
x0 = xfrm_state_lookup(net, mark, &x->id.daddr, htonl(spi), x->id.proto, x->props.family);
if (x0 == NULL) {
- x->id.spi = htonl(spi);
+ newspi = htonl(spi);
break;
}
xfrm_state_put(x0);
}
}
- if (x->id.spi) {
+ if (newspi) {
spin_lock_bh(&net->xfrm.xfrm_state_lock);
+ x->id.spi = newspi;
h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family);
hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
diff --git a/security/selinux/ibpkey.c b/security/selinux/ibpkey.c
index e3614ee5f1c0..36e61f622b5a 100644
--- a/security/selinux/ibpkey.c
+++ b/security/selinux/ibpkey.c
@@ -160,8 +160,10 @@ static int sel_ib_pkey_sid_slow(u64 subnet_prefix, u16 pkey_num, u32 *sid)
* is valid, it just won't be added to the cache.
*/
new = kzalloc(sizeof(*new), GFP_ATOMIC);
- if (!new)
+ if (!new) {
+ ret = -ENOMEM;
goto out;
+ }
new->psec.subnet_prefix = subnet_prefix;
new->psec.pkey = pkey_num;
diff --git a/sound/hda/ext/hdac_ext_controller.c b/sound/hda/ext/hdac_ext_controller.c
index 84f3b8168716..b679d5f37e4d 100644
--- a/sound/hda/ext/hdac_ext_controller.c
+++ b/sound/hda/ext/hdac_ext_controller.c
@@ -155,6 +155,8 @@ struct hdac_ext_link *snd_hdac_ext_bus_get_link(struct hdac_ext_bus *ebus,
return NULL;
if (ebus->idx != bus_idx)
return NULL;
+ if (addr < 0 || addr > 31)
+ return NULL;
list_for_each_entry(hlink, &ebus->hlink_list, list) {
for (i = 0; i < HDA_MAX_CODECS; i++) {
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index c49e8ea1a42c..decd5d147e81 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -492,6 +492,7 @@ static void perf_event__mmap2_swap(union perf_event *event,
event->mmap2.maj = bswap_32(event->mmap2.maj);
event->mmap2.min = bswap_32(event->mmap2.min);
event->mmap2.ino = bswap_64(event->mmap2.ino);
+ event->mmap2.ino_generation = bswap_64(event->mmap2.ino_generation);
if (sample_id_all) {
void *data = &event->mmap2.filename;
Powered by blists - more mailing lists