While the DOC at the beginning of lib/bitmap.c explicitly states that
"The number of valid bits in a given bitmap does _not_ need to be an
exact multiple of BITS_PER_LONG.", some of the bitmap operations do
indeed access BITS_PER_LONG portions of the provided bitmap no matter
the size of the provided bitmap. For example, if find_first_bit()
is provided with an 8 bit bitmap the operation will access
BITS_PER_LONG bits from the provided bitmap. While the operation
ensures that these extra bits do not affect the result, the memory
is still accessed.
The capacity bitmasks (CBMs) are typically stored in u32 since they
can never exceed 32 bits. A few instances exist where a bitmap_*
operation is performed on a CBM by simply pointing the bitmap operation
to the stored u32 value.
The consequence of this pattern is that some bitmap_* operations will
access out-of-bounds memory when interacting with the provided CBM.
This same issue has previously been addressed with commit 49e00eee0061
("x86/intel_rdt: Fix out-of-bounds memory access in CBM tests")
but at that time not all instances of the issue were fixed.
Fix this by using an unsigned long to store the capacity bitmask data
that is passed to bitmap functions.
Fixes: e651901187ab ("x86/intel_rdt: Introduce "bit_usage" to display cache allocations details")
Fixes: f4e80d67a527 ("x86/intel_rdt: Resctrl files reflect pseudo-locked information")
Fixes: 95f0b77efa57 ("x86/intel_rdt: Initialize new resource group with sane defaults")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Reinette Chatre <reinette.chatre(a)intel.com>
---
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 35 ++++++++++++--------------
1 file changed, 16 insertions(+), 19 deletions(-)
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 333c177a2471..b63e50b1a096 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -804,8 +804,12 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
struct rdt_resource *r = of->kn->parent->priv;
- u32 sw_shareable = 0, hw_shareable = 0;
- u32 exclusive = 0, pseudo_locked = 0;
+ /*
+ * Use unsigned long even though only 32 bits are used to ensure
+ * test_bit() is used safely.
+ */
+ unsigned long sw_shareable = 0, hw_shareable = 0;
+ unsigned long exclusive = 0, pseudo_locked = 0;
struct rdt_domain *dom;
int i, hwb, swb, excl, psl;
enum rdtgrp_mode mode;
@@ -850,10 +854,10 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
}
for (i = r->cache.cbm_len - 1; i >= 0; i--) {
pseudo_locked = dom->plr ? dom->plr->cbm : 0;
- hwb = test_bit(i, (unsigned long *)&hw_shareable);
- swb = test_bit(i, (unsigned long *)&sw_shareable);
- excl = test_bit(i, (unsigned long *)&exclusive);
- psl = test_bit(i, (unsigned long *)&pseudo_locked);
+ hwb = test_bit(i, &hw_shareable);
+ swb = test_bit(i, &sw_shareable);
+ excl = test_bit(i, &exclusive);
+ psl = test_bit(i, &pseudo_locked);
if (hwb && swb)
seq_putc(seq, 'X');
else if (hwb && !swb)
@@ -2494,26 +2498,19 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn,
*/
static void cbm_ensure_valid(u32 *_val, struct rdt_resource *r)
{
- /*
- * Convert the u32 _val to an unsigned long required by all the bit
- * operations within this function. No more than 32 bits of this
- * converted value can be accessed because all bit operations are
- * additionally provided with cbm_len that is initialized during
- * hardware enumeration using five bits from the EAX register and
- * thus never can exceed 32 bits.
- */
- unsigned long *val = (unsigned long *)_val;
+ unsigned long val = *_val;
unsigned int cbm_len = r->cache.cbm_len;
unsigned long first_bit, zero_bit;
- if (*val == 0)
+ if (val == 0)
return;
- first_bit = find_first_bit(val, cbm_len);
- zero_bit = find_next_zero_bit(val, cbm_len, first_bit);
+ first_bit = find_first_bit(&val, cbm_len);
+ zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
/* Clear any remaining bits to ensure contiguous region */
- bitmap_clear(val, zero_bit, cbm_len - zero_bit);
+ bitmap_clear(&val, zero_bit, cbm_len - zero_bit);
+ *_val = (u32)val;
}
/*
--
2.17.2
> From: Sasha Levin <sashal(a)kernel.org>
> Sent: Saturday, June 22, 2019 11:14 AM
> To: Sasha Levin <sashal(a)kernel.org>; Dexuan Cui <decui(a)microsoft.com>;
> linux-pci(a)vger.kernel.org
> Cc: Lili Deng (Wicresoft North America Ltd) <v-lide(a)microsoft.com>;
> stable(a)vger.kernel.org; stable(a)vger.kernel.org
> Subject: Re: [PATCH] PCI: hv: Fix a use-after-free bug in hv_eject_device_work()
>
> Hi,
>
> [This is an automated email]
>
> This commit has been processed because it contains a "Fixes:" tag,
> fixing commit: 05f151a73ec2 PCI: hv: Fix a memory leak in
> hv_eject_device_work().
>
> The bot has tested the following trees: v5.1.12, v4.19.53, v4.14.128, v4.9.182.
>
> v5.1.12: Build OK!
> v4.19.53: Build OK!
> v4.14.128: Failed to apply! Possible dependencies:
> 8c99e120ffca ("PCI: hv: Remove unused reason for refcount handler")
>
> v4.9.182: Failed to apply! Possible dependencies:
> 02c3764c776c ("PCI: hv: Temporary own CPU-number-to-vCPU-number
> infra")
> 0de8ce3ee8e3 ("PCI: hv: Allocate physically contiguous hypercall params
> buffer")
> 24196f0c7d4b ("PCI: hv: Convert hv_pci_dev.refs from atomic_t to
> refcount_t")
> 6ab42a66d2cc ("Drivers: hv: vmbus: Move Hypercall invocation code out
> of common code")
> 76d36ab79820 ("hv: switch to cpuhp state machine for synic
> init/cleanup")
> 7dcf90e9e032 ("PCI: hv: Use vPCI protocol version 1.2")
> 8730046c1498 ("Drivers: hv vmbus: Move Hypercall page setup out of
> common code")
> 8c99e120ffca ("PCI: hv: Remove unused reason for refcount handler")
> b1db7e7e1d70 ("PCI: hv: Add vPCI version protocol negotiation")
> d058fa7e98ff ("Drivers: hv: vmbus: Move the crash notification function")
>
>
> How should we proceed with this patch?
>
> --
> Thanks,
> Sasha
The patch has not gone into any upstream trees yet. So we can not do anything
at this point. I'll try to backport the patch for the old kernels once it's in.
Thanks,
-- Dexuan
commit cf131a81967583ae737df6383a0893b9fee75b4e upstream.
Heavy contention of the sde flushlist_lock can cause hard lockups at
extreme scale when the flushing logic is under stress.
Mitigate by replacing the item at a time copy to the local list with
an O(1) list_splice_init() and using the high priority work queue to
do the flushes.
Ported to linux-4.19.y.
Fixes: 7724105686e7 ("IB/hfi1: add driver files")
Cc: <stable(a)vger.kernel.org>
Reviewed-by: Dennis Dalessandro <dennis.dalessandro(a)intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn(a)intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro(a)intel.com>
Signed-off-by: Doug Ledford <dledford(a)redhat.com>
---
drivers/infiniband/hw/hfi1/sdma.c | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 88e326d..d648a41 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -410,10 +410,7 @@ static void sdma_flush(struct sdma_engine *sde)
sdma_flush_descq(sde);
spin_lock_irqsave(&sde->flushlist_lock, flags);
/* copy flush list */
- list_for_each_entry_safe(txp, txp_next, &sde->flushlist, list) {
- list_del_init(&txp->list);
- list_add_tail(&txp->list, &flushlist);
- }
+ list_splice_init(&sde->flushlist, &flushlist);
spin_unlock_irqrestore(&sde->flushlist_lock, flags);
/* flush from flush list */
list_for_each_entry_safe(txp, txp_next, &flushlist, list)
@@ -2426,7 +2423,7 @@ int sdma_send_txreq(struct sdma_engine *sde,
wait->tx_count++;
wait->count += tx->num_desc;
}
- schedule_work(&sde->flush_worker);
+ queue_work_on(sde->cpu, system_highpri_wq, &sde->flush_worker);
ret = -ECOMM;
goto unlock;
nodesc:
@@ -2526,7 +2523,7 @@ int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
}
}
spin_unlock(&sde->flushlist_lock);
- schedule_work(&sde->flush_worker);
+ queue_work_on(sde->cpu, system_highpri_wq, &sde->flush_worker);
ret = -ECOMM;
goto update_tail;
nodesc: