The "timers: Provide timer_shutdown[_sync]()" patch series implemented a
useful feature that addresses various bugs caused by attempts to rearm
shutdown timers.
https://lore.kernel.org/all/20221123201306.823305113@linutronix.de/
However, this patch series was not fully backported to versions prior to
6.2, requiring separate patches for older kernels if these bugs were
encountered.
The biggest problem with this is that even if these bugs were discovered
and patched in the upstream kernel, if the maintainer or author didn't
create a separate backport patch for versions prior to 6.2, the bugs would
remain untouched in older kernels.
Therefore, to reduce the hassle of having to write a separate patch, we
should backport the remaining unbackported commits from the
"timers: Provide timer_shutdown[_sync]()" patch series to versions prior
to 6.2.
---
Documentation/RCU/Design/Requirements/Requirements.rst | 2 +-
Documentation/core-api/local_ops.rst | 2 +-
Documentation/kernel-hacking/locking.rst | 17 +++++++-----
Documentation/timers/hrtimers.rst | 2 +-
Documentation/translations/it_IT/kernel-hacking/locking.rst | 14 +++++-----
Documentation/translations/zh_CN/core-api/local_ops.rst | 2 +-
arch/arm/mach-spear/time.c | 8 +++---
drivers/bluetooth/hci_qca.c | 10 +++++--
drivers/char/tpm/tpm-dev-common.c | 4 +--
drivers/clocksource/arm_arch_timer.c | 12 ++++-----
drivers/clocksource/timer-sp804.c | 6 ++---
drivers/staging/wlan-ng/hfa384x_usb.c | 4 +--
drivers/staging/wlan-ng/prism2usb.c | 6 ++---
include/linux/timer.h | 17 ++++++++++--
kernel/time/timer.c | 315 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------------------
net/sunrpc/xprt.c | 2 +-
16 files changed, 322 insertions(+), 101 deletions(-)
From: Florian Westphal <fw(a)strlen.de>
[ Upstream commit 791a615b7ad2258c560f91852be54b0480837c93 ]
The initial buffer has to be inited to all-ones, but it must restrict
it to the size of the first field, not the total field size.
After each round in the map search step, the result and the fill map
are swapped, so if we have a set where f->bsize of the first element
is smaller than m->bsize_max, those one-bits are leaked into future
rounds result map.
This makes pipapo find an incorrect matching results for sets where
first field size is not the largest.
Followup patch adds a test case to nft_concat_range.sh selftest script.
Thanks to Stefano Brivio for pointing out that we need to zero out
the remainder explicitly, only correcting memset() argument isn't enough.
Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges")
Reported-by: Yi Chen <yiche(a)redhat.com>
Cc: Stefano Brivio <sbrivio(a)redhat.com>
Signed-off-by: Florian Westphal <fw(a)strlen.de>
Reviewed-by: Stefano Brivio <sbrivio(a)redhat.com>
Signed-off-by: Pablo Neira Ayuso <pablo(a)netfilter.org>
Signed-off-by: Nazar Kalashnikov <sivartiwe(a)gmail.com>
---
Backport fix for CVE-2024-57947
net/netfilter/nft_set_pipapo.c | 4 ++--
net/netfilter/nft_set_pipapo.h | 21 +++++++++++++++++++++
net/netfilter/nft_set_pipapo_avx2.c | 10 ++++++----
3 files changed, 29 insertions(+), 6 deletions(-)
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index ce617f6a215f..6813ff660b72 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -432,7 +432,7 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
res_map = scratch->map + (map_index ? m->bsize_max : 0);
fill_map = scratch->map + (map_index ? 0 : m->bsize_max);
- memset(res_map, 0xff, m->bsize_max * sizeof(*res_map));
+ pipapo_resmap_init(m, res_map);
nft_pipapo_for_each_field(f, i, m) {
bool last = i == m->field_count - 1;
@@ -536,7 +536,7 @@ static struct nft_pipapo_elem *pipapo_get(const struct net *net,
goto out;
}
- memset(res_map, 0xff, m->bsize_max * sizeof(*res_map));
+ pipapo_resmap_init(m, res_map);
nft_pipapo_for_each_field(f, i, m) {
bool last = i == m->field_count - 1;
diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h
index 2e709ae01924..8f8f58af4e34 100644
--- a/net/netfilter/nft_set_pipapo.h
+++ b/net/netfilter/nft_set_pipapo.h
@@ -287,4 +287,25 @@ static u64 pipapo_estimate_size(const struct nft_set_desc *desc)
return size;
}
+/**
+ * pipapo_resmap_init() - Initialise result map before first use
+ * @m: Matching data, including mapping table
+ * @res_map: Result map
+ *
+ * Initialize all bits covered by the first field to one, so that after
+ * the first step, only the matching bits of the first bit group remain.
+ *
+ * If other fields have a large bitmap, set remainder of res_map to 0.
+ */
+static inline void pipapo_resmap_init(const struct nft_pipapo_match *m, unsigned long *res_map)
+{
+ const struct nft_pipapo_field *f = m->f;
+ int i;
+
+ for (i = 0; i < f->bsize; i++)
+ res_map[i] = ULONG_MAX;
+
+ for (i = f->bsize; i < m->bsize_max; i++)
+ res_map[i] = 0ul;
+}
#endif /* _NFT_SET_PIPAPO_H */
diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c
index 0a23d297084d..81e6d12ab4cd 100644
--- a/net/netfilter/nft_set_pipapo_avx2.c
+++ b/net/netfilter/nft_set_pipapo_avx2.c
@@ -1028,6 +1028,7 @@ static int nft_pipapo_avx2_lookup_8b_16(unsigned long *map, unsigned long *fill,
/**
* nft_pipapo_avx2_lookup_slow() - Fallback function for uncommon field sizes
+ * @mdata: Matching data, including mapping table
* @map: Previous match result, used as initial bitmap
* @fill: Destination bitmap to be filled with current match result
* @f: Field, containing lookup and mapping tables
@@ -1043,7 +1044,8 @@ static int nft_pipapo_avx2_lookup_8b_16(unsigned long *map, unsigned long *fill,
* Return: -1 on no match, rule index of match if @last, otherwise first long
* word index to be checked next (i.e. first filled word).
*/
-static int nft_pipapo_avx2_lookup_slow(unsigned long *map, unsigned long *fill,
+static int nft_pipapo_avx2_lookup_slow(const struct nft_pipapo_match *mdata,
+ unsigned long *map, unsigned long *fill,
struct nft_pipapo_field *f, int offset,
const u8 *pkt, bool first, bool last)
{
@@ -1053,7 +1055,7 @@ static int nft_pipapo_avx2_lookup_slow(unsigned long *map, unsigned long *fill,
lt += offset * NFT_PIPAPO_LONGS_PER_M256;
if (first)
- memset(map, 0xff, bsize * sizeof(*map));
+ pipapo_resmap_init(mdata, map);
for (i = offset; i < bsize; i++) {
if (f->bb == 8)
@@ -1181,7 +1183,7 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
} else if (f->groups == 16) {
NFT_SET_PIPAPO_AVX2_LOOKUP(8, 16);
} else {
- ret = nft_pipapo_avx2_lookup_slow(res, fill, f,
+ ret = nft_pipapo_avx2_lookup_slow(m, res, fill, f,
ret, rp,
first, last);
}
@@ -1197,7 +1199,7 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
} else if (f->groups == 32) {
NFT_SET_PIPAPO_AVX2_LOOKUP(4, 32);
} else {
- ret = nft_pipapo_avx2_lookup_slow(res, fill, f,
+ ret = nft_pipapo_avx2_lookup_slow(m, res, fill, f,
ret, rp,
first, last);
}
--
2.43.0
From: Nikolay Aleksandrov <razor(a)blackwall.org>
[ Upstream commit 9ec7eb60dcbcb6c41076defbc5df7bbd95ceaba5 ]
Add bond_ether_setup helper which is used to fix ether_setup() calls in the
bonding driver. It takes care of both IFF_MASTER and IFF_SLAVE flags, the
former is always restored and the latter only if it was set.
If the bond enslaves non-ARPHRD_ETHER device (changes its type), then
releases it and enslaves ARPHRD_ETHER device (changes back) then we
use ether_setup() to restore the bond device type but it also resets its
flags and removes IFF_MASTER and IFF_SLAVE[1]. Use the bond_ether_setup
helper to restore both after such transition.
[1] reproduce (nlmon is non-ARPHRD_ETHER):
$ ip l add nlmon0 type nlmon
$ ip l add bond2 type bond mode active-backup
$ ip l set nlmon0 master bond2
$ ip l set nlmon0 nomaster
$ ip l add bond1 type bond
(we use bond1 as ARPHRD_ETHER device to restore bond2's mode)
$ ip l set bond1 master bond2
$ ip l sh dev bond2
37: bond2: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
link/ether be:d7:c5:40:5b:cc brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 1500
(notice bond2's IFF_MASTER is missing)
Fixes: e36b9d16c6a6 ("bonding: clean muticast addresses when device changes type")
Signed-off-by: Nikolay Aleksandrov <razor(a)blackwall.org>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Alexey Panov <apanov(a)astralinux.ru>
---
Backport fix for CVE-2023-53103
Tested with the syzkaller reproducer for
https://syzkaller.appspot.com/bug?extid=9dfc3f3348729cc82277:
the issue triggers on vanilla v5.10.y and no longer reproduces with these
patches applied.
drivers/net/bonding/bond_main.c | 19 +++++++++++++++----
1 file changed, 15 insertions(+), 4 deletions(-)
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 08bc930afc4c..127242101c8e 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1691,6 +1691,19 @@ void bond_lower_state_changed(struct slave *slave)
netdev_lower_state_changed(slave->dev, &info);
}
+/* The bonding driver uses ether_setup() to convert a master bond device
+ * to ARPHRD_ETHER, that resets the target netdevice's flags so we always
+ * have to restore the IFF_MASTER flag, and only restore IFF_SLAVE if it was set
+ */
+static void bond_ether_setup(struct net_device *bond_dev)
+{
+ unsigned int slave_flag = bond_dev->flags & IFF_SLAVE;
+
+ ether_setup(bond_dev);
+ bond_dev->flags |= IFF_MASTER | slave_flag;
+ bond_dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+}
+
/* enslave device <slave> to bond device <master> */
int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
struct netlink_ext_ack *extack)
@@ -1777,10 +1790,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
if (slave_dev->type != ARPHRD_ETHER)
bond_setup_by_slave(bond_dev, slave_dev);
- else {
- ether_setup(bond_dev);
- bond_dev->priv_flags &= ~IFF_TX_SKB_SHARING;
- }
+ else
+ bond_ether_setup(bond_dev);
call_netdevice_notifiers(NETDEV_POST_TYPE_CHANGE,
bond_dev);
--
2.30.2
Changes in v3:
- Fixed patch numbering (previously sent as 2/3 instead of 3/3)
Changes in v2:
- Added a new patch fixing bonding regression, based on the Fixes tag in
c484fcc058ba ("bonding: Fix memory leak when changing bond type to Ethernet")
- Added a cover letter
- No changes in patches 1 and 3
- Retested the reproducer [1]
Tested with the syzkaller reproducer [1].
The issue triggers on vanilla v5.10.y and no longer reproduces with these
patches applied.
Additionally, c484fcc058ba ("bonding: Fix memory leak when changing bond type
to Ethernet") has a Fixes tag pointing to
9ec7eb60dcbc ("bonding: restore IFF_MASTER/SLAVE flags on bond enslave ether
type change"), so it should be ported as well.
[1]: https://syzkaller.appspot.com/bug?extid=9dfc3f3348729cc82277
Ido Schimmel (1):
bonding: Fix memory leak when changing bond type to Ethernet
Nikolay Aleksandrov (2):
bonding: restore IFF_MASTER/SLAVE flags on bond enslave ether type
change
bonding: restore bond's IFF_SLAVE flag if a non-eth dev enslave fails
drivers/net/bonding/bond_main.c | 24 +++++++++++++++++-------
1 file changed, 17 insertions(+), 7 deletions(-)
--
2.39.5
Changes in v2:
- Added a new patch fixing bonding regression, based on the Fixes tag in
c484fcc058ba ("bonding: Fix memory leak when changing bond type to Ethernet")
- Added a cover letter
- No changes in patches 1 and 3
- Retested the reproducer [1]
Tested with the syzkaller reproducer [1].
The issue triggers on vanilla v5.10.y and no longer reproduces with these
patches applied.
Additionally, c484fcc058ba ("bonding: Fix memory leak when changing bond type
to Ethernet") has a Fixes tag pointing to
9ec7eb60dcbc ("bonding: restore IFF_MASTER/SLAVE flags on bond enslave ether
type change"), so it should be ported as well.
[1]: https://syzkaller.appspot.com/bug?extid=9dfc3f3348729cc82277
Ido Schimmel (1):
bonding: Fix memory leak when changing bond type to Ethernet
Nikolay Aleksandrov (2):
bonding: restore IFF_MASTER/SLAVE flags on bond enslave ether type
change
bonding: restore bond's IFF_SLAVE flag if a non-eth dev enslave fails
drivers/net/bonding/bond_main.c | 24 +++++++++++++++++-------
1 file changed, 17 insertions(+), 7 deletions(-)
--
2.39.5
The calculation of bridge window head alignment is done by
calculate_mem_align() [*]. With the default bridge window alignment, it
is used for both head and tail alignment.
The selected head alignment does not always result in tight-fitting
resources (gap at d4f00000-d4ffffff):
d4800000-dbffffff : PCI Bus 0000:06
d4800000-d48fffff : PCI Bus 0000:07
d4800000-d4803fff : 0000:07:00.0
d4800000-d4803fff : nvme
d4900000-d49fffff : PCI Bus 0000:0a
d4900000-d490ffff : 0000:0a:00.0
d4900000-d490ffff : r8169
d4910000-d4913fff : 0000:0a:00.0
d4a00000-d4cfffff : PCI Bus 0000:0b
d4a00000-d4bfffff : 0000:0b:00.0
d4a00000-d4bfffff : 0000:0b:00.0
d4c00000-d4c07fff : 0000:0b:00.0
d4d00000-d4dfffff : PCI Bus 0000:15
d4d00000-d4d07fff : 0000:15:00.0
d4d00000-d4d07fff : xhci-hcd
d4e00000-d4efffff : PCI Bus 0000:16
d4e00000-d4e7ffff : 0000:16:00.0
d4e80000-d4e803ff : 0000:16:00.0
d4e80000-d4e803ff : ahci
d5000000-dbffffff : PCI Bus 0000:0c
This has not been caused problems (for years) with the default bridge
window tail alignment that grossly over-estimates the required tail
alignment leaving more tail room than necessary. With the introduction
of relaxed tail alignment that leaves no extra tail room whatsoever,
any gaps will immediately turn into assignment failures.
Introduce head alignment calculation that ensures no gaps are left and
apply the new approach when using relaxed alignment.
([*] I don't understand the algorithm in calculate_mem_align().)
Fixes: 5d0a8965aea9 ("[PATCH] 2.5.14: New PCI allocation code (alpha, arm, parisc) [2/2]")
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220775
Reported-by: Malte Schröder <malte+lkml(a)tnxip.de>
Tested-by: Malte Schröder <malte+lkml(a)tnxip.de>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
Cc: stable(a)vger.kernel.org
---
Little annoyingly, there's difference in what aligns array contains
between the legacy alignment approach (which I dare not to touch as I
really don't understand what the algorithm tries to do) and this new
head aligment algorithm, both consuming stack space. After making the
new approach the only available approach in the follow-up patch, only
one array remains (however, that follow-up change is also somewhat
riskier when it comes to regressions).
That being said, the new head alignment could work with the same aligns
array as the legacy approach, it just won't necessarily produce an
optimal (the smallest possible) head alignment when if (r_size <=
align) condition is used. Just let me know if that approach is
preferred (to save some stack space).
---
drivers/pci/setup-bus.c | 53 ++++++++++++++++++++++++++++++++++-------
1 file changed, 44 insertions(+), 9 deletions(-)
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 70d021ffb486..93f6b0750174 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -1224,6 +1224,45 @@ static inline resource_size_t calculate_mem_align(resource_size_t *aligns,
return min_align;
}
+/*
+ * Calculate bridge window head alignment that leaves no gaps in between
+ * resources.
+ */
+static resource_size_t calculate_head_align(resource_size_t *aligns,
+ int max_order)
+{
+ resource_size_t head_align = 1;
+ resource_size_t remainder = 0;
+ int order;
+
+ /* Take the largest alignment as the starting point. */
+ head_align <<= max_order + __ffs(SZ_1M);
+
+ for (order = max_order - 1; order >= 0; order--) {
+ resource_size_t align1 = 1;
+
+ align1 <<= order + __ffs(SZ_1M);
+
+ /*
+ * Account smaller resources with alignment < max_order that
+ * could be used to fill head room if alignment less than
+ * max_order is used.
+ */
+ remainder += aligns[order];
+
+ /*
+ * Test if head fill is enough to satisfy the alignment of
+ * the larger resources after reducing the alignment.
+ */
+ while ((head_align > align1) && (remainder >= head_align / 2)) {
+ head_align /= 2;
+ remainder -= head_align;
+ }
+ }
+
+ return head_align;
+}
+
/**
* pbus_upstream_space_available - Check no upstream resource limits allocation
* @bus: The bus
@@ -1311,13 +1350,13 @@ static void pbus_size_mem(struct pci_bus *bus, unsigned long type,
{
struct pci_dev *dev;
resource_size_t min_align, win_align, align, size, size0, size1 = 0;
- resource_size_t aligns[28]; /* Alignments from 1MB to 128TB */
+ resource_size_t aligns[28] = {}; /* Alignments from 1MB to 128TB */
+ resource_size_t aligns2[28] = {};/* Alignments from 1MB to 128TB */
int order, max_order;
struct resource *b_res = pbus_select_window_for_type(bus, type);
resource_size_t children_add_size = 0;
resource_size_t children_add_align = 0;
resource_size_t add_align = 0;
- resource_size_t relaxed_align;
resource_size_t old_size;
if (!b_res)
@@ -1327,7 +1366,6 @@ static void pbus_size_mem(struct pci_bus *bus, unsigned long type,
if (b_res->parent)
return;
- memset(aligns, 0, sizeof(aligns));
max_order = 0;
size = 0;
@@ -1378,6 +1416,7 @@ static void pbus_size_mem(struct pci_bus *bus, unsigned long type,
*/
if (r_size <= align)
aligns[order] += align;
+ aligns2[order] += align;
if (order > max_order)
max_order = order;
@@ -1402,9 +1441,7 @@ static void pbus_size_mem(struct pci_bus *bus, unsigned long type,
if (bus->self && size0 &&
!pbus_upstream_space_available(bus, b_res, size0, min_align)) {
- relaxed_align = 1ULL << (max_order + __ffs(SZ_1M));
- relaxed_align = max(relaxed_align, win_align);
- min_align = min(min_align, relaxed_align);
+ min_align = calculate_head_align(aligns2, max_order);
size0 = calculate_memsize(size, min_size, 0, 0, old_size, win_align);
resource_set_range(b_res, min_align, size0);
pci_info(bus->self, "bridge window %pR to %pR requires relaxed alignment rules\n",
@@ -1418,9 +1455,7 @@ static void pbus_size_mem(struct pci_bus *bus, unsigned long type,
if (bus->self && size1 &&
!pbus_upstream_space_available(bus, b_res, size1, add_align)) {
- relaxed_align = 1ULL << (max_order + __ffs(SZ_1M));
- relaxed_align = max(relaxed_align, win_align);
- min_align = min(min_align, relaxed_align);
+ min_align = calculate_head_align(aligns2, max_order);
size1 = calculate_memsize(size, min_size, add_size, children_add_size,
old_size, win_align);
pci_info(bus->self,
--
2.39.5