When unloading the MANA driver, mana_dealloc_queues() waits for the MANA
hardware to complete any inflight packets and set the pending send count
to zero. But if the hardware has failed, mana_dealloc_queues()
could wait forever.
Fix this by adding a timeout to the wait. Set the timeout to 120 seconds,
which is a somewhat arbitrary value that is more than long enough for
functional hardware to complete any sends.
Cc: stable(a)vger.kernel.org
Fixes: ca9c54d2d6a5 ("net: mana: Add a driver for Microsoft Azure Network Adapter (MANA)")
Signed-off-by: Souradeep Chakrabarti <schakrabarti(a)linux.microsoft.com>
---
V5 -> V6:
* Added pcie_flr to reset the pci after timeout.
* Fixed the position of changelog.
* Removed unused variable like cq.
V4 -> V5:
* Added fixes tag
* Changed the usleep_range from static to incremental value.
* Initialized timeout in the begining.
V3 -> V4:
* Removed the unnecessary braces from mana_dealloc_queues().
V2 -> V3:
* Removed the unnecessary braces from mana_dealloc_queues().
V1 -> V2:
* Added net branch
* Removed the typecasting to (struct mana_context*) of void pointer
* Repositioned timeout variable in mana_dealloc_queues()
* Repositioned vf_unload_timeout in mana_context struct, to utilise the
6 bytes hole
---
drivers/net/ethernet/microsoft/mana/mana_en.c | 38 +++++++++++++++++--
1 file changed, 34 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index a499e460594b..ea039e2d4c4b 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -8,6 +8,7 @@
#include <linux/ethtool.h>
#include <linux/filter.h>
#include <linux/mm.h>
+#include <linux/pci.h>
#include <net/checksum.h>
#include <net/ip6_checksum.h>
@@ -2345,9 +2346,12 @@ int mana_attach(struct net_device *ndev)
static int mana_dealloc_queues(struct net_device *ndev)
{
struct mana_port_context *apc = netdev_priv(ndev);
+ unsigned long timeout = jiffies + 120 * HZ;
struct gdma_dev *gd = apc->ac->gdma_dev;
struct mana_txq *txq;
+ struct sk_buff *skb;
int i, err;
+ u32 tsleep;
if (apc->port_is_up)
return -EINVAL;
@@ -2363,15 +2367,41 @@ static int mana_dealloc_queues(struct net_device *ndev)
* to false, but it doesn't matter since mana_start_xmit() drops any
* new packets due to apc->port_is_up being false.
*
- * Drain all the in-flight TX packets
+ * Drain all the in-flight TX packets.
+ * A timeout of 120 seconds for all the queues is used.
+ * This will break the while loop when h/w is not responding.
+ * This value of 120 has been decided here considering max
+ * number of queues.
*/
+
for (i = 0; i < apc->num_queues; i++) {
txq = &apc->tx_qp[i].txq;
-
- while (atomic_read(&txq->pending_sends) > 0)
- usleep_range(1000, 2000);
+ tsleep = 1000;
+ while (atomic_read(&txq->pending_sends) > 0 &&
+ time_before(jiffies, timeout)) {
+ usleep_range(tsleep, tsleep + 1000);
+ tsleep <<= 1;
+ }
+ if (atomic_read(&txq->pending_sends)) {
+ err = pcie_flr(to_pci_dev(gd->gdma_context->dev));
+ if (err) {
+ netdev_err(ndev, "flr failed %d with %d pkts pending in txq %u\n",
+ err, atomic_read(&txq->pending_sends),
+ txq->gdma_txq_id);
+ }
+ break;
+ }
}
+ for (i = 0; i < apc->num_queues; i++) {
+ txq = &apc->tx_qp[i].txq;
+ while (atomic_read(&txq->pending_sends)) {
+ skb = skb_dequeue(&txq->pending_skbs);
+ mana_unmap_skb(skb, apc);
+ dev_consume_skb_any(skb);
+ atomic_sub(1, &txq->pending_sends);
+ }
+ }
/* We're 100% sure the queues can no longer be woken up, because
* we're sure now mana_poll_tx_cq() can't be running.
*/
--
2.34.1
From: Lee Jones <lee(a)kernel.org>
Upstream commit 04c55383fa5689357bcdd2c8036725a55ed632bc.
In the event of a failure in tcf_change_indev(), u32_set_parms() will
immediately return without decrementing the recently incremented
reference counter. If this happens enough times, the counter will
rollover and the reference freed, leading to a double free which can be
used to do 'bad things'.
In order to prevent this, move the point of possible failure above the
point where the reference counter is incremented. Also save any
meaningful return values to be applied to the return data at the
appropriate point in time.
This issue was caught with KASAN.
Fixes: 705c7091262d ("net: sched: cls_u32: no need to call tcf_exts_change for newly allocated struct")
Suggested-by: Eric Dumazet <edumazet(a)google.com>
Signed-off-by: Lee Jones <lee(a)kernel.org>
Reviewed-by: Eric Dumazet <edumazet(a)google.com>
Acked-by: Jamal Hadi Salim <jhs(a)mojatatu.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Rishabh Bhatnagar <risbhat(a)amazon.com>
---
net/sched/cls_u32.c | 20 +++++++++++++-------
1 file changed, 13 insertions(+), 7 deletions(-)
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index d30256ac3537..ee8ef606a8e9 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -778,11 +778,22 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
struct netlink_ext_ack *extack)
{
int err;
+#ifdef CONFIG_NET_CLS_IND
+ int ifindex = -1;
+#endif
err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, extack);
if (err < 0)
return err;
+#ifdef CONFIG_NET_CLS_IND
+ if (tb[TCA_U32_INDEV]) {
+ ifindex = tcf_change_indev(net, tb[TCA_U32_INDEV], extack);
+ if (ifindex < 0)
+ return -EINVAL;
+ }
+#endif
+
if (tb[TCA_U32_LINK]) {
u32 handle = nla_get_u32(tb[TCA_U32_LINK]);
struct tc_u_hnode *ht_down = NULL, *ht_old;
@@ -814,13 +825,8 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
}
#ifdef CONFIG_NET_CLS_IND
- if (tb[TCA_U32_INDEV]) {
- int ret;
- ret = tcf_change_indev(net, tb[TCA_U32_INDEV], extack);
- if (ret < 0)
- return -EINVAL;
- n->ifindex = ret;
- }
+ if (ifindex >= 0)
+ n->ifindex = ifindex;
#endif
return 0;
}
--
2.40.1
select:false makes the schema basically ignored and not effective, which
is clearly not what we want for a device binding.
Fixes: 352546805a44 ("dt-bindings: clock: Add bindings for versal clock driver")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
---
Cc: Shubhrajyoti Datta <shubhrajyoti.datta(a)amd.com>
---
Documentation/devicetree/bindings/clock/xlnx,versal-clk.yaml | 2 --
1 file changed, 2 deletions(-)
diff --git a/Documentation/devicetree/bindings/clock/xlnx,versal-clk.yaml b/Documentation/devicetree/bindings/clock/xlnx,versal-clk.yaml
index e9cf747bf89b..04ea327d5313 100644
--- a/Documentation/devicetree/bindings/clock/xlnx,versal-clk.yaml
+++ b/Documentation/devicetree/bindings/clock/xlnx,versal-clk.yaml
@@ -14,8 +14,6 @@ description: |
reads required input clock frequencies from the devicetree and acts as clock
provider for all clock consumers of PS clocks.
-select: false
-
properties:
compatible:
oneOf:
--
2.34.1
The patch titled
Subject: Multi-gen LRU: Avoid race in inc_min_seq()
has been added to the -mm mm-unstable branch. Its filename is
mm-unstable-multi-gen-lru-avoid-race-in-inc_min_seq.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Kalesh Singh <kaleshsingh(a)google.com>
Subject: Multi-gen LRU: Avoid race in inc_min_seq()
Date: Tue, 1 Aug 2023 19:56:03 -0700
inc_max_seq() will try to inc_min_seq() if nr_gens == MAX_NR_GENS. This
is because the generations are reused (the last oldest now empty
generation will become the next youngest generation).
inc_min_seq() is retried until successful, dropping the lru_lock
and yielding the CPU on each failure, and retaking the lock before
trying again:
while (!inc_min_seq(lruvec, type, can_swap)) {
spin_unlock_irq(&lruvec->lru_lock);
cond_resched();
spin_lock_irq(&lruvec->lru_lock);
}
However, the initial condition that required incrementing the min_seq
(nr_gens == MAX_NR_GENS) is not retested. This can change by another
call to inc_max_seq() from run_aging() with force_scan=true from the
debugfs interface.
Since the eviction stalls when the nr_gens == MIN_NR_GENS, avoid
unnecessarily incrementing the min_seq by rechecking the number of
generations before each attempt.
This issue was uncovered in previous discussion on the list by Yu Zhao
and Aneesh Kumar [1].
[1] https://lore.kernel.org/linux-mm/CAOUHufbO7CaVm=xjEb1avDhHVvnC8pJmGyKcFf2iY…
Link: https://lkml.kernel.org/r/20230802025606.346758-2-kaleshsingh@google.com
Fixes: d6c3af7d8a2b ("mm: multi-gen LRU: debugfs interface")
Signed-off-by: Kalesh Singh <kaleshsingh(a)google.com>
Tested-by: AngeloGioacchino Del Regno <angelogioacchino.delregno(a)collabora.com> [mediatek]
Tested-by: Charan Teja Kalla <quic_charante(a)quicinc.com>
Cc: Yu Zhao <yuzhao(a)google.com>
Cc: Aneesh Kumar K V <aneesh.kumar(a)linux.ibm.com>
Cc: Barry Song <baohua(a)kernel.org>
Cc: Brian Geffon <bgeffon(a)google.com>
Cc: Jan Alexander Steffens (heftig) <heftig(a)archlinux.org>
Cc: Lecopzer Chen <lecopzer.chen(a)mediatek.com>
Cc: Matthias Brugger <matthias.bgg(a)gmail.com>
Cc: Oleksandr Natalenko <oleksandr(a)natalenko.name>
Cc: Qi Zheng <zhengqi.arch(a)bytedance.com>
Cc: Steven Barrett <steven(a)liquorix.net>
Cc: Suleiman Souhlal <suleiman(a)google.com>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/vmscan.c | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
--- a/mm/vmscan.c~mm-unstable-multi-gen-lru-avoid-race-in-inc_min_seq
+++ a/mm/vmscan.c
@@ -4439,7 +4439,7 @@ static void inc_max_seq(struct lruvec *l
int prev, next;
int type, zone;
struct lru_gen_folio *lrugen = &lruvec->lrugen;
-
+restart:
spin_lock_irq(&lruvec->lru_lock);
VM_WARN_ON_ONCE(!seq_is_valid(lruvec));
@@ -4450,11 +4450,12 @@ static void inc_max_seq(struct lruvec *l
VM_WARN_ON_ONCE(!force_scan && (type == LRU_GEN_FILE || can_swap));
- while (!inc_min_seq(lruvec, type, can_swap)) {
- spin_unlock_irq(&lruvec->lru_lock);
- cond_resched();
- spin_lock_irq(&lruvec->lru_lock);
- }
+ if (inc_min_seq(lruvec, type, can_swap))
+ continue;
+
+ spin_unlock_irq(&lruvec->lru_lock);
+ cond_resched();
+ goto restart;
}
/*
_
Patches currently in -mm which might be from kaleshsingh(a)google.com are
mm-unstable-multi-gen-lru-fix-per-zone-reclaim.patch
mm-unstable-multi-gen-lru-avoid-race-in-inc_min_seq.patch
mm-unstable-multi-gen-lru-fix-can_swap-in-lru_gen_look_around.patch
The patch titled
Subject: Multi-gen LRU: Fix per-zone reclaim
has been added to the -mm mm-unstable branch. Its filename is
mm-unstable-multi-gen-lru-fix-per-zone-reclaim.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Kalesh Singh <kaleshsingh(a)google.com>
Subject: Multi-gen LRU: Fix per-zone reclaim
Date: Tue, 1 Aug 2023 19:56:02 -0700
MGLRU has a LRU list for each zone for each type (anon/file) in each
generation:
long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
The min_seq (oldest generation) can progress independently for each
type but the max_seq (youngest generation) is shared for both anon and
file. This is to maintain a common frame of reference.
In order for eviction to advance the min_seq of a type, all the per-zone
lists in the oldest generation of that type must be empty.
The eviction logic only considers pages from eligible zones for
eviction or promotion.
scan_folios() {
...
for (zone = sc->reclaim_idx; zone >= 0; zone--) {
...
sort_folio(); // Promote
...
isolate_folio(); // Evict
}
...
}
Consider the system has the movable zone configured and default 4
generations. The current state of the system is as shown below
(only illustrating one type for simplicity):
Type: ANON
Zone DMA32 Normal Movable Device
Gen 0 0 0 4GB 0
Gen 1 0 1GB 1MB 0
Gen 2 1MB 4GB 1MB 0
Gen 3 1MB 1MB 1MB 0
Now consider there is a GFP_KERNEL allocation request (eligible zone
index <= Normal), evict_folios() will return without doing any work
since there are no pages to scan in the eligible zones of the oldest
generation. Reclaim won't make progress until triggered from a ZONE_MOVABLE
allocation request; which may not happen soon if there is a lot of free
memory in the movable zone. This can lead to OOM kills, although there
is 1GB pages in the Normal zone of Gen 1 that we have not yet tried to
reclaim.
This issue is not seen in the conventional active/inactive LRU since
there are no per-zone lists.
If there are no (not enough) folios to scan in the eligible zones, move
folios from ineligible zone (zone_index > reclaim_index) to the next
generation. This allows for the progression of min_seq and reclaiming
from the next generation (Gen 1).
Qualcomm, Mediatek and raspberrypi [1] discovered this issue independently.
[1] https://github.com/raspberrypi/linux/issues/5395
Link: https://lkml.kernel.org/r/20230802025606.346758-1-kaleshsingh@google.com
Fixes: ac35a4902374 ("mm: multi-gen LRU: minimal implementation")
Signed-off-by: Kalesh Singh <kaleshsingh(a)google.com>
Reported-by: Charan Teja Kalla <quic_charante(a)quicinc.com>
Reported-by: Lecopzer Chen <lecopzer.chen(a)mediatek.com>
Tested-by: AngeloGioacchino Del Regno <angelogioacchino.delregno(a)collabora.com> [mediatek]
Tested-by: Charan Teja Kalla <quic_charante(a)quicinc.com>
Cc: Yu Zhao <yuzhao(a)google.com>
Cc: Barry Song <baohua(a)kernel.org>
Cc: Brian Geffon <bgeffon(a)google.com>
Cc: Jan Alexander Steffens (heftig) <heftig(a)archlinux.org>
Cc: Matthias Brugger <matthias.bgg(a)gmail.com>
Cc: Oleksandr Natalenko <oleksandr(a)natalenko.name>
Cc: Qi Zheng <zhengqi.arch(a)bytedance.com>
Cc: Steven Barrett <steven(a)liquorix.net>
Cc: Suleiman Souhlal <suleiman(a)google.com>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: Aneesh Kumar K V <aneesh.kumar(a)linux.ibm.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/vmscan.c | 18 ++++++++++++++----
1 file changed, 14 insertions(+), 4 deletions(-)
--- a/mm/vmscan.c~mm-unstable-multi-gen-lru-fix-per-zone-reclaim
+++ a/mm/vmscan.c
@@ -4889,7 +4889,8 @@ static int lru_gen_memcg_seg(struct lruv
* the eviction
******************************************************************************/
-static bool sort_folio(struct lruvec *lruvec, struct folio *folio, int tier_idx)
+static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_control *sc,
+ int tier_idx)
{
bool success;
int gen = folio_lru_gen(folio);
@@ -4939,6 +4940,13 @@ static bool sort_folio(struct lruvec *lr
return true;
}
+ /* ineligible */
+ if (zone > sc->reclaim_idx) {
+ gen = folio_inc_gen(lruvec, folio, false);
+ list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
+ return true;
+ }
+
/* waiting for writeback */
if (folio_test_locked(folio) || folio_test_writeback(folio) ||
(type == LRU_GEN_FILE && folio_test_dirty(folio))) {
@@ -4987,7 +4995,8 @@ static bool isolate_folio(struct lruvec
static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
int type, int tier, struct list_head *list)
{
- int gen, zone;
+ int i;
+ int gen;
enum vm_event_item item;
int sorted = 0;
int scanned = 0;
@@ -5003,9 +5012,10 @@ static int scan_folios(struct lruvec *lr
gen = lru_gen_from_seq(lrugen->min_seq[type]);
- for (zone = sc->reclaim_idx; zone >= 0; zone--) {
+ for (i = MAX_NR_ZONES; i > 0; i--) {
LIST_HEAD(moved);
int skipped = 0;
+ int zone = (sc->reclaim_idx + i) % MAX_NR_ZONES;
struct list_head *head = &lrugen->folios[gen][type][zone];
while (!list_empty(head)) {
@@ -5019,7 +5029,7 @@ static int scan_folios(struct lruvec *lr
scanned += delta;
- if (sort_folio(lruvec, folio, tier))
+ if (sort_folio(lruvec, folio, sc, tier))
sorted += delta;
else if (isolate_folio(lruvec, folio, sc)) {
list_add(&folio->lru, list);
_
Patches currently in -mm which might be from kaleshsingh(a)google.com are
mm-unstable-multi-gen-lru-fix-per-zone-reclaim.patch
mm-unstable-multi-gen-lru-avoid-race-in-inc_min_seq.patch
mm-unstable-multi-gen-lru-fix-can_swap-in-lru_gen_look_around.patch