Zoned devices request sequential writing on the same zone. That means
if 2 requests on the saem zone, the lower pos request need to dispatch
to device first.
While different priority has it's own tree & list, request with high
priority will be disptch first.
So if requestA & requestB are on the same zone. RequestA is BE and pos
is X+0. ReqeustB is RT and pos is X+1. RequestB will be disptched before
requestA, which got an ERROR from zoned device.
This is found in a practice scenario when using F2FS on zoned device.
And it is very easy to reproduce:
1. Use fsstress to run 8 test processes
2. Use ionice to change 4/8 processes to RT priority
Fixes: c807ab520fc3 ("block/mq-deadline: Add I/O priority support")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Wu Bo <bo.wu(a)vivo.com>
---
block/mq-deadline.c | 31 +++++++++++++++++++++++++++++++
include/linux/blk-mq.h | 15 +++++++++++++++
2 files changed, 46 insertions(+)
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 02a916ba62ee..6a05dd86e8ca 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -539,6 +539,37 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd,
if (started_after(dd, rq, latest_start))
return NULL;
+ if (!blk_rq_is_seq_zoned_write(rq))
+ goto skip_check;
+ /*
+ * To ensure sequential writing, check the lower priority class to see
+ * if there is a request on the same zone and need to be dispatched
+ * first
+ */
+ ioprio_class = dd_rq_ioclass(rq);
+ prio = ioprio_class_to_prio[ioprio_class];
+ prio++;
+ for (; prio <= DD_PRIO_MAX; prio++) {
+ struct request *temp_rq;
+ unsigned long flags;
+ bool can_dispatch;
+
+ if (!dd_queued(dd, prio))
+ continue;
+
+ temp_rq = deadline_from_pos(&dd->per_prio[prio], data_dir, blk_rq_pos(rq));
+ if (temp_rq && blk_req_zone_in_one(temp_rq, rq) &&
+ blk_rq_pos(temp_rq) < blk_rq_pos(rq)) {
+ spin_lock_irqsave(&dd->zone_lock, flags);
+ can_dispatch = blk_req_can_dispatch_to_zone(temp_rq);
+ spin_unlock_irqrestore(&dd->zone_lock, flags);
+ if (!can_dispatch)
+ return NULL;
+ rq = temp_rq;
+ per_prio = &dd->per_prio[prio];
+ }
+ }
+skip_check:
/*
* rq is the selected appropriate request.
*/
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index d3d8fd8e229b..bca1e639e0f3 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -1202,6 +1202,15 @@ static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
return true;
return !blk_req_zone_is_write_locked(rq);
}
+
+static inline bool blk_req_zone_in_one(struct request *rq_a,
+ struct request *rq_b)
+{
+ unsigned int zone_sectors = rq_a->q->limits.chunk_sectors;
+
+ return round_down(blk_rq_pos(rq_a), zone_sectors) ==
+ round_down(blk_rq_pos(rq_b), zone_sectors);
+}
#else /* CONFIG_BLK_DEV_ZONED */
static inline bool blk_rq_is_seq_zoned_write(struct request *rq)
{
@@ -1229,6 +1238,12 @@ static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
{
return true;
}
+
+static inline bool blk_req_zone_in_one(struct request *rq_a,
+ struct request *rq_b)
+{
+ return false;
+}
#endif /* CONFIG_BLK_DEV_ZONED */
#endif /* BLK_MQ_H */
--
2.35.3
From: Ronald Wahl <ronald.wahl(a)raritan.com>
Under some circumstances it may happen that the ks8851 Ethernet driver
stops sending data.
Currently the interrupt handler resets the interrupt status flags in the
hardware after handling TX. With this approach we may lose interrupts in
the time window between handling the TX interrupt and resetting the TX
interrupt status bit.
When all of the three following conditions are true then transmitting
data stops:
- TX queue is stopped to wait for room in the hardware TX buffer
- no queued SKBs in the driver (txq) that wait for being written to hw
- hardware TX buffer is empty and the last TX interrupt was lost
This is because reenabling the TX queue happens when handling the TX
interrupt status but if the TX status bit has already been cleared then
this interrupt will never come.
With this commit the interrupt status flags will be cleared before they
are handled. That way we stop losing interrupts.
The wrong handling of the ISR flags was there from the beginning but
with commit 3dc5d4454545 ("net: ks8851: Fix TX stall caused by TX
buffer overrun") the issue becomes apparent.
Fixes: 3dc5d4454545 ("net: ks8851: Fix TX stall caused by TX buffer overrun")
Cc: "David S. Miller" <davem(a)davemloft.net>
Cc: Eric Dumazet <edumazet(a)google.com>
Cc: Jakub Kicinski <kuba(a)kernel.org>
Cc: Paolo Abeni <pabeni(a)redhat.com>
Cc: Simon Horman <horms(a)kernel.org>
Cc: netdev(a)vger.kernel.org
Cc: stable(a)vger.kernel.org # 5.10+
Signed-off-by: Ronald Wahl <ronald.wahl(a)raritan.com>
---
drivers/net/ethernet/micrel/ks8851_common.c | 18 +-----------------
1 file changed, 1 insertion(+), 17 deletions(-)
diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c
index 502518cdb461..6453c92f0fa7 100644
--- a/drivers/net/ethernet/micrel/ks8851_common.c
+++ b/drivers/net/ethernet/micrel/ks8851_common.c
@@ -328,7 +328,6 @@ static irqreturn_t ks8851_irq(int irq, void *_ks)
{
struct ks8851_net *ks = _ks;
struct sk_buff_head rxq;
- unsigned handled = 0;
unsigned long flags;
unsigned int status;
struct sk_buff *skb;
@@ -336,24 +335,17 @@ static irqreturn_t ks8851_irq(int irq, void *_ks)
ks8851_lock(ks, &flags);
status = ks8851_rdreg16(ks, KS_ISR);
+ ks8851_wrreg16(ks, KS_ISR, status);
netif_dbg(ks, intr, ks->netdev,
"%s: status 0x%04x\n", __func__, status);
- if (status & IRQ_LCI)
- handled |= IRQ_LCI;
-
if (status & IRQ_LDI) {
u16 pmecr = ks8851_rdreg16(ks, KS_PMECR);
pmecr &= ~PMECR_WKEVT_MASK;
ks8851_wrreg16(ks, KS_PMECR, pmecr | PMECR_WKEVT_LINK);
-
- handled |= IRQ_LDI;
}
- if (status & IRQ_RXPSI)
- handled |= IRQ_RXPSI;
-
if (status & IRQ_TXI) {
unsigned short tx_space = ks8851_rdreg16(ks, KS_TXMIR);
@@ -365,20 +357,12 @@ static irqreturn_t ks8851_irq(int irq, void *_ks)
if (netif_queue_stopped(ks->netdev))
netif_wake_queue(ks->netdev);
spin_unlock(&ks->statelock);
-
- handled |= IRQ_TXI;
}
- if (status & IRQ_RXI)
- handled |= IRQ_RXI;
-
if (status & IRQ_SPIBEI) {
netdev_err(ks->netdev, "%s: spi bus error\n", __func__);
- handled |= IRQ_SPIBEI;
}
- ks8851_wrreg16(ks, KS_ISR, handled);
-
if (status & IRQ_RXI) {
/* the datasheet says to disable the rx interrupt during
* packet read-out, however we're masking the interrupt
--
2.45.0
The initial change to set x86_virt_bits to the correct value straight
away broke boot on Intel Quark X1000 CPUs (which are family 5, model 9,
stepping 0)
With deeper investigation it appears that the Quark doesn't have
the bit 19 set in 0x01 CPUID leaf, which means it doesn't provide
any clflush instructions and hence the cache alignment is set to 0.
The actual cache line size is 16 bytes, hence we may set the alignment
accordingly. At the same time the physical and virtual address bits
are retrieved via 0x80000008 CPUID leaf.
Note, we don't really care about the value of x86_clflush_size as it
is either used with a proper check for the instruction to be present,
or, like in PCI case, it assumes 32 bytes for all supported 32-bit CPUs
that have actually smaller cache line sizes and don't advertise it.
The commit fbf6449f84bf ("x86/sev-es: Set x86_virt_bits to the correct
value straight away, instead of a two-phase approach") basically
revealed the issue that has been present from day 1 of introducing
the Quark support.
Fixes: aece118e487a ("x86: Add cpu_detect_cache_sizes to init_intel() add Quark legacy_cache()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Andy Shevchenko <andriy.shevchenko(a)linux.intel.com>
---
arch/x86/kernel/cpu/intel.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index be30d7fa2e66..2bffae158dd5 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -321,6 +321,15 @@ static void early_init_intel(struct cpuinfo_x86 *c)
#ifdef CONFIG_X86_64
set_cpu_cap(c, X86_FEATURE_SYSENTER32);
#else
+ /*
+ * The Quark doesn't have bit 19 set in 0x01 CPUID leaf, which means
+ * it doesn't provide any clflush instructions and hence the cache
+ * alignment is set to 0. The actual cache line size is 16 bytes,
+ * hence set the alignment accordingly. At the same time the physical
+ * and virtual address bits are retrieved via 0x80000008 CPUID leaf.
+ */
+ if (c->x86 == 5 && c->x86_model == 9)
+ c->x86_cache_alignment = 16;
/* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */
if (c->x86 == 15 && c->x86_cache_alignment == 64)
c->x86_cache_alignment = 128;
--
2.43.0.rc1.1336.g36b5255a03ac
I'm announcing the release of the 6.9.1 kernel.
All users of the 6.9 kernel series must upgrade.
The updated 6.9.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-6.9.y
and can be browsed at the normal kernel.org git web browser:
https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary
thanks,
greg k-h
------------
Makefile | 2
drivers/dma/idxd/cdev.c | 77 +++++++++++++++++++++++
drivers/dma/idxd/idxd.h | 3
drivers/dma/idxd/init.c | 4 +
drivers/dma/idxd/registers.h | 3
drivers/dma/idxd/sysfs.c | 27 +++++++-
drivers/net/wireless/mediatek/mt76/mt7915/main.c | 4 +
drivers/vfio/pci/vfio_pci.c | 2
include/linux/pci_ids.h | 2
security/keys/key.c | 3
10 files changed, 120 insertions(+), 7 deletions(-)
Arjan van de Ven (2):
VFIO: Add the SPR_DSA and SPR_IAX devices to the denylist
dmaengine: idxd: add a new security check to deal with a hardware erratum
Ben Greear (1):
wifi: mt76: mt7915: add missing chanctx ops
Greg Kroah-Hartman (1):
Linux 6.9.1
Nikhil Rao (1):
dmaengine: idxd: add a write() method for applications to submit work
Silvio Gissi (1):
keys: Fix overwrite of key expiration on instantiation