The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x b321c31c9b7b309dcde5e8854b741c8e6a9a05f0
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023072324-aviation-delirious-b27d@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
b321c31c9b7b ("KVM: arm64: vgic-v4: Make the doorbell request robust w.r.t preemption")
0c2f9acf6ae7 ("KVM: arm64: PMU: Don't overwrite PMUSERENR with vcpu loaded")
8681f7175901 ("KVM: arm64: PMU: Restore the host's PMUSERENR_EL0")
009d6dc87a56 ("ARM: perf: Allow the use of the PMUv3 driver on 32bit ARM")
711432770f78 ("perf: pmuv3: Abstract PMU version checks")
df29ddf4f04b ("arm64: perf: Abstract system register accesses away")
7755cec63ade ("arm64: perf: Move PMUv3 driver to drivers/perf")
cc91b9481605 ("arm64/perf: Replace PMU version number '0' with ID_AA64DFR0_EL1_PMUVer_NI")
4151bb636acf ("KVM: arm64: Fix SMPRI_EL1/TPIDR2_EL0 trapping on VHE")
bb0cca240a16 ("Merge branch kvm-arm64/single-step-async-exception into kvmarm-master/next")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b321c31c9b7b309dcde5e8854b741c8e6a9a05f0 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz(a)kernel.org>
Date: Thu, 13 Jul 2023 08:06:57 +0100
Subject: [PATCH] KVM: arm64: vgic-v4: Make the doorbell request robust w.r.t
preemption
Xiang reports that VMs occasionally fail to boot on GICv4.1 systems when
running a preemptible kernel, as it is possible that a vCPU is blocked
without requesting a doorbell interrupt.
The issue is that any preemption that occurs between vgic_v4_put() and
schedule() on the block path will mark the vPE as nonresident and *not*
request a doorbell irq. This occurs because when the vcpu thread is
resumed on its way to block, vcpu_load() will make the vPE resident
again. Once the vcpu actually blocks, we don't request a doorbell
anymore, and the vcpu won't be woken up on interrupt delivery.
Fix it by tracking that we're entering WFI, and key the doorbell
request on that flag. This allows us not to make the vPE resident
when going through a preempt/schedule cycle, meaning we don't lose
any state.
Cc: stable(a)vger.kernel.org
Fixes: 8e01d9a396e6 ("KVM: arm64: vgic-v4: Move the GICv4 residency flow to be driven by vcpu_load/put")
Reported-by: Xiang Chen <chenxiang66(a)hisilicon.com>
Suggested-by: Zenghui Yu <yuzenghui(a)huawei.com>
Tested-by: Xiang Chen <chenxiang66(a)hisilicon.com>
Co-developed-by: Oliver Upton <oliver.upton(a)linux.dev>
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
Acked-by: Zenghui Yu <yuzenghui(a)huawei.com>
Link: https://lore.kernel.org/r/20230713070657.3873244-1-maz@kernel.org
Signed-off-by: Oliver Upton <oliver.upton(a)linux.dev>
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 8b6096753740..d3dd05bbfe23 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -727,6 +727,8 @@ struct kvm_vcpu_arch {
#define DBG_SS_ACTIVE_PENDING __vcpu_single_flag(sflags, BIT(5))
/* PMUSERENR for the guest EL0 is on physical CPU */
#define PMUSERENR_ON_CPU __vcpu_single_flag(sflags, BIT(6))
+/* WFI instruction trapped */
+#define IN_WFI __vcpu_single_flag(sflags, BIT(7))
/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index a402ea5511f3..72dc53a75d1c 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -718,13 +718,15 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu)
*/
preempt_disable();
kvm_vgic_vmcr_sync(vcpu);
- vgic_v4_put(vcpu, true);
+ vcpu_set_flag(vcpu, IN_WFI);
+ vgic_v4_put(vcpu);
preempt_enable();
kvm_vcpu_halt(vcpu);
vcpu_clear_flag(vcpu, IN_WFIT);
preempt_disable();
+ vcpu_clear_flag(vcpu, IN_WFI);
vgic_v4_load(vcpu);
preempt_enable();
}
@@ -792,7 +794,7 @@ static int check_vcpu_requests(struct kvm_vcpu *vcpu)
if (kvm_check_request(KVM_REQ_RELOAD_GICv4, vcpu)) {
/* The distributor enable bits were changed */
preempt_disable();
- vgic_v4_put(vcpu, false);
+ vgic_v4_put(vcpu);
vgic_v4_load(vcpu);
preempt_enable();
}
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
index c3b8e132d599..3dfc8b84e03e 100644
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -749,7 +749,7 @@ void vgic_v3_put(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
- WARN_ON(vgic_v4_put(vcpu, false));
+ WARN_ON(vgic_v4_put(vcpu));
vgic_v3_vmcr_sync(vcpu);
diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c
index c1c28fe680ba..339a55194b2c 100644
--- a/arch/arm64/kvm/vgic/vgic-v4.c
+++ b/arch/arm64/kvm/vgic/vgic-v4.c
@@ -336,14 +336,14 @@ void vgic_v4_teardown(struct kvm *kvm)
its_vm->vpes = NULL;
}
-int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db)
+int vgic_v4_put(struct kvm_vcpu *vcpu)
{
struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
if (!vgic_supports_direct_msis(vcpu->kvm) || !vpe->resident)
return 0;
- return its_make_vpe_non_resident(vpe, need_db);
+ return its_make_vpe_non_resident(vpe, !!vcpu_get_flag(vcpu, IN_WFI));
}
int vgic_v4_load(struct kvm_vcpu *vcpu)
@@ -354,6 +354,9 @@ int vgic_v4_load(struct kvm_vcpu *vcpu)
if (!vgic_supports_direct_msis(vcpu->kvm) || vpe->resident)
return 0;
+ if (vcpu_get_flag(vcpu, IN_WFI))
+ return 0;
+
/*
* Before making the VPE resident, make sure the redistributor
* corresponding to our current CPU expects us here. See the
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 402b545959af..5b27f94d4fad 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -431,7 +431,7 @@ int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq,
int vgic_v4_load(struct kvm_vcpu *vcpu);
void vgic_v4_commit(struct kvm_vcpu *vcpu);
-int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db);
+int vgic_v4_put(struct kvm_vcpu *vcpu);
/* CPU HP callbacks */
void kvm_vgic_cpu_up(void);
Hi Greg, couple bcachefs fixes; this gets the downgrade path working so
6.9 works with the latest tools release.
CI testing... https://evilpiepirate.org/~testdashboard/ci?branch=bcachefs-for-v69
The following changes since commit 12c740d50d4e74e6b97d879363b85437dc895dde:
Linux 6.9.7 (2024-06-27 13:52:32 +0200)
are available in the Git repository at:
https://evilpiepirate.org/git/bcachefs.git tags/bcachefs-2024-06-20-stable-v6.9
for you to fetch changes up to 3d86d0704d4d03f76e5098ddf16152ee53f000f8:
bcachefs: btree_gc can now handle unknown btrees (2024-06-29 16:57:24 -0400)
----------------------------------------------------------------
bcachefs fixes for 6.9 stable
Downgrade path fixes; this fixes downgrade recovery passes not running
when downgrading from on disk format version 1.9; this results in
missing allocation information and writes immediately hanging due to the
allocator thinking there are no free buckets.
----------------------------------------------------------------
Kent Overstreet (5):
bcachefs: Fix sb_field_downgrade validation
bcachefs: Fix sb-downgrade validation
bcachefs: Fix bch2_sb_downgrade_update()
bcachefs: Fix setting of downgrade recovery passes/errors
bcachefs: btree_gc can now handle unknown btrees
fs/bcachefs/bcachefs.h | 44 +---------------------------------------
fs/bcachefs/btree_gc.c | 15 +++++++-------
fs/bcachefs/btree_gc.h | 48 ++++++++++++++++++++------------------------
fs/bcachefs/btree_gc_types.h | 29 ++++++++++++++++++++++++++
fs/bcachefs/ec.c | 2 +-
fs/bcachefs/sb-downgrade.c | 17 +++++++++++++---
fs/bcachefs/super-io.c | 12 +++--------
7 files changed, 77 insertions(+), 90 deletions(-)
create mode 100644 fs/bcachefs/btree_gc_types.h
[ Upstream commit bdcb8aa434c6d36b5c215d02a9ef07551be25a37 ]
In gfs2_put_super(), whether withdrawn or not, the quota should
be cleaned up by gfs2_quota_cleanup().
Otherwise, struct gfs2_sbd will be freed before gfs2_qd_dealloc (rcu
callback) has run for all gfs2_quota_data objects, resulting in
use-after-free.
Also, gfs2_destroy_threads() and gfs2_quota_cleanup() is already called
by gfs2_make_fs_ro(), so in gfs2_put_super(), after calling
gfs2_make_fs_ro(), there is no need to call them again.
The origin of a cherry-pick conflict is the (relevant) code block added in
commit f66af88e3321 ("gfs2: Stop using gfs2_make_fs_ro for withdraw")
There are no references to gfs2_withdrawn() nor gfs2_destroy_threads() in
gfs2_put_super(), so we can simply call gfs2_quota_cleanup() in a new else
block as bdcb8aa434c6 achieves.
Else braces were used for consistency with the if block.
Sponsor: 21SoftWare LLC
Signed-off-by: Clayton Casciato <majortomtosourcecontrol(a)gmail.com>
---
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 302d1e43d701..6107cd680176 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -591,6 +591,8 @@ static void gfs2_put_super(struct super_block *sb)
if (!sb_rdonly(sb)) {
gfs2_make_fs_ro(sdp);
+ } else {
+ gfs2_quota_cleanup(sdp);
}
WARN_ON(gfs2_withdrawing(sdp));
Current ata_gen_passthru_sense() code performs two actions:
1. Generates sense data based on the ATA 'status' and ATA 'error' fields.
2. Populates "ATA Status Return sense data descriptor" / "Fixed format
sense data" with ATA taskfile fields.
The problem is that #1 generates sense data even when a valid sense data
is already present (ATA_QCFLAG_SENSE_VALID is set). Factoring out #2 into
a separate function allows us to generate sense data only when there is
no valid sense data (ATA_QCFLAG_SENSE_VALID is not set).
As a bonus, we can now delete a FIXME comment in atapi_qc_complete()
which states that we don't want to translate taskfile registers into
sense descriptors for ATAPI.
Cc: stable(a)vger.kernel.org
Reviewed-by: Hannes Reinecke <hare(a)suse.de>
Reviewed-by: Damien Le Moal <dlemoal(a)kernel.org>
Signed-off-by: Igor Pylypiv <ipylypiv(a)google.com>
---
drivers/ata/libata-scsi.c | 158 +++++++++++++++++++++-----------------
1 file changed, 86 insertions(+), 72 deletions(-)
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index a9e44ad4c2de..26b1263f5c7c 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -230,6 +230,80 @@ void ata_scsi_set_sense_information(struct ata_device *dev,
SCSI_SENSE_BUFFERSIZE, information);
}
+/**
+ * ata_scsi_set_passthru_sense_fields - Set ATA fields in sense buffer
+ * @qc: ATA PASS-THROUGH command.
+ *
+ * Populates "ATA Status Return sense data descriptor" / "Fixed format
+ * sense data" with ATA taskfile fields.
+ *
+ * LOCKING:
+ * None.
+ */
+static void ata_scsi_set_passthru_sense_fields(struct ata_queued_cmd *qc)
+{
+ struct scsi_cmnd *cmd = qc->scsicmd;
+ struct ata_taskfile *tf = &qc->result_tf;
+ unsigned char *sb = cmd->sense_buffer;
+
+ if ((sb[0] & 0x7f) >= 0x72) {
+ unsigned char *desc;
+ u8 len;
+
+ /* descriptor format */
+ len = sb[7];
+ desc = (char *)scsi_sense_desc_find(sb, len + 8, 9);
+ if (!desc) {
+ if (SCSI_SENSE_BUFFERSIZE < len + 14)
+ return;
+ sb[7] = len + 14;
+ desc = sb + 8 + len;
+ }
+ desc[0] = 9;
+ desc[1] = 12;
+ /*
+ * Copy registers into sense buffer.
+ */
+ desc[2] = 0x00;
+ desc[3] = tf->error;
+ desc[5] = tf->nsect;
+ desc[7] = tf->lbal;
+ desc[9] = tf->lbam;
+ desc[11] = tf->lbah;
+ desc[12] = tf->device;
+ desc[13] = tf->status;
+
+ /*
+ * Fill in Extend bit, and the high order bytes
+ * if applicable.
+ */
+ if (tf->flags & ATA_TFLAG_LBA48) {
+ desc[2] |= 0x01;
+ desc[4] = tf->hob_nsect;
+ desc[6] = tf->hob_lbal;
+ desc[8] = tf->hob_lbam;
+ desc[10] = tf->hob_lbah;
+ }
+ } else {
+ /* Fixed sense format */
+ sb[0] |= 0x80;
+ sb[3] = tf->error;
+ sb[4] = tf->status;
+ sb[5] = tf->device;
+ sb[6] = tf->nsect;
+ if (tf->flags & ATA_TFLAG_LBA48) {
+ sb[8] |= 0x80;
+ if (tf->hob_nsect)
+ sb[8] |= 0x40;
+ if (tf->hob_lbal || tf->hob_lbam || tf->hob_lbah)
+ sb[8] |= 0x20;
+ }
+ sb[9] = tf->lbal;
+ sb[10] = tf->lbam;
+ sb[11] = tf->lbah;
+ }
+}
+
static void ata_scsi_set_invalid_field(struct ata_device *dev,
struct scsi_cmnd *cmd, u16 field, u8 bit)
{
@@ -837,10 +911,8 @@ static void ata_to_sense_error(unsigned id, u8 drv_stat, u8 drv_err, u8 *sk,
* ata_gen_passthru_sense - Generate check condition sense block.
* @qc: Command that completed.
*
- * This function is specific to the ATA descriptor format sense
- * block specified for the ATA pass through commands. Regardless
- * of whether the command errored or not, return a sense
- * block. Copy all controller registers into the sense
+ * This function is specific to the ATA pass through commands.
+ * Regardless of whether the command errored or not, return a sense
* block. If there was no error, we get the request from an ATA
* passthrough command, so we use the following sense data:
* sk = RECOVERED ERROR
@@ -875,63 +947,6 @@ static void ata_gen_passthru_sense(struct ata_queued_cmd *qc)
*/
scsi_build_sense(cmd, 1, RECOVERED_ERROR, 0, 0x1D);
}
-
- if ((sb[0] & 0x7f) >= 0x72) {
- unsigned char *desc;
- u8 len;
-
- /* descriptor format */
- len = sb[7];
- desc = (char *)scsi_sense_desc_find(sb, len + 8, 9);
- if (!desc) {
- if (SCSI_SENSE_BUFFERSIZE < len + 14)
- return;
- sb[7] = len + 14;
- desc = sb + 8 + len;
- }
- desc[0] = 9;
- desc[1] = 12;
- /*
- * Copy registers into sense buffer.
- */
- desc[2] = 0x00;
- desc[3] = tf->error;
- desc[5] = tf->nsect;
- desc[7] = tf->lbal;
- desc[9] = tf->lbam;
- desc[11] = tf->lbah;
- desc[12] = tf->device;
- desc[13] = tf->status;
-
- /*
- * Fill in Extend bit, and the high order bytes
- * if applicable.
- */
- if (tf->flags & ATA_TFLAG_LBA48) {
- desc[2] |= 0x01;
- desc[4] = tf->hob_nsect;
- desc[6] = tf->hob_lbal;
- desc[8] = tf->hob_lbam;
- desc[10] = tf->hob_lbah;
- }
- } else {
- /* Fixed sense format */
- sb[0] |= 0x80;
- sb[3] = tf->error;
- sb[4] = tf->status;
- sb[5] = tf->device;
- sb[6] = tf->nsect;
- if (tf->flags & ATA_TFLAG_LBA48) {
- sb[8] |= 0x80;
- if (tf->hob_nsect)
- sb[8] |= 0x40;
- if (tf->hob_lbal || tf->hob_lbam || tf->hob_lbah)
- sb[8] |= 0x20;
- }
- sb[9] = tf->lbal;
- sb[10] = tf->lbam;
- sb[11] = tf->lbah;
- }
}
/**
@@ -1634,6 +1649,8 @@ static void ata_scsi_qc_complete(struct ata_queued_cmd *qc)
u8 *cdb = cmd->cmnd;
int need_sense = (qc->err_mask != 0) &&
!(qc->flags & ATA_QCFLAG_SENSE_VALID);
+ int need_passthru_sense = (qc->err_mask != 0) ||
+ (qc->flags & ATA_QCFLAG_SENSE_VALID);
/* For ATA pass thru (SAT) commands, generate a sense block if
* user mandated it or if there's an error. Note that if we
@@ -1645,13 +1662,16 @@ static void ata_scsi_qc_complete(struct ata_queued_cmd *qc)
* asc,ascq = ATA PASS-THROUGH INFORMATION AVAILABLE
*/
if (((cdb[0] == ATA_16) || (cdb[0] == ATA_12)) &&
- ((cdb[2] & 0x20) || need_sense))
- ata_gen_passthru_sense(qc);
- else if (need_sense)
+ ((cdb[2] & 0x20) || need_passthru_sense)) {
+ if (!(qc->flags & ATA_QCFLAG_SENSE_VALID))
+ ata_gen_passthru_sense(qc);
+ ata_scsi_set_passthru_sense_fields(qc);
+ } else if (need_sense) {
ata_gen_ata_sense(qc);
- else
+ } else {
/* Keep the SCSI ML and status byte, clear host byte. */
cmd->result &= 0x0000ffff;
+ }
ata_qc_done(qc);
}
@@ -2590,14 +2610,8 @@ static void atapi_qc_complete(struct ata_queued_cmd *qc)
/* handle completion from EH */
if (unlikely(err_mask || qc->flags & ATA_QCFLAG_SENSE_VALID)) {
- if (!(qc->flags & ATA_QCFLAG_SENSE_VALID)) {
- /* FIXME: not quite right; we don't want the
- * translation of taskfile registers into a
- * sense descriptors, since that's only
- * correct for ATA, not ATAPI
- */
+ if (!(qc->flags & ATA_QCFLAG_SENSE_VALID))
ata_gen_passthru_sense(qc);
- }
/* SCSI EH automatically locks door if sdev->locked is
* set. Sometimes door lock request continues to
--
2.45.2.803.g4e1b14247a-goog