The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
1739c7017fb1 ("KVM: x86: Add compat handler for KVM_X86_SET_MSR_FILTER")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1739c7017fb1d759965dcbab925ff5980a5318cb Mon Sep 17 00:00:00 2001
From: Alexander Graf <graf(a)amazon.com>
Date: Mon, 17 Oct 2022 20:45:41 +0200
Subject: [PATCH] KVM: x86: Add compat handler for KVM_X86_SET_MSR_FILTER
The KVM_X86_SET_MSR_FILTER ioctls contains a pointer in the passed in
struct which means it has a different struct size depending on whether
it gets called from 32bit or 64bit code.
This patch introduces compat code that converts from the 32bit struct to
its 64bit counterpart which then gets used going forward internally.
With this applied, 32bit QEMU can successfully set MSR bitmaps when
running on 64bit kernels.
Reported-by: Andrew Randrianasulu <randrianasulu(a)gmail.com>
Fixes: 1a155254ff937 ("KVM: x86: Introduce MSR filtering")
Signed-off-by: Alexander Graf <graf(a)amazon.com>
Message-Id: <20221017184541.2658-4-graf(a)amazon.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 78f779f0264b..9cf1ba865562 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6489,6 +6489,62 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm,
return 0;
}
+#ifdef CONFIG_KVM_COMPAT
+/* for KVM_X86_SET_MSR_FILTER */
+struct kvm_msr_filter_range_compat {
+ __u32 flags;
+ __u32 nmsrs;
+ __u32 base;
+ __u32 bitmap;
+};
+
+struct kvm_msr_filter_compat {
+ __u32 flags;
+ struct kvm_msr_filter_range_compat ranges[KVM_MSR_FILTER_MAX_RANGES];
+};
+
+#define KVM_X86_SET_MSR_FILTER_COMPAT _IOW(KVMIO, 0xc6, struct kvm_msr_filter_compat)
+
+long kvm_arch_vm_compat_ioctl(struct file *filp, unsigned int ioctl,
+ unsigned long arg)
+{
+ void __user *argp = (void __user *)arg;
+ struct kvm *kvm = filp->private_data;
+ long r = -ENOTTY;
+
+ switch (ioctl) {
+ case KVM_X86_SET_MSR_FILTER_COMPAT: {
+ struct kvm_msr_filter __user *user_msr_filter = argp;
+ struct kvm_msr_filter_compat filter_compat;
+ struct kvm_msr_filter filter;
+ int i;
+
+ if (copy_from_user(&filter_compat, user_msr_filter,
+ sizeof(filter_compat)))
+ return -EFAULT;
+
+ filter.flags = filter_compat.flags;
+ for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) {
+ struct kvm_msr_filter_range_compat *cr;
+
+ cr = &filter_compat.ranges[i];
+ filter.ranges[i] = (struct kvm_msr_filter_range) {
+ .flags = cr->flags,
+ .nmsrs = cr->nmsrs,
+ .base = cr->base,
+ .bitmap = (__u8 *)(ulong)cr->bitmap,
+ };
+ }
+
+ r = kvm_vm_ioctl_set_msr_filter(kvm, &filter);
+ break;
+ }
+ }
+
+ return r;
+}
+#endif
+
#ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
static int kvm_arch_suspend_notifier(struct kvm *kvm)
{
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
2e3272bc1790 ("KVM: x86: Copy filter arg outside kvm_vm_ioctl_set_msr_filter()")
cf5029d5dd7c ("KVM: x86: Protect the unused bits in MSR exiting flags")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2e3272bc1790825c43d2c39690bf2836b81c6d36 Mon Sep 17 00:00:00 2001
From: Alexander Graf <graf(a)amazon.com>
Date: Mon, 17 Oct 2022 20:45:40 +0200
Subject: [PATCH] KVM: x86: Copy filter arg outside
kvm_vm_ioctl_set_msr_filter()
In the next patch we want to introduce a second caller to
set_msr_filter() which constructs its own filter list on the stack.
Refactor the original function so it takes it as argument instead of
reading it through copy_from_user().
Signed-off-by: Alexander Graf <graf(a)amazon.com>
Message-Id: <20221017184541.2658-3-graf(a)amazon.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4bd5f8a751de..78f779f0264b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6442,26 +6442,22 @@ static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter,
return 0;
}
-static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
+static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm,
+ struct kvm_msr_filter *filter)
{
- struct kvm_msr_filter __user *user_msr_filter = argp;
struct kvm_x86_msr_filter *new_filter, *old_filter;
- struct kvm_msr_filter filter;
bool default_allow;
bool empty = true;
int r = 0;
u32 i;
- if (copy_from_user(&filter, user_msr_filter, sizeof(filter)))
- return -EFAULT;
-
- if (filter.flags & ~KVM_MSR_FILTER_DEFAULT_DENY)
+ if (filter->flags & ~KVM_MSR_FILTER_DEFAULT_DENY)
return -EINVAL;
- for (i = 0; i < ARRAY_SIZE(filter.ranges); i++)
- empty &= !filter.ranges[i].nmsrs;
+ for (i = 0; i < ARRAY_SIZE(filter->ranges); i++)
+ empty &= !filter->ranges[i].nmsrs;
- default_allow = !(filter.flags & KVM_MSR_FILTER_DEFAULT_DENY);
+ default_allow = !(filter->flags & KVM_MSR_FILTER_DEFAULT_DENY);
if (empty && !default_allow)
return -EINVAL;
@@ -6469,8 +6465,8 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
if (!new_filter)
return -ENOMEM;
- for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) {
- r = kvm_add_msr_filter(new_filter, &filter.ranges[i]);
+ for (i = 0; i < ARRAY_SIZE(filter->ranges); i++) {
+ r = kvm_add_msr_filter(new_filter, &filter->ranges[i]);
if (r) {
kvm_free_msr_filter(new_filter);
return r;
@@ -6915,9 +6911,16 @@ long kvm_arch_vm_ioctl(struct file *filp,
case KVM_SET_PMU_EVENT_FILTER:
r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp);
break;
- case KVM_X86_SET_MSR_FILTER:
- r = kvm_vm_ioctl_set_msr_filter(kvm, argp);
+ case KVM_X86_SET_MSR_FILTER: {
+ struct kvm_msr_filter __user *user_msr_filter = argp;
+ struct kvm_msr_filter filter;
+
+ if (copy_from_user(&filter, user_msr_filter, sizeof(filter)))
+ return -EFAULT;
+
+ r = kvm_vm_ioctl_set_msr_filter(kvm, &filter);
break;
+ }
default:
r = -ENOTTY;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
2e3272bc1790 ("KVM: x86: Copy filter arg outside kvm_vm_ioctl_set_msr_filter()")
cf5029d5dd7c ("KVM: x86: Protect the unused bits in MSR exiting flags")
b318e8decf6b ("KVM: x86: Protect userspace MSR filter with SRCU, and set atomically-ish")
b3646477d458 ("KVM: x86: use static calls to reduce kvm_x86_ops overhead")
fe6b6bc802b4 ("KVM: VMX: Enable bus lock VM exit")
8e5332402164 ("KVM: VMX: Convert vcpu_vmx.exit_reason to a union")
15b51dc08a34 ("KVM: x86: Take KVM's SRCU lock only if steal time update is needed")
19979fba9bfa ("KVM: x86: Remove obsolete disabling of page faults in kvm_arch_vcpu_put()")
647daca25d24 ("KVM: SVM: Add support for booting APs in an SEV-ES guest")
8640ca588b03 ("KVM: SVM: Add AP_JUMP_TABLE support in prep for AP booting")
861377730aa9 ("KVM: SVM: Provide support for SEV-ES vCPU loading")
376c6d285017 ("KVM: SVM: Provide support for SEV-ES vCPU creation/loading")
85ca8be938c0 ("KVM: SVM: Set the encryption mask for the SVM host save area")
4444dfe4050b ("KVM: SVM: Add NMI support for an SEV-ES guest")
5719455fbd95 ("KVM: SVM: Do not report support for SMM for an SEV-ES guest")
7ed9abfe8e9f ("KVM: SVM: Support string IO operations for an SEV-ES guest")
8f423a80d299 ("KVM: SVM: Support MMIO for an SEV-ES guest")
d523ab6ba275 ("KVM: SVM: Create trace events for VMGEXIT processing")
d36946679ef6 ("KVM: SVM: Add support for SEV-ES GHCB MSR protocol function 0x004")
1edc14599e06 ("KVM: SVM: Add support for SEV-ES GHCB MSR protocol function 0x002")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2e3272bc1790825c43d2c39690bf2836b81c6d36 Mon Sep 17 00:00:00 2001
From: Alexander Graf <graf(a)amazon.com>
Date: Mon, 17 Oct 2022 20:45:40 +0200
Subject: [PATCH] KVM: x86: Copy filter arg outside
kvm_vm_ioctl_set_msr_filter()
In the next patch we want to introduce a second caller to
set_msr_filter() which constructs its own filter list on the stack.
Refactor the original function so it takes it as argument instead of
reading it through copy_from_user().
Signed-off-by: Alexander Graf <graf(a)amazon.com>
Message-Id: <20221017184541.2658-3-graf(a)amazon.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4bd5f8a751de..78f779f0264b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6442,26 +6442,22 @@ static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter,
return 0;
}
-static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
+static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm,
+ struct kvm_msr_filter *filter)
{
- struct kvm_msr_filter __user *user_msr_filter = argp;
struct kvm_x86_msr_filter *new_filter, *old_filter;
- struct kvm_msr_filter filter;
bool default_allow;
bool empty = true;
int r = 0;
u32 i;
- if (copy_from_user(&filter, user_msr_filter, sizeof(filter)))
- return -EFAULT;
-
- if (filter.flags & ~KVM_MSR_FILTER_DEFAULT_DENY)
+ if (filter->flags & ~KVM_MSR_FILTER_DEFAULT_DENY)
return -EINVAL;
- for (i = 0; i < ARRAY_SIZE(filter.ranges); i++)
- empty &= !filter.ranges[i].nmsrs;
+ for (i = 0; i < ARRAY_SIZE(filter->ranges); i++)
+ empty &= !filter->ranges[i].nmsrs;
- default_allow = !(filter.flags & KVM_MSR_FILTER_DEFAULT_DENY);
+ default_allow = !(filter->flags & KVM_MSR_FILTER_DEFAULT_DENY);
if (empty && !default_allow)
return -EINVAL;
@@ -6469,8 +6465,8 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
if (!new_filter)
return -ENOMEM;
- for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) {
- r = kvm_add_msr_filter(new_filter, &filter.ranges[i]);
+ for (i = 0; i < ARRAY_SIZE(filter->ranges); i++) {
+ r = kvm_add_msr_filter(new_filter, &filter->ranges[i]);
if (r) {
kvm_free_msr_filter(new_filter);
return r;
@@ -6915,9 +6911,16 @@ long kvm_arch_vm_ioctl(struct file *filp,
case KVM_SET_PMU_EVENT_FILTER:
r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp);
break;
- case KVM_X86_SET_MSR_FILTER:
- r = kvm_vm_ioctl_set_msr_filter(kvm, argp);
+ case KVM_X86_SET_MSR_FILTER: {
+ struct kvm_msr_filter __user *user_msr_filter = argp;
+ struct kvm_msr_filter filter;
+
+ if (copy_from_user(&filter, user_msr_filter, sizeof(filter)))
+ return -EFAULT;
+
+ r = kvm_vm_ioctl_set_msr_filter(kvm, &filter);
break;
+ }
default:
r = -ENOTTY;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
50b0e4d4da09 ("drm/amdgpu: fix sdma doorbell init ordering on APUs")
59c43748c7c8 ("drm/amdgpu: move nbio sdma_doorbell_range() into sdma code for vega")
db10109793bb ("drm/amdgpu: move nbio ih_doorbell_range() into ih code for vega")
b672cb1eee59 ("drm/amdgpu: enable retry fault wptr overflow")
bebd4c79a4eb ("drm/amdgpu: create vega20 ih blocks")
554bdbf6de74 ("drm/amdgpu: use cached ih rb control reg offsets for vega10")
21822b6a968d ("drm/amdgpu: switch to ih_enable_ring for vega10")
fd95e1b1049e ("drm/amdgpu: switch to ih_toggle_interrupts for vega10")
c73750322aaf ("drm/amdgpu: add helper to toggle ih ring interrupts for vega10")
ffa02126e0ef ("drm/amdgpu: add helper to enable an ih ring for vega10")
1ebb4841f064 ("drm/amdgpu: add helper to init ih ring regs for vega10")
4750918978a7 ("drm/amdgpu: enabled software IH ring for Vega")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 50b0e4d4da09fa501e722af886f97e60a4f820d6 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher(a)amd.com>
Date: Wed, 19 Oct 2022 16:57:42 -0400
Subject: [PATCH] drm/amdgpu: fix sdma doorbell init ordering on APUs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Commit 8795e182b02d ("PCI/portdrv: Don't disable AER reporting in get_port_device_capability()")
uncovered a bug in amdgpu that required a reordering of the driver
init sequence to avoid accessing a special register on the GPU
before it was properly set up leading to an PCI AER error. This
reordering uncovered a different hw programming ordering dependency
in some APUs where the SDMA doorbells need to be programmed before
the GFX doorbells. To fix this, move the SDMA doorbell programming
back into the soc15 common code, but use the actual doorbell range
values directly rather than the values stored in the ring structure
since those will not be initialized at this point.
This is a partial revert, but with the doorbell assignment
fixed so the proper doorbell index is set before it's used.
Fixes: e3163bc8ffdfdb ("drm/amdgpu: move nbio sdma_doorbell_range() into sdma code for vega")
Acked-by: Christian König <christian.koenig(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
Cc: skhan(a)linuxfoundation.org
Cc: stable(a)vger.kernel.org
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 298fa11702e7..1122bd4eae98 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1417,11 +1417,6 @@ static int sdma_v4_0_start(struct amdgpu_device *adev)
WREG32_SDMA(i, mmSDMA0_CNTL, temp);
if (!amdgpu_sriov_vf(adev)) {
- ring = &adev->sdma.instance[i].ring;
- adev->nbio.funcs->sdma_doorbell_range(adev, i,
- ring->use_doorbell, ring->doorbell_index,
- adev->doorbell_index.sdma_doorbell_range);
-
/* unhalt engine */
temp = RREG32_SDMA(i, mmSDMA0_F32_CNTL);
temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 183024d7c184..e3b2b6b4f1a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -1211,6 +1211,20 @@ static int soc15_common_sw_fini(void *handle)
return 0;
}
+static void soc15_sdma_doorbell_range_init(struct amdgpu_device *adev)
+{
+ int i;
+
+ /* sdma doorbell range is programed by hypervisor */
+ if (!amdgpu_sriov_vf(adev)) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ adev->nbio.funcs->sdma_doorbell_range(adev, i,
+ true, adev->doorbell_index.sdma_engine[i] << 1,
+ adev->doorbell_index.sdma_doorbell_range);
+ }
+ }
+}
+
static int soc15_common_hw_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -1230,6 +1244,13 @@ static int soc15_common_hw_init(void *handle)
/* enable the doorbell aperture */
soc15_enable_doorbell_aperture(adev, true);
+ /* HW doorbell routing policy: doorbell writing not
+ * in SDMA/IH/MM/ACV range will be routed to CP. So
+ * we need to init SDMA doorbell range prior
+ * to CP ip block init and ring test. IH already
+ * happens before CP.
+ */
+ soc15_sdma_doorbell_range_init(adev);
return 0;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
67bf6493449b ("x86/resctrl: Fix min_cbm_bits for AMD")
2b8dd4ab65da ("x86/resctrl: Calculate the index from the configuration type")
2e7df368fc92 ("x86/resctrl: Apply offset correction when config is staged")
f07e9d025057 ("x86/resctrl: Add a helper to read a closid's configuration")
2e6678195d59 ("x86/resctrl: Rename update_domains() to resctrl_arch_update_domains()")
75408e43509e ("x86/resctrl: Allow different CODE/DATA configurations to be staged")
e8f7282552b9 ("x86/resctrl: Group staged configuration into a separate struct")
1c290682c0c9 ("x86/resctrl: Pass the schema to resctrl filesystem functions")
eb6f31876941 ("x86/resctrl: Add resctrl_arch_get_num_closid()")
3183e87c1b79 ("x86/resctrl: Store the effective num_closid in the schema")
331ebe4c4349 ("x86/resctrl: Walk the resctrl schema list instead of an arch list")
208ab16847c5 ("x86/resctrl: Label the resources with their configuration type")
f2594492308d ("x86/resctrl: Pass the schema in info dir's private pointer")
cdb9ebc91784 ("x86/resctrl: Add a separate schema list for resctrl")
792e0f6f789b ("x86/resctrl: Split struct rdt_domain")
63c8b1231929 ("x86/resctrl: Split struct rdt_resource")
fd2afa70eff0 ("x86/resctrl: Fix kernel-doc in internal.h")
8ba27ae36b41 ("Merge tag 'x86_cache_for_v5.11' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 67bf6493449b09590f9f71d7df29efb392b12d25 Mon Sep 17 00:00:00 2001
From: Babu Moger <babu.moger(a)amd.com>
Date: Tue, 27 Sep 2022 15:16:29 -0500
Subject: [PATCH] x86/resctrl: Fix min_cbm_bits for AMD
AMD systems support zero CBM (capacity bit mask) for cache allocation.
That is reflected in rdt_init_res_defs_amd() by:
r->cache.arch_has_empty_bitmaps = true;
However given the unified code in cbm_validate(), checking for:
val == 0 && !arch_has_empty_bitmaps
is not enough because of another check in cbm_validate():
if ((zero_bit - first_bit) < r->cache.min_cbm_bits)
The default value of r->cache.min_cbm_bits = 1.
Leading to:
$ cd /sys/fs/resctrl
$ mkdir foo
$ cd foo
$ echo L3:0=0 > schemata
-bash: echo: write error: Invalid argument
$ cat /sys/fs/resctrl/info/last_cmd_status
Need at least 1 bits in the mask
Initialize the min_cbm_bits to 0 for AMD. Also, remove the default
setting of min_cbm_bits and initialize it separately.
After the fix:
$ cd /sys/fs/resctrl
$ mkdir foo
$ cd foo
$ echo L3:0=0 > schemata
$ cat /sys/fs/resctrl/info/last_cmd_status
ok
Fixes: 316e7f901f5a ("x86/resctrl: Add struct rdt_cache::arch_has_{sparse, empty}_bitmaps")
Co-developed-by: Stephane Eranian <eranian(a)google.com>
Signed-off-by: Stephane Eranian <eranian(a)google.com>
Signed-off-by: Babu Moger <babu.moger(a)amd.com>
Signed-off-by: Borislav Petkov <bp(a)suse.de>
Reviewed-by: Ingo Molnar <mingo(a)kernel.org>
Reviewed-by: James Morse <james.morse(a)arm.com>
Reviewed-by: Reinette Chatre <reinette.chatre(a)intel.com>
Reviewed-by: Fenghua Yu <fenghua.yu(a)intel.com>
Cc: <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/lkml/20220517001234.3137157-1-eranian@google.com
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index de62b0b87ced..3266ea36667c 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -66,9 +66,6 @@ struct rdt_hw_resource rdt_resources_all[] = {
.rid = RDT_RESOURCE_L3,
.name = "L3",
.cache_level = 3,
- .cache = {
- .min_cbm_bits = 1,
- },
.domains = domain_init(RDT_RESOURCE_L3),
.parse_ctrlval = parse_cbm,
.format_str = "%d=%0*x",
@@ -83,9 +80,6 @@ struct rdt_hw_resource rdt_resources_all[] = {
.rid = RDT_RESOURCE_L2,
.name = "L2",
.cache_level = 2,
- .cache = {
- .min_cbm_bits = 1,
- },
.domains = domain_init(RDT_RESOURCE_L2),
.parse_ctrlval = parse_cbm,
.format_str = "%d=%0*x",
@@ -836,6 +830,7 @@ static __init void rdt_init_res_defs_intel(void)
r->cache.arch_has_sparse_bitmaps = false;
r->cache.arch_has_empty_bitmaps = false;
r->cache.arch_has_per_cpu_cfg = false;
+ r->cache.min_cbm_bits = 1;
} else if (r->rid == RDT_RESOURCE_MBA) {
hw_res->msr_base = MSR_IA32_MBA_THRTL_BASE;
hw_res->msr_update = mba_wrmsr_intel;
@@ -856,6 +851,7 @@ static __init void rdt_init_res_defs_amd(void)
r->cache.arch_has_sparse_bitmaps = true;
r->cache.arch_has_empty_bitmaps = true;
r->cache.arch_has_per_cpu_cfg = true;
+ r->cache.min_cbm_bits = 0;
} else if (r->rid == RDT_RESOURCE_MBA) {
hw_res->msr_base = MSR_IA32_MBA_BW_BASE;
hw_res->msr_update = mba_wrmsr_amd;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
abe3c631447d ("selinux: enable use of both GFP_KERNEL and GFP_ATOMIC in convert_context()")
d97bd23c2d7d ("selinux: cache the SID -> context string translation")
66f8e2f03c02 ("selinux: sidtab reverse lookup hash table")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From abe3c631447dcd1ba7af972fe6f054bee6f136fa Mon Sep 17 00:00:00 2001
From: "GONG, Ruiqi" <gongruiqi1(a)huawei.com>
Date: Wed, 19 Oct 2022 10:57:10 +0800
Subject: [PATCH] selinux: enable use of both GFP_KERNEL and GFP_ATOMIC in
convert_context()
The following warning was triggered on a hardware environment:
SELinux: Converting 162 SID table entries...
BUG: sleeping function called from invalid context at
__might_sleep+0x60/0x74 0x0
in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 5943, name: tar
CPU: 7 PID: 5943 Comm: tar Tainted: P O 5.10.0 #1
Call trace:
dump_backtrace+0x0/0x1c8
show_stack+0x18/0x28
dump_stack+0xe8/0x15c
___might_sleep+0x168/0x17c
__might_sleep+0x60/0x74
__kmalloc_track_caller+0xa0/0x7dc
kstrdup+0x54/0xac
convert_context+0x48/0x2e4
sidtab_context_to_sid+0x1c4/0x36c
security_context_to_sid_core+0x168/0x238
security_context_to_sid_default+0x14/0x24
inode_doinit_use_xattr+0x164/0x1e4
inode_doinit_with_dentry+0x1c0/0x488
selinux_d_instantiate+0x20/0x34
security_d_instantiate+0x70/0xbc
d_splice_alias+0x4c/0x3c0
ext4_lookup+0x1d8/0x200 [ext4]
__lookup_slow+0x12c/0x1e4
walk_component+0x100/0x200
path_lookupat+0x88/0x118
filename_lookup+0x98/0x130
user_path_at_empty+0x48/0x60
vfs_statx+0x84/0x140
vfs_fstatat+0x20/0x30
__se_sys_newfstatat+0x30/0x74
__arm64_sys_newfstatat+0x1c/0x2c
el0_svc_common.constprop.0+0x100/0x184
do_el0_svc+0x1c/0x2c
el0_svc+0x20/0x34
el0_sync_handler+0x80/0x17c
el0_sync+0x13c/0x140
SELinux: Context system_u:object_r:pssp_rsyslog_log_t:s0:c0 is
not valid (left unmapped).
It was found that within a critical section of spin_lock_irqsave in
sidtab_context_to_sid(), convert_context() (hooked by
sidtab_convert_params.func) might cause the process to sleep via
allocating memory with GFP_KERNEL, which is problematic.
As Ondrej pointed out [1], convert_context()/sidtab_convert_params.func
has another caller sidtab_convert_tree(), which is okay with GFP_KERNEL.
Therefore, fix this problem by adding a gfp_t argument for
convert_context()/sidtab_convert_params.func and pass GFP_KERNEL/_ATOMIC
properly in individual callers.
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/20221018120111.1474581-1-gongruiqi1@huawei.com/ [1]
Reported-by: Tan Ninghao <tanninghao1(a)huawei.com>
Fixes: ee1a84fdfeed ("selinux: overhaul sidtab to fix bug and improve performance")
Signed-off-by: GONG, Ruiqi <gongruiqi1(a)huawei.com>
Reviewed-by: Ondrej Mosnacek <omosnace(a)redhat.com>
[PM: wrap long BUG() output lines, tweak subject line]
Signed-off-by: Paul Moore <paul(a)paul-moore.com>
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index fe5fcf571c56..64a6a37dc36d 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -2022,7 +2022,8 @@ static inline int convert_context_handle_invalid_context(
* in `newc'. Verify that the context is valid
* under the new policy.
*/
-static int convert_context(struct context *oldc, struct context *newc, void *p)
+static int convert_context(struct context *oldc, struct context *newc, void *p,
+ gfp_t gfp_flags)
{
struct convert_context_args *args;
struct ocontext *oc;
@@ -2036,7 +2037,7 @@ static int convert_context(struct context *oldc, struct context *newc, void *p)
args = p;
if (oldc->str) {
- s = kstrdup(oldc->str, GFP_KERNEL);
+ s = kstrdup(oldc->str, gfp_flags);
if (!s)
return -ENOMEM;
diff --git a/security/selinux/ss/sidtab.c b/security/selinux/ss/sidtab.c
index a54b8652bfb5..db5cce385bf8 100644
--- a/security/selinux/ss/sidtab.c
+++ b/security/selinux/ss/sidtab.c
@@ -325,7 +325,7 @@ int sidtab_context_to_sid(struct sidtab *s, struct context *context,
}
rc = convert->func(context, &dst_convert->context,
- convert->args);
+ convert->args, GFP_ATOMIC);
if (rc) {
context_destroy(&dst->context);
goto out_unlock;
@@ -404,7 +404,7 @@ static int sidtab_convert_tree(union sidtab_entry_inner *edst,
while (i < SIDTAB_LEAF_ENTRIES && *pos < count) {
rc = convert->func(&esrc->ptr_leaf->entries[i].context,
&edst->ptr_leaf->entries[i].context,
- convert->args);
+ convert->args, GFP_KERNEL);
if (rc)
return rc;
(*pos)++;
diff --git a/security/selinux/ss/sidtab.h b/security/selinux/ss/sidtab.h
index 4eff0e49dcb2..9fce0d553fe2 100644
--- a/security/selinux/ss/sidtab.h
+++ b/security/selinux/ss/sidtab.h
@@ -65,7 +65,7 @@ struct sidtab_isid_entry {
};
struct sidtab_convert_params {
- int (*func)(struct context *oldc, struct context *newc, void *args);
+ int (*func)(struct context *oldc, struct context *newc, void *args, gfp_t gfp_flags);
void *args;
struct sidtab *target;
};
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
9e769bd7e5db ("btrfs: unlock locked extent area if we have contention")
994bcd1eae5b ("btrfs: remove failed_start argument from set_extent_bit")
c07d1004c55c ("btrfs: drop exclusive_bits from set_extent_bit")
e3974c669472 ("btrfs: move core extent_io_tree functions to extent-io-tree.c")
38830018387e ("btrfs: move a few exported extent_io_tree helpers to extent-io-tree.c")
04eba8932392 ("btrfs: temporarily export and then move extent state helpers")
91af24e48474 ("btrfs: temporarily export and move core extent_io_tree tree functions")
6962541e964f ("btrfs: move btrfs_debug_check_extent_io_range into extent-io-tree.c")
ec39e39bbf97 ("btrfs: export wait_extent_bit")
a66318872c41 ("btrfs: move simple extent bit helpers out of extent_io.c")
ad795329574c ("btrfs: convert BUG_ON(EXTENT_BIT_LOCKED) checks to ASSERT's")
83cf709a89fb ("btrfs: move extent state init and alloc functions to their own file")
c45379a20fbc ("btrfs: temporarily export alloc_extent_state helpers")
a40246e8afc0 ("btrfs: separate out the eb and extent state leak helpers")
a62a3bd9546b ("btrfs: separate out the extent state and extent buffer init code")
87c11705cc94 ("btrfs: convert the io_failure_tree to a plain rb_tree")
a2061748052c ("btrfs: unexport internal failrec functions")
0d0a762c419a ("btrfs: rename clean_io_failure and remove extraneous args")
917f32a23501 ("btrfs: give struct btrfs_bio a real end_io handler")
f1c2937976be ("btrfs: properly abstract the parity raid bio handling")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 9e769bd7e5db5e3bd76e7c67004c261f7fcaa8f1 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef(a)toxicpanda.com>
Date: Fri, 30 Sep 2022 16:45:08 -0400
Subject: [PATCH] btrfs: unlock locked extent area if we have contention
In production we hit the following deadlock
task 1 task 2 task 3
------ ------ ------
fiemap(file) falloc(file) fsync(file)
write(0, 1MiB)
btrfs_commit_transaction()
wait_on(!pending_ordered)
lock(512MiB, 1GiB)
start_transaction
wait_on_transaction
lock(0, 1GiB)
wait_extent_bit(512MiB)
task 4
------
finish_ordered_extent(0, 1MiB)
lock(0, 1MiB)
**DEADLOCK**
This occurs because when task 1 does it's lock, it locks everything from
0-512MiB, and then waits for the 512MiB chunk to unlock. task 2 will
never unlock because it's waiting on the transaction commit to happen,
the transaction commit is waiting for the outstanding ordered extents,
and then the ordered extent thread is blocked waiting on the 0-1MiB
range to unlock.
To fix this we have to clear anything we've locked so far, wait for the
extent_state that we contended on, and then try to re-lock the entire
range again.
CC: stable(a)vger.kernel.org # 5.15+
Reviewed-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: Josef Bacik <josef(a)toxicpanda.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/extent-io-tree.c b/fs/btrfs/extent-io-tree.c
index 618275af19c4..83cb0378096f 100644
--- a/fs/btrfs/extent-io-tree.c
+++ b/fs/btrfs/extent-io-tree.c
@@ -1641,16 +1641,17 @@ int lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
int err;
u64 failed_start;
- while (1) {
+ err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start,
+ cached_state, NULL, GFP_NOFS);
+ while (err == -EEXIST) {
+ if (failed_start != start)
+ clear_extent_bit(tree, start, failed_start - 1,
+ EXTENT_LOCKED, cached_state);
+
+ wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
err = __set_extent_bit(tree, start, end, EXTENT_LOCKED,
&failed_start, cached_state, NULL,
GFP_NOFS);
- if (err == -EEXIST) {
- wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
- start = failed_start;
- } else
- break;
- WARN_ON(start > end);
}
return err;
}
The patch below does not apply to the 6.0-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
9e769bd7e5db ("btrfs: unlock locked extent area if we have contention")
994bcd1eae5b ("btrfs: remove failed_start argument from set_extent_bit")
c07d1004c55c ("btrfs: drop exclusive_bits from set_extent_bit")
e3974c669472 ("btrfs: move core extent_io_tree functions to extent-io-tree.c")
38830018387e ("btrfs: move a few exported extent_io_tree helpers to extent-io-tree.c")
04eba8932392 ("btrfs: temporarily export and then move extent state helpers")
91af24e48474 ("btrfs: temporarily export and move core extent_io_tree tree functions")
6962541e964f ("btrfs: move btrfs_debug_check_extent_io_range into extent-io-tree.c")
ec39e39bbf97 ("btrfs: export wait_extent_bit")
a66318872c41 ("btrfs: move simple extent bit helpers out of extent_io.c")
ad795329574c ("btrfs: convert BUG_ON(EXTENT_BIT_LOCKED) checks to ASSERT's")
83cf709a89fb ("btrfs: move extent state init and alloc functions to their own file")
c45379a20fbc ("btrfs: temporarily export alloc_extent_state helpers")
a40246e8afc0 ("btrfs: separate out the eb and extent state leak helpers")
a62a3bd9546b ("btrfs: separate out the extent state and extent buffer init code")
87c11705cc94 ("btrfs: convert the io_failure_tree to a plain rb_tree")
a2061748052c ("btrfs: unexport internal failrec functions")
0d0a762c419a ("btrfs: rename clean_io_failure and remove extraneous args")
917f32a23501 ("btrfs: give struct btrfs_bio a real end_io handler")
f1c2937976be ("btrfs: properly abstract the parity raid bio handling")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 9e769bd7e5db5e3bd76e7c67004c261f7fcaa8f1 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef(a)toxicpanda.com>
Date: Fri, 30 Sep 2022 16:45:08 -0400
Subject: [PATCH] btrfs: unlock locked extent area if we have contention
In production we hit the following deadlock
task 1 task 2 task 3
------ ------ ------
fiemap(file) falloc(file) fsync(file)
write(0, 1MiB)
btrfs_commit_transaction()
wait_on(!pending_ordered)
lock(512MiB, 1GiB)
start_transaction
wait_on_transaction
lock(0, 1GiB)
wait_extent_bit(512MiB)
task 4
------
finish_ordered_extent(0, 1MiB)
lock(0, 1MiB)
**DEADLOCK**
This occurs because when task 1 does it's lock, it locks everything from
0-512MiB, and then waits for the 512MiB chunk to unlock. task 2 will
never unlock because it's waiting on the transaction commit to happen,
the transaction commit is waiting for the outstanding ordered extents,
and then the ordered extent thread is blocked waiting on the 0-1MiB
range to unlock.
To fix this we have to clear anything we've locked so far, wait for the
extent_state that we contended on, and then try to re-lock the entire
range again.
CC: stable(a)vger.kernel.org # 5.15+
Reviewed-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: Josef Bacik <josef(a)toxicpanda.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/extent-io-tree.c b/fs/btrfs/extent-io-tree.c
index 618275af19c4..83cb0378096f 100644
--- a/fs/btrfs/extent-io-tree.c
+++ b/fs/btrfs/extent-io-tree.c
@@ -1641,16 +1641,17 @@ int lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
int err;
u64 failed_start;
- while (1) {
+ err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start,
+ cached_state, NULL, GFP_NOFS);
+ while (err == -EEXIST) {
+ if (failed_start != start)
+ clear_extent_bit(tree, start, failed_start - 1,
+ EXTENT_LOCKED, cached_state);
+
+ wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
err = __set_extent_bit(tree, start, end, EXTENT_LOCKED,
&failed_start, cached_state, NULL,
GFP_NOFS);
- if (err == -EEXIST) {
- wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
- start = failed_start;
- } else
- break;
- WARN_ON(start > end);
}
return err;
}
commit 81d5d61454c365718655cfc87d8200c84e25d596 upstream.
Currently there are two corner cases not handling compat RO flags
correctly:
- Remount
We can still mount the fs RO with compat RO flags, then remount it RW.
We should not allow any write into a fs with unsupported RO flags.
- Still try to search block group items
In fact, behavior/on-disk format change to extent tree should not
need a full incompat flag.
And since we can ensure fs with unsupported RO flags never got any
writes (with above case fixed), then we can even skip block group
items search at mount time.
This patch will enhance the unsupported RO compat flags by:
- Reject read-write remount if there are unsupported RO compat flags
- Go dummy block group items directly for unsupported RO compat flags
In fact, only changes to chunk/subvolume/root/csum trees should go
incompat flags.
The latter part should allow future change to extent tree to be compat
RO flags.
Thus this patch also needs to be backported to all stable trees.
CC: stable(a)vger.kernel.org # 5.15
Reviewed-by: Nikolay Borisov <nborisov(a)suse.com>
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
---
fs/btrfs/block-group.c | 11 ++++++++++-
fs/btrfs/super.c | 9 +++++++++
2 files changed, 19 insertions(+), 1 deletion(-)
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 474dcc0540a8..5eea56789ccc 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -2139,7 +2139,16 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
int need_clear = 0;
u64 cache_gen;
- if (!info->extent_root)
+ /*
+ * Either no extent root (with ibadroots rescue option) or we have
+ * unsupported RO options. The fs can never be mounted read-write, so no
+ * need to waste time searching block group items.
+ *
+ * This also allows new extent tree related changes to be RO compat,
+ * no need for a full incompat flag.
+ */
+ if (!info->extent_root || (btrfs_super_compat_ro_flags(info->super_copy) &
+ ~BTRFS_FEATURE_COMPAT_RO_SUPP))
return fill_dummy_bgs(info);
key.objectid = 0;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 969bf0724fdf..1bf2ded0be93 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -2045,6 +2045,15 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
ret = -EINVAL;
goto restore;
}
+ if (btrfs_super_compat_ro_flags(fs_info->super_copy) &
+ ~BTRFS_FEATURE_COMPAT_RO_SUPP) {
+ btrfs_err(fs_info,
+ "can not remount read-write due to unsupported optional flags 0x%llx",
+ btrfs_super_compat_ro_flags(fs_info->super_copy) &
+ ~BTRFS_FEATURE_COMPAT_RO_SUPP);
+ ret = -EINVAL;
+ goto restore;
+ }
if (fs_info->fs_devices->rw_devices == 0) {
ret = -EACCES;
goto restore;
--
2.38.1