When f2fs skipped a gc round during victim migration, there was a bug which
would skip all upcoming gc rounds unconditionally because skipped_gc_rwsem
was not initialized. It fixes the bug by correctly initializing the
skipped_gc_rwsem inside the gc loop.
Fixes: 3db1de0e582c ("f2fs: change the current atomic write way")
Cc: stable(a)vger.kernel.org
Signed-off-by: Yonggil Song <yonggil.song(a)samsung.com>
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index b22f49a6f128..81d326abaac1 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -1786,8 +1786,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control)
prefree_segments(sbi));
cpc.reason = __get_cp_reason(sbi);
- sbi->skipped_gc_rwsem = 0;
gc_more:
+ sbi->skipped_gc_rwsem = 0;
if (unlikely(!(sbi->sb->s_flags & SB_ACTIVE))) {
ret = -EINVAL;
goto stop;
--
2.34.1
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 42d0c4bdf753063b6eec55415003184d3ca24f6e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678704830213151(a)kroah.com' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
42d0c4bdf753 ("filelocks: use mount idmapping for setlease permission check")
c65454a94726 ("fs: remove locks_inode")
5970e15dbcfe ("filelock: move file locking definitions to separate header file")
8e1858710d9a ("ceph: avoid use-after-free in ceph_fl_release_lock()")
461ab10ef7e6 ("ceph: switch to vfs_inode_has_locks() to fix file lock bug")
6a518afcc206 ("Merge tag 'fs.acl.rework.v6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/idmapping")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 42d0c4bdf753063b6eec55415003184d3ca24f6e Mon Sep 17 00:00:00 2001
From: Seth Forshee <sforshee(a)kernel.org>
Date: Thu, 9 Mar 2023 14:39:09 -0600
Subject: [PATCH] filelocks: use mount idmapping for setlease permission check
A user should be allowed to take out a lease via an idmapped mount if
the fsuid matches the mapped uid of the inode. generic_setlease() is
checking the unmapped inode uid, causing these operations to be denied.
Fix this by comparing against the mapped inode uid instead of the
unmapped uid.
Fixes: 9caccd41541a ("fs: introduce MOUNT_ATTR_IDMAP")
Cc: stable(a)vger.kernel.org
Signed-off-by: Seth Forshee (DigitalOcean) <sforshee(a)kernel.org>
Signed-off-by: Christian Brauner (Microsoft) <brauner(a)kernel.org>
diff --git a/fs/locks.c b/fs/locks.c
index d82c4cacdfb9..df8b26a42524 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1863,9 +1863,10 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp,
void **priv)
{
struct inode *inode = file_inode(filp);
+ vfsuid_t vfsuid = i_uid_into_vfsuid(file_mnt_idmap(filp), inode);
int error;
- if ((!uid_eq(current_fsuid(), inode->i_uid)) && !capable(CAP_LEASE))
+ if ((!vfsuid_eq_kuid(vfsuid, current_fsuid())) && !capable(CAP_LEASE))
return -EACCES;
if (!S_ISREG(inode->i_mode))
return -EINVAL;
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 42d0c4bdf753063b6eec55415003184d3ca24f6e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167870483012291(a)kroah.com' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
42d0c4bdf753 ("filelocks: use mount idmapping for setlease permission check")
c65454a94726 ("fs: remove locks_inode")
5970e15dbcfe ("filelock: move file locking definitions to separate header file")
8e1858710d9a ("ceph: avoid use-after-free in ceph_fl_release_lock()")
461ab10ef7e6 ("ceph: switch to vfs_inode_has_locks() to fix file lock bug")
6a518afcc206 ("Merge tag 'fs.acl.rework.v6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/idmapping")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 42d0c4bdf753063b6eec55415003184d3ca24f6e Mon Sep 17 00:00:00 2001
From: Seth Forshee <sforshee(a)kernel.org>
Date: Thu, 9 Mar 2023 14:39:09 -0600
Subject: [PATCH] filelocks: use mount idmapping for setlease permission check
A user should be allowed to take out a lease via an idmapped mount if
the fsuid matches the mapped uid of the inode. generic_setlease() is
checking the unmapped inode uid, causing these operations to be denied.
Fix this by comparing against the mapped inode uid instead of the
unmapped uid.
Fixes: 9caccd41541a ("fs: introduce MOUNT_ATTR_IDMAP")
Cc: stable(a)vger.kernel.org
Signed-off-by: Seth Forshee (DigitalOcean) <sforshee(a)kernel.org>
Signed-off-by: Christian Brauner (Microsoft) <brauner(a)kernel.org>
diff --git a/fs/locks.c b/fs/locks.c
index d82c4cacdfb9..df8b26a42524 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1863,9 +1863,10 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp,
void **priv)
{
struct inode *inode = file_inode(filp);
+ vfsuid_t vfsuid = i_uid_into_vfsuid(file_mnt_idmap(filp), inode);
int error;
- if ((!uid_eq(current_fsuid(), inode->i_uid)) && !capable(CAP_LEASE))
+ if ((!vfsuid_eq_kuid(vfsuid, current_fsuid())) && !capable(CAP_LEASE))
return -EACCES;
if (!S_ISREG(inode->i_mode))
return -EINVAL;
Presently, when a guest writes 1 to PMCR_EL0.{C,P}, which is WO/RAZ,
KVM saves the register value, including these bits.
When userspace reads the register using KVM_GET_ONE_REG, KVM returns
the saved register value as it is (the saved value might have these
bits set). This could result in userspace setting these bits on the
destination during migration. Consequently, KVM may end up resetting
the vPMU counter registers (PMCCNTR_EL0 and/or PMEVCNTR<n>_EL0) to
zero on the first KVM_RUN after migration.
Fix this by not saving those bits when a guest writes 1 to those bits.
Fixes: ab9468340d2b ("arm64: KVM: Add access handler for PMCR register")
Cc: stable(a)vger.kernel.org
Reviewed-by: Marc Zyngier <maz(a)kernel.org>
Signed-off-by: Reiji Watanabe <reijiw(a)google.com>
---
arch/arm64/kvm/pmu-emul.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 24908400e190..c243b10f3e15 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -538,7 +538,8 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
if (!kvm_pmu_is_3p5(vcpu))
val &= ~ARMV8_PMU_PMCR_LP;
- __vcpu_sys_reg(vcpu, PMCR_EL0) = val;
+ /* The reset bits don't indicate any state, and shouldn't be saved. */
+ __vcpu_sys_reg(vcpu, PMCR_EL0) = val & ~(ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_P);
if (val & ARMV8_PMU_PMCR_E) {
kvm_pmu_enable_counter_mask(vcpu,
--
2.40.0.rc1.284.g88254d51c5-goog
The patch titled
Subject: kernel/sys.c: fix and improve control flow in __sys_setres[ug]id()
has been added to the -mm mm-nonmm-unstable branch. Its filename is
kernel-sysc-fix-and-improve-control-flow-in-__sys_setresid.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-nonmm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Ondrej Mosnacek <omosnace(a)redhat.com>
Subject: kernel/sys.c: fix and improve control flow in __sys_setres[ug]id()
Date: Fri, 17 Feb 2023 17:21:54 +0100
Linux Security Modules (LSMs) that implement the "capable" hook will
usually emit an access denial message to the audit log whenever they
"block" the current task from using the given capability based on their
security policy.
The occurrence of a denial is used as an indication that the given task
has attempted an operation that requires the given access permission, so
the callers of functions that perform LSM permission checks must take care
to avoid calling them too early (before it is decided if the permission is
actually needed to perform the requested operation).
The __sys_setres[ug]id() functions violate this convention by first
calling ns_capable_setid() and only then checking if the operation
requires the capability or not. It means that any caller that has the
capability granted by DAC (task's capability set) but not by MAC (LSMs)
will generate a "denied" audit record, even if is doing an operation for
which the capability is not required.
Fix this by reordering the checks such that ns_capable_setid() is checked
last and -EPERM is returned immediately if it returns false.
While there, also do two small optimizations:
* move the capability check before prepare_creds() and
* bail out early in case of a no-op.
Link: https://lkml.kernel.org/r/20230217162154.837549-1-omosnace@redhat.com
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Ondrej Mosnacek <omosnace(a)redhat.com>
Cc: Eric W. Biederman <ebiederm(a)xmission.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
--- a/kernel/sys.c~kernel-sysc-fix-and-improve-control-flow-in-__sys_setresid
+++ a/kernel/sys.c
@@ -665,6 +665,7 @@ long __sys_setresuid(uid_t ruid, uid_t e
struct cred *new;
int retval;
kuid_t kruid, keuid, ksuid;
+ bool ruid_new, euid_new, suid_new;
kruid = make_kuid(ns, ruid);
keuid = make_kuid(ns, euid);
@@ -679,25 +680,29 @@ long __sys_setresuid(uid_t ruid, uid_t e
if ((suid != (uid_t) -1) && !uid_valid(ksuid))
return -EINVAL;
+ old = current_cred();
+
+ /* check for no-op */
+ if ((ruid == (uid_t) -1 || uid_eq(kruid, old->uid)) &&
+ (euid == (uid_t) -1 || (uid_eq(keuid, old->euid) &&
+ uid_eq(keuid, old->fsuid))) &&
+ (suid == (uid_t) -1 || uid_eq(ksuid, old->suid)))
+ return 0;
+
+ ruid_new = ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) &&
+ !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid);
+ euid_new = euid != (uid_t) -1 && !uid_eq(keuid, old->uid) &&
+ !uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid);
+ suid_new = suid != (uid_t) -1 && !uid_eq(ksuid, old->uid) &&
+ !uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid);
+ if ((ruid_new || euid_new || suid_new) &&
+ !ns_capable_setid(old->user_ns, CAP_SETUID))
+ return -EPERM;
+
new = prepare_creds();
if (!new)
return -ENOMEM;
- old = current_cred();
-
- retval = -EPERM;
- if (!ns_capable_setid(old->user_ns, CAP_SETUID)) {
- if (ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) &&
- !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid))
- goto error;
- if (euid != (uid_t) -1 && !uid_eq(keuid, old->uid) &&
- !uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid))
- goto error;
- if (suid != (uid_t) -1 && !uid_eq(ksuid, old->uid) &&
- !uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid))
- goto error;
- }
-
if (ruid != (uid_t) -1) {
new->uid = kruid;
if (!uid_eq(kruid, old->uid)) {
@@ -762,6 +767,7 @@ long __sys_setresgid(gid_t rgid, gid_t e
struct cred *new;
int retval;
kgid_t krgid, kegid, ksgid;
+ bool rgid_new, egid_new, sgid_new;
krgid = make_kgid(ns, rgid);
kegid = make_kgid(ns, egid);
@@ -774,23 +780,28 @@ long __sys_setresgid(gid_t rgid, gid_t e
if ((sgid != (gid_t) -1) && !gid_valid(ksgid))
return -EINVAL;
+ old = current_cred();
+
+ /* check for no-op */
+ if ((rgid == (gid_t) -1 || gid_eq(krgid, old->gid)) &&
+ (egid == (gid_t) -1 || (gid_eq(kegid, old->egid) &&
+ gid_eq(kegid, old->fsgid))) &&
+ (sgid == (gid_t) -1 || gid_eq(ksgid, old->sgid)))
+ return 0;
+
+ rgid_new = rgid != (gid_t) -1 && !gid_eq(krgid, old->gid) &&
+ !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid);
+ egid_new = egid != (gid_t) -1 && !gid_eq(kegid, old->gid) &&
+ !gid_eq(kegid, old->egid) && !gid_eq(kegid, old->sgid);
+ sgid_new = sgid != (gid_t) -1 && !gid_eq(ksgid, old->gid) &&
+ !gid_eq(ksgid, old->egid) && !gid_eq(ksgid, old->sgid);
+ if ((rgid_new || egid_new || sgid_new) &&
+ !ns_capable_setid(old->user_ns, CAP_SETGID))
+ return -EPERM;
+
new = prepare_creds();
if (!new)
return -ENOMEM;
- old = current_cred();
-
- retval = -EPERM;
- if (!ns_capable_setid(old->user_ns, CAP_SETGID)) {
- if (rgid != (gid_t) -1 && !gid_eq(krgid, old->gid) &&
- !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid))
- goto error;
- if (egid != (gid_t) -1 && !gid_eq(kegid, old->gid) &&
- !gid_eq(kegid, old->egid) && !gid_eq(kegid, old->sgid))
- goto error;
- if (sgid != (gid_t) -1 && !gid_eq(ksgid, old->gid) &&
- !gid_eq(ksgid, old->egid) && !gid_eq(ksgid, old->sgid))
- goto error;
- }
if (rgid != (gid_t) -1)
new->gid = krgid;
_
Patches currently in -mm which might be from omosnace(a)redhat.com are
kernel-sysc-fix-and-improve-control-flow-in-__sys_setresid.patch
The following changes since commit fe15c26ee26efa11741a7b632e9f23b01aca4cc6:
Linux 6.3-rc1 (2023-03-05 14:52:03 -0800)
are available in the Git repository at:
https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git tags/for_linus
for you to fetch changes up to ae43c20da2a77c508715a9c77845b4e87e6a1e25:
tools/virtio: Ignore virtio-trace/trace-agent (2023-03-13 02:29:12 -0400)
----------------------------------------------------------------
virtio,vhost,vdpa: bugfixes
Some fixes accumulated so far.
Signed-off-by: Michael S. Tsirkin <mst(a)redhat.com>
----------------------------------------------------------------
Cindy Lu (1):
vp_vdpa: fix the crash in hot unplug with vp_vdpa
Eugenio Pérez (1):
vdpa_sim: set last_used_idx as last_avail_idx in vdpasim_queue_ready
Gautam Dawar (1):
vhost-vdpa: free iommu domain after last use during cleanup
Rong Tao (1):
tools/virtio: Ignore virtio-trace/trace-agent
Si-Wei Liu (1):
vdpa/mlx5: should not activate virtq object when suspended
drivers/vdpa/mlx5/core/mlx5_vdpa.h | 1 +
drivers/vdpa/mlx5/net/mlx5_vnet.c | 6 +++++-
drivers/vdpa/vdpa_sim/vdpa_sim.c | 11 +++++++++++
drivers/vdpa/virtio_pci/vp_vdpa.c | 2 +-
drivers/vhost/vdpa.c | 3 ++-
tools/virtio/.gitignore | 1 +
6 files changed, 21 insertions(+), 3 deletions(-)
debug_active_activate() expected ref->count to be zero
which is not true anymore as __i915_active_activate() calls
debug_active_activate() after incrementing the count.
v2: No need to check for "ref->count == 1" as __i915_active_activate()
already make sure of that.
Fixes: 04240e30ed06 ("drm/i915: Skip taking acquire mutex for no ref->active callback")
Cc: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin(a)intel.com>
Cc: Thomas Hellström <thomas.hellstrom(a)intel.com>
Cc: Andi Shyti <andi.shyti(a)linux.intel.com>
Cc: intel-gfx(a)lists.freedesktop.org
Cc: Janusz Krzysztofik <janusz.krzysztofik(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org> # v5.10+
Signed-off-by: Nirmoy Das <nirmoy.das(a)intel.com>
---
drivers/gpu/drm/i915/i915_active.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index a9fea115f2d2..8ef93889061a 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -92,8 +92,7 @@ static void debug_active_init(struct i915_active *ref)
static void debug_active_activate(struct i915_active *ref)
{
lockdep_assert_held(&ref->tree_lock);
- if (!atomic_read(&ref->count)) /* before the first inc */
- debug_object_activate(ref, &active_debug_desc);
+ debug_object_activate(ref, &active_debug_desc);
}
static void debug_active_deactivate(struct i915_active *ref)
--
2.39.0
We walk the userspace PTs to discover what mapping size was
used there. However, this can race against the userspace tables
being freed, and we end-up in the weeds.
Thankfully, the mm code is being generous and will IPI us when
doing so. So let's implement our part of the bargain and disable
interrupts around the walk. This ensures that nothing terrible
happens during that time.
We still need to handle the removal of the page tables before
the walk. For that, allow get_user_mapping_size() to return an
error, and make sure this error can be propagated all the way
to the the exit handler.
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
Cc: stable(a)vger.kernel.org
---
arch/arm64/kvm/mmu.c | 35 ++++++++++++++++++++++++++++-------
1 file changed, 28 insertions(+), 7 deletions(-)
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 7113587222ff..d7b8b25942df 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -666,14 +666,23 @@ static int get_user_mapping_size(struct kvm *kvm, u64 addr)
CONFIG_PGTABLE_LEVELS),
.mm_ops = &kvm_user_mm_ops,
};
+ unsigned long flags;
kvm_pte_t pte = 0; /* Keep GCC quiet... */
u32 level = ~0;
int ret;
+ /*
+ * Disable IRQs so that we hazard against a concurrent
+ * teardown of the userspace page tables (which relies on
+ * IPI-ing threads).
+ */
+ local_irq_save(flags);
ret = kvm_pgtable_get_leaf(&pgt, addr, &pte, &level);
- VM_BUG_ON(ret);
- VM_BUG_ON(level >= KVM_PGTABLE_MAX_LEVELS);
- VM_BUG_ON(!(pte & PTE_VALID));
+ local_irq_restore(flags);
+
+ /* Oops, the userspace PTs are gone... */
+ if (ret || level >= KVM_PGTABLE_MAX_LEVELS || !(pte & PTE_VALID))
+ return -EFAULT;
return BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(level));
}
@@ -1079,7 +1088,7 @@ static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot,
*
* Returns the size of the mapping.
*/
-static unsigned long
+static long
transparent_hugepage_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long hva, kvm_pfn_t *pfnp,
phys_addr_t *ipap)
@@ -1091,8 +1100,15 @@ transparent_hugepage_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot,
* sure that the HVA and IPA are sufficiently aligned and that the
* block map is contained within the memslot.
*/
- if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE) &&
- get_user_mapping_size(kvm, hva) >= PMD_SIZE) {
+ if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) {
+ int sz = get_user_mapping_size(kvm, hva);
+
+ if (sz < 0)
+ return sz;
+
+ if (sz < PMD_SIZE)
+ return PAGE_SIZE;
+
/*
* The address we faulted on is backed by a transparent huge
* page. However, because we map the compound huge page and
@@ -1203,7 +1219,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
kvm_pfn_t pfn;
bool logging_active = memslot_is_logging(memslot);
unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu);
- unsigned long vma_pagesize, fault_granule;
+ long vma_pagesize, fault_granule;
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
struct kvm_pgtable *pgt;
@@ -1350,6 +1366,11 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
vma_pagesize = transparent_hugepage_adjust(kvm, memslot,
hva, &pfn,
&fault_ipa);
+
+ if (vma_pagesize < 0) {
+ ret = vma_pagesize;
+ goto out_unlock;
+ }
}
if (fault_status != ESR_ELx_FSC_PERM && !device && kvm_has_mte(kvm)) {
--
2.34.1