On Wed, 2025-09-24 at 16:22 +0100, "Roy, Patrick" wrote:
[...]
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index 55b8d739779f..b7129c4868c5 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -4,6 +4,9 @@ #include <linux/kvm_host.h> #include <linux/pagemap.h> #include <linux/anon_inodes.h> +#include <linux/set_memory.h>
+#include <asm/tlbflush.h> #include "kvm_mm.h" @@ -42,6 +45,44 @@ static int __kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slo return 0; } +#define KVM_GMEM_FOLIO_NO_DIRECT_MAP BIT(0)
+static bool kvm_gmem_folio_no_direct_map(struct folio *folio) +{
- return ((u64) folio->private) & KVM_GMEM_FOLIO_NO_DIRECT_MAP;
+}
+static int kvm_gmem_folio_zap_direct_map(struct folio *folio) +{
- if (kvm_gmem_folio_no_direct_map(folio))
return 0;
- int r = set_direct_map_valid_noflush(folio_page(folio, 0), folio_nr_pages(folio),
false);
- if (!r) {
unsigned long addr = (unsigned long) folio_address(folio);
folio->private = (void *) ((u64) folio->private & KVM_GMEM_FOLIO_NO_DIRECT_MAP);
flush_tlb_kernel_range(addr, addr + folio_size(folio));
- }
- return r;
+}
No idea how I managed to mess this function up so completely, but it should be more like
static int kvm_gmem_folio_zap_direct_map(struct folio *folio) { int r = 0; unsigned long addr = (unsigned long) folio_address(folio); u64 gmem_flags = (u64) folio_inode(folio)->i_private;
if (kvm_gmem_folio_no_direct_map(folio) || !(gmem_flags & GUEST_MEMFD_FLAG_NO_DIRECT_MAP)) goto out;
r = set_direct_map_valid_noflush(folio_page(folio, 0), folio_nr_pages(folio), false);
if (r) goto out;
folio->private = (void *) KVM_GMEM_FOLIO_NO_DIRECT_MAP; flush_tlb_kernel_range(addr, addr + folio_size(folio));
out: return r; }
the version I sent (a) does not respect the flags passed to guest_memfd on creation, and (b) does not correctly set the bit in folio->private.
+static void kvm_gmem_folio_restore_direct_map(struct folio *folio) +{
- /*
* Direct map restoration cannot fail, as the only error condition
* for direct map manipulation is failure to allocate page tables
* when splitting huge pages, but this split would have already
* happened in set_direct_map_invalid_noflush() in kvm_gmem_folio_zap_direct_map().
* Thus set_direct_map_valid_noflush() here only updates prot bits.
*/
- if (kvm_gmem_folio_no_direct_map(folio))
set_direct_map_valid_noflush(folio_page(folio, 0), folio_nr_pages(folio),
true);
+}
static inline void kvm_gmem_mark_prepared(struct folio *folio) { folio_mark_uptodate(folio); @@ -324,13 +365,14 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf) struct inode *inode = file_inode(vmf->vma->vm_file); struct folio *folio; vm_fault_t ret = VM_FAULT_LOCKED;
- int err;
if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode)) return VM_FAULT_SIGBUS; folio = kvm_gmem_get_folio(inode, vmf->pgoff); if (IS_ERR(folio)) {
int err = PTR_ERR(folio);
err = PTR_ERR(folio);
if (err == -EAGAIN) return VM_FAULT_RETRY; @@ -348,6 +390,13 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf) kvm_gmem_mark_prepared(folio); }
- err = kvm_gmem_folio_zap_direct_map(folio);
- if (err) {
ret = vmf_error(err);
goto out_folio;
- }
- vmf->page = folio_file_page(folio, vmf->pgoff);
out_folio: @@ -435,6 +484,8 @@ static void kvm_gmem_free_folio(struct folio *folio) kvm_pfn_t pfn = page_to_pfn(page); int order = folio_order(folio);
- kvm_gmem_folio_restore_direct_map(folio);
- kvm_arch_gmem_invalidate(pfn, pfn + (1ul << order));
} @@ -499,6 +550,9 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags) /* Unmovable mappings are supposed to be marked unevictable as well. */ WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping));
- if (flags & GUEST_MEMFD_FLAG_NO_DIRECT_MAP)
mapping_set_no_direct_map(inode->i_mapping);
- kvm_get_kvm(kvm); gmem->kvm = kvm; xa_init(&gmem->bindings);
@@ -523,6 +577,9 @@ int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args) if (kvm_arch_supports_gmem_mmap(kvm)) valid_flags |= GUEST_MEMFD_FLAG_MMAP;
- if (kvm_arch_gmem_supports_no_direct_map())
valid_flags |= GUEST_MEMFD_FLAG_NO_DIRECT_MAP;
- if (flags & ~valid_flags) return -EINVAL;
@@ -687,6 +744,8 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, if (!is_prepared) r = kvm_gmem_prepare_folio(kvm, slot, gfn, folio);
- kvm_gmem_folio_zap_direct_map(folio);
- folio_unlock(folio);
if (!r)
[...]