The patch titled
Subject: mm, thp: bail out early in collapse_file for writeback page
has been added to the -mm tree. Its filename is
mm-thp-bail-out-early-in-collapse_file-for-writeback-page.patch
This patch should soon appear at
https://ozlabs.org/~akpm/mmots/broken-out/mm-thp-bail-out-early-in-collapse…
and later at
https://ozlabs.org/~akpm/mmotm/broken-out/mm-thp-bail-out-early-in-collapse…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Rongwei Wang <rongwei.wang(a)linux.alibaba.com>
Subject: mm, thp: bail out early in collapse_file for writeback page
Currently collapse_file does not explicitly check PG_writeback, instead,
page_has_private and try_to_release_page are used to filter writeback
pages. This does not work for xfs with blocksize equal to or larger than
pagesize, because in such case xfs has no page->private.
This makes collapse_file bail out early for writeback page. Otherwise,
xfs end_page_writeback will panic as follows.
page:fffffe00201bcc80 refcount:0 mapcount:0 mapping:ffff0003f88c86a8 index:0x0 pfn:0x84ef32
aops:xfs_address_space_operations [xfs] ino:30000b7 dentry name:"libtest.so"
flags: 0x57fffe0000008027(locked|referenced|uptodate|active|writeback)
raw: 57fffe0000008027 ffff80001b48bc28 ffff80001b48bc28 ffff0003f88c86a8
raw: 0000000000000000 0000000000000000 00000000ffffffff ffff0000c3e9a000
page dumped because: VM_BUG_ON_PAGE(((unsigned int) page_ref_count(page) + 127u <= 127u))
page->mem_cgroup:ffff0000c3e9a000
------------[ cut here ]------------
kernel BUG at include/linux/mm.h:1212!
Internal error: Oops - BUG: 0 [#1] SMP
Modules linked in:
BUG: Bad page state in process khugepaged pfn:84ef32
xfs(E)
page:fffffe00201bcc80 refcount:0 mapcount:0 mapping:0 index:0x0 pfn:0x84ef32
libcrc32c(E) rfkill(E) aes_ce_blk(E) crypto_simd(E) ...
CPU: 25 PID: 0 Comm: swapper/25 Kdump: loaded Tainted: ...
pstate: 60400005 (nZCv daif +PAN -UAO -TCO BTYPE=--)
pc : end_page_writeback+0x1c0/0x214
lr : end_page_writeback+0x1c0/0x214
sp : ffff800011ce3cc0
x29: ffff800011ce3cc0 x28: 0000000000000000
x27: ffff000c04608040 x26: 0000000000000000
x25: ffff000c04608040 x24: 0000000000001000
x23: ffff0003f88c8530 x22: 0000000000001000
x21: ffff0003f88c8530 x20: 0000000000000000
x19: fffffe00201bcc80 x18: 0000000000000030
x17: 0000000000000000 x16: 0000000000000000
x15: ffff000c018f9760 x14: ffffffffffffffff
x13: ffff8000119d72b0 x12: ffff8000119d6ee3
x11: ffff8000117b69b8 x10: 00000000ffff8000
x9 : ffff800010617534 x8 : 0000000000000000
x7 : ffff8000114f69b8 x6 : 000000000000000f
x5 : 0000000000000000 x4 : 0000000000000000
x3 : 0000000000000400 x2 : 0000000000000000
x1 : 0000000000000000 x0 : 0000000000000000
Call trace:
end_page_writeback+0x1c0/0x214
iomap_finish_page_writeback+0x13c/0x204
iomap_finish_ioend+0xe8/0x19c
iomap_writepage_end_bio+0x38/0x50
bio_endio+0x168/0x1ec
blk_update_request+0x278/0x3f0
blk_mq_end_request+0x34/0x15c
virtblk_request_done+0x38/0x74 [virtio_blk]
blk_done_softirq+0xc4/0x110
__do_softirq+0x128/0x38c
__irq_exit_rcu+0x118/0x150
irq_exit+0x1c/0x30
__handle_domain_irq+0x8c/0xf0
gic_handle_irq+0x84/0x108
el1_irq+0xcc/0x180
arch_cpu_idle+0x18/0x40
default_idle_call+0x4c/0x1a0
cpuidle_idle_call+0x168/0x1e0
do_idle+0xb4/0x104
cpu_startup_entry+0x30/0x9c
secondary_start_kernel+0x104/0x180
Code: d4210000 b0006161 910c8021 94013f4d (d4210000)
---[ end trace 4a88c6a074082f8c ]---
Kernel panic - not syncing: Oops - BUG: Fatal exception in interrupt
Link: https://lkml.kernel.org/r/20211022023052.33114-1-rongwei.wang@linux.alibaba…
Fixes: 99cb0dbd47a1 ("mm,thp: add read-only THP support for (non-shmem) FS")
Signed-off-by: Rongwei Wang <rongwei.wang(a)linux.alibaba.com>
Signed-off-by: Xu Yu <xuyu(a)linux.alibaba.com>
Suggested-by: Yang Shi <shy828301(a)gmail.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Reviewed-by: Yang Shi <shy828301(a)gmail.com>
Cc: Song Liu <song(a)kernel.org>
Cc: William Kucharski <william.kucharski(a)oracle.com>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/khugepaged.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
--- a/mm/khugepaged.c~mm-thp-bail-out-early-in-collapse_file-for-writeback-page
+++ a/mm/khugepaged.c
@@ -1763,6 +1763,10 @@ static void collapse_file(struct mm_stru
filemap_flush(mapping);
result = SCAN_FAIL;
goto xa_unlocked;
+ } else if (PageWriteback(page)) {
+ xas_unlock_irq(&xas);
+ result = SCAN_FAIL;
+ goto xa_unlocked;
} else if (trylock_page(page)) {
get_page(page);
xas_unlock_irq(&xas);
@@ -1798,7 +1802,8 @@ static void collapse_file(struct mm_stru
goto out_unlock;
}
- if (!is_shmem && PageDirty(page)) {
+ if (!is_shmem && (PageDirty(page) ||
+ PageWriteback(page))) {
/*
* khugepaged only works on read-only fd, so this
* page is dirty because it hasn't been flushed
_
Patches currently in -mm which might be from rongwei.wang(a)linux.alibaba.com are
mm-thp-bail-out-early-in-collapse_file-for-writeback-page.patch
mm-damon-dbgfs-remove-unnecessary-variables.patch
The patch titled
Subject: mm/secretmem: avoid letting secretmem_users drop to zero
has been added to the -mm tree. Its filename is
mm-secretmem-avoid-letting-secretmem_users-drop-to-zero.patch
This patch should soon appear at
https://ozlabs.org/~akpm/mmots/broken-out/mm-secretmem-avoid-letting-secret…
and later at
https://ozlabs.org/~akpm/mmotm/broken-out/mm-secretmem-avoid-letting-secret…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Kees Cook <keescook(a)chromium.org>
Subject: mm/secretmem: avoid letting secretmem_users drop to zero
Quoting Dmitry: "refcount_inc() needs to be done before fd_install().
After fd_install() finishes, the fd can be used by userspace and we can
have secret data in memory before the refcount_inc().
A straightforward misuse where a user will predict the returned fd in
another thread before the syscall returns and will use it to store secret
data is somewhat dubious because such a user just shoots themself in the
foot.
But a more interesting misuse would be to close the predicted fd and
decrement the refcount before the corresponding refcount_inc, this way one
can briefly drop the refcount to zero while there are other users of
secretmem."
Move fd_install() after refcount_inc().
Link: https://lkml.kernel.org/r/20211021154046.880251-1-keescook@chromium.org
Link: https://lore.kernel.org/lkml/CACT4Y+b1sW6-Hkn8HQYw_SsT7X3tp-CJNh2ci0wG3ZnQz…
Fixes: 9a436f8ff631 ("PM: hibernate: disable when there are active secretmem users")
Signed-off-by: Kees Cook <keescook(a)chromium.org>
Reported-by: Dmitry Vyukov <dvyukov(a)google.com>
Reviewed-by: Dmitry Vyukov <dvyukov(a)google.com>
Reviewed-by: David Hildenbrand <david(a)redhat.com>
Reviewed-by: Jordy Zomer <jordy(a)pwning.systems>
Cc: Mike Rapoport <rppt(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/secretmem.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/mm/secretmem.c~mm-secretmem-avoid-letting-secretmem_users-drop-to-zero
+++ a/mm/secretmem.c
@@ -217,8 +217,8 @@ SYSCALL_DEFINE1(memfd_secret, unsigned i
file->f_flags |= O_LARGEFILE;
- fd_install(fd, file);
refcount_inc(&secretmem_users);
+ fd_install(fd, file);
return fd;
err_put_fd:
_
Patches currently in -mm which might be from keescook(a)chromium.org are
mm-secretmem-avoid-letting-secretmem_users-drop-to-zero.patch
kasan-test-bypass-__alloc_size-checks.patch
rapidio-avoid-bogus-__alloc_size-warning.patch
compiler-attributes-add-__alloc_size-for-better-bounds-checking.patch
slab-clean-up-function-prototypes.patch
slab-add-__alloc_size-attributes-for-better-bounds-checking.patch
mm-kvmalloc-add-__alloc_size-attributes-for-better-bounds-checking.patch
mm-vmalloc-add-__alloc_size-attributes-for-better-bounds-checking.patch
mm-page_alloc-add-__alloc_size-attributes-for-better-bounds-checking.patch
percpu-add-__alloc_size-attributes-for-better-bounds-checking.patch
kasan-test-consolidate-workarounds-for-unwanted-__alloc_size-protection.patch
maintainers-add-exec-binfmt-section-with-myself-and-eric.patch
binfmt_elf-reintroduce-using-map_fixed_noreplace.patch
The patch titled
Subject: mm: bdi: initialize bdi_min_ratio when bdi is unregistered
has been added to the -mm tree. Its filename is
mm-bdi-initialize-bdi_min_ratio-when-bdi-unregister.patch
This patch should soon appear at
https://ozlabs.org/~akpm/mmots/broken-out/mm-bdi-initialize-bdi_min_ratio-w…
and later at
https://ozlabs.org/~akpm/mmotm/broken-out/mm-bdi-initialize-bdi_min_ratio-w…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Manjong Lee <mj0123.lee(a)samsung.com>
Subject: mm: bdi: initialize bdi_min_ratio when bdi is unregistered
Initialize min_ratio if it is set during bdi unregistration.
This can prevent problems that may occur a when bdi is removed without
resetting min_ratio.
For example.
1) insert external sdcard
2) set external sdcard's min_ratio 70
3) remove external sdcard without setting min_ratio 0
4) insert external sdcard
5) set external sdcard's min_ratio 70 << error occur(can't set)
Because when an sdcard is removed, the present bdi_min_ratio value will
remain. Currently, the only way to reset bdi_min_ratio is to reboot.
Link: https://lkml.kernel.org/r/20211021161942.5983-1-mj0123.lee@samsung.com
Signed-off-by: Manjong Lee <mj0123.lee(a)samsung.com>
Cc: Changheun Lee <nanich.lee(a)samsung.com>
Cc: Jens Axboe <axboe(a)kernel.dk>
Cc: Christoph Hellwig <hch(a)infradead.org>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: <seunghwan.hyun(a)samsung.com>
Cc: <sookwan7.kim(a)samsung.com>
Cc: <yt0928.kim(a)samsung.com>
Cc: <junho89.kim(a)samsung.com>
Cc: <jisoo2146.oh(a)samsung.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/backing-dev.c | 5 +++++
1 file changed, 5 insertions(+)
--- a/mm/backing-dev.c~mm-bdi-initialize-bdi_min_ratio-when-bdi-unregister
+++ a/mm/backing-dev.c
@@ -947,6 +947,11 @@ void bdi_unregister(struct backing_dev_i
wb_shutdown(&bdi->wb);
cgwb_bdi_unregister(bdi);
+ /* if min ratio doesn't 0, it has to set 0 before unregister */
+ if (bdi->min_ratio) {
+ bdi_set_min_ratio(bdi, 0);
+ }
+
if (bdi->dev) {
bdi_debug_unregister(bdi);
device_unregister(bdi->dev);
_
Patches currently in -mm which might be from mj0123.lee(a)samsung.com are
mm-bdi-initialize-bdi_min_ratio-when-bdi-unregister.patch
The patch titled
Subject: ocfs2: race between searching chunks and release journal_head from buffer_head
has been added to the -mm tree. Its filename is
ocfs2-race-between-searching-chunks-and-release-journal_head-from-buffer_head.patch
This patch should soon appear at
https://ozlabs.org/~akpm/mmots/broken-out/ocfs2-race-between-searching-chun…
and later at
https://ozlabs.org/~akpm/mmotm/broken-out/ocfs2-race-between-searching-chun…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Gautham Ananthakrishna <gautham.ananthakrishna(a)oracle.com>
Subject: ocfs2: race between searching chunks and release journal_head from buffer_head
Encountered a race between ocfs2_test_bg_bit_allocatable() and
jbd2_journal_put_journal_head() resulting in the below vmcore.
PID: 106879 TASK: ffff880244ba9c00 CPU: 2 COMMAND: "loop3"
0 [ffff8802435ff1c0] panic at ffffffff816ed175
1 [ffff8802435ff240] oops_end at ffffffff8101a7c9
2 [ffff8802435ff270] no_context at ffffffff8106eccf
3 [ffff8802435ff2e0] __bad_area_nosemaphore at ffffffff8106ef9d
4 [ffff8802435ff330] bad_area_nosemaphore at ffffffff8106f143
5 [ffff8802435ff340] __do_page_fault at ffffffff8106f80b
6 [ffff8802435ff3a0] do_page_fault at ffffffff8106fc2f
7 [ffff8802435ff3e0] page_fault at ffffffff816fd667
[exception RIP: ocfs2_block_group_find_clear_bits+316]
RIP: ffffffffc11ef6fc RSP: ffff8802435ff498 RFLAGS: 00010206
RAX: 0000000000003918 RBX: 0000000000000001 RCX: 0000000000000018
RDX: 0000000000003918 RSI: 0000000000000000 RDI: ffff880060194040
RBP: ffff8802435ff4f8 R8: ffffffffff000000 R9: ffffffffffffffff
R10: ffff8802435ff730 R11: ffff8802a94e5800 R12: 0000000000000007
R13: 0000000000007e00 R14: 0000000000003918 R15: ffff88017c973a28
ORIG_RAX: ffffffffffffffff CS: e030 SS: e02b
8 [ffff8802435ff490] ocfs2_block_group_find_clear_bits at ffffffffc11ef680 [ocfs2]
9 [ffff8802435ff500] ocfs2_cluster_group_search at ffffffffc11ef916 [ocfs2]
10 [ffff8802435ff580] ocfs2_search_chain at ffffffffc11f0fb6 [ocfs2]
11 [ffff8802435ff660] ocfs2_claim_suballoc_bits at ffffffffc11f1b1b [ocfs2]
12 [ffff8802435ff6f0] __ocfs2_claim_clusters at ffffffffc11f32cb [ocfs2]
13 [ffff8802435ff770] ocfs2_claim_clusters at ffffffffc11f5caf [ocfs2]
14 [ffff8802435ff780] ocfs2_local_alloc_slide_window at ffffffffc11cc0db [ocfs2]
15 [ffff8802435ff820] ocfs2_reserve_local_alloc_bits at ffffffffc11ce53f [ocfs2]
16 [ffff8802435ff890] ocfs2_reserve_clusters_with_limit at ffffffffc11f59b5 [ocfs2]
17 [ffff8802435ff8e0] ocfs2_reserve_clusters at ffffffffc11f5c88 [ocfs2]
18 [ffff8802435ff8f0] ocfs2_lock_refcount_allocators at ffffffffc11dc169 [ocfs2]
19 [ffff8802435ff960] ocfs2_make_clusters_writable at ffffffffc11e4274 [ocfs2]
20 [ffff8802435ffa50] ocfs2_replace_cow at ffffffffc11e4df1 [ocfs2]
21 [ffff8802435ffac0] ocfs2_refcount_cow at ffffffffc11e54b1 [ocfs2]
22 [ffff8802435ffb80] ocfs2_file_write_iter at ffffffffc11bf8f4 [ocfs2]
23 [ffff8802435ffcd0] lo_rw_aio at ffffffff814a1b5d
24 [ffff8802435ffd80] loop_queue_work at ffffffff814a2802
25 [ffff8802435ffe60] kthread_worker_fn at ffffffff810a80d2
26 [ffff8802435ffec0] kthread at ffffffff810a7afb
27 [ffff8802435fff50] ret_from_fork at ffffffff816f7da1
When ocfs2_test_bg_bit_allocatable() called bh2jh(bg_bh), the
bg_bh->b_private NULL as jbd2_journal_put_journal_head() raced and
released the jounal head from the buffer head. Needed to take bit lock
for the bit 'BH_JournalHead' to fix this race.
Link: https://lkml.kernel.org/r/1634820718-6043-1-git-send-email-gautham.ananthak…
Signed-off-by: Gautham Ananthakrishna <gautham.ananthakrishna(a)oracle.com>
Reviewed-by: Joseph Qi <joseph.qi(a)linux.alibaba.com>
Cc: <rajesh.sivaramasubramaniom(a)oracle.com>
Cc: Mark Fasheh <mark(a)fasheh.com>
Cc: Joel Becker <jlbec(a)evilplan.org>
Cc: Junxiao Bi <junxiao.bi(a)oracle.com>
Cc: Changwei Ge <gechangwei(a)live.cn>
Cc: Gang He <ghe(a)suse.com>
Cc: Jun Piao <piaojun(a)huawei.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/ocfs2/suballoc.c | 22 +++++++++++++---------
1 file changed, 13 insertions(+), 9 deletions(-)
--- a/fs/ocfs2/suballoc.c~ocfs2-race-between-searching-chunks-and-release-journal_head-from-buffer_head
+++ a/fs/ocfs2/suballoc.c
@@ -1251,7 +1251,7 @@ static int ocfs2_test_bg_bit_allocatable
{
struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
struct journal_head *jh;
- int ret;
+ int ret = 1;
if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap))
return 0;
@@ -1259,14 +1259,18 @@ static int ocfs2_test_bg_bit_allocatable
if (!buffer_jbd(bg_bh))
return 1;
- jh = bh2jh(bg_bh);
- spin_lock(&jh->b_state_lock);
- bg = (struct ocfs2_group_desc *) jh->b_committed_data;
- if (bg)
- ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
- else
- ret = 1;
- spin_unlock(&jh->b_state_lock);
+ jbd_lock_bh_journal_head(bg_bh);
+ if (buffer_jbd(bg_bh)) {
+ jh = bh2jh(bg_bh);
+ spin_lock(&jh->b_state_lock);
+ bg = (struct ocfs2_group_desc *) jh->b_committed_data;
+ if (bg)
+ ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
+ else
+ ret = 1;
+ spin_unlock(&jh->b_state_lock);
+ }
+ jbd_unlock_bh_journal_head(bg_bh);
return ret;
}
_
Patches currently in -mm which might be from gautham.ananthakrishna(a)oracle.com are
ocfs2-race-between-searching-chunks-and-release-journal_head-from-buffer_head.patch
The PIO scratch buffer is larger than a single page, and therefore
it is not possible to copy it in a single step to vcpu->arch/pio_data.
Bound each call to emulator_pio_in/out to a single page; keep
track of how many I/O operations are left in vcpu->arch.sev_pio_count,
so that the operation can be restarted in the complete_userspace_io
callback.
For OUT, this means that the previous kvm_sev_es_outs implementation
becomes an iterator of the loop, and we can consume the sev_pio_data
buffer before leaving to userspace.
For IN, instead, consuming the buffer and decreasing sev_pio_count
is always done in the complete_userspace_io callback, because that
is when the memcpy is done into sev_pio_data.
Cc: stable(a)vger.kernel.org
Fixes: 7ed9abfe8e9f ("KVM: SVM: Support string IO operations for an SEV-ES guest")
Reported-by: Felix Wilhelm <fwilhelm(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
---
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/x86.c | 73 +++++++++++++++++++++++++--------
2 files changed, 57 insertions(+), 17 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6bed6c416c6c..5a0298aa56ba 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -703,6 +703,7 @@ struct kvm_vcpu_arch {
struct kvm_pio_request pio;
void *pio_data;
void *sev_pio_data;
+ unsigned sev_pio_count;
u8 event_exit_inst_len;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a485e185ad00..09c1e64495d3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -12378,38 +12378,76 @@ int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes,
EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_read);
static int kvm_sev_es_outs(struct kvm_vcpu *vcpu, unsigned int size,
- unsigned int port, unsigned int count)
+ unsigned int port);
+
+static int complete_sev_es_emulated_outs(struct kvm_vcpu *vcpu)
{
- int ret = emulator_pio_out(vcpu, size, port,
- vcpu->arch.sev_pio_data, count);
+ vcpu->arch.pio.count = 0;
+ if (vcpu->arch.sev_pio_count)
+ return kvm_sev_es_outs(vcpu,
+ vcpu->arch.pio.size,
+ vcpu->arch.pio.port);
+ return 1;
+}
+
+static int kvm_sev_es_outs(struct kvm_vcpu *vcpu, unsigned int size,
+ unsigned int port)
+{
+ for (;;) {
+ unsigned int count =
+ min_t(unsigned int, PAGE_SIZE / size, vcpu->arch.sev_pio_count);
+ int ret = emulator_pio_out(vcpu, size, port, vcpu->arch.sev_pio_data, count);
+
+ /* memcpy done already by emulator_pio_out. */
+ vcpu->arch.sev_pio_count -= count;
+ vcpu->arch.sev_pio_data += count * vcpu->arch.pio.size;
+ if (!ret)
+ break;
- if (ret) {
/* Emulation done by the kernel. */
- return ret;
+ vcpu->arch.pio.count = 0;
+ if (!vcpu->arch.sev_pio_count)
+ return 1;
}
- vcpu->arch.pio.count = 0;
+ vcpu->arch.complete_userspace_io = complete_sev_es_emulated_outs;
return 0;
}
-static int complete_sev_es_emulated_ins(struct kvm_vcpu *vcpu)
+static int kvm_sev_es_ins(struct kvm_vcpu *vcpu, unsigned int size,
+ unsigned int port);
+
+static void __complete_sev_es_emulated_ins(struct kvm_vcpu *vcpu)
{
- memcpy(vcpu->arch.sev_pio_data, vcpu->arch.pio_data,
- vcpu->arch.pio.count * vcpu->arch.pio.size);
- vcpu->arch.pio.count = 0;
+ unsigned count = vcpu->arch.pio.count;
+ complete_emulator_pio_in(vcpu, vcpu->arch.sev_pio_data);
+ vcpu->arch.sev_pio_count -= count;
+ vcpu->arch.sev_pio_data += count * vcpu->arch.pio.size;
+}
+static int complete_sev_es_emulated_ins(struct kvm_vcpu *vcpu)
+{
+ __complete_sev_es_emulated_ins(vcpu);
+ if (vcpu->arch.sev_pio_count)
+ return kvm_sev_es_ins(vcpu,
+ vcpu->arch.pio.size,
+ vcpu->arch.pio.port);
return 1;
}
static int kvm_sev_es_ins(struct kvm_vcpu *vcpu, unsigned int size,
- unsigned int port, unsigned int count)
+ unsigned int port)
{
- int ret = emulator_pio_in(vcpu, size, port,
- vcpu->arch.sev_pio_data, count);
+ for (;;) {
+ unsigned int count =
+ min_t(unsigned int, PAGE_SIZE / size, vcpu->arch.sev_pio_count);
+ if (!__emulator_pio_in(vcpu, size, port, count))
+ break;
- if (ret) {
/* Emulation done by the kernel. */
- return ret;
+ __complete_sev_es_emulated_ins(vcpu);
+ if (!vcpu->arch.sev_pio_count)
+ return 1;
}
vcpu->arch.complete_userspace_io = complete_sev_es_emulated_ins;
@@ -12421,8 +12459,9 @@ int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size,
int in)
{
vcpu->arch.sev_pio_data = data;
- return in ? kvm_sev_es_ins(vcpu, size, port, count)
- : kvm_sev_es_outs(vcpu, size, port, count);
+ vcpu->arch.sev_pio_count = count;
+ return in ? kvm_sev_es_ins(vcpu, size, port)
+ : kvm_sev_es_outs(vcpu, size, port);
}
EXPORT_SYMBOL_GPL(kvm_sev_es_string_io);
--
2.27.0
emulator_pio_in handles both the case where the data is pending in
vcpu->arch.pio.count, and the case where I/O has to be done via either
an in-kernel device or a userspace exit. For SEV-ES we would like
to split these, to identify clearly the moment at which the
sev_pio_data is consumed. To this end, create two different
functions: __emulator_pio_in fills in vcpu->arch.pio.count, while
complete_emulator_pio_in clears it and releases vcpu->arch.pio.data.
While at it, remove the void* argument also from emulator_pio_in_out.
No functional change intended.
Cc: stable(a)vger.kernel.org
Fixes: 7ed9abfe8e9f ("KVM: SVM: Support string IO operations for an SEV-ES guest")
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
---
arch/x86/kvm/x86.c | 42 +++++++++++++++++++++++-------------------
1 file changed, 23 insertions(+), 19 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8880dc36a2b4..07d9533b471d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6906,7 +6906,7 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
}
static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size,
- unsigned short port, void *val,
+ unsigned short port,
unsigned int count, bool in)
{
vcpu->arch.pio.port = port;
@@ -6927,26 +6927,31 @@ static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size,
return 0;
}
-static int emulator_pio_in(struct kvm_vcpu *vcpu, int size,
- unsigned short port, void *val, unsigned int count)
+static int __emulator_pio_in(struct kvm_vcpu *vcpu, int size,
+ unsigned short port, unsigned int count)
{
- int ret;
-
- if (vcpu->arch.pio.count)
- goto data_avail;
-
+ WARN_ON(vcpu->arch.pio.count);
memset(vcpu->arch.pio_data, 0, size * count);
+ return emulator_pio_in_out(vcpu, size, port, count, true);
+}
- ret = emulator_pio_in_out(vcpu, size, port, val, count, true);
- if (ret) {
-data_avail:
- memcpy(val, vcpu->arch.pio_data, size * count);
- trace_kvm_pio(KVM_PIO_IN, port, size, count, vcpu->arch.pio_data);
- vcpu->arch.pio.count = 0;
- return 1;
- }
+static void complete_emulator_pio_in(struct kvm_vcpu *vcpu, int size,
+ unsigned short port, void *val)
+{
+ memcpy(val, vcpu->arch.pio_data, size * vcpu->arch.pio.count);
+ trace_kvm_pio(KVM_PIO_IN, port, size, vcpu->arch.pio.count, vcpu->arch.pio_data);
+ vcpu->arch.pio.count = 0;
+}
- return 0;
+static int emulator_pio_in(struct kvm_vcpu *vcpu, int size,
+ unsigned short port, void *val, unsigned int count)
+{
+ if (!vcpu->arch.pio.count && !__emulator_pio_in(vcpu, size, port, count))
+ return 0;
+
+ WARN_ON(count != vcpu->arch.pio.count);
+ complete_emulator_pio_in(vcpu, size, port, val);
+ return 1;
}
static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
@@ -6965,12 +6970,11 @@ static int emulator_pio_out(struct kvm_vcpu *vcpu, int size,
memcpy(vcpu->arch.pio_data, val, size * count);
trace_kvm_pio(KVM_PIO_OUT, port, size, count, vcpu->arch.pio_data);
- ret = emulator_pio_in_out(vcpu, size, port, (void *)val, count, false);
+ ret = emulator_pio_in_out(vcpu, size, port, count, false);
if (ret)
vcpu->arch.pio.count = 0;
return ret;
-
}
static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
--
2.27.0