The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From a48b73eca4ceb9b8a4b97f290a065335dbcd8a04 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef(a)toxicpanda.com>
Date: Mon, 10 Aug 2020 11:42:27 -0400
Subject: [PATCH] btrfs: fix potential deadlock in the search ioctl
With the conversion of the tree locks to rwsem I got the following
lockdep splat:
======================================================
WARNING: possible circular locking dependency detected
5.8.0-rc7-00165-g04ec4da5f45f-dirty #922 Not tainted
------------------------------------------------------
compsize/11122 is trying to acquire lock:
ffff889fabca8768 (&mm->mmap_lock#2){++++}-{3:3}, at: __might_fault+0x3e/0x90
but task is already holding lock:
ffff889fe720fe40 (btrfs-fs-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x39/0x180
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #2 (btrfs-fs-00){++++}-{3:3}:
down_write_nested+0x3b/0x70
__btrfs_tree_lock+0x24/0x120
btrfs_search_slot+0x756/0x990
btrfs_lookup_inode+0x3a/0xb4
__btrfs_update_delayed_inode+0x93/0x270
btrfs_async_run_delayed_root+0x168/0x230
btrfs_work_helper+0xd4/0x570
process_one_work+0x2ad/0x5f0
worker_thread+0x3a/0x3d0
kthread+0x133/0x150
ret_from_fork+0x1f/0x30
-> #1 (&delayed_node->mutex){+.+.}-{3:3}:
__mutex_lock+0x9f/0x930
btrfs_delayed_update_inode+0x50/0x440
btrfs_update_inode+0x8a/0xf0
btrfs_dirty_inode+0x5b/0xd0
touch_atime+0xa1/0xd0
btrfs_file_mmap+0x3f/0x60
mmap_region+0x3a4/0x640
do_mmap+0x376/0x580
vm_mmap_pgoff+0xd5/0x120
ksys_mmap_pgoff+0x193/0x230
do_syscall_64+0x50/0x90
entry_SYSCALL_64_after_hwframe+0x44/0xa9
-> #0 (&mm->mmap_lock#2){++++}-{3:3}:
__lock_acquire+0x1272/0x2310
lock_acquire+0x9e/0x360
__might_fault+0x68/0x90
_copy_to_user+0x1e/0x80
copy_to_sk.isra.32+0x121/0x300
search_ioctl+0x106/0x200
btrfs_ioctl_tree_search_v2+0x7b/0xf0
btrfs_ioctl+0x106f/0x30a0
ksys_ioctl+0x83/0xc0
__x64_sys_ioctl+0x16/0x20
do_syscall_64+0x50/0x90
entry_SYSCALL_64_after_hwframe+0x44/0xa9
other info that might help us debug this:
Chain exists of:
&mm->mmap_lock#2 --> &delayed_node->mutex --> btrfs-fs-00
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(btrfs-fs-00);
lock(&delayed_node->mutex);
lock(btrfs-fs-00);
lock(&mm->mmap_lock#2);
*** DEADLOCK ***
1 lock held by compsize/11122:
#0: ffff889fe720fe40 (btrfs-fs-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x39/0x180
stack backtrace:
CPU: 17 PID: 11122 Comm: compsize Kdump: loaded Not tainted 5.8.0-rc7-00165-g04ec4da5f45f-dirty #922
Hardware name: Quanta Tioga Pass Single Side 01-0030993006/Tioga Pass Single Side, BIOS F08_3A18 12/20/2018
Call Trace:
dump_stack+0x78/0xa0
check_noncircular+0x165/0x180
__lock_acquire+0x1272/0x2310
lock_acquire+0x9e/0x360
? __might_fault+0x3e/0x90
? find_held_lock+0x72/0x90
__might_fault+0x68/0x90
? __might_fault+0x3e/0x90
_copy_to_user+0x1e/0x80
copy_to_sk.isra.32+0x121/0x300
? btrfs_search_forward+0x2a6/0x360
search_ioctl+0x106/0x200
btrfs_ioctl_tree_search_v2+0x7b/0xf0
btrfs_ioctl+0x106f/0x30a0
? __do_sys_newfstat+0x5a/0x70
? ksys_ioctl+0x83/0xc0
ksys_ioctl+0x83/0xc0
__x64_sys_ioctl+0x16/0x20
do_syscall_64+0x50/0x90
entry_SYSCALL_64_after_hwframe+0x44/0xa9
The problem is we're doing a copy_to_user() while holding tree locks,
which can deadlock if we have to do a page fault for the copy_to_user().
This exists even without my locking changes, so it needs to be fixed.
Rework the search ioctl to do the pre-fault and then
copy_to_user_nofault for the copying.
CC: stable(a)vger.kernel.org # 4.4+
Reviewed-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: Josef Bacik <josef(a)toxicpanda.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 617ea38e6fd7..c15ab6c1897f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -5653,9 +5653,9 @@ void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
}
}
-int read_extent_buffer_to_user(const struct extent_buffer *eb,
- void __user *dstv,
- unsigned long start, unsigned long len)
+int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb,
+ void __user *dstv,
+ unsigned long start, unsigned long len)
{
size_t cur;
size_t offset;
@@ -5675,7 +5675,7 @@ int read_extent_buffer_to_user(const struct extent_buffer *eb,
cur = min(len, (PAGE_SIZE - offset));
kaddr = page_address(page);
- if (copy_to_user(dst, kaddr + offset, cur)) {
+ if (copy_to_user_nofault(dst, kaddr + offset, cur)) {
ret = -EFAULT;
break;
}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 00a88f2eb5ab..30794ae58498 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -241,9 +241,9 @@ int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
void read_extent_buffer(const struct extent_buffer *eb, void *dst,
unsigned long start,
unsigned long len);
-int read_extent_buffer_to_user(const struct extent_buffer *eb,
- void __user *dst, unsigned long start,
- unsigned long len);
+int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb,
+ void __user *dst, unsigned long start,
+ unsigned long len);
void write_extent_buffer_fsid(const struct extent_buffer *eb, const void *src);
void write_extent_buffer_chunk_tree_uuid(const struct extent_buffer *eb,
const void *src);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index bd3511c5ca81..ac45f022b495 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2086,9 +2086,14 @@ static noinline int copy_to_sk(struct btrfs_path *path,
sh.len = item_len;
sh.transid = found_transid;
- /* copy search result header */
- if (copy_to_user(ubuf + *sk_offset, &sh, sizeof(sh))) {
- ret = -EFAULT;
+ /*
+ * Copy search result header. If we fault then loop again so we
+ * can fault in the pages and -EFAULT there if there's a
+ * problem. Otherwise we'll fault and then copy the buffer in
+ * properly this next time through
+ */
+ if (copy_to_user_nofault(ubuf + *sk_offset, &sh, sizeof(sh))) {
+ ret = 0;
goto out;
}
@@ -2096,10 +2101,14 @@ static noinline int copy_to_sk(struct btrfs_path *path,
if (item_len) {
char __user *up = ubuf + *sk_offset;
- /* copy the item */
- if (read_extent_buffer_to_user(leaf, up,
- item_off, item_len)) {
- ret = -EFAULT;
+ /*
+ * Copy the item, same behavior as above, but reset the
+ * * sk_offset so we copy the full thing again.
+ */
+ if (read_extent_buffer_to_user_nofault(leaf, up,
+ item_off, item_len)) {
+ ret = 0;
+ *sk_offset -= sizeof(sh);
goto out;
}
@@ -2184,6 +2193,10 @@ static noinline int search_ioctl(struct inode *inode,
key.offset = sk->min_offset;
while (1) {
+ ret = fault_in_pages_writeable(ubuf, *buf_size - sk_offset);
+ if (ret)
+ break;
+
ret = btrfs_search_forward(root, &key, path, sk->min_transid);
if (ret != 0) {
if (ret > 0)
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From a48b73eca4ceb9b8a4b97f290a065335dbcd8a04 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef(a)toxicpanda.com>
Date: Mon, 10 Aug 2020 11:42:27 -0400
Subject: [PATCH] btrfs: fix potential deadlock in the search ioctl
With the conversion of the tree locks to rwsem I got the following
lockdep splat:
======================================================
WARNING: possible circular locking dependency detected
5.8.0-rc7-00165-g04ec4da5f45f-dirty #922 Not tainted
------------------------------------------------------
compsize/11122 is trying to acquire lock:
ffff889fabca8768 (&mm->mmap_lock#2){++++}-{3:3}, at: __might_fault+0x3e/0x90
but task is already holding lock:
ffff889fe720fe40 (btrfs-fs-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x39/0x180
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #2 (btrfs-fs-00){++++}-{3:3}:
down_write_nested+0x3b/0x70
__btrfs_tree_lock+0x24/0x120
btrfs_search_slot+0x756/0x990
btrfs_lookup_inode+0x3a/0xb4
__btrfs_update_delayed_inode+0x93/0x270
btrfs_async_run_delayed_root+0x168/0x230
btrfs_work_helper+0xd4/0x570
process_one_work+0x2ad/0x5f0
worker_thread+0x3a/0x3d0
kthread+0x133/0x150
ret_from_fork+0x1f/0x30
-> #1 (&delayed_node->mutex){+.+.}-{3:3}:
__mutex_lock+0x9f/0x930
btrfs_delayed_update_inode+0x50/0x440
btrfs_update_inode+0x8a/0xf0
btrfs_dirty_inode+0x5b/0xd0
touch_atime+0xa1/0xd0
btrfs_file_mmap+0x3f/0x60
mmap_region+0x3a4/0x640
do_mmap+0x376/0x580
vm_mmap_pgoff+0xd5/0x120
ksys_mmap_pgoff+0x193/0x230
do_syscall_64+0x50/0x90
entry_SYSCALL_64_after_hwframe+0x44/0xa9
-> #0 (&mm->mmap_lock#2){++++}-{3:3}:
__lock_acquire+0x1272/0x2310
lock_acquire+0x9e/0x360
__might_fault+0x68/0x90
_copy_to_user+0x1e/0x80
copy_to_sk.isra.32+0x121/0x300
search_ioctl+0x106/0x200
btrfs_ioctl_tree_search_v2+0x7b/0xf0
btrfs_ioctl+0x106f/0x30a0
ksys_ioctl+0x83/0xc0
__x64_sys_ioctl+0x16/0x20
do_syscall_64+0x50/0x90
entry_SYSCALL_64_after_hwframe+0x44/0xa9
other info that might help us debug this:
Chain exists of:
&mm->mmap_lock#2 --> &delayed_node->mutex --> btrfs-fs-00
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(btrfs-fs-00);
lock(&delayed_node->mutex);
lock(btrfs-fs-00);
lock(&mm->mmap_lock#2);
*** DEADLOCK ***
1 lock held by compsize/11122:
#0: ffff889fe720fe40 (btrfs-fs-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x39/0x180
stack backtrace:
CPU: 17 PID: 11122 Comm: compsize Kdump: loaded Not tainted 5.8.0-rc7-00165-g04ec4da5f45f-dirty #922
Hardware name: Quanta Tioga Pass Single Side 01-0030993006/Tioga Pass Single Side, BIOS F08_3A18 12/20/2018
Call Trace:
dump_stack+0x78/0xa0
check_noncircular+0x165/0x180
__lock_acquire+0x1272/0x2310
lock_acquire+0x9e/0x360
? __might_fault+0x3e/0x90
? find_held_lock+0x72/0x90
__might_fault+0x68/0x90
? __might_fault+0x3e/0x90
_copy_to_user+0x1e/0x80
copy_to_sk.isra.32+0x121/0x300
? btrfs_search_forward+0x2a6/0x360
search_ioctl+0x106/0x200
btrfs_ioctl_tree_search_v2+0x7b/0xf0
btrfs_ioctl+0x106f/0x30a0
? __do_sys_newfstat+0x5a/0x70
? ksys_ioctl+0x83/0xc0
ksys_ioctl+0x83/0xc0
__x64_sys_ioctl+0x16/0x20
do_syscall_64+0x50/0x90
entry_SYSCALL_64_after_hwframe+0x44/0xa9
The problem is we're doing a copy_to_user() while holding tree locks,
which can deadlock if we have to do a page fault for the copy_to_user().
This exists even without my locking changes, so it needs to be fixed.
Rework the search ioctl to do the pre-fault and then
copy_to_user_nofault for the copying.
CC: stable(a)vger.kernel.org # 4.4+
Reviewed-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: Josef Bacik <josef(a)toxicpanda.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 617ea38e6fd7..c15ab6c1897f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -5653,9 +5653,9 @@ void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
}
}
-int read_extent_buffer_to_user(const struct extent_buffer *eb,
- void __user *dstv,
- unsigned long start, unsigned long len)
+int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb,
+ void __user *dstv,
+ unsigned long start, unsigned long len)
{
size_t cur;
size_t offset;
@@ -5675,7 +5675,7 @@ int read_extent_buffer_to_user(const struct extent_buffer *eb,
cur = min(len, (PAGE_SIZE - offset));
kaddr = page_address(page);
- if (copy_to_user(dst, kaddr + offset, cur)) {
+ if (copy_to_user_nofault(dst, kaddr + offset, cur)) {
ret = -EFAULT;
break;
}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 00a88f2eb5ab..30794ae58498 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -241,9 +241,9 @@ int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
void read_extent_buffer(const struct extent_buffer *eb, void *dst,
unsigned long start,
unsigned long len);
-int read_extent_buffer_to_user(const struct extent_buffer *eb,
- void __user *dst, unsigned long start,
- unsigned long len);
+int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb,
+ void __user *dst, unsigned long start,
+ unsigned long len);
void write_extent_buffer_fsid(const struct extent_buffer *eb, const void *src);
void write_extent_buffer_chunk_tree_uuid(const struct extent_buffer *eb,
const void *src);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index bd3511c5ca81..ac45f022b495 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2086,9 +2086,14 @@ static noinline int copy_to_sk(struct btrfs_path *path,
sh.len = item_len;
sh.transid = found_transid;
- /* copy search result header */
- if (copy_to_user(ubuf + *sk_offset, &sh, sizeof(sh))) {
- ret = -EFAULT;
+ /*
+ * Copy search result header. If we fault then loop again so we
+ * can fault in the pages and -EFAULT there if there's a
+ * problem. Otherwise we'll fault and then copy the buffer in
+ * properly this next time through
+ */
+ if (copy_to_user_nofault(ubuf + *sk_offset, &sh, sizeof(sh))) {
+ ret = 0;
goto out;
}
@@ -2096,10 +2101,14 @@ static noinline int copy_to_sk(struct btrfs_path *path,
if (item_len) {
char __user *up = ubuf + *sk_offset;
- /* copy the item */
- if (read_extent_buffer_to_user(leaf, up,
- item_off, item_len)) {
- ret = -EFAULT;
+ /*
+ * Copy the item, same behavior as above, but reset the
+ * * sk_offset so we copy the full thing again.
+ */
+ if (read_extent_buffer_to_user_nofault(leaf, up,
+ item_off, item_len)) {
+ ret = 0;
+ *sk_offset -= sizeof(sh);
goto out;
}
@@ -2184,6 +2193,10 @@ static noinline int search_ioctl(struct inode *inode,
key.offset = sk->min_offset;
while (1) {
+ ret = fault_in_pages_writeable(ubuf, *buf_size - sk_offset);
+ if (ret)
+ break;
+
ret = btrfs_search_forward(root, &key, path, sk->min_transid);
if (ret != 0) {
if (ret > 0)
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From a48b73eca4ceb9b8a4b97f290a065335dbcd8a04 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef(a)toxicpanda.com>
Date: Mon, 10 Aug 2020 11:42:27 -0400
Subject: [PATCH] btrfs: fix potential deadlock in the search ioctl
With the conversion of the tree locks to rwsem I got the following
lockdep splat:
======================================================
WARNING: possible circular locking dependency detected
5.8.0-rc7-00165-g04ec4da5f45f-dirty #922 Not tainted
------------------------------------------------------
compsize/11122 is trying to acquire lock:
ffff889fabca8768 (&mm->mmap_lock#2){++++}-{3:3}, at: __might_fault+0x3e/0x90
but task is already holding lock:
ffff889fe720fe40 (btrfs-fs-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x39/0x180
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #2 (btrfs-fs-00){++++}-{3:3}:
down_write_nested+0x3b/0x70
__btrfs_tree_lock+0x24/0x120
btrfs_search_slot+0x756/0x990
btrfs_lookup_inode+0x3a/0xb4
__btrfs_update_delayed_inode+0x93/0x270
btrfs_async_run_delayed_root+0x168/0x230
btrfs_work_helper+0xd4/0x570
process_one_work+0x2ad/0x5f0
worker_thread+0x3a/0x3d0
kthread+0x133/0x150
ret_from_fork+0x1f/0x30
-> #1 (&delayed_node->mutex){+.+.}-{3:3}:
__mutex_lock+0x9f/0x930
btrfs_delayed_update_inode+0x50/0x440
btrfs_update_inode+0x8a/0xf0
btrfs_dirty_inode+0x5b/0xd0
touch_atime+0xa1/0xd0
btrfs_file_mmap+0x3f/0x60
mmap_region+0x3a4/0x640
do_mmap+0x376/0x580
vm_mmap_pgoff+0xd5/0x120
ksys_mmap_pgoff+0x193/0x230
do_syscall_64+0x50/0x90
entry_SYSCALL_64_after_hwframe+0x44/0xa9
-> #0 (&mm->mmap_lock#2){++++}-{3:3}:
__lock_acquire+0x1272/0x2310
lock_acquire+0x9e/0x360
__might_fault+0x68/0x90
_copy_to_user+0x1e/0x80
copy_to_sk.isra.32+0x121/0x300
search_ioctl+0x106/0x200
btrfs_ioctl_tree_search_v2+0x7b/0xf0
btrfs_ioctl+0x106f/0x30a0
ksys_ioctl+0x83/0xc0
__x64_sys_ioctl+0x16/0x20
do_syscall_64+0x50/0x90
entry_SYSCALL_64_after_hwframe+0x44/0xa9
other info that might help us debug this:
Chain exists of:
&mm->mmap_lock#2 --> &delayed_node->mutex --> btrfs-fs-00
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(btrfs-fs-00);
lock(&delayed_node->mutex);
lock(btrfs-fs-00);
lock(&mm->mmap_lock#2);
*** DEADLOCK ***
1 lock held by compsize/11122:
#0: ffff889fe720fe40 (btrfs-fs-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x39/0x180
stack backtrace:
CPU: 17 PID: 11122 Comm: compsize Kdump: loaded Not tainted 5.8.0-rc7-00165-g04ec4da5f45f-dirty #922
Hardware name: Quanta Tioga Pass Single Side 01-0030993006/Tioga Pass Single Side, BIOS F08_3A18 12/20/2018
Call Trace:
dump_stack+0x78/0xa0
check_noncircular+0x165/0x180
__lock_acquire+0x1272/0x2310
lock_acquire+0x9e/0x360
? __might_fault+0x3e/0x90
? find_held_lock+0x72/0x90
__might_fault+0x68/0x90
? __might_fault+0x3e/0x90
_copy_to_user+0x1e/0x80
copy_to_sk.isra.32+0x121/0x300
? btrfs_search_forward+0x2a6/0x360
search_ioctl+0x106/0x200
btrfs_ioctl_tree_search_v2+0x7b/0xf0
btrfs_ioctl+0x106f/0x30a0
? __do_sys_newfstat+0x5a/0x70
? ksys_ioctl+0x83/0xc0
ksys_ioctl+0x83/0xc0
__x64_sys_ioctl+0x16/0x20
do_syscall_64+0x50/0x90
entry_SYSCALL_64_after_hwframe+0x44/0xa9
The problem is we're doing a copy_to_user() while holding tree locks,
which can deadlock if we have to do a page fault for the copy_to_user().
This exists even without my locking changes, so it needs to be fixed.
Rework the search ioctl to do the pre-fault and then
copy_to_user_nofault for the copying.
CC: stable(a)vger.kernel.org # 4.4+
Reviewed-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: Josef Bacik <josef(a)toxicpanda.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 617ea38e6fd7..c15ab6c1897f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -5653,9 +5653,9 @@ void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
}
}
-int read_extent_buffer_to_user(const struct extent_buffer *eb,
- void __user *dstv,
- unsigned long start, unsigned long len)
+int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb,
+ void __user *dstv,
+ unsigned long start, unsigned long len)
{
size_t cur;
size_t offset;
@@ -5675,7 +5675,7 @@ int read_extent_buffer_to_user(const struct extent_buffer *eb,
cur = min(len, (PAGE_SIZE - offset));
kaddr = page_address(page);
- if (copy_to_user(dst, kaddr + offset, cur)) {
+ if (copy_to_user_nofault(dst, kaddr + offset, cur)) {
ret = -EFAULT;
break;
}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 00a88f2eb5ab..30794ae58498 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -241,9 +241,9 @@ int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
void read_extent_buffer(const struct extent_buffer *eb, void *dst,
unsigned long start,
unsigned long len);
-int read_extent_buffer_to_user(const struct extent_buffer *eb,
- void __user *dst, unsigned long start,
- unsigned long len);
+int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb,
+ void __user *dst, unsigned long start,
+ unsigned long len);
void write_extent_buffer_fsid(const struct extent_buffer *eb, const void *src);
void write_extent_buffer_chunk_tree_uuid(const struct extent_buffer *eb,
const void *src);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index bd3511c5ca81..ac45f022b495 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2086,9 +2086,14 @@ static noinline int copy_to_sk(struct btrfs_path *path,
sh.len = item_len;
sh.transid = found_transid;
- /* copy search result header */
- if (copy_to_user(ubuf + *sk_offset, &sh, sizeof(sh))) {
- ret = -EFAULT;
+ /*
+ * Copy search result header. If we fault then loop again so we
+ * can fault in the pages and -EFAULT there if there's a
+ * problem. Otherwise we'll fault and then copy the buffer in
+ * properly this next time through
+ */
+ if (copy_to_user_nofault(ubuf + *sk_offset, &sh, sizeof(sh))) {
+ ret = 0;
goto out;
}
@@ -2096,10 +2101,14 @@ static noinline int copy_to_sk(struct btrfs_path *path,
if (item_len) {
char __user *up = ubuf + *sk_offset;
- /* copy the item */
- if (read_extent_buffer_to_user(leaf, up,
- item_off, item_len)) {
- ret = -EFAULT;
+ /*
+ * Copy the item, same behavior as above, but reset the
+ * * sk_offset so we copy the full thing again.
+ */
+ if (read_extent_buffer_to_user_nofault(leaf, up,
+ item_off, item_len)) {
+ ret = 0;
+ *sk_offset -= sizeof(sh);
goto out;
}
@@ -2184,6 +2193,10 @@ static noinline int search_ioctl(struct inode *inode,
key.offset = sk->min_offset;
while (1) {
+ ret = fault_in_pages_writeable(ubuf, *buf_size - sk_offset);
+ if (ret)
+ break;
+
ret = btrfs_search_forward(root, &key, path, sk->min_transid);
if (ret != 0) {
if (ret > 0)
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From a48b73eca4ceb9b8a4b97f290a065335dbcd8a04 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef(a)toxicpanda.com>
Date: Mon, 10 Aug 2020 11:42:27 -0400
Subject: [PATCH] btrfs: fix potential deadlock in the search ioctl
With the conversion of the tree locks to rwsem I got the following
lockdep splat:
======================================================
WARNING: possible circular locking dependency detected
5.8.0-rc7-00165-g04ec4da5f45f-dirty #922 Not tainted
------------------------------------------------------
compsize/11122 is trying to acquire lock:
ffff889fabca8768 (&mm->mmap_lock#2){++++}-{3:3}, at: __might_fault+0x3e/0x90
but task is already holding lock:
ffff889fe720fe40 (btrfs-fs-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x39/0x180
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #2 (btrfs-fs-00){++++}-{3:3}:
down_write_nested+0x3b/0x70
__btrfs_tree_lock+0x24/0x120
btrfs_search_slot+0x756/0x990
btrfs_lookup_inode+0x3a/0xb4
__btrfs_update_delayed_inode+0x93/0x270
btrfs_async_run_delayed_root+0x168/0x230
btrfs_work_helper+0xd4/0x570
process_one_work+0x2ad/0x5f0
worker_thread+0x3a/0x3d0
kthread+0x133/0x150
ret_from_fork+0x1f/0x30
-> #1 (&delayed_node->mutex){+.+.}-{3:3}:
__mutex_lock+0x9f/0x930
btrfs_delayed_update_inode+0x50/0x440
btrfs_update_inode+0x8a/0xf0
btrfs_dirty_inode+0x5b/0xd0
touch_atime+0xa1/0xd0
btrfs_file_mmap+0x3f/0x60
mmap_region+0x3a4/0x640
do_mmap+0x376/0x580
vm_mmap_pgoff+0xd5/0x120
ksys_mmap_pgoff+0x193/0x230
do_syscall_64+0x50/0x90
entry_SYSCALL_64_after_hwframe+0x44/0xa9
-> #0 (&mm->mmap_lock#2){++++}-{3:3}:
__lock_acquire+0x1272/0x2310
lock_acquire+0x9e/0x360
__might_fault+0x68/0x90
_copy_to_user+0x1e/0x80
copy_to_sk.isra.32+0x121/0x300
search_ioctl+0x106/0x200
btrfs_ioctl_tree_search_v2+0x7b/0xf0
btrfs_ioctl+0x106f/0x30a0
ksys_ioctl+0x83/0xc0
__x64_sys_ioctl+0x16/0x20
do_syscall_64+0x50/0x90
entry_SYSCALL_64_after_hwframe+0x44/0xa9
other info that might help us debug this:
Chain exists of:
&mm->mmap_lock#2 --> &delayed_node->mutex --> btrfs-fs-00
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(btrfs-fs-00);
lock(&delayed_node->mutex);
lock(btrfs-fs-00);
lock(&mm->mmap_lock#2);
*** DEADLOCK ***
1 lock held by compsize/11122:
#0: ffff889fe720fe40 (btrfs-fs-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x39/0x180
stack backtrace:
CPU: 17 PID: 11122 Comm: compsize Kdump: loaded Not tainted 5.8.0-rc7-00165-g04ec4da5f45f-dirty #922
Hardware name: Quanta Tioga Pass Single Side 01-0030993006/Tioga Pass Single Side, BIOS F08_3A18 12/20/2018
Call Trace:
dump_stack+0x78/0xa0
check_noncircular+0x165/0x180
__lock_acquire+0x1272/0x2310
lock_acquire+0x9e/0x360
? __might_fault+0x3e/0x90
? find_held_lock+0x72/0x90
__might_fault+0x68/0x90
? __might_fault+0x3e/0x90
_copy_to_user+0x1e/0x80
copy_to_sk.isra.32+0x121/0x300
? btrfs_search_forward+0x2a6/0x360
search_ioctl+0x106/0x200
btrfs_ioctl_tree_search_v2+0x7b/0xf0
btrfs_ioctl+0x106f/0x30a0
? __do_sys_newfstat+0x5a/0x70
? ksys_ioctl+0x83/0xc0
ksys_ioctl+0x83/0xc0
__x64_sys_ioctl+0x16/0x20
do_syscall_64+0x50/0x90
entry_SYSCALL_64_after_hwframe+0x44/0xa9
The problem is we're doing a copy_to_user() while holding tree locks,
which can deadlock if we have to do a page fault for the copy_to_user().
This exists even without my locking changes, so it needs to be fixed.
Rework the search ioctl to do the pre-fault and then
copy_to_user_nofault for the copying.
CC: stable(a)vger.kernel.org # 4.4+
Reviewed-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: Josef Bacik <josef(a)toxicpanda.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 617ea38e6fd7..c15ab6c1897f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -5653,9 +5653,9 @@ void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
}
}
-int read_extent_buffer_to_user(const struct extent_buffer *eb,
- void __user *dstv,
- unsigned long start, unsigned long len)
+int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb,
+ void __user *dstv,
+ unsigned long start, unsigned long len)
{
size_t cur;
size_t offset;
@@ -5675,7 +5675,7 @@ int read_extent_buffer_to_user(const struct extent_buffer *eb,
cur = min(len, (PAGE_SIZE - offset));
kaddr = page_address(page);
- if (copy_to_user(dst, kaddr + offset, cur)) {
+ if (copy_to_user_nofault(dst, kaddr + offset, cur)) {
ret = -EFAULT;
break;
}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 00a88f2eb5ab..30794ae58498 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -241,9 +241,9 @@ int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
void read_extent_buffer(const struct extent_buffer *eb, void *dst,
unsigned long start,
unsigned long len);
-int read_extent_buffer_to_user(const struct extent_buffer *eb,
- void __user *dst, unsigned long start,
- unsigned long len);
+int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb,
+ void __user *dst, unsigned long start,
+ unsigned long len);
void write_extent_buffer_fsid(const struct extent_buffer *eb, const void *src);
void write_extent_buffer_chunk_tree_uuid(const struct extent_buffer *eb,
const void *src);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index bd3511c5ca81..ac45f022b495 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2086,9 +2086,14 @@ static noinline int copy_to_sk(struct btrfs_path *path,
sh.len = item_len;
sh.transid = found_transid;
- /* copy search result header */
- if (copy_to_user(ubuf + *sk_offset, &sh, sizeof(sh))) {
- ret = -EFAULT;
+ /*
+ * Copy search result header. If we fault then loop again so we
+ * can fault in the pages and -EFAULT there if there's a
+ * problem. Otherwise we'll fault and then copy the buffer in
+ * properly this next time through
+ */
+ if (copy_to_user_nofault(ubuf + *sk_offset, &sh, sizeof(sh))) {
+ ret = 0;
goto out;
}
@@ -2096,10 +2101,14 @@ static noinline int copy_to_sk(struct btrfs_path *path,
if (item_len) {
char __user *up = ubuf + *sk_offset;
- /* copy the item */
- if (read_extent_buffer_to_user(leaf, up,
- item_off, item_len)) {
- ret = -EFAULT;
+ /*
+ * Copy the item, same behavior as above, but reset the
+ * * sk_offset so we copy the full thing again.
+ */
+ if (read_extent_buffer_to_user_nofault(leaf, up,
+ item_off, item_len)) {
+ ret = 0;
+ *sk_offset -= sizeof(sh);
goto out;
}
@@ -2184,6 +2193,10 @@ static noinline int search_ioctl(struct inode *inode,
key.offset = sk->min_offset;
while (1) {
+ ret = fault_in_pages_writeable(ubuf, *buf_size - sk_offset);
+ if (ret)
+ break;
+
ret = btrfs_search_forward(root, &key, path, sk->min_transid);
if (ret != 0) {
if (ret > 0)
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From a48b73eca4ceb9b8a4b97f290a065335dbcd8a04 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef(a)toxicpanda.com>
Date: Mon, 10 Aug 2020 11:42:27 -0400
Subject: [PATCH] btrfs: fix potential deadlock in the search ioctl
With the conversion of the tree locks to rwsem I got the following
lockdep splat:
======================================================
WARNING: possible circular locking dependency detected
5.8.0-rc7-00165-g04ec4da5f45f-dirty #922 Not tainted
------------------------------------------------------
compsize/11122 is trying to acquire lock:
ffff889fabca8768 (&mm->mmap_lock#2){++++}-{3:3}, at: __might_fault+0x3e/0x90
but task is already holding lock:
ffff889fe720fe40 (btrfs-fs-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x39/0x180
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #2 (btrfs-fs-00){++++}-{3:3}:
down_write_nested+0x3b/0x70
__btrfs_tree_lock+0x24/0x120
btrfs_search_slot+0x756/0x990
btrfs_lookup_inode+0x3a/0xb4
__btrfs_update_delayed_inode+0x93/0x270
btrfs_async_run_delayed_root+0x168/0x230
btrfs_work_helper+0xd4/0x570
process_one_work+0x2ad/0x5f0
worker_thread+0x3a/0x3d0
kthread+0x133/0x150
ret_from_fork+0x1f/0x30
-> #1 (&delayed_node->mutex){+.+.}-{3:3}:
__mutex_lock+0x9f/0x930
btrfs_delayed_update_inode+0x50/0x440
btrfs_update_inode+0x8a/0xf0
btrfs_dirty_inode+0x5b/0xd0
touch_atime+0xa1/0xd0
btrfs_file_mmap+0x3f/0x60
mmap_region+0x3a4/0x640
do_mmap+0x376/0x580
vm_mmap_pgoff+0xd5/0x120
ksys_mmap_pgoff+0x193/0x230
do_syscall_64+0x50/0x90
entry_SYSCALL_64_after_hwframe+0x44/0xa9
-> #0 (&mm->mmap_lock#2){++++}-{3:3}:
__lock_acquire+0x1272/0x2310
lock_acquire+0x9e/0x360
__might_fault+0x68/0x90
_copy_to_user+0x1e/0x80
copy_to_sk.isra.32+0x121/0x300
search_ioctl+0x106/0x200
btrfs_ioctl_tree_search_v2+0x7b/0xf0
btrfs_ioctl+0x106f/0x30a0
ksys_ioctl+0x83/0xc0
__x64_sys_ioctl+0x16/0x20
do_syscall_64+0x50/0x90
entry_SYSCALL_64_after_hwframe+0x44/0xa9
other info that might help us debug this:
Chain exists of:
&mm->mmap_lock#2 --> &delayed_node->mutex --> btrfs-fs-00
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(btrfs-fs-00);
lock(&delayed_node->mutex);
lock(btrfs-fs-00);
lock(&mm->mmap_lock#2);
*** DEADLOCK ***
1 lock held by compsize/11122:
#0: ffff889fe720fe40 (btrfs-fs-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x39/0x180
stack backtrace:
CPU: 17 PID: 11122 Comm: compsize Kdump: loaded Not tainted 5.8.0-rc7-00165-g04ec4da5f45f-dirty #922
Hardware name: Quanta Tioga Pass Single Side 01-0030993006/Tioga Pass Single Side, BIOS F08_3A18 12/20/2018
Call Trace:
dump_stack+0x78/0xa0
check_noncircular+0x165/0x180
__lock_acquire+0x1272/0x2310
lock_acquire+0x9e/0x360
? __might_fault+0x3e/0x90
? find_held_lock+0x72/0x90
__might_fault+0x68/0x90
? __might_fault+0x3e/0x90
_copy_to_user+0x1e/0x80
copy_to_sk.isra.32+0x121/0x300
? btrfs_search_forward+0x2a6/0x360
search_ioctl+0x106/0x200
btrfs_ioctl_tree_search_v2+0x7b/0xf0
btrfs_ioctl+0x106f/0x30a0
? __do_sys_newfstat+0x5a/0x70
? ksys_ioctl+0x83/0xc0
ksys_ioctl+0x83/0xc0
__x64_sys_ioctl+0x16/0x20
do_syscall_64+0x50/0x90
entry_SYSCALL_64_after_hwframe+0x44/0xa9
The problem is we're doing a copy_to_user() while holding tree locks,
which can deadlock if we have to do a page fault for the copy_to_user().
This exists even without my locking changes, so it needs to be fixed.
Rework the search ioctl to do the pre-fault and then
copy_to_user_nofault for the copying.
CC: stable(a)vger.kernel.org # 4.4+
Reviewed-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: Josef Bacik <josef(a)toxicpanda.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 617ea38e6fd7..c15ab6c1897f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -5653,9 +5653,9 @@ void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
}
}
-int read_extent_buffer_to_user(const struct extent_buffer *eb,
- void __user *dstv,
- unsigned long start, unsigned long len)
+int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb,
+ void __user *dstv,
+ unsigned long start, unsigned long len)
{
size_t cur;
size_t offset;
@@ -5675,7 +5675,7 @@ int read_extent_buffer_to_user(const struct extent_buffer *eb,
cur = min(len, (PAGE_SIZE - offset));
kaddr = page_address(page);
- if (copy_to_user(dst, kaddr + offset, cur)) {
+ if (copy_to_user_nofault(dst, kaddr + offset, cur)) {
ret = -EFAULT;
break;
}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 00a88f2eb5ab..30794ae58498 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -241,9 +241,9 @@ int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
void read_extent_buffer(const struct extent_buffer *eb, void *dst,
unsigned long start,
unsigned long len);
-int read_extent_buffer_to_user(const struct extent_buffer *eb,
- void __user *dst, unsigned long start,
- unsigned long len);
+int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb,
+ void __user *dst, unsigned long start,
+ unsigned long len);
void write_extent_buffer_fsid(const struct extent_buffer *eb, const void *src);
void write_extent_buffer_chunk_tree_uuid(const struct extent_buffer *eb,
const void *src);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index bd3511c5ca81..ac45f022b495 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2086,9 +2086,14 @@ static noinline int copy_to_sk(struct btrfs_path *path,
sh.len = item_len;
sh.transid = found_transid;
- /* copy search result header */
- if (copy_to_user(ubuf + *sk_offset, &sh, sizeof(sh))) {
- ret = -EFAULT;
+ /*
+ * Copy search result header. If we fault then loop again so we
+ * can fault in the pages and -EFAULT there if there's a
+ * problem. Otherwise we'll fault and then copy the buffer in
+ * properly this next time through
+ */
+ if (copy_to_user_nofault(ubuf + *sk_offset, &sh, sizeof(sh))) {
+ ret = 0;
goto out;
}
@@ -2096,10 +2101,14 @@ static noinline int copy_to_sk(struct btrfs_path *path,
if (item_len) {
char __user *up = ubuf + *sk_offset;
- /* copy the item */
- if (read_extent_buffer_to_user(leaf, up,
- item_off, item_len)) {
- ret = -EFAULT;
+ /*
+ * Copy the item, same behavior as above, but reset the
+ * * sk_offset so we copy the full thing again.
+ */
+ if (read_extent_buffer_to_user_nofault(leaf, up,
+ item_off, item_len)) {
+ ret = 0;
+ *sk_offset -= sizeof(sh);
goto out;
}
@@ -2184,6 +2193,10 @@ static noinline int search_ioctl(struct inode *inode,
key.offset = sk->min_offset;
while (1) {
+ ret = fault_in_pages_writeable(ubuf, *buf_size - sk_offset);
+ if (ret)
+ break;
+
ret = btrfs_search_forward(root, &key, path, sk->min_transid);
if (ret != 0) {
if (ret > 0)
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From d3beaa253fd6fa40b8b18a216398e6e5376a9d21 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef(a)toxicpanda.com>
Date: Mon, 10 Aug 2020 11:42:31 -0400
Subject: [PATCH] btrfs: set the lockdep class for log tree extent buffers
These are special extent buffers that get rewound in order to lookup
the state of the tree at a specific point in time. As such they do not
go through the normal initialization paths that set their lockdep class,
so handle them appropriately when they are created and before they are
locked.
CC: stable(a)vger.kernel.org # 4.4+
Reviewed-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: Josef Bacik <josef(a)toxicpanda.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index cd1cd673bc0b..cd392da69b81 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1297,6 +1297,8 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
btrfs_tree_read_unlock_blocking(eb);
free_extent_buffer(eb);
+ btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb_rewin),
+ eb_rewin, btrfs_header_level(eb_rewin));
btrfs_tree_read_lock(eb_rewin);
__tree_mod_log_rewind(fs_info, eb_rewin, time_seq, tm);
WARN_ON(btrfs_header_nritems(eb_rewin) >
@@ -1370,7 +1372,6 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
if (!eb)
return NULL;
- btrfs_tree_read_lock(eb);
if (old_root) {
btrfs_set_header_bytenr(eb, eb->start);
btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV);
@@ -1378,6 +1379,9 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
btrfs_set_header_level(eb, old_root->level);
btrfs_set_header_generation(eb, old_generation);
}
+ btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), eb,
+ btrfs_header_level(eb));
+ btrfs_tree_read_lock(eb);
if (tm)
__tree_mod_log_rewind(fs_info, eb, time_seq, tm);
else
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From d3beaa253fd6fa40b8b18a216398e6e5376a9d21 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef(a)toxicpanda.com>
Date: Mon, 10 Aug 2020 11:42:31 -0400
Subject: [PATCH] btrfs: set the lockdep class for log tree extent buffers
These are special extent buffers that get rewound in order to lookup
the state of the tree at a specific point in time. As such they do not
go through the normal initialization paths that set their lockdep class,
so handle them appropriately when they are created and before they are
locked.
CC: stable(a)vger.kernel.org # 4.4+
Reviewed-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: Josef Bacik <josef(a)toxicpanda.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index cd1cd673bc0b..cd392da69b81 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1297,6 +1297,8 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
btrfs_tree_read_unlock_blocking(eb);
free_extent_buffer(eb);
+ btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb_rewin),
+ eb_rewin, btrfs_header_level(eb_rewin));
btrfs_tree_read_lock(eb_rewin);
__tree_mod_log_rewind(fs_info, eb_rewin, time_seq, tm);
WARN_ON(btrfs_header_nritems(eb_rewin) >
@@ -1370,7 +1372,6 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
if (!eb)
return NULL;
- btrfs_tree_read_lock(eb);
if (old_root) {
btrfs_set_header_bytenr(eb, eb->start);
btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV);
@@ -1378,6 +1379,9 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
btrfs_set_header_level(eb, old_root->level);
btrfs_set_header_generation(eb, old_generation);
}
+ btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), eb,
+ btrfs_header_level(eb));
+ btrfs_tree_read_lock(eb);
if (tm)
__tree_mod_log_rewind(fs_info, eb, time_seq, tm);
else
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From d3beaa253fd6fa40b8b18a216398e6e5376a9d21 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef(a)toxicpanda.com>
Date: Mon, 10 Aug 2020 11:42:31 -0400
Subject: [PATCH] btrfs: set the lockdep class for log tree extent buffers
These are special extent buffers that get rewound in order to lookup
the state of the tree at a specific point in time. As such they do not
go through the normal initialization paths that set their lockdep class,
so handle them appropriately when they are created and before they are
locked.
CC: stable(a)vger.kernel.org # 4.4+
Reviewed-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: Josef Bacik <josef(a)toxicpanda.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index cd1cd673bc0b..cd392da69b81 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1297,6 +1297,8 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
btrfs_tree_read_unlock_blocking(eb);
free_extent_buffer(eb);
+ btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb_rewin),
+ eb_rewin, btrfs_header_level(eb_rewin));
btrfs_tree_read_lock(eb_rewin);
__tree_mod_log_rewind(fs_info, eb_rewin, time_seq, tm);
WARN_ON(btrfs_header_nritems(eb_rewin) >
@@ -1370,7 +1372,6 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
if (!eb)
return NULL;
- btrfs_tree_read_lock(eb);
if (old_root) {
btrfs_set_header_bytenr(eb, eb->start);
btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV);
@@ -1378,6 +1379,9 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
btrfs_set_header_level(eb, old_root->level);
btrfs_set_header_generation(eb, old_generation);
}
+ btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), eb,
+ btrfs_header_level(eb));
+ btrfs_tree_read_lock(eb);
if (tm)
__tree_mod_log_rewind(fs_info, eb, time_seq, tm);
else
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From d3beaa253fd6fa40b8b18a216398e6e5376a9d21 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef(a)toxicpanda.com>
Date: Mon, 10 Aug 2020 11:42:31 -0400
Subject: [PATCH] btrfs: set the lockdep class for log tree extent buffers
These are special extent buffers that get rewound in order to lookup
the state of the tree at a specific point in time. As such they do not
go through the normal initialization paths that set their lockdep class,
so handle them appropriately when they are created and before they are
locked.
CC: stable(a)vger.kernel.org # 4.4+
Reviewed-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: Josef Bacik <josef(a)toxicpanda.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index cd1cd673bc0b..cd392da69b81 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1297,6 +1297,8 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
btrfs_tree_read_unlock_blocking(eb);
free_extent_buffer(eb);
+ btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb_rewin),
+ eb_rewin, btrfs_header_level(eb_rewin));
btrfs_tree_read_lock(eb_rewin);
__tree_mod_log_rewind(fs_info, eb_rewin, time_seq, tm);
WARN_ON(btrfs_header_nritems(eb_rewin) >
@@ -1370,7 +1372,6 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
if (!eb)
return NULL;
- btrfs_tree_read_lock(eb);
if (old_root) {
btrfs_set_header_bytenr(eb, eb->start);
btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV);
@@ -1378,6 +1379,9 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
btrfs_set_header_level(eb, old_root->level);
btrfs_set_header_generation(eb, old_generation);
}
+ btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), eb,
+ btrfs_header_level(eb));
+ btrfs_tree_read_lock(eb);
if (tm)
__tree_mod_log_rewind(fs_info, eb, time_seq, tm);
else
This is a note to let you know that I've just added the patch titled
usb: typec: intel_pmc_mux: Do not configure SBU and HSL Orientation
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From 7c6bbdf086ac7f1374bcf1ef0994b15109ecaf48 Mon Sep 17 00:00:00 2001
From: Utkarsh Patel <utkarsh.h.patel(a)intel.com>
Date: Mon, 7 Sep 2020 17:21:52 +0300
Subject: usb: typec: intel_pmc_mux: Do not configure SBU and HSL Orientation
in Alternate modes
According to the PMC Type C Subsystem (TCSS) Mux programming guide rev
0.7, bits 4 and 5 are reserved in Alternate modes.
SBU Orientation and HSL Orientation needs to be configured only during
initial cable detection in USB connect flow based on device property of
"sbu-orientation" and "hsl-orientation".
Configuring these reserved bits in the Alternate modes may result in delay
in display link training or some unexpected behaviour.
So do not configure them while issuing Alternate Mode requests.
Fixes: ff4a30d5e243 ("usb: typec: mux: intel_pmc_mux: Support for static SBU/HSL orientation")
Signed-off-by: Utkarsh Patel <utkarsh.h.patel(a)intel.com>
Cc: stable <stable(a)vger.kernel.org>
Signed-off-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
Link: https://lore.kernel.org/r/20200907142152.35678-3-heikki.krogerus@linux.inte…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/usb/typec/mux/intel_pmc_mux.c | 8 --------
1 file changed, 8 deletions(-)
diff --git a/drivers/usb/typec/mux/intel_pmc_mux.c b/drivers/usb/typec/mux/intel_pmc_mux.c
index bb9cb4ec4d6f..ec7da0fa3cf8 100644
--- a/drivers/usb/typec/mux/intel_pmc_mux.c
+++ b/drivers/usb/typec/mux/intel_pmc_mux.c
@@ -61,8 +61,6 @@ enum {
#define PMC_USB_ALTMODE_ORI_SHIFT 1
#define PMC_USB_ALTMODE_UFP_SHIFT 3
-#define PMC_USB_ALTMODE_ORI_AUX_SHIFT 4
-#define PMC_USB_ALTMODE_ORI_HSL_SHIFT 5
/* DP specific Mode Data bits */
#define PMC_USB_ALTMODE_DP_MODE_SHIFT 8
@@ -178,9 +176,6 @@ pmc_usb_mux_dp(struct pmc_usb_port *port, struct typec_mux_state *state)
req.mode_data = (port->orientation - 1) << PMC_USB_ALTMODE_ORI_SHIFT;
req.mode_data |= (port->role - 1) << PMC_USB_ALTMODE_UFP_SHIFT;
- req.mode_data |= sbu_orientation(port) << PMC_USB_ALTMODE_ORI_AUX_SHIFT;
- req.mode_data |= hsl_orientation(port) << PMC_USB_ALTMODE_ORI_HSL_SHIFT;
-
req.mode_data |= (state->mode - TYPEC_STATE_MODAL) <<
PMC_USB_ALTMODE_DP_MODE_SHIFT;
@@ -208,9 +203,6 @@ pmc_usb_mux_tbt(struct pmc_usb_port *port, struct typec_mux_state *state)
req.mode_data = (port->orientation - 1) << PMC_USB_ALTMODE_ORI_SHIFT;
req.mode_data |= (port->role - 1) << PMC_USB_ALTMODE_UFP_SHIFT;
- req.mode_data |= sbu_orientation(port) << PMC_USB_ALTMODE_ORI_AUX_SHIFT;
- req.mode_data |= hsl_orientation(port) << PMC_USB_ALTMODE_ORI_HSL_SHIFT;
-
if (TBT_ADAPTER(data->device_mode) == TBT_ADAPTER_TBT3)
req.mode_data |= PMC_USB_ALTMODE_TBT_TYPE;
--
2.28.0
Errors returned by crypto_shash_update() are not checked in
ima_calc_boot_aggregate_tfm() and thus can be overwritten at the next
iteration of the loop. This patch adds a check after calling
crypto_shash_update() and returns immediately if the result is not zero.
Cc: stable(a)vger.kernel.org
Fixes: 3323eec921efd ("integrity: IMA as an integrity service provider")
Signed-off-by: Roberto Sassu <roberto.sassu(a)huawei.com>
Reviewed-by: Mimi Zohar <zohar(a)linux.ibm.com>
---
security/integrity/ima/ima_crypto.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c
index 011c3c76af86..21989fa0c107 100644
--- a/security/integrity/ima/ima_crypto.c
+++ b/security/integrity/ima/ima_crypto.c
@@ -829,6 +829,8 @@ static int ima_calc_boot_aggregate_tfm(char *digest, u16 alg_id,
/* now accumulate with current aggregate */
rc = crypto_shash_update(shash, d.digest,
crypto_shash_digestsize(tfm));
+ if (rc != 0)
+ return rc;
}
/*
* Extend cumulative digest over TPM registers 8-9, which contain
--
2.27.GIT
Hi
[This is an automated email]
This commit has been processed because it contains a -stable tag.
The stable tag indicates that it's relevant for the following trees: all
The bot has tested the following trees: v5.8.7, v5.4.63, v4.19.143, v4.14.196, v4.9.235, v4.4.235.
v5.8.7: Build OK!
v5.4.63: Build OK!
v4.19.143: Build OK!
v4.14.196: Failed to apply! Possible dependencies:
06856938112b ("fs: ext2: Adding new return type vm_fault_t")
1b5a1cb21e0c ("dax: Inline dax_insert_mapping() into the callsite")
31a6f1a6e5a4 ("dax: Simplify arguments of dax_insert_mapping()")
5e161e4066d3 ("dax: Factor out getting of pfn out of iomap")
9a0dd4225143 ("dax: Allow dax_iomap_fault() to return pfn")
a0987ad5c576 ("dax: Create local variable for VMA in dax_iomap_pte_fault()")
aaa422c4c3f6 ("fs, dax: unify IOMAP_F_DIRTY read vs write handling policy in the dax core")
c0b246259792 ("dax: pass detailed error code from dax_iomap_fault()")
caa51d26f85c ("dax, iomap: Add support for synchronous faults")
cec04e8c825e ("dax: Fix comment describing dax_iomap_fault()")
d2c43ef13327 ("dax: Create local variable for vmf->flags & FAULT_FLAG_WRITE test")
f5b7b74876cf ("dax: Allow tuning whether dax_insert_mapping_entry() dirties entry")
v4.9.235: Failed to apply! Possible dependencies:
024b6a63138c ("gpu: drm: gma500: Use vma_pages()")
11bac8000449 ("mm, fs: reduce fault, page_mkwrite, and pfn_mkwrite to take only vmf")
1a29d85eb0f1 ("mm: use vmf->address instead of of vmf->virtual_address")
82b0f8c39a38 ("mm: join struct fault_env and vm_fault")
v4.4.235: Failed to apply! Possible dependencies:
11bac8000449 ("mm, fs: reduce fault, page_mkwrite, and pfn_mkwrite to take only vmf")
366baf28ee3f ("KVM: PPC: Use RCU for arch.spapr_tce_tables")
462ee11e58c9 ("KVM: PPC: Replace SPAPR_TCE_SHIFT with IOMMU_PAGE_SHIFT_4K")
5ee7af18642c ("KVM: PPC: Move reusable bits of H_PUT_TCE handler to helpers")
d3695aa4f452 ("KVM: PPC: Add support for multiple-TCE hcalls")
f8626985c7c2 ("KVM: PPC: Account TCE-containing pages in locked_vm")
fcbb2ce67284 ("KVM: PPC: Rework H_PUT_TCE/H_GET_TCE handlers")
fe26e52712cc ("KVM: PPC: Add @page_shift to kvmppc_spapr_tce_table")
NOTE: The patch will not be queued to stable trees until it is upstream.
How should we proceed with this patch?
--
Thanks
Sasha
From: Utkarsh Patel <utkarsh.h.patel(a)intel.com>
According to the PMC Type C Subsystem (TCSS) Mux programming guide rev
0.7, bit 14 is reserved in Alternate mode.
In DP Alternate Mode state, if the HPD_STATE (bit 7) field in the
status update command VDO is set to HPD_HIGH, HPD is configured via
separate HPD mode request after configuring DP Alternate mode request.
Configuring reserved bit may show unexpected behaviour.
So do not configure them while issuing the Alternate Mode request.
Fixes: 7990be48ef4d ("usb: typec: mux: intel: Handle alt mode HPD_HIGH")
Cc: stable(a)vger.kernel.org
Signed-off-by: Utkarsh Patel <utkarsh.h.patel(a)intel.com>
Signed-off-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
---
drivers/usb/typec/mux/intel_pmc_mux.c | 4 ----
1 file changed, 4 deletions(-)
diff --git a/drivers/usb/typec/mux/intel_pmc_mux.c b/drivers/usb/typec/mux/intel_pmc_mux.c
index e4021e13af40a..802d443b367c6 100644
--- a/drivers/usb/typec/mux/intel_pmc_mux.c
+++ b/drivers/usb/typec/mux/intel_pmc_mux.c
@@ -68,7 +68,6 @@ enum {
#define PMC_USB_ALTMODE_DP_MODE_SHIFT 8
/* TBT specific Mode Data bits */
-#define PMC_USB_ALTMODE_HPD_HIGH BIT(14)
#define PMC_USB_ALTMODE_TBT_TYPE BIT(17)
#define PMC_USB_ALTMODE_CABLE_TYPE BIT(18)
#define PMC_USB_ALTMODE_ACTIVE_LINK BIT(20)
@@ -185,9 +184,6 @@ pmc_usb_mux_dp(struct pmc_usb_port *port, struct typec_mux_state *state)
req.mode_data |= (state->mode - TYPEC_STATE_MODAL) <<
PMC_USB_ALTMODE_DP_MODE_SHIFT;
- if (data->status & DP_STATUS_HPD_STATE)
- req.mode_data |= PMC_USB_ALTMODE_HPD_HIGH;
-
ret = pmc_usb_command(port, (void *)&req, sizeof(req));
if (ret)
return ret;
--
2.28.0
From: Utkarsh Patel <utkarsh.h.patel(a)intel.com>
According to the PMC Type C Subsystem (TCSS) Mux programming guide rev
0.7, bit 14 is reserved in Alternate mode.
In DP Alternate Mode state, if the HPD_STATE (bit 7) field in the
status update command VDO is set to HPD_HIGH, HPD is configured via
separate HPD mode request after configuring DP Alternate mode request.
Configuring reserved bit may show unexpected behaviour.
So do not configure them while issuing the Alternate Mode request.
Fixes: 7990be48ef4d ("usb: typec: mux: intel: Handle alt mode HPD_HIGH")
Cc: stable(a)vger.kernel.org
Signed-off-by: Utkarsh Patel <utkarsh.h.patel(a)intel.com>
Signed-off-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
---
drivers/usb/typec/mux/intel_pmc_mux.c | 4 ----
1 file changed, 4 deletions(-)
diff --git a/drivers/usb/typec/mux/intel_pmc_mux.c b/drivers/usb/typec/mux/intel_pmc_mux.c
index e4021e13af40a..802d443b367c6 100644
--- a/drivers/usb/typec/mux/intel_pmc_mux.c
+++ b/drivers/usb/typec/mux/intel_pmc_mux.c
@@ -68,7 +68,6 @@ enum {
#define PMC_USB_ALTMODE_DP_MODE_SHIFT 8
/* TBT specific Mode Data bits */
-#define PMC_USB_ALTMODE_HPD_HIGH BIT(14)
#define PMC_USB_ALTMODE_TBT_TYPE BIT(17)
#define PMC_USB_ALTMODE_CABLE_TYPE BIT(18)
#define PMC_USB_ALTMODE_ACTIVE_LINK BIT(20)
@@ -185,9 +184,6 @@ pmc_usb_mux_dp(struct pmc_usb_port *port, struct typec_mux_state *state)
req.mode_data |= (state->mode - TYPEC_STATE_MODAL) <<
PMC_USB_ALTMODE_DP_MODE_SHIFT;
- if (data->status & DP_STATUS_HPD_STATE)
- req.mode_data |= PMC_USB_ALTMODE_HPD_HIGH;
-
ret = pmc_usb_command(port, (void *)&req, sizeof(req));
if (ret)
return ret;
--
2.28.0
Hello,
We ran automated tests on a recent commit from this kernel tree:
Kernel repo: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
Commit: 71f6ddbfce21 - include/linux/log2.h: add missing () around n in roundup_pow_of_two()
The results of these automated tests are provided below.
Overall result: PASSED
Merge: OK
Compile: OK
Tests: OK
All kernel binaries, config files, and logs are available for download here:
https://cki-artifacts.s3.us-east-2.amazonaws.com/index.html?prefix=dataware…
Please reply to this email if you have any questions about the tests that we
ran or if you have any suggestions on how to make future tests more effective.
,-. ,-.
( C ) ( K ) Continuous
`-',-.`-' Kernel
( I ) Integration
`-'
______________________________________________________________________________
Compile testing
---------------
We compiled the kernel for 4 architectures:
aarch64:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
ppc64le:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
s390x:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
x86_64:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
Hardware testing
----------------
We booted each kernel and ran the following tests:
aarch64:
Host 1:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ xfstests - btrfs
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
🚧 ✅ Storage nvme - tcp
Host 2:
✅ Boot test
✅ ACPI table test
✅ ACPI enabled test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
ppc64le:
Host 1:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
Host 2:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
🚧 ✅ xfstests - btrfs
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
🚧 ✅ Storage nvme - tcp
Host 3:
✅ Boot test
🚧 ✅ kdump - sysrq-c
s390x:
Host 1:
✅ Boot test
✅ selinux-policy: serge-testsuite
✅ stress: stress-ng
🚧 ✅ Storage blktests
🚧 ✅ Storage nvme - tcp
Host 2:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
x86_64:
Host 1:
✅ Boot test
✅ ACPI table test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: sanity smoke test
✅ pciutils: update pci ids test
✅ kernel-rt: rt_migrate_test
✅ kernel-rt: rteval
✅ kernel-rt: sched_deadline
✅ kernel-rt: smidetect
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
Host 2:
✅ Boot test
🚧 ✅ kdump - sysrq-c
🚧 ✅ kdump - file-load
Host 3:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ❌ CPU: Frequency Driver Test
🚧 ✅ CPU: Idle Test
🚧 ✅ xfstests - btrfs
🚧 ⚡⚡⚡ IOMMU boot test
🚧 ⚡⚡⚡ IPMI driver test
🚧 ⚡⚡⚡ IPMItool loop stress test
🚧 ⚡⚡⚡ power-management: cpupower/sanity test
🚧 ⚡⚡⚡ Storage blktests
🚧 ⚡⚡⚡ Storage nvme - tcp
Test sources: https://gitlab.com/cki-project/kernel-tests
💚 Pull requests are welcome for new tests or improvements to existing tests!
Aborted tests
-------------
Tests that didn't complete running successfully are marked with ⚡⚡⚡.
If this was caused by an infrastructure issue, we try to mark that
explicitly in the report.
Waived tests
------------
If the test run included waived tests, they are marked with 🚧. Such tests are
executed but their results are not taken into account. Tests are waived when
their results are not reliable enough, e.g. when they're just introduced or are
being fixed.
Testing timeout
---------------
We aim to provide a report within reasonable timeframe. Tests that haven't
finished running yet are marked with ⏱.
From: Alex Williamson <alex.williamson(a)redhat.com>
commit 41311242221e3482b20bfed10fa4d9db98d87016 upstream.
With conversion to follow_pfn(), DMA mapping a PFNMAP range depends on
the range being faulted into the vma. Add support to manually provide
that, in the same way as done on KVM with hva_to_pfn_remapped().
Reviewed-by: Peter Xu <peterx(a)redhat.com>
Signed-off-by: Alex Williamson <alex.williamson(a)redhat.com>
[Ajay: Regenerated the patch for v4.19]
Signed-off-by: Ajay Kaher <akaher(a)vmware.com>
---
drivers/vfio/vfio_iommu_type1.c | 36 +++++++++++++++++++++++++++++++++---
1 file changed, 33 insertions(+), 3 deletions(-)
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 6dbdadb..cb8d8bb 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -343,6 +343,32 @@ static int put_pfn(unsigned long pfn, int prot)
return 0;
}
+static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
+ unsigned long vaddr, unsigned long *pfn,
+ bool write_fault)
+{
+ int ret;
+
+ ret = follow_pfn(vma, vaddr, pfn);
+ if (ret) {
+ bool unlocked = false;
+
+ ret = fixup_user_fault(NULL, mm, vaddr,
+ FAULT_FLAG_REMOTE |
+ (write_fault ? FAULT_FLAG_WRITE : 0),
+ &unlocked);
+ if (unlocked)
+ return -EAGAIN;
+
+ if (ret)
+ return ret;
+
+ ret = follow_pfn(vma, vaddr, pfn);
+ }
+
+ return ret;
+}
+
static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr,
int prot, unsigned long *pfn)
{
@@ -382,12 +408,16 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr,
down_read(&mm->mmap_sem);
+retry:
vma = find_vma_intersection(mm, vaddr, vaddr + 1);
if (vma && vma->vm_flags & VM_PFNMAP) {
- if (!follow_pfn(vma, vaddr, pfn) &&
- is_invalid_reserved_pfn(*pfn))
- ret = 0;
+ ret = follow_fault_pfn(vma, mm, vaddr, pfn, prot & IOMMU_WRITE);
+ if (ret == -EAGAIN)
+ goto retry;
+
+ if (!ret && !is_invalid_reserved_pfn(*pfn))
+ ret = -EFAULT;
}
up_read(&mm->mmap_sem);
--
2.7.4
If block_write_full_page() is called for a page that is beyond current
inode size, it will truncate page buffers for the page and return 0.
This logic has been added in 2.5.62 in commit 81eb69062588 ("fix ext3
BUG due to race with truncate") in history.git tree to fix a problem
with ext3 in data=ordered mode. This particular problem doesn't exist
anymore because ext3 is long gone and ext4 handles ordered data
differently. Also normally buffers are invalidated by truncate code and
there's no need to specially handle this in ->writepage() code.
This invalidation of page buffers in block_write_full_page() is causing
issues to filesystems (e.g. ext4 or ocfs2) when block device is shrunk
under filesystem's hands and metadata buffers get discarded while being
tracked by the journalling layer. Although it is obviously "not
supported" it can cause kernel crashes like:
[ 7986.689400] BUG: unable to handle kernel NULL pointer dereference at
+0000000000000008
[ 7986.697197] PGD 0 P4D 0
[ 7986.699724] Oops: 0002 [#1] SMP PTI
[ 7986.703200] CPU: 4 PID: 203778 Comm: jbd2/dm-3-8 Kdump: loaded Tainted: G
+O --------- - - 4.18.0-147.5.0.5.h126.eulerosv2r9.x86_64 #1
[ 7986.716438] Hardware name: Huawei RH2288H V3/BC11HGSA0, BIOS 1.57 08/11/2015
[ 7986.723462] RIP: 0010:jbd2_journal_grab_journal_head+0x1b/0x40 [jbd2]
...
[ 7986.810150] Call Trace:
[ 7986.812595] __jbd2_journal_insert_checkpoint+0x23/0x70 [jbd2]
[ 7986.818408] jbd2_journal_commit_transaction+0x155f/0x1b60 [jbd2]
[ 7986.836467] kjournald2+0xbd/0x270 [jbd2]
which is not great. The crash happens because bh->b_private is suddently
NULL although BH_JBD flag is still set (this is because
block_invalidatepage() cleared BH_Mapped flag and subsequent bh lookup
found buffer without BH_Mapped set, called init_page_buffers() which has
rewritten bh->b_private). So just remove the invalidation in
block_write_full_page().
Note that the buffer cache invalidation when block device changes size
is already careful to avoid similar problems by using
invalidate_mapping_pages() which skips busy buffers so it was only this
odd block_write_full_page() behavior that could tear down bdev buffers
under filesystem's hands.
Reported-by: Ye Bin <yebin10(a)huawei.com>
CC: stable(a)vger.kernel.org
Signed-off-by: Jan Kara <jack(a)suse.cz>
---
fs/buffer.c | 16 ----------------
1 file changed, 16 deletions(-)
diff --git a/fs/buffer.c b/fs/buffer.c
index 061dd202979d..163c2c0b9aa3 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2771,16 +2771,6 @@ int nobh_writepage(struct page *page, get_block_t *get_block,
/* Is the page fully outside i_size? (truncate in progress) */
offset = i_size & (PAGE_SIZE-1);
if (page->index >= end_index+1 || !offset) {
- /*
- * The page may have dirty, unmapped buffers. For example,
- * they may have been added in ext3_writepage(). Make them
- * freeable here, so the page does not leak.
- */
-#if 0
- /* Not really sure about this - do we need this ? */
- if (page->mapping->a_ops->invalidatepage)
- page->mapping->a_ops->invalidatepage(page, offset);
-#endif
unlock_page(page);
return 0; /* don't care */
}
@@ -2975,12 +2965,6 @@ int block_write_full_page(struct page *page, get_block_t *get_block,
/* Is the page fully outside i_size? (truncate in progress) */
offset = i_size & (PAGE_SIZE-1);
if (page->index >= end_index+1 || !offset) {
- /*
- * The page may have dirty, unmapped buffers. For example,
- * they may have been added in ext3_writepage(). Make them
- * freeable here, so the page does not leak.
- */
- do_invalidatepage(page, 0, PAGE_SIZE);
unlock_page(page);
return 0; /* don't care */
}
--
2.16.4
The USB device descriptor may get changed between two consecutive
enumerations on the same device for some reason, such as DFU or
malicius device.
In that case, we may access the changing descriptor if we don't take
the device lock here.
The issue is reported:
https://syzkaller.appspot.com/bug?id=901a0d9e6519ef8dc7acab25344bd287dd3c7b…
Cc: stable <stable(a)vger.kernel.org>
Cc: Alan Stern <stern(a)rowland.harvard.edu>
Reported-by: syzbot+256e56ddde8b8957eabd(a)syzkaller.appspotmail.com
Fixes: 217a9081d8e6 ("USB: add all configs to the "descriptors" attribute")
Signed-off-by: Zeng Tao <prime.zeng(a)hisilicon.com>
---
drivers/usb/core/sysfs.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c
index a2ca38e..8d13419 100644
--- a/drivers/usb/core/sysfs.c
+++ b/drivers/usb/core/sysfs.c
@@ -889,7 +889,11 @@ read_descriptors(struct file *filp, struct kobject *kobj,
size_t srclen, n;
int cfgno;
void *src;
+ int retval;
+ retval = usb_lock_device_interruptible(udev);
+ if (retval < 0)
+ return -EINTR;
/* The binary attribute begins with the device descriptor.
* Following that are the raw descriptor entries for all the
* configurations (config plus subsidiary descriptors).
@@ -914,6 +918,7 @@ read_descriptors(struct file *filp, struct kobject *kobj,
off -= srclen;
}
}
+ usb_unlock_device(udev);
return count - nleft;
}
--
2.8.1
Since INGENIC_GENERIC_BOARD was introduced, the JZ4740_QI_LB60 option
is no longer the default, so the symbol has to be selected by the
defconfig, otherwise the kernel built will be for a generic Ingenic
board and won't have the Device Tree blob built-in.
Cc: stable(a)vger.kernel.org # v5.7
Fixes: 62249209a772 ("MIPS: ingenic: Default to a generic board")
Signed-off-by: Paul Cercueil <paul(a)crapouillou.net>
---
Notes:
v3: New patch
arch/mips/configs/qi_lb60_defconfig | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/mips/configs/qi_lb60_defconfig b/arch/mips/configs/qi_lb60_defconfig
index 81bfbee72b0c..9c2c183085d1 100644
--- a/arch/mips/configs/qi_lb60_defconfig
+++ b/arch/mips/configs/qi_lb60_defconfig
@@ -8,6 +8,7 @@ CONFIG_EMBEDDED=y
# CONFIG_COMPAT_BRK is not set
CONFIG_SLAB=y
CONFIG_MACH_INGENIC=y
+CONFIG_JZ4740_QI_LB60=y
CONFIG_HZ_100=y
# CONFIG_SECCOMP is not set
CONFIG_MODULES=y
--
2.28.0
Hello,
We ran automated tests on a recent commit from this kernel tree:
Kernel repo: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
Commit: b72c392fa3e5 - Linux 5.8.7
The results of these automated tests are provided below.
Overall result: PASSED
Merge: OK
Compile: OK
Tests: OK
All kernel binaries, config files, and logs are available for download here:
https://cki-artifacts.s3.us-east-2.amazonaws.com/index.html?prefix=dataware…
Please reply to this email if you have any questions about the tests that we
ran or if you have any suggestions on how to make future tests more effective.
,-. ,-.
( C ) ( K ) Continuous
`-',-.`-' Kernel
( I ) Integration
`-'
______________________________________________________________________________
Compile testing
---------------
We compiled the kernel for 4 architectures:
aarch64:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
ppc64le:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
s390x:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
x86_64:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
Hardware testing
----------------
We booted each kernel and ran the following tests:
aarch64:
Host 1:
✅ Boot test
✅ ACPI table test
✅ ACPI enabled test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
Host 2:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ xfstests - btrfs
🚧 ❌ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
🚧 ✅ Storage nvme - tcp
ppc64le:
Host 1:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
🚧 ✅ xfstests - btrfs
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
🚧 ✅ Storage nvme - tcp
Host 2:
✅ Boot test
🚧 ✅ kdump - sysrq-c
Host 3:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
s390x:
Host 1:
✅ Boot test
✅ selinux-policy: serge-testsuite
✅ stress: stress-ng
🚧 ✅ Storage blktests
🚧 ❌ Storage nvme - tcp
Host 2:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
x86_64:
Host 1:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
🚧 ⚡⚡⚡ kdump - sysrq-c
🚧 ⚡⚡⚡ kdump - file-load
Host 2:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ ACPI table test
⚡⚡⚡ Podman system integration test - as root
⚡⚡⚡ Podman system integration test - as user
⚡⚡⚡ LTP
⚡⚡⚡ Loopdev Sanity
⚡⚡⚡ Memory function: memfd_create
⚡⚡⚡ AMTU (Abstract Machine Test Utility)
⚡⚡⚡ Networking bridge: sanity
⚡⚡⚡ Ethernet drivers sanity
⚡⚡⚡ Networking socket: fuzz
⚡⚡⚡ Networking: igmp conformance test
⚡⚡⚡ Networking route: pmtu
⚡⚡⚡ Networking route_func - local
⚡⚡⚡ Networking route_func - forward
⚡⚡⚡ Networking TCP: keepalive test
⚡⚡⚡ Networking UDP: socket
⚡⚡⚡ Networking tunnel: geneve basic test
⚡⚡⚡ Networking tunnel: gre basic
⚡⚡⚡ L2TP basic test
⚡⚡⚡ Networking tunnel: vxlan basic
⚡⚡⚡ Networking ipsec: basic netns - transport
⚡⚡⚡ Networking ipsec: basic netns - tunnel
⚡⚡⚡ Libkcapi AF_ALG test
⚡⚡⚡ pciutils: sanity smoke test
⚡⚡⚡ pciutils: update pci ids test
⚡⚡⚡ kernel-rt: rt_migrate_test
⚡⚡⚡ kernel-rt: rteval
⚡⚡⚡ kernel-rt: sched_deadline
⚡⚡⚡ kernel-rt: smidetect
⚡⚡⚡ ALSA PCM loopback test
⚡⚡⚡ ALSA Control (mixer) Userspace Element test
⚡⚡⚡ storage: SCSI VPD
🚧 ⚡⚡⚡ CIFS Connectathon
🚧 ⚡⚡⚡ POSIX pjd-fstest suites
🚧 ⚡⚡⚡ jvm - jcstress tests
🚧 ⚡⚡⚡ Memory function: kaslr
🚧 ⚡⚡⚡ Networking firewall: basic netfilter test
🚧 ⚡⚡⚡ audit: audit testsuite test
🚧 ⚡⚡⚡ trace: ftrace/tracer
🚧 ⚡⚡⚡ kdump - kexec_boot
Host 3:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ❌ CPU: Frequency Driver Test
🚧 ✅ CPU: Idle Test
🚧 ✅ xfstests - btrfs
🚧 ⚡⚡⚡ IOMMU boot test
🚧 ⚡⚡⚡ IPMI driver test
🚧 ⚡⚡⚡ IPMItool loop stress test
🚧 ⚡⚡⚡ power-management: cpupower/sanity test
🚧 ⚡⚡⚡ Storage blktests
🚧 ⚡⚡⚡ Storage nvme - tcp
Host 4:
✅ Boot test
✅ ACPI table test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: sanity smoke test
✅ pciutils: update pci ids test
✅ kernel-rt: rt_migrate_test
✅ kernel-rt: rteval
✅ kernel-rt: sched_deadline
✅ kernel-rt: smidetect
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ❌ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
Test sources: https://gitlab.com/cki-project/kernel-tests
💚 Pull requests are welcome for new tests or improvements to existing tests!
Aborted tests
-------------
Tests that didn't complete running successfully are marked with ⚡⚡⚡.
If this was caused by an infrastructure issue, we try to mark that
explicitly in the report.
Waived tests
------------
If the test run included waived tests, they are marked with 🚧. Such tests are
executed but their results are not taken into account. Tests are waived when
their results are not reliable enough, e.g. when they're just introduced or are
being fixed.
Testing timeout
---------------
We aim to provide a report within reasonable timeframe. Tests that haven't
finished running yet are marked with ⏱.
LLVM implemented a recent "libcall optimization" that lowers calls to
`sprintf(dest, "%s", str)` where the return value is used to
`stpcpy(dest, str) - dest`. This generally avoids the machinery involved
in parsing format strings. `stpcpy` is just like `strcpy` except it
returns the pointer to the new tail of `dest`. This optimization was
introduced into clang-12.
Implement this so that we don't observe linkage failures due to missing
symbol definitions for `stpcpy`.
Similar to last year's fire drill with:
commit 5f074f3e192f ("lib/string.c: implement a basic bcmp")
The kernel is somewhere between a "freestanding" environment (no full libc)
and "hosted" environment (many symbols from libc exist with the same
type, function signature, and semantics).
As H. Peter Anvin notes, there's not really a great way to inform the
compiler that you're targeting a freestanding environment but would like
to opt-in to some libcall optimizations (see pr/47280 below), rather than
opt-out.
Arvind notes, -fno-builtin-* behaves slightly differently between GCC
and Clang, and Clang is missing many __builtin_* definitions, which I
consider a bug in Clang and am working on fixing.
Masahiro summarizes the subtle distinction between compilers justly:
To prevent transformation from foo() into bar(), there are two ways in
Clang to do that; -fno-builtin-foo, and -fno-builtin-bar. There is
only one in GCC; -fno-buitin-foo.
(Any difference in that behavior in Clang is likely a bug from a missing
__builtin_* definition.)
Masahiro also notes:
We want to disable optimization from foo() to bar(),
but we may still benefit from the optimization from
foo() into something else. If GCC implements the same transform, we
would run into a problem because it is not -fno-builtin-bar, but
-fno-builtin-foo that disables that optimization.
In this regard, -fno-builtin-foo would be more future-proof than
-fno-built-bar, but -fno-builtin-foo is still potentially overkill. We
may want to prevent calls from foo() being optimized into calls to
bar(), but we still may want other optimization on calls to foo().
It seems that compilers today don't quite provide the fine grain control
over which libcall optimizations pseudo-freestanding environments would
prefer.
Finally, Kees notes that this interface is unsafe, so we should not
encourage its use. As such, I've removed the declaration from any
header, but it still needs to be exported to avoid linkage errors in
modules.
Cc: stable(a)vger.kernel.org
Link: https://bugs.llvm.org/show_bug.cgi?id=47162
Link: https://bugs.llvm.org/show_bug.cgi?id=47280
Link: https://github.com/ClangBuiltLinux/linux/issues/1126
Link: https://man7.org/linux/man-pages/man3/stpcpy.3.html
Link: https://pubs.opengroup.org/onlinepubs/9699919799/functions/stpcpy.html
Link: https://reviews.llvm.org/D85963
Suggested-by: Andy Lavr <andy.lavr(a)gmail.com>
Suggested-by: Arvind Sankar <nivedita(a)alum.mit.edu>
Suggested-by: Joe Perches <joe(a)perches.com>
Suggested-by: Masahiro Yamada <masahiroy(a)kernel.org>
Suggested-by: Rasmus Villemoes <linux(a)rasmusvillemoes.dk>
Reported-by: Sami Tolvanen <samitolvanen(a)google.com>
Signed-off-by: Nick Desaulniers <ndesaulniers(a)google.com>
---
Changes V3:
* Drop Sami's Tested by tag; newer patch.
* Add EXPORT_SYMBOL as per Andy.
* Rewrite commit message, rewrote part of what Masahiro said to be
generic in terms of foo() and bar().
* Prefer %NUL-terminated to NULL terminated. NUL is the ASCII character
'\0', as per Arvind and Rasmus.
Changes V2:
* Added Sami's Tested by; though the patch changed implementation, the
missing symbol at link time was the problem Sami was observing.
* Fix __restrict -> __restrict__ typo as per Joe.
* Drop note about restrict from commit message as per Arvind.
* Fix NULL -> NUL as per Arvind; NUL is ASCII '\0'. TIL
* Fix off by one error as per Arvind; I had another off by one error in
my test program that was masking this.
lib/string.c | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)
diff --git a/lib/string.c b/lib/string.c
index 6012c385fb31..6bd0cf0fb009 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -272,6 +272,30 @@ ssize_t strscpy_pad(char *dest, const char *src, size_t count)
}
EXPORT_SYMBOL(strscpy_pad);
+/**
+ * stpcpy - copy a string from src to dest returning a pointer to the new end
+ * of dest, including src's %NUL-terminator. May overrun dest.
+ * @dest: pointer to end of string being copied into. Must be large enough
+ * to receive copy.
+ * @src: pointer to the beginning of string being copied from. Must not overlap
+ * dest.
+ *
+ * stpcpy differs from strcpy in a key way: the return value is the new
+ * %NUL-terminated character. (for strcpy, the return value is a pointer to
+ * src. This interface is considered unsafe as it doesn't perform bounds
+ * checking of the inputs. As such it's not recommended for usage. Instead,
+ * its definition is provided in case the compiler lowers other libcalls to
+ * stpcpy.
+ */
+char *stpcpy(char *__restrict__ dest, const char *__restrict__ src);
+char *stpcpy(char *__restrict__ dest, const char *__restrict__ src)
+{
+ while ((*dest++ = *src++) != '\0')
+ /* nothing */;
+ return --dest;
+}
+EXPORT_SYMBOL(stpcpy);
+
#ifndef __HAVE_ARCH_STRCAT
/**
* strcat - Append one %NUL-terminated string to another
--
2.28.0.297.g1956fa8f8d-goog
This is the start of the stable review cycle for the 5.4.63 release.
There are 16 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Sun, 06 Sep 2020 12:02:48 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.4.63-rc1…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-5.4.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 5.4.63-rc1
Bodo Stroesser <bstroesser(a)ts.fujitsu.com>
scsi: target: tcmu: Optimize use of flush_dcache_page
Bodo Stroesser <bstroesser(a)ts.fujitsu.com>
scsi: target: tcmu: Fix size in calls to tcmu_flush_dcache_range
Sowjanya Komatineni <skomatineni(a)nvidia.com>
sdhci: tegra: Remove SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK for Tegra186
Sowjanya Komatineni <skomatineni(a)nvidia.com>
sdhci: tegra: Remove SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK for Tegra210
Sowjanya Komatineni <skomatineni(a)nvidia.com>
arm64: tegra: Add missing timeout clock to Tegra210 SDMMC
Sowjanya Komatineni <skomatineni(a)nvidia.com>
arm64: tegra: Add missing timeout clock to Tegra186 SDMMC nodes
Sowjanya Komatineni <skomatineni(a)nvidia.com>
arm64: tegra: Add missing timeout clock to Tegra194 SDMMC nodes
Sowjanya Komatineni <skomatineni(a)nvidia.com>
dt-bindings: mmc: tegra: Add tmclk for Tegra210 and later
James Morse <james.morse(a)arm.com>
KVM: arm64: Set HCR_EL2.PTW to prevent AT taking synchronous exception
James Morse <james.morse(a)arm.com>
KVM: arm64: Survive synchronous exceptions caused by AT instructions
James Morse <james.morse(a)arm.com>
KVM: arm64: Add kvm_extable for vaxorcism code
Lucas Stach <l.stach(a)pengutronix.de>
drm/etnaviv: fix TS cache flushing on GPUs with BLT engine
Andrey Grodzovsky <andrey.grodzovsky(a)amd.com>
drm/sched: Fix passing zero to 'PTR_ERR' warning v2
Kim Phillips <kim.phillips(a)amd.com>
perf record/stat: Explicitly call out event modifiers in the documentation
Marc Zyngier <maz(a)kernel.org>
HID: core: Sanitize event code and type when mapping input
Marc Zyngier <maz(a)kernel.org>
HID: core: Correctly handle ReportSize being zero
-------------
Diffstat:
.../bindings/mmc/nvidia,tegra20-sdhci.txt | 32 ++++++++++-
Makefile | 4 +-
arch/arm64/boot/dts/nvidia/tegra186.dtsi | 20 ++++---
arch/arm64/boot/dts/nvidia/tegra194.dtsi | 15 +++--
arch/arm64/boot/dts/nvidia/tegra210.dtsi | 20 ++++---
arch/arm64/include/asm/kvm_arm.h | 3 +-
arch/arm64/include/asm/kvm_asm.h | 43 ++++++++++++++
arch/arm64/kernel/vmlinux.lds.S | 8 +++
arch/arm64/kvm/hyp/entry.S | 15 +++--
arch/arm64/kvm/hyp/hyp-entry.S | 65 ++++++++++++++--------
arch/arm64/kvm/hyp/switch.c | 39 +++++++++++--
drivers/gpu/drm/etnaviv/etnaviv_buffer.c | 60 ++++++++++++++++++--
drivers/gpu/drm/etnaviv/state_blt.xml.h | 2 +
drivers/gpu/drm/scheduler/sched_main.c | 7 ++-
drivers/hid/hid-core.c | 15 ++++-
drivers/hid/hid-input.c | 4 ++
drivers/hid/hid-multitouch.c | 2 +
drivers/mmc/host/sdhci-tegra.c | 2 -
drivers/target/target_core_user.c | 15 +++--
include/linux/hid.h | 42 +++++++++-----
tools/perf/Documentation/perf-record.txt | 4 ++
tools/perf/Documentation/perf-stat.txt | 4 ++
22 files changed, 329 insertions(+), 92 deletions(-)
From: SeongJae Park <sjpark(a)amazon.de>
Hello,
We found below 27 commits in the 'v5.5..linus/master (upstream)' seems fixing or mentioning
commits in the 'v5.4..stable/linux-5.4.y (downstream)' but are not merged in the 'downstream' yet.
Could you please review if those need to be merged in?
A commit is considered as fix of another if the complete 'Fixed:' tag is in the
commit message. If the tag is not found but the commit message contains the
title or the hash id of the other commit, it is considered mentioning it. So,
the 'mentions' might have many false positives, but it could cover the typos (I
found such cases before).
The commits are grouped as 'fixes cleanly applicable', 'fixes not cleanly
applicable (need manual backporting to be applied)', 'mentions cleanly
applicable', and 'mentions not cleanly applicable'. Also, the commits in each
group are sorted by the commit dates (oldest first).
Both the finding of the commits and the writeup of this report is automatically
done by a little script[1]. I'm going to run the tool and post this kind of
report every couple of weeks or every month. Any comment (e.g., regarding
posting period, new features request, bug report, ...) is welcome.
Especially, if you find some commits that don't need to be merged in the
downstream, please let me know so that I can mark those as unnecessary and
don't bother you again.
[1] https://github.com/sjp38/stream-track
Thanks,
SeongJae
# v5.5: 4e3112a240ba9986cc3f67a6880da6529a955006
# linus/master: 15bc20c6af4ceee97a1f90b43c0e386643c071b4
# v5.4: 6e815efe19a99a33b16cc720c3d3a727565a4fa1
# stable/linux-5.4.y: 6576d69aac94cd8409636dfa86e0df39facdf0d2
Fixes cleanly applicable
------------------------
2fb75ceaf71a ("remoteproc: Add missing '\n' in log messages")
# commit date: 2020-04-22, author: Christophe JAILLET <christophe.jaillet(a)wanadoo.fr>
# fixes 'remoteproc: Fix NULL pointer dereference in rproc_virtio_notify'
1b9ae0c92925 ("wireless: Use linux/stddef.h instead of stddef.h")
# commit date: 2020-05-27, author: Hauke Mehrtens <hauke(a)hauke-m.de>
# fixes 'wireless: Use offsetof instead of custom macro.'
e4b0e41fee94 ("ALSA: usb-audio: Use the new macro for HP Dock rename quirks")
# commit date: 2020-06-08, author: Takashi Iwai <tiwai(a)suse.de>
# fixes 'ALSA: usb-audio: Add vendor, product and profile name for HP Thunderbolt Dock'
efb94790852a ("drm/panel-simple: fix connector type for LogicPD Type28 Display")
# commit date: 2020-06-21, author: Adam Ford <aford173(a)gmail.com>
# fixes 'drm/panel: simple: Add Logic PD Type 28 display support'
2f57b8d57673 ("dmaengine: imx-sdma: Fix: Remove 'always true' comparison")
# commit date: 2020-06-24, author: Fabio Estevam <festevam(a)gmail.com>
# fixes 'dmaengine: imx-sdma: Fix the event id check to include RX event for UART6'
10de795a5add ("kprobes: Fix compiler warning for !CONFIG_KPROBES_ON_FTRACE")
# commit date: 2020-08-06, author: Muchun Song <songmuchun(a)bytedance.com>
# fixes 'kprobes: Fix NULL pointer dereference at kprobe_ftrace_handler'
Fixes not cleanly applicable
----------------------------
3907ccfaec5d ("crypto: atmel-aes - Fix CTR counter overflow when multiple fragments")
# commit date: 2019-12-20, author: Tudor Ambarus <tudor.ambarus(a)microchip.com>
# fixes 'crypto: atmel-aes - Fix counter overflow in CTR mode'
9210c075cef2 ("nvme-pci: avoid race between nvme_reap_pending_cqes() and nvme_poll()")
# commit date: 2020-05-27, author: Dongli Zhang <dongli.zhang(a)oracle.com>
# fixes 'nvme/pci: move cqe check after device shutdown'
6e2f83884c09 ("bnxt_en: Fix AER reset logic on 57500 chips.")
# commit date: 2020-06-15, author: Michael Chan <michael.chan(a)broadcom.com>
# fixes 'bnxt_en: Improve AER slot reset.'
695cf5ab401c ("ALSA: usb-audio: Fix packet size calculation")
# commit date: 2020-06-30, author: Alexander Tsoy <alexander(a)tsoy.me>
# fixes 'ALSA: usb-audio: Improve frames size computation'
2fb2799a2abb ("net: rmnet: do not allow to add multiple bridge interfaces")
# commit date: 2020-07-04, author: Taehee Yoo <ap420073(a)gmail.com>
# fixes 'net: rmnet: use upper/lower device infrastructure'
Mentions cleanly applicable
---------------------------
32ada3b9e04c ("x86/resctrl: Clean up unused function parameter in mkdir path")
# commit date: 2020-01-20, author: Xiaochen Shen <xiaochen.shen(a)intel.com>
# mentions 'x86/resctrl: Fix a deadlock due to inaccurate reference'
20f513091caf ("crypto: ccree - remove set but not used variable 'du_size'")
# commit date: 2020-02-13, author: YueHaibing <yuehaibing(a)huawei.com>
# mentions 'crypto: ccree - fix FDE descriptor sequence'
b182a66792fe ("net: ena: remove set but not used variable 'hash_key'")
# commit date: 2020-02-17, author: YueHaibing <yuehaibing(a)huawei.com>
# mentions 'net: ena: rss: do not allocate key when not supported'
cbb5494ebce5 ("Revert "thunderbolt: Prevent crash if non-active NVMem file is read"")
# commit date: 2020-04-16, author: Nicholas Johnson <nicholas.johnson-opensource(a)outlook.com.au>
# mentions 'thunderbolt: Prevent crash if non-active NVMem file is read'
1a33e10e4a95 ("net: partially revert dynamic lockdep key changes")
# commit date: 2020-05-04, author: Cong Wang <xiyou.wangcong(a)gmail.com>
# mentions 'bonding: add missing netdev_update_lockdep_key()'
7e579f3a074c ("tools arch x86 uapi: Synch asm/unistd.h with the kernel sources")
# commit date: 2020-06-09, author: Arnaldo Carvalho de Melo <acme(a)redhat.com>
# mentions 'x86/syscalls: Revert "x86/syscalls: Make __X32_SYSCALL_BIT be unsigned long"'
c3dbe541ef77 ("blktrace: Avoid sparse warnings when assigning q->blk_trace")
# commit date: 2020-06-17, author: Jan Kara <jack(a)suse.cz>
# mentions 'blktrace: Protect q->blk_trace with RCU'
6d548b9e5d56 ("btrfs: fix reclaim_size counter leak after stealing from global reserve")
# commit date: 2020-07-02, author: Filipe Manana <fdmanana(a)suse.com>
# mentions 'btrfs: improve global reserve stealing logic'
Mentions not cleanly applicable
-------------------------------
6dbd54e4154d ("Revert "tty/serial: atmel: fix out of range clock divider handling"")
# commit date: 2019-12-18, author: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
# mentions 'tty/serial: atmel: fix out of range clock divider handling'
48d7fb181a91 ("drm/i915: Remove lite restore defines")
# commit date: 2020-02-08, author: Mika Kuoppala <mika.kuoppala(a)linux.intel.com>
# mentions 'drm/i915/gt: Detect if we miss WaIdleLiteRestore'
ff7e06a55676 ("ALSA: pcm: oss: Fix regression by buffer overflow fix (again)")
# commit date: 2020-04-03, author: Takashi Iwai <tiwai(a)suse.de>
# mentions 'ALSA: pcm: oss: Avoid plugin buffer overflow'
ac957e8c5411 ("ALSA: pcm: oss: Place the plugin buffer overflow checks correctly (for 5.7)")
# commit date: 2020-04-24, author: Takashi Iwai <tiwai(a)suse.de>
# mentions 'ALSA: pcm: oss: Avoid plugin buffer overflow'
e7511f560f54 ("bonding: remove useless stats_lock_key")
# commit date: 2020-05-04, author: Cong Wang <xiyou.wangcong(a)gmail.com>
# mentions 'bonding: fix lockdep warning in bond_get_stats()'
39f23ce07b93 ("sched/fair: Fix unthrottle_cfs_rq() for leaf_cfs_rq list")
# commit date: 2020-05-19, author: Vincent Guittot <vincent.guittot(a)linaro.org>
# mentions 'sched/fair: Fix enqueue_task_fair warning'
8ab3a3812aa9 ("drm/i915/gt: Incrementally check for rewinding")
# commit date: 2020-06-16, author: Chris Wilson <chris(a)chris-wilson.co.uk>
# mentions 'drm/i915/execlists: Always force a context reload when rewinding RING_TAIL'
5e548b32018d ("btrfs: do not set the full sync flag on the inode during page release")
# commit date: 2020-07-27, author: Filipe Manana <fdmanana(a)suse.com>
# mentions 'btrfs: fix race between page release and a fast fsync'
Hi
[This is an automated email]
This commit has been processed because it contains a -stable tag.
The stable tag indicates that it's relevant for the following trees: all
The bot has tested the following trees: v5.8.6, v5.4.62, v4.19.143, v4.14.196, v4.9.235, v4.4.235.
v5.8.6: Build OK!
v5.4.62: Build OK!
v4.19.143: Failed to apply! Possible dependencies:
458a3bf82df4 ("lib/string: Add strscpy_pad() function")
f51a03c82ca3 ("lib/string.c: implement stpcpy")
v4.14.196: Failed to apply! Possible dependencies:
458a3bf82df4 ("lib/string: Add strscpy_pad() function")
f51a03c82ca3 ("lib/string.c: implement stpcpy")
v4.9.235: Failed to apply! Possible dependencies:
458a3bf82df4 ("lib/string: Add strscpy_pad() function")
f51a03c82ca3 ("lib/string.c: implement stpcpy")
v4.4.235: Failed to apply! Possible dependencies:
458a3bf82df4 ("lib/string: Add strscpy_pad() function")
f51a03c82ca3 ("lib/string.c: implement stpcpy")
NOTE: The patch will not be queued to stable trees until it is upstream.
How should we proceed with this patch?
--
Thanks
Sasha
Hi
[This is an automated email]
This commit has been processed because it contains a -stable tag.
The stable tag indicates that it's relevant for the following trees: all
The bot has tested the following trees: v5.8.6, v5.4.62, v4.19.143, v4.14.196, v4.9.235, v4.4.235.
v5.8.6: Failed to apply! Possible dependencies:
fb46d057db82 ("x86: Add support for ZSTD compressed kernel")
v5.4.62: Failed to apply! Possible dependencies:
003602ad5516 ("x86/*/Makefile: Use -fno-asynchronous-unwind-tables to suppress .eh_frame sections")
9e2276fa6eb3 ("arch/x86/boot: Use prefix map to avoid embedded paths")
fb46d057db82 ("x86: Add support for ZSTD compressed kernel")
v4.19.143: Failed to apply! Possible dependencies:
003602ad5516 ("x86/*/Makefile: Use -fno-asynchronous-unwind-tables to suppress .eh_frame sections")
30cc0b6c1220 ("doc: add boot protocol 2.13 description to Documentation/x86/boot.txt")
384184044981 ("x86/boot: Mostly revert commit ae7e1238e68f2a ("Add ACPI RSDP address to setup_header")")
9e2276fa6eb3 ("arch/x86/boot: Use prefix map to avoid embedded paths")
ae7e1238e68f ("x86/boot: Add ACPI RSDP address to setup_header")
c5ed311b4e31 ("x86, boot: documentation whitespace fixup")
f1f238a9f1ca ("Documentation: x86: convert boot.txt to reST")
fb46d057db82 ("x86: Add support for ZSTD compressed kernel")
v4.14.196: Failed to apply! Possible dependencies:
003602ad5516 ("x86/*/Makefile: Use -fno-asynchronous-unwind-tables to suppress .eh_frame sections")
0b3e336601b8 ("arm64: Add support for STACKLEAK gcc plugin")
1fc5dce78ad1 ("arm64/sve: Low-level SVE architectural state manipulation functions")
2d2123bc7c7f ("arm64/sve: Add prctl controls for userspace vector length management")
33b6d03469b2 ("efi: call get_event_log before ExitBootServices")
36b649760e94 ("efi: Use string literals for efi_char16_t variable initializers")
3db5e0ba8b8f ("efi/libstub: Disable some warnings for x86{,_64}")
6c3b56b19730 ("x86/boot: Disable Clang warnings about GNU extensions")
7582e22038a2 ("arm64/sve: Backend logic for setting the vector length")
9e2276fa6eb3 ("arch/x86/boot: Use prefix map to avoid embedded paths")
9e8084d3f761 ("arm64: Implement thread_struct whitelist for hardened usercopy")
bc0ee4760364 ("arm64/sve: Core task context handling")
ce279d374ff3 ("efi/libstub: Only disable stackleak plugin for arm64")
dca5203e3fe2 ("x86/boot: Add -Wno-pointer-sign to KBUILD_CFLAGS")
f779ca740f25 ("efi: Make const array 'apple' static")
v4.9.235: Failed to apply! Possible dependencies:
003602ad5516 ("x86/*/Makefile: Use -fno-asynchronous-unwind-tables to suppress .eh_frame sections")
06cbbac0f57d ("x86/build: Don't use $(LINUXINCLUDE) twice")
0acba3f91823 ("x86/boot/build: Remove always empty $(USERINCLUDE)")
2959c95d510c ("efi/capsule: Add support for Quark security header")
36b649760e94 ("efi: Use string literals for efi_char16_t variable initializers")
3db5e0ba8b8f ("efi/libstub: Disable some warnings for x86{,_64}")
46cd4b75cd0e ("efi: Add device path parser")
5520b7e7d2d2 ("x86/boot/e820: Remove spurious asm/e820/api.h inclusions")
568bc4e87033 ("efi/arm*/libstub: Invoke EFI_RNG_PROTOCOL to seed the UEFI RNG table")
58c5475aba67 ("x86/efi: Retrieve and assign Apple device properties")
66441bd3cfdc ("x86/boot/e820: Move asm/e820.h to asm/e820/api.h")
6c3b56b19730 ("x86/boot: Disable Clang warnings about GNU extensions")
70a9d8184cce ("x86/boot/e820: Introduce arch/x86/include/asm/e820/types.h")
846221cfb8f6 ("Remove references to dead make variable LINUX_INCLUDE")
9d2f86c6cad5 ("x86: Make E820_X_MAX unconditionally larger than E820MAX")
9e2276fa6eb3 ("arch/x86/boot: Use prefix map to avoid embedded paths")
dca5203e3fe2 ("x86/boot: Add -Wno-pointer-sign to KBUILD_CFLAGS")
f779ca740f25 ("efi: Make const array 'apple' static")
v4.4.235: Failed to apply! Possible dependencies:
0acba3f91823 ("x86/boot/build: Remove always empty $(USERINCLUDE)")
21266be9ed54 ("arch: consolidate CONFIG_STRICT_DEVM in lib/Kconfig.debug")
9e2276fa6eb3 ("arch/x86/boot: Use prefix map to avoid embedded paths")
c6d308534aef ("UBSAN: run-time undefined behavior sanity checker")
NOTE: The patch will not be queued to stable trees until it is upstream.
How should we proceed with this patch?
--
Thanks
Sasha
Hi
[This is an automated email]
This commit has been processed because it contains a -stable tag.
The stable tag indicates that it's relevant for the following trees: all
The bot has tested the following trees: v5.8.6, v5.4.62, v4.19.143, v4.14.196, v4.9.235, v4.4.235.
v5.8.6: Build OK!
v5.4.62: Build OK!
v4.19.143: Failed to apply! Possible dependencies:
458a3bf82df4 ("lib/string: Add strscpy_pad() function")
v4.14.196: Failed to apply! Possible dependencies:
458a3bf82df4 ("lib/string: Add strscpy_pad() function")
v4.9.235: Failed to apply! Possible dependencies:
458a3bf82df4 ("lib/string: Add strscpy_pad() function")
v4.4.235: Failed to apply! Possible dependencies:
458a3bf82df4 ("lib/string: Add strscpy_pad() function")
NOTE: The patch will not be queued to stable trees until it is upstream.
How should we proceed with this patch?
--
Thanks
Sasha
A migrating transparent huge page has to already be unmapped. Otherwise,
the page could be modified while it is being copied to a new page and
data could be lost. The function __split_huge_pmd() checks for a PMD
migration entry before calling __split_huge_pmd_locked() leading one to
think that __split_huge_pmd_locked() can handle splitting a migrating PMD.
However, the code always increments the page->_mapcount and adjusts the
memory control group accounting assuming the page is mapped.
Also, if the PMD entry is a migration PMD entry, the call to
is_huge_zero_pmd(*pmd) is incorrect because it calls pmd_pfn(pmd) instead
of migration_entry_to_pfn(pmd_to_swp_entry(pmd)).
Fix these problems by checking for a PMD migration entry.
Fixes: 84c3fc4e9c56 ("mm: thp: check pmd migration entry in common path")
cc: stable(a)vger.kernel.org # 4.14+
Signed-off-by: Ralph Campbell <rcampbell(a)nvidia.com>
Reviewed-by: Yang Shi <shy828301(a)gmail.com>
Reviewed-by: Zi Yan <ziy(a)nvidia.com>
---
No changes in v3 to this patch, just added reviewed-by and fixes to the
change log and sending this as a separate patch from the rest of the
series ("mm/hmm/nouveau: add THP migration to migrate_vma_*").
I'll hold off resending the series without this patch unless there are
changes needed.
mm/huge_memory.c | 42 +++++++++++++++++++++++-------------------
1 file changed, 23 insertions(+), 19 deletions(-)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 2a468a4acb0a..606d712d9505 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2023,7 +2023,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
put_page(page);
add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR);
return;
- } else if (is_huge_zero_pmd(*pmd)) {
+ } else if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) {
/*
* FIXME: Do we want to invalidate secondary mmu by calling
* mmu_notifier_invalidate_range() see comments below inside
@@ -2117,30 +2117,34 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
pte = pte_offset_map(&_pmd, addr);
BUG_ON(!pte_none(*pte));
set_pte_at(mm, addr, pte, entry);
- atomic_inc(&page[i]._mapcount);
- pte_unmap(pte);
- }
-
- /*
- * Set PG_double_map before dropping compound_mapcount to avoid
- * false-negative page_mapped().
- */
- if (compound_mapcount(page) > 1 && !TestSetPageDoubleMap(page)) {
- for (i = 0; i < HPAGE_PMD_NR; i++)
+ if (!pmd_migration)
atomic_inc(&page[i]._mapcount);
+ pte_unmap(pte);
}
- lock_page_memcg(page);
- if (atomic_add_negative(-1, compound_mapcount_ptr(page))) {
- /* Last compound_mapcount is gone. */
- __dec_lruvec_page_state(page, NR_ANON_THPS);
- if (TestClearPageDoubleMap(page)) {
- /* No need in mapcount reference anymore */
+ if (!pmd_migration) {
+ /*
+ * Set PG_double_map before dropping compound_mapcount to avoid
+ * false-negative page_mapped().
+ */
+ if (compound_mapcount(page) > 1 &&
+ !TestSetPageDoubleMap(page)) {
for (i = 0; i < HPAGE_PMD_NR; i++)
- atomic_dec(&page[i]._mapcount);
+ atomic_inc(&page[i]._mapcount);
+ }
+
+ lock_page_memcg(page);
+ if (atomic_add_negative(-1, compound_mapcount_ptr(page))) {
+ /* Last compound_mapcount is gone. */
+ __dec_lruvec_page_state(page, NR_ANON_THPS);
+ if (TestClearPageDoubleMap(page)) {
+ /* No need in mapcount reference anymore */
+ for (i = 0; i < HPAGE_PMD_NR; i++)
+ atomic_dec(&page[i]._mapcount);
+ }
}
+ unlock_page_memcg(page);
}
- unlock_page_memcg(page);
smp_wmb(); /* make pte visible before pmd */
pmd_populate(mm, pmd, pgtable);
--
2.20.1
With the current implementation the following race can happen:
* blk_pre_runtime_suspend() calls blk_freeze_queue_start() and
blk_mq_unfreeze_queue().
* blk_queue_enter() calls blk_queue_pm_only() and that function returns
true.
* blk_queue_enter() calls blk_pm_request_resume() and that function does
not call pm_request_resume() because the queue runtime status is
RPM_ACTIVE.
* blk_pre_runtime_suspend() changes the queue status into RPM_SUSPENDING.
Fix this race by changing the queue runtime status into RPM_SUSPENDING
before switching q_usage_counter to atomic mode.
Acked-by: Alan Stern <stern(a)rowland.harvard.edu>
Acked-by: Stanley Chu <stanley.chu(a)mediatek.com>
Cc: Ming Lei <ming.lei(a)redhat.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Cc: stable <stable(a)vger.kernel.org>
Fixes: 986d413b7c15 ("blk-mq: Enable support for runtime power management")
Signed-off-by: Can Guo <cang(a)codeaurora.org>
Signed-off-by: Bart Van Assche <bvanassche(a)acm.org>
---
block/blk-pm.c | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/block/blk-pm.c b/block/blk-pm.c
index b85234d758f7..17bd020268d4 100644
--- a/block/blk-pm.c
+++ b/block/blk-pm.c
@@ -67,6 +67,10 @@ int blk_pre_runtime_suspend(struct request_queue *q)
WARN_ON_ONCE(q->rpm_status != RPM_ACTIVE);
+ spin_lock_irq(&q->queue_lock);
+ q->rpm_status = RPM_SUSPENDING;
+ spin_unlock_irq(&q->queue_lock);
+
/*
* Increase the pm_only counter before checking whether any
* non-PM blk_queue_enter() calls are in progress to avoid that any
@@ -89,15 +93,14 @@ int blk_pre_runtime_suspend(struct request_queue *q)
/* Switch q_usage_counter back to per-cpu mode. */
blk_mq_unfreeze_queue(q);
- spin_lock_irq(&q->queue_lock);
- if (ret < 0)
+ if (ret < 0) {
+ spin_lock_irq(&q->queue_lock);
+ q->rpm_status = RPM_ACTIVE;
pm_runtime_mark_last_busy(q->dev);
- else
- q->rpm_status = RPM_SUSPENDING;
- spin_unlock_irq(&q->queue_lock);
+ spin_unlock_irq(&q->queue_lock);
- if (ret)
blk_clear_pm_only(q);
+ }
return ret;
}
Good day,
Im Becky Su the Purchase Manager of Yushin Exports Trading Co.Ltd. Also Trading as Uniqa Dental A general merchandise company.
We are interested in your products, can you supply so I will forward our Bulk purchase proposal immediately For your Quotes .
Thanks & regards,
Export Department of Yushin Group
Becky Su
Purchase Manager
This is the start of the stable review cycle for the 5.8.7 release.
There are 17 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Sun, 06 Sep 2020 12:02:48 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.8.7-rc1.…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-5.8.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 5.8.7-rc1
Bodo Stroesser <bstroesser(a)ts.fujitsu.com>
scsi: target: tcmu: Optimize use of flush_dcache_page
Johannes Berg <johannes.berg(a)intel.com>
nl80211: fix NL80211_ATTR_HE_6GHZ_CAPABILITY usage
Sowjanya Komatineni <skomatineni(a)nvidia.com>
sdhci: tegra: Remove SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK for Tegra186
Sowjanya Komatineni <skomatineni(a)nvidia.com>
sdhci: tegra: Remove SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK for Tegra210
Sowjanya Komatineni <skomatineni(a)nvidia.com>
arm64: tegra: Add missing timeout clock to Tegra210 SDMMC
Sowjanya Komatineni <skomatineni(a)nvidia.com>
arm64: tegra: Add missing timeout clock to Tegra186 SDMMC nodes
Sowjanya Komatineni <skomatineni(a)nvidia.com>
arm64: tegra: Add missing timeout clock to Tegra194 SDMMC nodes
Sowjanya Komatineni <skomatineni(a)nvidia.com>
dt-bindings: mmc: tegra: Add tmclk for Tegra210 and later
James Morse <james.morse(a)arm.com>
KVM: arm64: Survive synchronous exceptions caused by AT instructions
James Morse <james.morse(a)arm.com>
KVM: arm64: Add kvm_extable for vaxorcism code
Peilin Ye <yepeilin.cs(a)gmail.com>
media: media/v4l2-core: Fix kernel-infoleak in video_put_user()
Kim Phillips <kim.phillips(a)amd.com>
perf record/stat: Explicitly call out event modifiers in the documentation
Andy Lutomirski <luto(a)kernel.org>
selftests/x86/test_vsyscall: Improve the process_vm_readv() test
Dave Hansen <dave.hansen(a)linux.intel.com>
mm: fix pin vs. gup mismatch with gate pages
Stefano Brivio <sbrivio(a)redhat.com>
netfilter: nft_set_rbtree: Handle outcomes of tree rotations in overlap detection
Marc Zyngier <maz(a)kernel.org>
HID: core: Sanitize event code and type when mapping input
Marc Zyngier <maz(a)kernel.org>
HID: core: Correctly handle ReportSize being zero
-------------
Diffstat:
.../bindings/mmc/nvidia,tegra20-sdhci.txt | 32 ++++++++++-
Makefile | 4 +-
arch/arm64/boot/dts/nvidia/tegra186.dtsi | 20 ++++---
arch/arm64/boot/dts/nvidia/tegra194.dtsi | 15 +++--
arch/arm64/boot/dts/nvidia/tegra210.dtsi | 20 ++++---
arch/arm64/include/asm/kvm_asm.h | 43 ++++++++++++++
arch/arm64/kernel/vmlinux.lds.S | 8 +++
arch/arm64/kvm/hyp/entry.S | 15 +++--
arch/arm64/kvm/hyp/hyp-entry.S | 65 ++++++++++++++--------
arch/arm64/kvm/hyp/switch.c | 39 +++++++++++--
drivers/hid/hid-core.c | 15 ++++-
drivers/hid/hid-input.c | 4 ++
drivers/hid/hid-multitouch.c | 2 +
drivers/media/v4l2-core/v4l2-ioctl.c | 50 +++++++++--------
drivers/mmc/host/sdhci-tegra.c | 2 -
drivers/target/target_core_user.c | 11 ++--
include/linux/hid.h | 42 +++++++++-----
mm/gup.c | 2 +-
net/netfilter/nft_set_rbtree.c | 23 +++++---
net/wireless/nl80211.c | 2 +-
tools/perf/Documentation/perf-record.txt | 4 ++
tools/perf/Documentation/perf-stat.txt | 4 ++
tools/testing/selftests/x86/test_vsyscall.c | 22 +++++++-
23 files changed, 326 insertions(+), 118 deletions(-)
Hello,
We ran automated tests on a recent commit from this kernel tree:
Kernel repo: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
Commit: c04031f6147d - scsi: target: tcmu: Optimize use of flush_dcache_page
The results of these automated tests are provided below.
Overall result: PASSED
Merge: OK
Compile: OK
Tests: OK
All kernel binaries, config files, and logs are available for download here:
https://cki-artifacts.s3.us-east-2.amazonaws.com/index.html?prefix=dataware…
Please reply to this email if you have any questions about the tests that we
ran or if you have any suggestions on how to make future tests more effective.
,-. ,-.
( C ) ( K ) Continuous
`-',-.`-' Kernel
( I ) Integration
`-'
______________________________________________________________________________
Compile testing
---------------
We compiled the kernel for 4 architectures:
aarch64:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
ppc64le:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
s390x:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
x86_64:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
Hardware testing
----------------
We booted each kernel and ran the following tests:
aarch64:
Host 1:
✅ Boot test
✅ ACPI table test
✅ ACPI enabled test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
Host 2:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ xfstests - btrfs
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
🚧 ✅ Storage nvme - tcp
ppc64le:
Host 1:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
🚧 ✅ xfstests - btrfs
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
🚧 ✅ Storage nvme - tcp
Host 2:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
Host 3:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
🚧 ⚡⚡⚡ kdump - sysrq-c
Host 4:
✅ Boot test
🚧 ✅ kdump - sysrq-c
s390x:
Host 1:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
Host 2:
✅ Boot test
✅ selinux-policy: serge-testsuite
✅ stress: stress-ng
🚧 ✅ Storage blktests
🚧 ❌ Storage nvme - tcp
x86_64:
Host 1:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ❌ CPU: Frequency Driver Test
🚧 ✅ CPU: Idle Test
🚧 ✅ xfstests - btrfs
🚧 ⚡⚡⚡ IOMMU boot test
🚧 ⚡⚡⚡ IPMI driver test
🚧 ⚡⚡⚡ IPMItool loop stress test
🚧 ⚡⚡⚡ power-management: cpupower/sanity test
🚧 ⚡⚡⚡ Storage blktests
🚧 ⚡⚡⚡ Storage nvme - tcp
Host 2:
✅ Boot test
🚧 ✅ kdump - sysrq-c
🚧 ✅ kdump - file-load
Host 3:
✅ Boot test
✅ ACPI table test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: sanity smoke test
✅ pciutils: update pci ids test
✅ kernel-rt: rt_migrate_test
✅ kernel-rt: rteval
✅ kernel-rt: sched_deadline
✅ kernel-rt: smidetect
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ❌ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
Test sources: https://gitlab.com/cki-project/kernel-tests
💚 Pull requests are welcome for new tests or improvements to existing tests!
Aborted tests
-------------
Tests that didn't complete running successfully are marked with ⚡⚡⚡.
If this was caused by an infrastructure issue, we try to mark that
explicitly in the report.
Waived tests
------------
If the test run included waived tests, they are marked with 🚧. Such tests are
executed but their results are not taken into account. Tests are waived when
their results are not reliable enough, e.g. when they're just introduced or are
being fixed.
Testing timeout
---------------
We aim to provide a report within reasonable timeframe. Tests that haven't
finished running yet are marked with ⏱.
From: Alistair Popple <alistair(a)popple.id.au>
Subject: mm/rmap: fixup copying of soft dirty and uffd ptes
During memory migration a pte is temporarily replaced with a migration
swap pte. Some pte bits from the existing mapping such as the soft-dirty
and uffd write-protect bits are preserved by copying these to the
temporary migration swap pte.
However these bits are not stored at the same location for swap and
non-swap ptes. Therefore testing these bits requires using the
appropriate helper function for the given pte type.
Unfortunately several code locations were found where the wrong helper
function is being used to test soft_dirty and uffd_wp bits which leads to
them getting incorrectly set or cleared during page-migration.
Fix these by using the correct tests based on pte type.
Link: https://lkml.kernel.org/r/20200825064232.10023-2-alistair@popple.id.au
Fixes: a5430dda8a3a ("mm/migrate: support un-addressable ZONE_DEVICE page in migration")
Fixes: 8c3328f1f36a ("mm/migrate: migrate_vma() unmap page from vma while collecting pages")
Fixes: f45ec5ff16a7 ("userfaultfd: wp: support swap and page migration")
Signed-off-by: Alistair Popple <alistair(a)popple.id.au>
Reviewed-by: Peter Xu <peterx(a)redhat.com>
Cc: Jérôme Glisse <jglisse(a)redhat.com>
Cc: John Hubbard <jhubbard(a)nvidia.com>
Cc: Ralph Campbell <rcampbell(a)nvidia.com>
Cc: Alistair Popple <alistair(a)popple.id.au>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/migrate.c | 15 +++++++++++----
mm/rmap.c | 9 +++++++--
2 files changed, 18 insertions(+), 6 deletions(-)
--- a/mm/migrate.c~mm-rmap-fixup-copying-of-soft-dirty-and-uffd-ptes
+++ a/mm/migrate.c
@@ -2427,10 +2427,17 @@ again:
entry = make_migration_entry(page, mpfn &
MIGRATE_PFN_WRITE);
swp_pte = swp_entry_to_pte(entry);
- if (pte_soft_dirty(pte))
- swp_pte = pte_swp_mksoft_dirty(swp_pte);
- if (pte_uffd_wp(pte))
- swp_pte = pte_swp_mkuffd_wp(swp_pte);
+ if (pte_present(pte)) {
+ if (pte_soft_dirty(pte))
+ swp_pte = pte_swp_mksoft_dirty(swp_pte);
+ if (pte_uffd_wp(pte))
+ swp_pte = pte_swp_mkuffd_wp(swp_pte);
+ } else {
+ if (pte_swp_soft_dirty(pte))
+ swp_pte = pte_swp_mksoft_dirty(swp_pte);
+ if (pte_swp_uffd_wp(pte))
+ swp_pte = pte_swp_mkuffd_wp(swp_pte);
+ }
set_pte_at(mm, addr, ptep, swp_pte);
/*
--- a/mm/rmap.c~mm-rmap-fixup-copying-of-soft-dirty-and-uffd-ptes
+++ a/mm/rmap.c
@@ -1511,9 +1511,14 @@ static bool try_to_unmap_one(struct page
*/
entry = make_migration_entry(page, 0);
swp_pte = swp_entry_to_pte(entry);
- if (pte_soft_dirty(pteval))
+
+ /*
+ * pteval maps a zone device page and is therefore
+ * a swap pte.
+ */
+ if (pte_swp_soft_dirty(pteval))
swp_pte = pte_swp_mksoft_dirty(swp_pte);
- if (pte_uffd_wp(pteval))
+ if (pte_swp_uffd_wp(pteval))
swp_pte = pte_swp_mkuffd_wp(swp_pte);
set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
/*
_
Later revisions of PPRs that post-date the original Family 17h events
submission patch add these events.
Specifically, they were not in this 2017 revision of the F17h PPR:
Processor Programming Reference (PPR) for AMD Family 17h Model 01h, Revision B1 Processors Rev 1.14 - April 15, 2017
But e.g., are included in this 2019 version of the PPR:
Processor Programming Reference (PPR) for AMD Family 17h Model 18h, Revision B1 Processors Rev. 3.14 - Sep 26, 2019
Signed-off-by: Kim Phillips <kim.phillips(a)amd.com>
Fixes: 98c07a8f74f8 ("perf vendor events amd: perf PMU events for AMD Family 17h")
Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: Arnaldo Carvalho de Melo <acme(a)kernel.org>
Cc: Mark Rutland <mark.rutland(a)arm.com>
Cc: Alexander Shishkin <alexander.shishkin(a)linux.intel.com>
Cc: Jiri Olsa <jolsa(a)redhat.com>
Cc: Namhyung Kim <namhyung(a)kernel.org>
Cc: Vijay Thakkar <vijaythakkar(a)me.com>
Cc: Andi Kleen <ak(a)linux.intel.com>
Cc: John Garry <john.garry(a)huawei.com>
Cc: Kan Liang <kan.liang(a)linux.intel.com>
Cc: Yunfeng Ye <yeyunfeng(a)huawei.com>
Cc: Jin Yao <yao.jin(a)linux.intel.com>
Cc: "Martin Liška" <mliska(a)suse.cz>
Cc: Borislav Petkov <bp(a)suse.de>
Cc: Jon Grimm <jon.grimm(a)amd.com>
Cc: Martin Jambor <mjambor(a)suse.cz>
Cc: Michael Petlan <mpetlan(a)redhat.com>
Cc: William Cohen <wcohen(a)redhat.com>
Cc: Stephane Eranian <eranian(a)google.com>
Cc: Ian Rogers <irogers(a)google.com>
Cc: linux-perf-users(a)vger.kernel.org
Cc: linux-kernel(a)vger.kernel.org
Cc: stable(a)vger.kernel.org
---
.../pmu-events/arch/x86/amdzen1/cache.json | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/cache.json b/tools/perf/pmu-events/arch/x86/amdzen1/cache.json
index 404d4c569c01..695ed3ffa3a6 100644
--- a/tools/perf/pmu-events/arch/x86/amdzen1/cache.json
+++ b/tools/perf/pmu-events/arch/x86/amdzen1/cache.json
@@ -249,6 +249,24 @@
"BriefDescription": "Cycles with fill pending from L2. Total cycles spent with one or more fill requests in flight from L2.",
"UMask": "0x1"
},
+ {
+ "EventName": "l2_pf_hit_l2",
+ "EventCode": "0x70",
+ "BriefDescription": "L2 prefetch hit in L2.",
+ "UMask": "0xff"
+ },
+ {
+ "EventName": "l2_pf_miss_l2_hit_l3",
+ "EventCode": "0x71",
+ "BriefDescription": "L2 prefetcher hits in L3. Counts all L2 prefetches accepted by the L2 pipeline which miss the L2 cache and hit the L3.",
+ "UMask": "0xff"
+ },
+ {
+ "EventName": "l2_pf_miss_l2_l3",
+ "EventCode": "0x72",
+ "BriefDescription": "L2 prefetcher misses in L3. All L2 prefetches accepted by the L2 pipeline which miss the L2 and the L3 caches.",
+ "UMask": "0xff"
+ },
{
"EventName": "l3_request_g1.caching_l3_cache_accesses",
"EventCode": "0x01",
--
2.27.0
This is a note to let you know that I've just added the patch titled
debugfs: Fix module state check condition
to my driver-core git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git
in the driver-core-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From e3b9fc7eec55e6fdc8beeed18f2ed207086341e2 Mon Sep 17 00:00:00 2001
From: Vladis Dronov <vdronov(a)redhat.com>
Date: Tue, 11 Aug 2020 17:01:29 +0200
Subject: debugfs: Fix module state check condition
The '#ifdef MODULE' check in the original commit does not work as intended.
The code under the check is not built at all if CONFIG_DEBUG_FS=y. Fix this
by using a correct check.
Fixes: 275678e7a9be ("debugfs: Check module state before warning in {full/open}_proxy_open()")
Signed-off-by: Vladis Dronov <vdronov(a)redhat.com>
Cc: stable <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/20200811150129.53343-1-vdronov@redhat.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
fs/debugfs/file.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index b167d2d02148..a768a09430c3 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -177,7 +177,7 @@ static int open_proxy_open(struct inode *inode, struct file *filp)
goto out;
if (!fops_get(real_fops)) {
-#ifdef MODULE
+#ifdef CONFIG_MODULES
if (real_fops->owner &&
real_fops->owner->state == MODULE_STATE_GOING)
goto out;
@@ -312,7 +312,7 @@ static int full_proxy_open(struct inode *inode, struct file *filp)
goto out;
if (!fops_get(real_fops)) {
-#ifdef MODULE
+#ifdef CONFIG_MODULES
if (real_fops->owner &&
real_fops->owner->state == MODULE_STATE_GOING)
goto out;
--
2.28.0
This is a note to let you know that I've just added the patch titled
video: fbdev: fix OOB read in vga_8planes_imageblit()
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From bd018a6a75cebb511bb55a0e7690024be975fe93 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel(a)i-love.sakura.ne.jp>
Date: Mon, 31 Aug 2020 19:37:00 +0900
Subject: video: fbdev: fix OOB read in vga_8planes_imageblit()
syzbot is reporting OOB read at vga_8planes_imageblit() [1], for
"cdat[y] >> 4" can become a negative value due to "const char *cdat".
[1] https://syzkaller.appspot.com/bug?id=0d7a0da1557dcd1989e00cb3692b26d4173b41…
Reported-by: syzbot <syzbot+69fbd3e01470f169c8c4(a)syzkaller.appspotmail.com>
Signed-off-by: Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp>
Cc: stable <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/90b55ec3-d5b0-3307-9f7c-7ff5c5fd6ad3@i-love.sakur…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/video/fbdev/vga16fb.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/video/fbdev/vga16fb.c b/drivers/video/fbdev/vga16fb.c
index a20eeb8308ff..578d3541e3d6 100644
--- a/drivers/video/fbdev/vga16fb.c
+++ b/drivers/video/fbdev/vga16fb.c
@@ -1121,7 +1121,7 @@ static void vga_8planes_imageblit(struct fb_info *info, const struct fb_image *i
char oldop = setop(0);
char oldsr = setsr(0);
char oldmask = selectmask();
- const char *cdat = image->data;
+ const unsigned char *cdat = image->data;
u32 dx = image->dx;
char __iomem *where;
int y;
--
2.28.0
Hello,
We ran automated tests on a recent commit from this kernel tree:
Kernel repo: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
Commit: 96b51cca34dd - KVM: arm64: Survive synchronous exceptions caused by AT instructions
The results of these automated tests are provided below.
Overall result: PASSED
Merge: OK
Compile: OK
Tests: OK
All kernel binaries, config files, and logs are available for download here:
https://cki-artifacts.s3.us-east-2.amazonaws.com/index.html?prefix=dataware…
Please reply to this email if you have any questions about the tests that we
ran or if you have any suggestions on how to make future tests more effective.
,-. ,-.
( C ) ( K ) Continuous
`-',-.`-' Kernel
( I ) Integration
`-'
______________________________________________________________________________
Compile testing
---------------
We compiled the kernel for 4 architectures:
aarch64:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
ppc64le:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
s390x:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
x86_64:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
Hardware testing
----------------
We booted each kernel and ran the following tests:
aarch64:
Host 1:
✅ Boot test
✅ ACPI table test
✅ ACPI enabled test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
Host 2:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ xfstests - btrfs
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
🚧 ✅ Storage nvme - tcp
ppc64le:
Host 1:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
🚧 ✅ xfstests - btrfs
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
🚧 ✅ Storage nvme - tcp
Host 2:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
Host 3:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
🚧 ⚡⚡⚡ kdump - sysrq-c
Host 4:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
🚧 ⚡⚡⚡ kdump - sysrq-c
Host 5:
✅ Boot test
🚧 ✅ kdump - sysrq-c
s390x:
Host 1:
✅ Boot test
✅ selinux-policy: serge-testsuite
✅ stress: stress-ng
🚧 ✅ Storage blktests
🚧 ❌ Storage nvme - tcp
Host 2:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
x86_64:
Host 1:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ CPU: Frequency Driver Test
🚧 ✅ CPU: Idle Test
🚧 ✅ xfstests - btrfs
🚧 ⚡⚡⚡ IOMMU boot test
🚧 ⚡⚡⚡ IPMI driver test
🚧 ⚡⚡⚡ IPMItool loop stress test
🚧 ⚡⚡⚡ power-management: cpupower/sanity test
🚧 ⚡⚡⚡ Storage blktests
🚧 ⚡⚡⚡ Storage nvme - tcp
Host 2:
✅ Boot test
🚧 ✅ kdump - sysrq-c
🚧 ✅ kdump - file-load
Host 3:
✅ Boot test
✅ ACPI table test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: sanity smoke test
✅ pciutils: update pci ids test
✅ kernel-rt: rt_migrate_test
✅ kernel-rt: rteval
✅ kernel-rt: sched_deadline
✅ kernel-rt: smidetect
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
Test sources: https://gitlab.com/cki-project/kernel-tests
💚 Pull requests are welcome for new tests or improvements to existing tests!
Aborted tests
-------------
Tests that didn't complete running successfully are marked with ⚡⚡⚡.
If this was caused by an infrastructure issue, we try to mark that
explicitly in the report.
Waived tests
------------
If the test run included waived tests, they are marked with 🚧. Such tests are
executed but their results are not taken into account. Tests are waived when
their results are not reliable enough, e.g. when they're just introduced or are
being fixed.
Testing timeout
---------------
We aim to provide a report within reasonable timeframe. Tests that haven't
finished running yet are marked with ⏱.
This is a note to let you know that I've just added the patch titled
usb: core: fix slab-out-of-bounds Read in read_descriptors
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From a18cd6c9b6bc73dc17e8b7e9bd07decaa8833c97 Mon Sep 17 00:00:00 2001
From: Zeng Tao <prime.zeng(a)hisilicon.com>
Date: Fri, 4 Sep 2020 14:37:44 +0800
Subject: usb: core: fix slab-out-of-bounds Read in read_descriptors
The USB device descriptor may get changed between two consecutive
enumerations on the same device for some reason, such as DFU or
malicius device.
In that case, we may access the changing descriptor if we don't take
the device lock here.
The issue is reported:
https://syzkaller.appspot.com/bug?id=901a0d9e6519ef8dc7acab25344bd287dd3c7b…
Cc: stable <stable(a)vger.kernel.org>
Cc: Alan Stern <stern(a)rowland.harvard.edu>
Reported-by: syzbot+256e56ddde8b8957eabd(a)syzkaller.appspotmail.com
Fixes: 217a9081d8e6 ("USB: add all configs to the "descriptors" attribute")
Signed-off-by: Zeng Tao <prime.zeng(a)hisilicon.com>
Link: https://lore.kernel.org/r/1599201467-11000-1-git-send-email-prime.zeng@hisi…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/usb/core/sysfs.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c
index a2ca38e25e0c..8d134193fa0c 100644
--- a/drivers/usb/core/sysfs.c
+++ b/drivers/usb/core/sysfs.c
@@ -889,7 +889,11 @@ read_descriptors(struct file *filp, struct kobject *kobj,
size_t srclen, n;
int cfgno;
void *src;
+ int retval;
+ retval = usb_lock_device_interruptible(udev);
+ if (retval < 0)
+ return -EINTR;
/* The binary attribute begins with the device descriptor.
* Following that are the raw descriptor entries for all the
* configurations (config plus subsidiary descriptors).
@@ -914,6 +918,7 @@ read_descriptors(struct file *filp, struct kobject *kobj,
off -= srclen;
}
}
+ usb_unlock_device(udev);
return count - nleft;
}
--
2.28.0
This is a note to let you know that I've just added the patch titled
Revert "usb: dwc3: meson-g12a: fix shared reset control use"
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From a6498d51821edf9615b42b968fb419a40197a982 Mon Sep 17 00:00:00 2001
From: Amjad Ouled-Ameur <aouledameur(a)baylibre.com>
Date: Thu, 27 Aug 2020 16:48:10 +0200
Subject: Revert "usb: dwc3: meson-g12a: fix shared reset control use"
This reverts commit 7a410953d1fb4dbe91ffcfdee9cbbf889d19b0d7.
This commit breaks USB on meson-gxl-s905x-libretech-cc. Reverting
the change solves the issue.
In fact, according to the reset framework code, consumers must not use
reset_control_(de)assert() on shared reset lines when reset_control_reset
has been used, and vice-versa.
Moreover, with this commit, usb is not guaranted to be reset since the
reset is likely to be initially deasserted.
Reverting the commit will bring back the suspend warning mentioned in the
commit description. Nevertheless, a warning is much less critical than
breaking dwc3-meson-g12a USB completely. We will address the warning
issue in another way as a 2nd step.
Fixes: 7a410953d1fb ("usb: dwc3: meson-g12a: fix shared reset control use")
Cc: stable <stable(a)vger.kernel.org>
Signed-off-by: Amjad Ouled-Ameur <aouledameur(a)baylibre.com>
Reported-by: Jerome Brunet <jbrunet(a)baylibre.com>
Acked-by: Neil Armstrong <narmstrong(a)baylibre.com>
Acked-by: Philipp Zabel <p.zabel(a)pengutronix.de>
Link: https://lore.kernel.org/r/20200827144810.26657-1-aouledameur@baylibre.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/usb/dwc3/dwc3-meson-g12a.c | 15 ++++++---------
1 file changed, 6 insertions(+), 9 deletions(-)
diff --git a/drivers/usb/dwc3/dwc3-meson-g12a.c b/drivers/usb/dwc3/dwc3-meson-g12a.c
index 88b75b5a039c..1f7f4d88ed9d 100644
--- a/drivers/usb/dwc3/dwc3-meson-g12a.c
+++ b/drivers/usb/dwc3/dwc3-meson-g12a.c
@@ -737,13 +737,13 @@ static int dwc3_meson_g12a_probe(struct platform_device *pdev)
goto err_disable_clks;
}
- ret = reset_control_deassert(priv->reset);
+ ret = reset_control_reset(priv->reset);
if (ret)
- goto err_assert_reset;
+ goto err_disable_clks;
ret = dwc3_meson_g12a_get_phys(priv);
if (ret)
- goto err_assert_reset;
+ goto err_disable_clks;
ret = priv->drvdata->setup_regmaps(priv, base);
if (ret)
@@ -752,7 +752,7 @@ static int dwc3_meson_g12a_probe(struct platform_device *pdev)
if (priv->vbus) {
ret = regulator_enable(priv->vbus);
if (ret)
- goto err_assert_reset;
+ goto err_disable_clks;
}
/* Get dr_mode */
@@ -765,13 +765,13 @@ static int dwc3_meson_g12a_probe(struct platform_device *pdev)
ret = priv->drvdata->usb_init(priv);
if (ret)
- goto err_assert_reset;
+ goto err_disable_clks;
/* Init PHYs */
for (i = 0 ; i < PHY_COUNT ; ++i) {
ret = phy_init(priv->phys[i]);
if (ret)
- goto err_assert_reset;
+ goto err_disable_clks;
}
/* Set PHY Power */
@@ -809,9 +809,6 @@ static int dwc3_meson_g12a_probe(struct platform_device *pdev)
for (i = 0 ; i < PHY_COUNT ; ++i)
phy_exit(priv->phys[i]);
-err_assert_reset:
- reset_control_assert(priv->reset);
-
err_disable_clks:
clk_bulk_disable_unprepare(priv->drvdata->num_clks,
priv->drvdata->clks);
--
2.28.0
This is a note to let you know that I've just added the patch titled
usb: typec: intel_pmc_mux: Un-register the USB role switch
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From 290a405ce318d036666c4155d5899eb8cd6e0d97 Mon Sep 17 00:00:00 2001
From: Madhusudanarao Amara <madhusudanarao.amara(a)intel.com>
Date: Wed, 26 Aug 2020 00:08:11 +0530
Subject: usb: typec: intel_pmc_mux: Un-register the USB role switch
Added missing code for un-register USB role switch in the remove and
error path.
Cc: Stable <stable(a)vger.kernel.org> # v5.8
Reviewed-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
Fixes: 6701adfa9693b ("usb: typec: driver for Intel PMC mux control")
Signed-off-by: Madhusudanarao Amara <madhusudanarao.amara(a)intel.com>
Link: https://lore.kernel.org/r/20200825183811.7262-1-madhusudanarao.amara@intel.…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/usb/typec/mux/intel_pmc_mux.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/usb/typec/mux/intel_pmc_mux.c b/drivers/usb/typec/mux/intel_pmc_mux.c
index e4021e13af40..fd9008f19208 100644
--- a/drivers/usb/typec/mux/intel_pmc_mux.c
+++ b/drivers/usb/typec/mux/intel_pmc_mux.c
@@ -497,6 +497,7 @@ static int pmc_usb_probe(struct platform_device *pdev)
for (i = 0; i < pmc->num_ports; i++) {
typec_switch_unregister(pmc->port[i].typec_sw);
typec_mux_unregister(pmc->port[i].typec_mux);
+ usb_role_switch_unregister(pmc->port[i].usb_sw);
}
return ret;
@@ -510,6 +511,7 @@ static int pmc_usb_remove(struct platform_device *pdev)
for (i = 0; i < pmc->num_ports; i++) {
typec_switch_unregister(pmc->port[i].typec_sw);
typec_mux_unregister(pmc->port[i].typec_mux);
+ usb_role_switch_unregister(pmc->port[i].usb_sw);
}
return 0;
--
2.28.0
This is a note to let you know that I've just added the patch titled
usb: Fix out of sync data toggle if a configured device is
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From cfd54fa83a5068b61b7eb28d3c117d8354c74c7a Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman(a)linux.intel.com>
Date: Tue, 1 Sep 2020 11:25:28 +0300
Subject: usb: Fix out of sync data toggle if a configured device is
reconfigured
Userspace drivers that use a SetConfiguration() request to "lightweight"
reset an already configured usb device might cause data toggles to get out
of sync between the device and host, and the device becomes unusable.
The xHCI host requires endpoints to be dropped and added back to reset the
toggle. If USB core notices the new configuration is the same as the
current active configuration it will avoid these extra steps by calling
usb_reset_configuration() instead of usb_set_configuration().
A SetConfiguration() request will reset the device side data toggles.
Make sure usb_reset_configuration() function also drops and adds back the
endpoints to ensure data toggles are in sync.
To avoid code duplication split the current usb_disable_device() function
and reuse the endpoint specific part.
Cc: stable <stable(a)vger.kernel.org>
Tested-by: Martin Thierer <mthierer(a)gmail.com>
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
Link: https://lore.kernel.org/r/20200901082528.12557-1-mathias.nyman@linux.intel.…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/usb/core/message.c | 91 ++++++++++++++++++--------------------
1 file changed, 42 insertions(+), 49 deletions(-)
diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c
index 6197938dcc2d..ae1de9cc4b09 100644
--- a/drivers/usb/core/message.c
+++ b/drivers/usb/core/message.c
@@ -1205,6 +1205,34 @@ void usb_disable_interface(struct usb_device *dev, struct usb_interface *intf,
}
}
+/*
+ * usb_disable_device_endpoints -- Disable all endpoints for a device
+ * @dev: the device whose endpoints are being disabled
+ * @skip_ep0: 0 to disable endpoint 0, 1 to skip it.
+ */
+static void usb_disable_device_endpoints(struct usb_device *dev, int skip_ep0)
+{
+ struct usb_hcd *hcd = bus_to_hcd(dev->bus);
+ int i;
+
+ if (hcd->driver->check_bandwidth) {
+ /* First pass: Cancel URBs, leave endpoint pointers intact. */
+ for (i = skip_ep0; i < 16; ++i) {
+ usb_disable_endpoint(dev, i, false);
+ usb_disable_endpoint(dev, i + USB_DIR_IN, false);
+ }
+ /* Remove endpoints from the host controller internal state */
+ mutex_lock(hcd->bandwidth_mutex);
+ usb_hcd_alloc_bandwidth(dev, NULL, NULL, NULL);
+ mutex_unlock(hcd->bandwidth_mutex);
+ }
+ /* Second pass: remove endpoint pointers */
+ for (i = skip_ep0; i < 16; ++i) {
+ usb_disable_endpoint(dev, i, true);
+ usb_disable_endpoint(dev, i + USB_DIR_IN, true);
+ }
+}
+
/**
* usb_disable_device - Disable all the endpoints for a USB device
* @dev: the device whose endpoints are being disabled
@@ -1218,7 +1246,6 @@ void usb_disable_interface(struct usb_device *dev, struct usb_interface *intf,
void usb_disable_device(struct usb_device *dev, int skip_ep0)
{
int i;
- struct usb_hcd *hcd = bus_to_hcd(dev->bus);
/* getting rid of interfaces will disconnect
* any drivers bound to them (a key side effect)
@@ -1264,22 +1291,8 @@ void usb_disable_device(struct usb_device *dev, int skip_ep0)
dev_dbg(&dev->dev, "%s nuking %s URBs\n", __func__,
skip_ep0 ? "non-ep0" : "all");
- if (hcd->driver->check_bandwidth) {
- /* First pass: Cancel URBs, leave endpoint pointers intact. */
- for (i = skip_ep0; i < 16; ++i) {
- usb_disable_endpoint(dev, i, false);
- usb_disable_endpoint(dev, i + USB_DIR_IN, false);
- }
- /* Remove endpoints from the host controller internal state */
- mutex_lock(hcd->bandwidth_mutex);
- usb_hcd_alloc_bandwidth(dev, NULL, NULL, NULL);
- mutex_unlock(hcd->bandwidth_mutex);
- /* Second pass: remove endpoint pointers */
- }
- for (i = skip_ep0; i < 16; ++i) {
- usb_disable_endpoint(dev, i, true);
- usb_disable_endpoint(dev, i + USB_DIR_IN, true);
- }
+
+ usb_disable_device_endpoints(dev, skip_ep0);
}
/**
@@ -1522,6 +1535,9 @@ EXPORT_SYMBOL_GPL(usb_set_interface);
* The caller must own the device lock.
*
* Return: Zero on success, else a negative error code.
+ *
+ * If this routine fails the device will probably be in an unusable state
+ * with endpoints disabled, and interfaces only partially enabled.
*/
int usb_reset_configuration(struct usb_device *dev)
{
@@ -1537,10 +1553,7 @@ int usb_reset_configuration(struct usb_device *dev)
* calls during probe() are fine
*/
- for (i = 1; i < 16; ++i) {
- usb_disable_endpoint(dev, i, true);
- usb_disable_endpoint(dev, i + USB_DIR_IN, true);
- }
+ usb_disable_device_endpoints(dev, 1); /* skip ep0*/
config = dev->actconfig;
retval = 0;
@@ -1553,34 +1566,10 @@ int usb_reset_configuration(struct usb_device *dev)
mutex_unlock(hcd->bandwidth_mutex);
return -ENOMEM;
}
- /* Make sure we have enough bandwidth for each alternate setting 0 */
- for (i = 0; i < config->desc.bNumInterfaces; i++) {
- struct usb_interface *intf = config->interface[i];
- struct usb_host_interface *alt;
- alt = usb_altnum_to_altsetting(intf, 0);
- if (!alt)
- alt = &intf->altsetting[0];
- if (alt != intf->cur_altsetting)
- retval = usb_hcd_alloc_bandwidth(dev, NULL,
- intf->cur_altsetting, alt);
- if (retval < 0)
- break;
- }
- /* If not, reinstate the old alternate settings */
+ /* xHCI adds all endpoints in usb_hcd_alloc_bandwidth */
+ retval = usb_hcd_alloc_bandwidth(dev, config, NULL, NULL);
if (retval < 0) {
-reset_old_alts:
- for (i--; i >= 0; i--) {
- struct usb_interface *intf = config->interface[i];
- struct usb_host_interface *alt;
-
- alt = usb_altnum_to_altsetting(intf, 0);
- if (!alt)
- alt = &intf->altsetting[0];
- if (alt != intf->cur_altsetting)
- usb_hcd_alloc_bandwidth(dev, NULL,
- alt, intf->cur_altsetting);
- }
usb_enable_lpm(dev);
mutex_unlock(hcd->bandwidth_mutex);
return retval;
@@ -1589,8 +1578,12 @@ int usb_reset_configuration(struct usb_device *dev)
USB_REQ_SET_CONFIGURATION, 0,
config->desc.bConfigurationValue, 0,
NULL, 0, USB_CTRL_SET_TIMEOUT);
- if (retval < 0)
- goto reset_old_alts;
+ if (retval < 0) {
+ usb_hcd_alloc_bandwidth(dev, NULL, NULL, NULL);
+ usb_enable_lpm(dev);
+ mutex_unlock(hcd->bandwidth_mutex);
+ return retval;
+ }
mutex_unlock(hcd->bandwidth_mutex);
/* re-init hc/hcd interface/endpoint state */
--
2.28.0
The following commit has been merged into the x86/urgent branch of tip:
Commit-ID: 662a0221893a3d58aa72719671844264306f6e4b
Gitweb: https://git.kernel.org/tip/662a0221893a3d58aa72719671844264306f6e4b
Author: Peter Zijlstra <peterz(a)infradead.org>
AuthorDate: Wed, 02 Sep 2020 15:25:50 +02:00
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Fri, 04 Sep 2020 15:09:29 +02:00
x86/entry: Fix AC assertion
The WARN added in commit 3c73b81a9164 ("x86/entry, selftests: Further
improve user entry sanity checks") unconditionally triggers on a IVB
machine because it does not support SMAP.
For !SMAP hardware the CLAC/STAC instructions are patched out and thus if
userspace sets AC, it is still have set after entry.
Fixes: 3c73b81a9164 ("x86/entry, selftests: Further improve user entry sanity checks")
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Tested-by: Daniel Thompson <daniel.thompson(a)linaro.org>
Acked-by: Andy Lutomirski <luto(a)kernel.org>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20200902133200.666781610@infradead.org
---
arch/x86/include/asm/entry-common.h | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
index a8f9315..6fe54b2 100644
--- a/arch/x86/include/asm/entry-common.h
+++ b/arch/x86/include/asm/entry-common.h
@@ -18,8 +18,16 @@ static __always_inline void arch_check_user_regs(struct pt_regs *regs)
* state, not the interrupt state as imagined by Xen.
*/
unsigned long flags = native_save_fl();
- WARN_ON_ONCE(flags & (X86_EFLAGS_AC | X86_EFLAGS_DF |
- X86_EFLAGS_NT));
+ unsigned long mask = X86_EFLAGS_DF | X86_EFLAGS_NT;
+
+ /*
+ * For !SMAP hardware we patch out CLAC on entry.
+ */
+ if (boot_cpu_has(X86_FEATURE_SMAP) ||
+ (IS_ENABLED(CONFIG_64_BIT) && boot_cpu_has(X86_FEATURE_XENPV)))
+ mask |= X86_EFLAGS_AC;
+
+ WARN_ON_ONCE(flags & mask);
/* We think we came from user mode. Make sure pt_regs agrees. */
WARN_ON_ONCE(!user_mode(regs));
Trying to clear DR7 around a #DB from usermode malfunctions if we
schedule when delivering SIGTRAP. Rather than trying to define a
special no-recursion region, just allow a single level of recursion.
We do the same thing for NMI, and it hasn't caused any problems yet.
Reported-by: Kyle Huey <me(a)kylehuey.com>
Debugged-by: Josh Poimboeuf <jpoimboe(a)redhat.com>
Fixes: 9f58fdde95c9 ("x86/db: Split out dr6/7 handling")
Cc: stable(a)vger.kernel.org
Signed-off-by: Andy Lutomirski <luto(a)kernel.org>
---
arch/x86/kernel/traps.c | 69 +++++++++++++++++++++++------------------
1 file changed, 38 insertions(+), 31 deletions(-)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 1f66d2d1e998..bf852b72f15c 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -729,20 +729,9 @@ static bool is_sysenter_singlestep(struct pt_regs *regs)
#endif
}
-static __always_inline void debug_enter(unsigned long *dr6, unsigned long *dr7)
+static __always_inline unsigned long debug_read_clear_dr6(void)
{
- /*
- * Disable breakpoints during exception handling; recursive exceptions
- * are exceedingly 'fun'.
- *
- * Since this function is NOKPROBE, and that also applies to
- * HW_BREAKPOINT_X, we can't hit a breakpoint before this (XXX except a
- * HW_BREAKPOINT_W on our stack)
- *
- * Entry text is excluded for HW_BP_X and cpu_entry_area, which
- * includes the entry stack is excluded for everything.
- */
- *dr7 = local_db_save();
+ unsigned long dr6;
/*
* The Intel SDM says:
@@ -755,15 +744,21 @@ static __always_inline void debug_enter(unsigned long *dr6, unsigned long *dr7)
*
* Keep it simple: clear DR6 immediately.
*/
- get_debugreg(*dr6, 6);
+ get_debugreg(dr6, 6);
set_debugreg(0, 6);
/* Filter out all the reserved bits which are preset to 1 */
- *dr6 &= ~DR6_RESERVED;
+ dr6 &= ~DR6_RESERVED;
+
+ return dr6;
+}
+
+static __always_inline void debug_enter(unsigned long *dr6, unsigned long *dr7)
+{
+ *dr6 = debug_read_clear_dr6();
}
static __always_inline void debug_exit(unsigned long dr7)
{
- local_db_restore(dr7);
}
/*
@@ -863,6 +858,19 @@ static void handle_debug(struct pt_regs *regs, unsigned long dr6, bool user)
static __always_inline void exc_debug_kernel(struct pt_regs *regs,
unsigned long dr6)
{
+ /*
+ * Disable breakpoints during exception handling; recursive exceptions
+ * are exceedingly 'fun'.
+ *
+ * Since this function is NOKPROBE, and that also applies to
+ * HW_BREAKPOINT_X, we can't hit a breakpoint before this (XXX except a
+ * HW_BREAKPOINT_W on our stack)
+ *
+ * Entry text is excluded for HW_BP_X and cpu_entry_area, which
+ * includes the entry stack is excluded for everything.
+ */
+ unsigned long dr7 = local_db_save();
+
bool irq_state = idtentry_enter_nmi(regs);
instrumentation_begin();
@@ -883,6 +891,8 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs,
instrumentation_end();
idtentry_exit_nmi(regs, irq_state);
+
+ local_db_restore(dr7);
}
static __always_inline void exc_debug_user(struct pt_regs *regs,
@@ -894,6 +904,15 @@ static __always_inline void exc_debug_user(struct pt_regs *regs,
*/
WARN_ON_ONCE(!user_mode(regs));
+ /*
+ * NB: We can't easily clear DR7 here because
+ * idtentry_exit_to_usermode() can invoke ptrace, schedule, access
+ * user memory, etc. This means that a recursive #DB is possible. If
+ * this happens, that #DB will hit exc_debug_kernel() and clear DR7.
+ * Since we're not on the IST stack right now, everything will be
+ * fine.
+ */
+
irqentry_enter_from_user_mode(regs);
instrumentation_begin();
@@ -907,36 +926,24 @@ static __always_inline void exc_debug_user(struct pt_regs *regs,
/* IST stack entry */
DEFINE_IDTENTRY_DEBUG(exc_debug)
{
- unsigned long dr6, dr7;
-
- debug_enter(&dr6, &dr7);
- exc_debug_kernel(regs, dr6);
- debug_exit(dr7);
+ exc_debug_kernel(regs, debug_read_clear_dr6());
}
/* User entry, runs on regular task stack */
DEFINE_IDTENTRY_DEBUG_USER(exc_debug)
{
- unsigned long dr6, dr7;
-
- debug_enter(&dr6, &dr7);
- exc_debug_user(regs, dr6);
- debug_exit(dr7);
+ exc_debug_user(regs, debug_read_clear_dr6());
}
#else
/* 32 bit does not have separate entry points. */
DEFINE_IDTENTRY_RAW(exc_debug)
{
- unsigned long dr6, dr7;
-
- debug_enter(&dr6, &dr7);
+ unsigned long dr6 = debug_read_clear_dr6();
if (user_mode(regs))
exc_debug_user(regs, dr6);
else
exc_debug_kernel(regs, dr6);
-
- debug_exit(dr7);
}
#endif
--
2.25.4
Hi,
I'm adding stable(a)vger.kernel.org
Once again, this time really adding stable.
On 2020-06-03 04:31, Martin K. Petersen wrote:
> On Thu, 28 May 2020 21:31:08 +0200, Bodo Stroesser wrote:
>
>> 1) If remaining ring space before the end of the ring is
>> smaller then the next cmd to write, tcmu writes a padding
>> entry which fills the remaining space at the end of the
>> ring.
>> Then tcmu calls tcmu_flush_dcache_range() with the size
>> of struct tcmu_cmd_entry as data length to flush.
>> If the space filled by the padding was smaller then
>> tcmu_cmd_entry, tcmu_flush_dcache_range() is called for
>> an address range reaching behind the end of the vmalloc'ed
>> ring.
>> tcmu_flush_dcache_range() in a loop calls
>> flush_dcache_page(virt_to_page(start));
>> for every page being part of the range. On x86 the line is
>> optimized out by the compiler, as flush_dcache_page() is
>> empty on x86.
>> But I assume the above can cause trouble on other
>> architectures that really have a flush_dcache_page().
>> For paddings only the header part of an entry is relevant
>> Due to alignment rules the header always fits in the
>> remaining space, if padding is needed.
>> So tcmu_flush_dcache_range() can safely be called with
>> sizeof(entry->hdr) as the length here.
>>
>> [...]
>
> Applied to 5.8/scsi-queue, thanks!
>
> [1/1] scsi: target: tcmu: Fix size in calls to tcmu_flush_dcache_range
> https://git.kernel.org/mkp/scsi/c/8c4e0f212398
>
The full commit of this patch is:
8c4e0f212398cdd1eb4310a5981d06a723cdd24f
This patch is the first of four patches that are necessary to run tcmu
on ARM without crash. For details please see
https://bugzilla.kernel.org/show_bug.cgi?id=208045
Upsteam commits of patches 2,3, and 4 are:
2: 3c58f737231e "scsi: target: tcmu: Optimize use of flush_dcache_page"
3: 3145550a7f8b "scsi: target: tcmu: Fix crash in
tcmu_flush_dcache_range on ARM"
4: 5a0c256d96f0 "scsi: target: tcmu: Fix crash on ARM during cmd
completion"
Since patches 3 and 4 already were accepted for 5.8, 5.4, and 4.19, and
I sent a request to add patch 2 about 1 hour ago, please consider adding
this patch to 5.4 and 4.19, because without it tcmu on ARM will still
crash.
Thank you,
Bodo
From: Mel Gorman <mgorman(a)techsingularity.net>
[ Upstream commit e5b31ac2ca2cd0cf6bf2fcbb708ed01466c89aaa ]
The original count is never reused so it can be removed.
Signed-off-by: Mel Gorman <mgorman(a)techsingularity.net>
Acked-by: Vlastimil Babka <vbabka(a)suse.cz>
Cc: Jesper Dangaard Brouer <brouer(a)redhat.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
[dwagner: update context]
Signed-off-by: Daniel Wagner <dwagner(a)suse.de>
---
Hi,
The backport commit 0c9ce43da97d ("mm, page_alloc: fix core hung in
free_pcppages_bulk()") has the depency to this commit. It went in v4.6
so only v4.4 is effected.
Thanks,
Daniel
mm/page_alloc.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 14bab5fa1b65..3570aaf2a620 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -835,7 +835,6 @@ static void free_pcppages_bulk(struct zone *zone, int count,
{
int migratetype = 0;
int batch_free = 0;
- int to_free = count;
unsigned long nr_scanned;
spin_lock(&zone->lock);
@@ -848,7 +847,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
* below while (list_empty(list)) loop.
*/
count = min(pcp->count, count);
- while (to_free) {
+ while (count) {
struct page *page;
struct list_head *list;
@@ -868,7 +867,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
/* This is the only non-empty list. Free them all. */
if (batch_free == MIGRATE_PCPTYPES)
- batch_free = to_free;
+ batch_free = count;
do {
int mt; /* migratetype of the to-be-freed page */
@@ -886,7 +885,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
__free_one_page(page, page_to_pfn(page), zone, 0, mt);
trace_mm_page_pcpu_drain(page, 0, mt);
- } while (--to_free && --batch_free && !list_empty(list));
+ } while (--count && --batch_free && !list_empty(list));
}
spin_unlock(&zone->lock);
}
--
2.28.0
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From f7f86e8ac0ad7cd6792a80137f5a550924966916 Mon Sep 17 00:00:00 2001
From: Sowjanya Komatineni <skomatineni(a)nvidia.com>
Date: Thu, 27 Aug 2020 10:20:57 -0700
Subject: [PATCH] dt-bindings: mmc: tegra: Add tmclk for Tegra210 and later
commit b5a84ecf025a ("mmc: tegra: Add Tegra210 support")
Tegra210 and later uses separate SDMMC_LEGACY_TM clock for data
timeout.
So, this patch adds "tmclk" to Tegra sdhci clock property in the
device tree binding.
Fixes: b5a84ecf025a ("mmc: tegra: Add Tegra210 support")
Cc: stable <stable(a)vger.kernel.org> # 5.4
Reviewed-by: Jon Hunter <jonathanh(a)nvidia.com>
Signed-off-by: Sowjanya Komatineni <skomatineni(a)nvidia.com>
Link: https://lore.kernel.org/r/1598548861-32373-4-git-send-email-skomatineni@nvi…
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
diff --git a/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.txt b/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.txt
index 2cf3affa1be7..96c0b1440c9c 100644
--- a/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.txt
+++ b/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.txt
@@ -15,8 +15,15 @@ Required properties:
- "nvidia,tegra210-sdhci": for Tegra210
- "nvidia,tegra186-sdhci": for Tegra186
- "nvidia,tegra194-sdhci": for Tegra194
-- clocks : Must contain one entry, for the module clock.
- See ../clocks/clock-bindings.txt for details.
+- clocks: For Tegra210, Tegra186 and Tegra194 must contain two entries.
+ One for the module clock and one for the timeout clock.
+ For all other Tegra devices, must contain a single entry for
+ the module clock. See ../clocks/clock-bindings.txt for details.
+- clock-names: For Tegra210, Tegra186 and Tegra194 must contain the
+ strings 'sdhci' and 'tmclk' to represent the module and
+ the timeout clocks, respectively.
+ For all other Tegra devices must contain the string 'sdhci'
+ to represent the module clock.
- resets : Must contain an entry for each entry in reset-names.
See ../reset/reset.txt for details.
- reset-names : Must include the following entries:
@@ -99,7 +106,7 @@ Optional properties for Tegra210, Tegra186 and Tegra194:
Example:
sdhci@700b0000 {
- compatible = "nvidia,tegra210-sdhci", "nvidia,tegra124-sdhci";
+ compatible = "nvidia,tegra124-sdhci";
reg = <0x0 0x700b0000 0x0 0x200>;
interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&tegra_car TEGRA210_CLK_SDMMC1>;
@@ -115,3 +122,22 @@ sdhci@700b0000 {
nvidia,pad-autocal-pull-down-offset-1v8 = <0x7b>;
status = "disabled";
};
+
+sdhci@700b0000 {
+ compatible = "nvidia,tegra210-sdhci";
+ reg = <0x0 0x700b0000 0x0 0x200>;
+ interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA210_CLK_SDMMC1>,
+ <&tegra_car TEGRA210_CLK_SDMMC_LEGACY>;
+ clock-names = "sdhci", "tmclk";
+ resets = <&tegra_car 14>;
+ reset-names = "sdhci";
+ pinctrl-names = "sdmmc-3v3", "sdmmc-1v8";
+ pinctrl-0 = <&sdmmc1_3v3>;
+ pinctrl-1 = <&sdmmc1_1v8>;
+ nvidia,pad-autocal-pull-up-offset-3v3 = <0x00>;
+ nvidia,pad-autocal-pull-down-offset-3v3 = <0x7d>;
+ nvidia,pad-autocal-pull-up-offset-1v8 = <0x7b>;
+ nvidia,pad-autocal-pull-down-offset-1v8 = <0x7b>;
+ status = "disabled";
+};
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From f7f86e8ac0ad7cd6792a80137f5a550924966916 Mon Sep 17 00:00:00 2001
From: Sowjanya Komatineni <skomatineni(a)nvidia.com>
Date: Thu, 27 Aug 2020 10:20:57 -0700
Subject: [PATCH] dt-bindings: mmc: tegra: Add tmclk for Tegra210 and later
commit b5a84ecf025a ("mmc: tegra: Add Tegra210 support")
Tegra210 and later uses separate SDMMC_LEGACY_TM clock for data
timeout.
So, this patch adds "tmclk" to Tegra sdhci clock property in the
device tree binding.
Fixes: b5a84ecf025a ("mmc: tegra: Add Tegra210 support")
Cc: stable <stable(a)vger.kernel.org> # 5.4
Reviewed-by: Jon Hunter <jonathanh(a)nvidia.com>
Signed-off-by: Sowjanya Komatineni <skomatineni(a)nvidia.com>
Link: https://lore.kernel.org/r/1598548861-32373-4-git-send-email-skomatineni@nvi…
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
diff --git a/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.txt b/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.txt
index 2cf3affa1be7..96c0b1440c9c 100644
--- a/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.txt
+++ b/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.txt
@@ -15,8 +15,15 @@ Required properties:
- "nvidia,tegra210-sdhci": for Tegra210
- "nvidia,tegra186-sdhci": for Tegra186
- "nvidia,tegra194-sdhci": for Tegra194
-- clocks : Must contain one entry, for the module clock.
- See ../clocks/clock-bindings.txt for details.
+- clocks: For Tegra210, Tegra186 and Tegra194 must contain two entries.
+ One for the module clock and one for the timeout clock.
+ For all other Tegra devices, must contain a single entry for
+ the module clock. See ../clocks/clock-bindings.txt for details.
+- clock-names: For Tegra210, Tegra186 and Tegra194 must contain the
+ strings 'sdhci' and 'tmclk' to represent the module and
+ the timeout clocks, respectively.
+ For all other Tegra devices must contain the string 'sdhci'
+ to represent the module clock.
- resets : Must contain an entry for each entry in reset-names.
See ../reset/reset.txt for details.
- reset-names : Must include the following entries:
@@ -99,7 +106,7 @@ Optional properties for Tegra210, Tegra186 and Tegra194:
Example:
sdhci@700b0000 {
- compatible = "nvidia,tegra210-sdhci", "nvidia,tegra124-sdhci";
+ compatible = "nvidia,tegra124-sdhci";
reg = <0x0 0x700b0000 0x0 0x200>;
interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&tegra_car TEGRA210_CLK_SDMMC1>;
@@ -115,3 +122,22 @@ sdhci@700b0000 {
nvidia,pad-autocal-pull-down-offset-1v8 = <0x7b>;
status = "disabled";
};
+
+sdhci@700b0000 {
+ compatible = "nvidia,tegra210-sdhci";
+ reg = <0x0 0x700b0000 0x0 0x200>;
+ interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA210_CLK_SDMMC1>,
+ <&tegra_car TEGRA210_CLK_SDMMC_LEGACY>;
+ clock-names = "sdhci", "tmclk";
+ resets = <&tegra_car 14>;
+ reset-names = "sdhci";
+ pinctrl-names = "sdmmc-3v3", "sdmmc-1v8";
+ pinctrl-0 = <&sdmmc1_3v3>;
+ pinctrl-1 = <&sdmmc1_1v8>;
+ nvidia,pad-autocal-pull-up-offset-3v3 = <0x00>;
+ nvidia,pad-autocal-pull-down-offset-3v3 = <0x7d>;
+ nvidia,pad-autocal-pull-up-offset-1v8 = <0x7b>;
+ nvidia,pad-autocal-pull-down-offset-1v8 = <0x7b>;
+ status = "disabled";
+};
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From f7f86e8ac0ad7cd6792a80137f5a550924966916 Mon Sep 17 00:00:00 2001
From: Sowjanya Komatineni <skomatineni(a)nvidia.com>
Date: Thu, 27 Aug 2020 10:20:57 -0700
Subject: [PATCH] dt-bindings: mmc: tegra: Add tmclk for Tegra210 and later
commit b5a84ecf025a ("mmc: tegra: Add Tegra210 support")
Tegra210 and later uses separate SDMMC_LEGACY_TM clock for data
timeout.
So, this patch adds "tmclk" to Tegra sdhci clock property in the
device tree binding.
Fixes: b5a84ecf025a ("mmc: tegra: Add Tegra210 support")
Cc: stable <stable(a)vger.kernel.org> # 5.4
Reviewed-by: Jon Hunter <jonathanh(a)nvidia.com>
Signed-off-by: Sowjanya Komatineni <skomatineni(a)nvidia.com>
Link: https://lore.kernel.org/r/1598548861-32373-4-git-send-email-skomatineni@nvi…
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
diff --git a/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.txt b/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.txt
index 2cf3affa1be7..96c0b1440c9c 100644
--- a/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.txt
+++ b/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.txt
@@ -15,8 +15,15 @@ Required properties:
- "nvidia,tegra210-sdhci": for Tegra210
- "nvidia,tegra186-sdhci": for Tegra186
- "nvidia,tegra194-sdhci": for Tegra194
-- clocks : Must contain one entry, for the module clock.
- See ../clocks/clock-bindings.txt for details.
+- clocks: For Tegra210, Tegra186 and Tegra194 must contain two entries.
+ One for the module clock and one for the timeout clock.
+ For all other Tegra devices, must contain a single entry for
+ the module clock. See ../clocks/clock-bindings.txt for details.
+- clock-names: For Tegra210, Tegra186 and Tegra194 must contain the
+ strings 'sdhci' and 'tmclk' to represent the module and
+ the timeout clocks, respectively.
+ For all other Tegra devices must contain the string 'sdhci'
+ to represent the module clock.
- resets : Must contain an entry for each entry in reset-names.
See ../reset/reset.txt for details.
- reset-names : Must include the following entries:
@@ -99,7 +106,7 @@ Optional properties for Tegra210, Tegra186 and Tegra194:
Example:
sdhci@700b0000 {
- compatible = "nvidia,tegra210-sdhci", "nvidia,tegra124-sdhci";
+ compatible = "nvidia,tegra124-sdhci";
reg = <0x0 0x700b0000 0x0 0x200>;
interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&tegra_car TEGRA210_CLK_SDMMC1>;
@@ -115,3 +122,22 @@ sdhci@700b0000 {
nvidia,pad-autocal-pull-down-offset-1v8 = <0x7b>;
status = "disabled";
};
+
+sdhci@700b0000 {
+ compatible = "nvidia,tegra210-sdhci";
+ reg = <0x0 0x700b0000 0x0 0x200>;
+ interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA210_CLK_SDMMC1>,
+ <&tegra_car TEGRA210_CLK_SDMMC_LEGACY>;
+ clock-names = "sdhci", "tmclk";
+ resets = <&tegra_car 14>;
+ reset-names = "sdhci";
+ pinctrl-names = "sdmmc-3v3", "sdmmc-1v8";
+ pinctrl-0 = <&sdmmc1_3v3>;
+ pinctrl-1 = <&sdmmc1_1v8>;
+ nvidia,pad-autocal-pull-up-offset-3v3 = <0x00>;
+ nvidia,pad-autocal-pull-down-offset-3v3 = <0x7d>;
+ nvidia,pad-autocal-pull-up-offset-1v8 = <0x7b>;
+ nvidia,pad-autocal-pull-down-offset-1v8 = <0x7b>;
+ status = "disabled";
+};
The below error is seen in dmesg, while formatting the disks discovered on host.
dmesg:
[ 636.733374] blk_update_request: I/O error, dev nvme4n1, sector 0 op 0x3:(DISCARD) flags 0x800 phys_seg 1 prio class 0
Patch 6 fixes it and there are 5 other dependent patches that also need to be
pulled from upstream to stable, 5.4 and 4.19 branches.
Patch 1 dependent patch
Patch 2 dependent patch
Patch 3 dependent patch
Patch 4 dependent patch
Patch 5 dependent patch
Patch 6 fix patch
Thanks,
Dakshaja
Christoph Hellwig (5):
nvmet: Cleanup discovery execute handlers
nvmet: Introduce common execute function for get_log_page and identify
nvmet: Introduce nvmet_dsm_len() helper
nvmet: Remove the data_len field from the nvmet_req struct
nvmet: Open code nvmet_req_execute()
Sagi Grimberg (1):
nvmet: fix dsm failure when payload does not match sgl descriptor
drivers/nvme/target/admin-cmd.c | 128 +++++++++++++++++-------------
drivers/nvme/target/core.c | 23 ++++--
drivers/nvme/target/discovery.c | 62 +++++++--------
drivers/nvme/target/fabrics-cmd.c | 15 +++-
drivers/nvme/target/fc.c | 4 +-
drivers/nvme/target/io-cmd-bdev.c | 19 +++--
drivers/nvme/target/io-cmd-file.c | 20 +++--
drivers/nvme/target/loop.c | 2 +-
drivers/nvme/target/nvmet.h | 11 ++-
drivers/nvme/target/rdma.c | 4 +-
drivers/nvme/target/tcp.c | 6 +-
11 files changed, 176 insertions(+), 118 deletions(-)
--
2.18.0.232.gb7bd9486b.dirty
Changes from v1:
- (Re-)adding Marc's review tags from upstream. Differences to the
original patches are trivial for 2/3 and 3/3, and straight-forward
for 1/3.
- Fix spelling of vaxorcism (hope my soul gets spared for the sacrilege)
----------
In some architectural corner cases, AT instructions can generate an
exception, which KVM is not really ready to handle properly.
Teach the code to handle this situation gracefully.
This is a backport of the respective upstream patches to v5.4(.61).
James prepared and tested these already, but we were lacking the upstream
commit IDs so far.
I am sending this on his behalf, since he is off this week.
The last two of the patches were tagged Cc: stable already, but did
not apply cleanly, hence this specific backport.
Cheers,
Andre.
James Morse (3):
KVM: arm64: Add kvm_extable for vaxorcism code
KVM: arm64: Survive synchronous exceptions caused by AT instructions
KVM: arm64: Set HCR_EL2.PTW to prevent AT taking synchronous exception
arch/arm64/include/asm/kvm_arm.h | 3 +-
arch/arm64/include/asm/kvm_asm.h | 43 +++++++++++++++++++++
arch/arm64/kernel/vmlinux.lds.S | 8 ++++
arch/arm64/kvm/hyp/entry.S | 15 +++++---
arch/arm64/kvm/hyp/hyp-entry.S | 65 ++++++++++++++++++++------------
arch/arm64/kvm/hyp/switch.c | 39 +++++++++++++++++--
6 files changed, 138 insertions(+), 35 deletions(-)
--
2.17.1
In some architectural corner cases, AT instructions can generate an
exception, which KVM is not really ready to handle properly.
Teach the code to handle this situation gracefully.
This is a backport of the respective upstream patches to v4.14(.196).
James prepared and tested these already, but we were lacking the upstream
commit IDs so far.
I am sending this on his behalf, since he is off this week.
The original patches contained stable tags, but with a prerequisite
patch in v5.3. Patch 2/4 is a backport of this one (removing ARMv8.2 RAS
barriers, which are not supported in v4.14), patches 1/4 and 3/4
needed some massaging to apply and work on 4.14.
Cheers,
Andre.
James Morse (4):
KVM: arm64: Add kvm_extable for vaxorcism code
KVM: arm64: Defer guest entry when an asynchronous exception is pending
KVM: arm64: Survive synchronous exceptions caused by AT instructions
KVM: arm64: Set HCR_EL2.PTW to prevent AT taking synchronous exception
arch/arm64/include/asm/kvm_arm.h | 3 +-
arch/arm64/include/asm/kvm_asm.h | 43 ++++++++++++++++++++++
arch/arm64/kernel/vmlinux.lds.S | 8 ++++
arch/arm64/kvm/hyp/entry.S | 26 ++++++++++---
arch/arm64/kvm/hyp/hyp-entry.S | 63 +++++++++++++++++++++-----------
arch/arm64/kvm/hyp/switch.c | 39 ++++++++++++++++++--
6 files changed, 150 insertions(+), 32 deletions(-)
--
2.17.1
Failing probe with -EPROBE_DEFER until all dependencies
listed in the _DEP (Operation Region Dependencies) object
have been met.
This will fix an issue where on some platforms UCSI ACPI
driver fails to probe because the address space handler for
the operation region that the UCSI ACPI interface uses has
not been loaded yet.
Fixes: 8243edf44152 ("usb: typec: ucsi: Add ACPI driver")
Cc: stable(a)vger.kernel.org
Signed-off-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
---
drivers/usb/typec/ucsi/ucsi_acpi.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c
index 9fc4f338e8700..c0aca2f0f23f0 100644
--- a/drivers/usb/typec/ucsi/ucsi_acpi.c
+++ b/drivers/usb/typec/ucsi/ucsi_acpi.c
@@ -112,11 +112,15 @@ static void ucsi_acpi_notify(acpi_handle handle, u32 event, void *data)
static int ucsi_acpi_probe(struct platform_device *pdev)
{
+ struct acpi_device *adev = ACPI_COMPANION(&pdev->dev);
struct ucsi_acpi *ua;
struct resource *res;
acpi_status status;
int ret;
+ if (adev->dep_unmet)
+ return -EPROBE_DEFER;
+
ua = devm_kzalloc(&pdev->dev, sizeof(*ua), GFP_KERNEL);
if (!ua)
return -ENOMEM;
--
2.28.0
EVM_ALLOW_METADATA_WRITES is an EVM initialization flag that can be set to
temporarily disable metadata verification until all xattrs/attrs necessary
to verify an EVM portable signature are copied to the file. This flag is
cleared when EVM is initialized with an HMAC key, to avoid that the HMAC is
calculated on unverified xattrs/attrs.
Currently EVM unnecessarily denies setting this flag if EVM is initialized
with public key, which is not a concern as it cannot be used to trust
xattrs/attrs updates. This patch removes this limitation.
Cc: stable(a)vger.kernel.org # 4.16.x
Fixes: ae1ba1676b88e ("EVM: Allow userland to permit modification of EVM-protected metadata")
Signed-off-by: Roberto Sassu <roberto.sassu(a)huawei.com>
---
Documentation/ABI/testing/evm | 6 ++++--
security/integrity/evm/evm_secfs.c | 2 +-
2 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/Documentation/ABI/testing/evm b/Documentation/ABI/testing/evm
index 201d10319fa1..cbb50ab09c78 100644
--- a/Documentation/ABI/testing/evm
+++ b/Documentation/ABI/testing/evm
@@ -42,8 +42,10 @@ Description:
modification of EVM-protected metadata and
disable all further modification of policy
- Note that once a key has been loaded, it will no longer be
- possible to enable metadata modification.
+ Note that once HMAC validation and creation is enabled,
+ it will no longer be possible to enable metadata modification
+ and if metadata modification is already enabled, it will be
+ disabled.
Until key loading has been signaled EVM can not create
or validate the 'security.evm' xattr, but returns
diff --git a/security/integrity/evm/evm_secfs.c b/security/integrity/evm/evm_secfs.c
index cfc3075769bb..92fe26ace797 100644
--- a/security/integrity/evm/evm_secfs.c
+++ b/security/integrity/evm/evm_secfs.c
@@ -84,7 +84,7 @@ static ssize_t evm_write_key(struct file *file, const char __user *buf,
* keys are loaded.
*/
if ((i & EVM_ALLOW_METADATA_WRITES) &&
- ((evm_initialized & EVM_KEY_MASK) != 0) &&
+ ((evm_initialized & EVM_INIT_HMAC) != 0) &&
!(evm_initialized & EVM_ALLOW_METADATA_WRITES))
return -EPERM;
--
2.27.GIT
Could you please cherry-pick these ones to Linux 5.4
Commit: f232d9ec029ce3e2543b05213e2979e01e503408
Author: Lucas Stach <l.stach@xxxxxxxxxxxxxxxxx>
Date: Wed, 26 Feb 2020 16:27:08 +0100
Commit: d7c5782acd354bdb5ed0fa10e1e397eaed558390
Author: Andrey Grodzovsky <andrey.grodzovsky@xxxxxxxxxxxxxxxxx>
Date: Tue, 29 Oct 2019 11:03:05 -0400
These patches fixes GPU hangs using etnaviv driver.
Regards,
Walter
Changes from v1:
- (Re-)adding Marc's review tags from upstream. Differences to the
original patches are trivial for 2/2, and straight-forward for 1/2.
- Fix spelling of vaxorcism
-------------
In some architectural corner cases, AT instructions can generate an
exception, which KVM is not really ready to handle properly.
Teach the code to handle this situation gracefully.
This is a backport of the respective upstream patches to v5.8(.5).
James prepared and tested these already, but we were lacking the upstream
commit ID so far.
I am sending this on his behalf, since he is off this week.
The last two of the originally three patches were tagged Cc: stable
already, but 2/3 did not apply cleanly, hence this specific backport.
3/3 has already been added to stable-queue, so I am dropping it from
this post.
Cheers,
Andre.
James Morse (2):
KVM: arm64: Add kvm_extable for vaxorcism code
KVM: arm64: Survive synchronous exceptions caused by AT instructions
arch/arm64/include/asm/kvm_asm.h | 43 +++++++++++++++++++++
arch/arm64/kernel/vmlinux.lds.S | 8 ++++
arch/arm64/kvm/hyp/entry.S | 15 +++++---
arch/arm64/kvm/hyp/hyp-entry.S | 65 ++++++++++++++++++++------------
arch/arm64/kvm/hyp/switch.c | 39 +++++++++++++++++--
5 files changed, 136 insertions(+), 34 deletions(-)
--
2.17.1
The patch titled
Subject: mm/memory_hotplug: drain per-cpu pages again during memory offline
has been added to the -mm tree. Its filename is
mm-memory_hotplug-drain-per-cpu-pages-again-during-memory-offline.patch
This patch should soon appear at
https://ozlabs.org/~akpm/mmots/broken-out/mm-memory_hotplug-drain-per-cpu-p…
and later at
https://ozlabs.org/~akpm/mmotm/broken-out/mm-memory_hotplug-drain-per-cpu-p…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Pavel Tatashin <pasha.tatashin(a)soleen.com>
Subject: mm/memory_hotplug: drain per-cpu pages again during memory offline
There is a race during page offline that can lead to infinite loop:
a page never ends up on a buddy list and __offline_pages() keeps
retrying infinitely or until a termination signal is received.
Thread#1 - a new process:
load_elf_binary
begin_new_exec
exec_mmap
mmput
exit_mmap
tlb_finish_mmu
tlb_flush_mmu
release_pages
free_unref_page_list
free_unref_page_prepare
set_pcppage_migratetype(page, migratetype);
// Set page->index migration type below MIGRATE_PCPTYPES
Thread#2 - hot-removes memory
__offline_pages
start_isolate_page_range
set_migratetype_isolate
set_pageblock_migratetype(page, MIGRATE_ISOLATE);
Set migration type to MIGRATE_ISOLATE-> set
drain_all_pages(zone);
// drain per-cpu page lists to buddy allocator.
Thread#1 - continue
free_unref_page_commit
migratetype = get_pcppage_migratetype(page);
// get old migration type
list_add(&page->lru, &pcp->lists[migratetype]);
// add new page to already drained pcp list
Thread#2
Never drains pcp again, and therefore gets stuck in the loop.
The fix is to try to drain per-cpu lists again after
check_pages_isolated_cb() fails.
Link: https://lkml.kernel.org/r/20200903140032.380431-1-pasha.tatashin@soleen.com
Fixes: c52e75935f8d ("mm: remove extra drain pages on pcp list")
Signed-off-by: Pavel Tatashin <pasha.tatashin(a)soleen.com>
Acked-by: David Rientjes <rientjes(a)google.com>
Acked-by: Vlastimil Babka <vbabka(a)suse.cz>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Wei Yang <richard.weiyang(a)gmail.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memory_hotplug.c | 14 ++++++++++++++
mm/page_isolation.c | 8 ++++++++
2 files changed, 22 insertions(+)
--- a/mm/memory_hotplug.c~mm-memory_hotplug-drain-per-cpu-pages-again-during-memory-offline
+++ a/mm/memory_hotplug.c
@@ -1575,6 +1575,20 @@ static int __ref __offline_pages(unsigne
/* check again */
ret = walk_system_ram_range(start_pfn, end_pfn - start_pfn,
NULL, check_pages_isolated_cb);
+ /*
+ * per-cpu pages are drained in start_isolate_page_range, but if
+ * there are still pages that are not free, make sure that we
+ * drain again, because when we isolated range we might
+ * have raced with another thread that was adding pages to pcp
+ * list.
+ *
+ * Forward progress should be still guaranteed because
+ * pages on the pcp list can only belong to MOVABLE_ZONE
+ * because has_unmovable_pages explicitly checks for
+ * PageBuddy on freed pages on other zones.
+ */
+ if (ret)
+ drain_all_pages(zone);
} while (ret);
/* Ok, all of our target is isolated.
--- a/mm/page_isolation.c~mm-memory_hotplug-drain-per-cpu-pages-again-during-memory-offline
+++ a/mm/page_isolation.c
@@ -170,6 +170,14 @@ __first_valid_page(unsigned long pfn, un
* pageblocks we may have modified and return -EBUSY to caller. This
* prevents two threads from simultaneously working on overlapping ranges.
*
+ * Please note that there is no strong synchronization with the page allocator
+ * either. Pages might be freed while their page blocks are marked ISOLATED.
+ * In some cases pages might still end up on pcp lists and that would allow
+ * for their allocation even when they are in fact isolated already. Depending
+ * on how strong of a guarantee the caller needs drain_all_pages might be needed
+ * (e.g. __offline_pages will need to call it after check for isolated range for
+ * a next retry).
+ *
* Return: the number of isolated pageblocks on success and -EBUSY if any part
* of range cannot be isolated.
*/
_
Patches currently in -mm which might be from pasha.tatashin(a)soleen.com are
mm-memory_hotplug-drain-per-cpu-pages-again-during-memory-offline.patch
The patch titled
Subject: mm/thp: fix __split_huge_pmd_locked() for migration PMD
has been added to the -mm tree. Its filename is
mm-thp-fix-__split_huge_pmd_locked-for-migration-pmd.patch
This patch should soon appear at
https://ozlabs.org/~akpm/mmots/broken-out/mm-thp-fix-__split_huge_pmd_locke…
and later at
https://ozlabs.org/~akpm/mmotm/broken-out/mm-thp-fix-__split_huge_pmd_locke…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Ralph Campbell <rcampbell(a)nvidia.com>
Subject: mm/thp: fix __split_huge_pmd_locked() for migration PMD
A migrating transparent huge page has to already be unmapped. Otherwise,
the page could be modified while it is being copied to a new page and data
could be lost. The function __split_huge_pmd() checks for a PMD migration
entry before calling __split_huge_pmd_locked() leading one to think that
__split_huge_pmd_locked() can handle splitting a migrating PMD.
However, the code always increments the page->_mapcount and adjusts the
memory control group accounting assuming the page is mapped.
Also, if the PMD entry is a migration PMD entry, the call to
is_huge_zero_pmd(*pmd) is incorrect because it calls pmd_pfn(pmd) instead
of migration_entry_to_pfn(pmd_to_swp_entry(pmd)). Fix these problems by
checking for a PMD migration entry.
Link: https://lkml.kernel.org/r/20200903183140.19055-1-rcampbell@nvidia.com
Fixes: 84c3fc4e9c56 ("mm: thp: check pmd migration entry in common path")
Signed-off-by: Ralph Campbell <rcampbell(a)nvidia.com>
Reviewed-by: Yang Shi <shy828301(a)gmail.com>
Reviewed-by: Zi Yan <ziy(a)nvidia.com>
Cc: Jerome Glisse <jglisse(a)redhat.com>
Cc: John Hubbard <jhubbard(a)nvidia.com>
Cc: Alistair Popple <apopple(a)nvidia.com>
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: Jason Gunthorpe <jgg(a)nvidia.com>
Cc: Bharata B Rao <bharata(a)linux.ibm.com>
Cc: Ben Skeggs <bskeggs(a)redhat.com>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: <stable(a)vger.kernel.org> [4.14+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/huge_memory.c | 42 +++++++++++++++++++++++-------------------
1 file changed, 23 insertions(+), 19 deletions(-)
--- a/mm/huge_memory.c~mm-thp-fix-__split_huge_pmd_locked-for-migration-pmd
+++ a/mm/huge_memory.c
@@ -2021,7 +2021,7 @@ static void __split_huge_pmd_locked(stru
put_page(page);
add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR);
return;
- } else if (is_huge_zero_pmd(*pmd)) {
+ } else if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) {
/*
* FIXME: Do we want to invalidate secondary mmu by calling
* mmu_notifier_invalidate_range() see comments below inside
@@ -2115,30 +2115,34 @@ static void __split_huge_pmd_locked(stru
pte = pte_offset_map(&_pmd, addr);
BUG_ON(!pte_none(*pte));
set_pte_at(mm, addr, pte, entry);
- atomic_inc(&page[i]._mapcount);
- pte_unmap(pte);
- }
-
- /*
- * Set PG_double_map before dropping compound_mapcount to avoid
- * false-negative page_mapped().
- */
- if (compound_mapcount(page) > 1 && !TestSetPageDoubleMap(page)) {
- for (i = 0; i < HPAGE_PMD_NR; i++)
+ if (!pmd_migration)
atomic_inc(&page[i]._mapcount);
+ pte_unmap(pte);
}
- lock_page_memcg(page);
- if (atomic_add_negative(-1, compound_mapcount_ptr(page))) {
- /* Last compound_mapcount is gone. */
- __dec_lruvec_page_state(page, NR_ANON_THPS);
- if (TestClearPageDoubleMap(page)) {
- /* No need in mapcount reference anymore */
+ if (!pmd_migration) {
+ /*
+ * Set PG_double_map before dropping compound_mapcount to avoid
+ * false-negative page_mapped().
+ */
+ if (compound_mapcount(page) > 1 &&
+ !TestSetPageDoubleMap(page)) {
for (i = 0; i < HPAGE_PMD_NR; i++)
- atomic_dec(&page[i]._mapcount);
+ atomic_inc(&page[i]._mapcount);
+ }
+
+ lock_page_memcg(page);
+ if (atomic_add_negative(-1, compound_mapcount_ptr(page))) {
+ /* Last compound_mapcount is gone. */
+ __dec_lruvec_page_state(page, NR_ANON_THPS);
+ if (TestClearPageDoubleMap(page)) {
+ /* No need in mapcount reference anymore */
+ for (i = 0; i < HPAGE_PMD_NR; i++)
+ atomic_dec(&page[i]._mapcount);
+ }
}
+ unlock_page_memcg(page);
}
- unlock_page_memcg(page);
smp_wmb(); /* make pte visible before pmd */
pmd_populate(mm, pmd, pgtable);
_
Patches currently in -mm which might be from rcampbell(a)nvidia.com are
mm-thp-fix-__split_huge_pmd_locked-for-migration-pmd.patch
mm-test-use-the-new-skip-macro.patch
mm-migrate-remove-cpages-in-migrate_vma_finalize.patch
mm-migrate-remove-obsolete-comment-about-device-public.patch
Event modifiers are not mentioned in the perf record or perf stat
manpages. Add them to orient new users more effectively by pointing
them to the perf list manpage for details.
Signed-off-by: Kim Phillips <kim.phillips(a)amd.com>
Fixes: 2055fdaf8703 ("perf list: Document precise event sampling for AMD IBS")
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: Arnaldo Carvalho de Melo <acme(a)kernel.org>
Cc: Mark Rutland <mark.rutland(a)arm.com>
Cc: Alexander Shishkin <alexander.shishkin(a)linux.intel.com>
Cc: Jiri Olsa <jolsa(a)redhat.com>
Cc: Namhyung Kim <namhyung(a)kernel.org>
Cc: Adrian Hunter <adrian.hunter(a)intel.com>
Cc: Stephane Eranian <eranian(a)google.com>
Cc: Alexey Budankov <alexey.budankov(a)linux.intel.com>
Cc: Tony Jones <tonyj(a)suse.de>
Cc: Jin Yao <yao.jin(a)linux.intel.com>
Cc: Ian Rogers <irogers(a)google.com>
Cc: "Paul A. Clarke" <pc(a)us.ibm.com>
Cc: linux-perf-users(a)vger.kernel.org
Cc: linux-kernel(a)vger.kernel.org
Cc: stable(a)vger.kernel.org
---
tools/perf/Documentation/perf-record.txt | 4 ++++
tools/perf/Documentation/perf-stat.txt | 4 ++++
2 files changed, 8 insertions(+)
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 3f72d8e261f3..bd50cdff08a8 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -33,6 +33,10 @@ OPTIONS
- a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a
hexadecimal event descriptor.
+ - a symbolic or raw PMU event followed by an optional colon
+ and a list of event modifiers, e.g., cpu-cycles:p. See the
+ linkperf:perf-list[1] man page for details on event modifiers.
+
- a symbolically formed PMU event like 'pmu/param1=0x3,param2/' where
'param1', 'param2', etc are defined as formats for the PMU in
/sys/bus/event_source/devices/<pmu>/format/*.
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index c9bfefc051fb..a4b1d11fefc8 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -39,6 +39,10 @@ report::
- a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a
hexadecimal event descriptor.
+ - a symbolic or raw PMU event followed by an optional colon
+ and a list of event modifiers, e.g., cpu-cycles:p. See the
+ linkperf:perf-list[1] man page for details on event modifiers.
+
- a symbolically formed event like 'pmu/param1=0x3,param2/' where
param1 and param2 are defined as formats for the PMU in
/sys/bus/event_source/devices/<pmu>/format/*
--
2.27.0
Guys,
There is a convoluted deadlock that I just root caused, and that is
fixed by this work (at least based on my code inspection it appears to
be fixed); but the deadlock exists in older and stable kernels, and I
am not sure whether to create a separate patch for it, or backport
this whole thing.
Thread #1: Hot-removes memory
device_offline
memory_subsys_offline
offline_pages
__offline_pages
mem_hotplug_lock <- write access
waits for Thread #3 refcnt for pfn 9e5113 to get to 1 so it can
migrate it.
Thread #2: ccs killer kthread
css_killed_work_fn
cgroup_mutex <- Grab this Mutex
mem_cgroup_css_offline
memcg_offline_kmem.part
memcg_deactivate_kmem_caches
get_online_mems
mem_hotplug_lock <- waits for Thread#1 to get read access
Thread #3: crashing userland program
do_coredump
elf_core_dump
get_dump_page() -> get page with pfn#9e5113, and increment refcnt
dump_emit
__kernel_write
__vfs_write
new_sync_write
pipe_write
pipe_wait -> waits for Thread #4 systemd-coredump to
read the pipe
Thread #4: systemd-coredump
ksys_read
vfs_read
__vfs_read
seq_read
proc_single_show
proc_cgroup_show
cgroup_mutex -> waits from Thread #2 for this lock.
In Summary:
Thread#1 waits for Thread#3 for refcnt, Thread#3 waits for Thread#4 to
read pipe. Thread#4 waits for Thread#2 for cgroup_mutex lock; Thread#2
waits for Thread#1 for mem_hotplug_lock rwlock.
This work appears to fix this deadlock because cgroup_mutex is not
called anymore before mem_hotplug_lock (unless I am missing it), as it
removes memcg_deactivate_kmem_caches.
Thank you,
Pasha
On Wed, Jan 29, 2020 at 9:42 PM Roman Gushchin <guro(a)fb.com> wrote:
>
> On Thu, Jan 30, 2020 at 07:36:26AM +0530, Bharata B Rao wrote:
> > On Mon, Jan 27, 2020 at 09:34:25AM -0800, Roman Gushchin wrote:
> > > The existing cgroup slab memory controller is based on the idea of
> > > replicating slab allocator internals for each memory cgroup.
> > > This approach promises a low memory overhead (one pointer per page),
> > > and isn't adding too much code on hot allocation and release paths.
> > > But is has a very serious flaw: it leads to a low slab utilization.
> > >
> > > Using a drgn* script I've got an estimation of slab utilization on
> > > a number of machines running different production workloads. In most
> > > cases it was between 45% and 65%, and the best number I've seen was
> > > around 85%. Turning kmem accounting off brings it to high 90s. Also
> > > it brings back 30-50% of slab memory. It means that the real price
> > > of the existing slab memory controller is way bigger than a pointer
> > > per page.
> > >
> > > The real reason why the existing design leads to a low slab utilization
> > > is simple: slab pages are used exclusively by one memory cgroup.
> > > If there are only few allocations of certain size made by a cgroup,
> > > or if some active objects (e.g. dentries) are left after the cgroup is
> > > deleted, or the cgroup contains a single-threaded application which is
> > > barely allocating any kernel objects, but does it every time on a new CPU:
> > > in all these cases the resulting slab utilization is very low.
> > > If kmem accounting is off, the kernel is able to use free space
> > > on slab pages for other allocations.
> > >
> > > Arguably it wasn't an issue back to days when the kmem controller was
> > > introduced and was an opt-in feature, which had to be turned on
> > > individually for each memory cgroup. But now it's turned on by default
> > > on both cgroup v1 and v2. And modern systemd-based systems tend to
> > > create a large number of cgroups.
> > >
> > > This patchset provides a new implementation of the slab memory controller,
> > > which aims to reach a much better slab utilization by sharing slab pages
> > > between multiple memory cgroups. Below is the short description of the new
> > > design (more details in commit messages).
> > >
> > > Accounting is performed per-object instead of per-page. Slab-related
> > > vmstat counters are converted to bytes. Charging is performed on page-basis,
> > > with rounding up and remembering leftovers.
> > >
> > > Memcg ownership data is stored in a per-slab-page vector: for each slab page
> > > a vector of corresponding size is allocated. To keep slab memory reparenting
> > > working, instead of saving a pointer to the memory cgroup directly an
> > > intermediate object is used. It's simply a pointer to a memcg (which can be
> > > easily changed to the parent) with a built-in reference counter. This scheme
> > > allows to reparent all allocated objects without walking them over and
> > > changing memcg pointer to the parent.
> > >
> > > Instead of creating an individual set of kmem_caches for each memory cgroup,
> > > two global sets are used: the root set for non-accounted and root-cgroup
> > > allocations and the second set for all other allocations. This allows to
> > > simplify the lifetime management of individual kmem_caches: they are
> > > destroyed with root counterparts. It allows to remove a good amount of code
> > > and make things generally simpler.
> > >
> > > The patchset* has been tested on a number of different workloads in our
> > > production. In all cases it saved significant amount of memory, measured
> > > from high hundreds of MBs to single GBs per host. On average, the size
> > > of slab memory has been reduced by 35-45%.
> >
> > Here are some numbers from multiple runs of sysbench and kernel compilation
> > with this patchset on a 10 core POWER8 host:
> >
> > ==========================================================================
> > Peak usage of memory.kmem.usage_in_bytes, memory.usage_in_bytes and
> > meminfo:Slab for Sysbench oltp_read_write with mysqld running as part
> > of a mem cgroup (Sampling every 5s)
> > --------------------------------------------------------------------------
> > 5.5.0-rc7-mm1 +slab patch %reduction
> > --------------------------------------------------------------------------
> > memory.kmem.usage_in_bytes 15859712 4456448 72
> > memory.usage_in_bytes 337510400 335806464 .5
> > Slab: (kB) 814336 607296 25
> >
> > memory.kmem.usage_in_bytes 16187392 4653056 71
> > memory.usage_in_bytes 318832640 300154880 5
> > Slab: (kB) 789888 559744 29
> > --------------------------------------------------------------------------
> >
> >
> > Peak usage of memory.kmem.usage_in_bytes, memory.usage_in_bytes and
> > meminfo:Slab for kernel compilation (make -s -j64) Compilation was
> > done from bash that is in a memory cgroup. (Sampling every 5s)
> > --------------------------------------------------------------------------
> > 5.5.0-rc7-mm1 +slab patch %reduction
> > --------------------------------------------------------------------------
> > memory.kmem.usage_in_bytes 338493440 231931904 31
> > memory.usage_in_bytes 7368015872 6275923968 15
> > Slab: (kB) 1139072 785408 31
> >
> > memory.kmem.usage_in_bytes 341835776 236453888 30
> > memory.usage_in_bytes 6540427264 6072893440 7
> > Slab: (kB) 1074304 761280 29
> >
> > memory.kmem.usage_in_bytes 340525056 233570304 31
> > memory.usage_in_bytes 6406209536 6177357824 3
> > Slab: (kB) 1244288 739712 40
> > --------------------------------------------------------------------------
> >
> > Slab consumption right after boot
> > --------------------------------------------------------------------------
> > 5.5.0-rc7-mm1 +slab patch %reduction
> > --------------------------------------------------------------------------
> > Slab: (kB) 821888 583424 29
> > ==========================================================================
> >
> > Summary:
> >
> > With sysbench and kernel compilation, memory.kmem.usage_in_bytes shows
> > around 70% and 30% reduction consistently.
> >
> > Didn't see consistent reduction of memory.usage_in_bytes with sysbench and
> > kernel compilation.
> >
> > Slab usage (from /proc/meminfo) shows consistent 30% reduction and the
> > same is seen right after boot too.
>
> That's just perfect!
>
> memory.usage_in_bytes was most likely the same because the freed space
> was taken by pagecache.
>
> Thank you very much for testing!
>
> Roman
Hello,
We ran automated tests on a recent commit from this kernel tree:
Kernel repo: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
Commit: 137f3178a407 - ALSA: usb-audio: Update documentation comment for MS2109 quirk
The results of these automated tests are provided below.
Overall result: PASSED
Merge: OK
Compile: OK
Tests: OK
All kernel binaries, config files, and logs are available for download here:
https://cki-artifacts.s3.us-east-2.amazonaws.com/index.html?prefix=dataware…
Please reply to this email if you have any questions about the tests that we
ran or if you have any suggestions on how to make future tests more effective.
,-. ,-.
( C ) ( K ) Continuous
`-',-.`-' Kernel
( I ) Integration
`-'
______________________________________________________________________________
Compile testing
---------------
We compiled the kernel for 4 architectures:
aarch64:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
ppc64le:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
s390x:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
x86_64:
make options: make -j30 INSTALL_MOD_STRIP=1 targz-pkg
Hardware testing
----------------
We booted each kernel and ran the following tests:
aarch64:
Host 1:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ xfstests - btrfs
🚧 ❌ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
Host 2:
✅ Boot test
✅ ACPI table test
✅ ACPI enabled test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
ppc64le:
Host 1:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
🚧 ✅ xfstests - btrfs
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
Host 2:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
Host 3:
✅ Boot test
🚧 ✅ kdump - sysrq-c
s390x:
Host 1:
✅ Boot test
✅ selinux-policy: serge-testsuite
✅ stress: stress-ng
🚧 ✅ Storage blktests
Host 2:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
x86_64:
Host 1:
✅ Boot test
✅ ACPI table test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: sanity smoke test
✅ pciutils: update pci ids test
✅ kernel-rt: rt_migrate_test
✅ kernel-rt: rteval
✅ kernel-rt: sched_deadline
✅ kernel-rt: smidetect
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - jcstress tests
🚧 ❌ Memory function: kaslr
🚧 ✅ Networking firewall: basic netfilter test
🚧 ✅ audit: audit testsuite test
🚧 ✅ trace: ftrace/tracer
🚧 ✅ kdump - kexec_boot
Host 2:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ xfstests - ext4
⚡⚡⚡ xfstests - xfs
⚡⚡⚡ selinux-policy: serge-testsuite
⚡⚡⚡ storage: software RAID testing
⚡⚡⚡ stress: stress-ng
🚧 ⚡⚡⚡ CPU: Frequency Driver Test
🚧 ⚡⚡⚡ CPU: Idle Test
🚧 ⚡⚡⚡ xfstests - btrfs
🚧 ⚡⚡⚡ IOMMU boot test
🚧 ⚡⚡⚡ IPMI driver test
🚧 ⚡⚡⚡ IPMItool loop stress test
🚧 ⚡⚡⚡ power-management: cpupower/sanity test
🚧 ⚡⚡⚡ Storage blktests
Host 3:
✅ Boot test
🚧 ✅ kdump - sysrq-c
🚧 ✅ kdump - file-load
Host 4:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ CPU: Frequency Driver Test
🚧 ✅ CPU: Idle Test
🚧 ✅ xfstests - btrfs
🚧 ⚡⚡⚡ IOMMU boot test
🚧 ⚡⚡⚡ IPMI driver test
🚧 ⚡⚡⚡ IPMItool loop stress test
🚧 ⚡⚡⚡ power-management: cpupower/sanity test
🚧 ⚡⚡⚡ Storage blktests
Test sources: https://gitlab.com/cki-project/kernel-tests
💚 Pull requests are welcome for new tests or improvements to existing tests!
Aborted tests
-------------
Tests that didn't complete running successfully are marked with ⚡⚡⚡.
If this was caused by an infrastructure issue, we try to mark that
explicitly in the report.
Waived tests
------------
If the test run included waived tests, they are marked with 🚧. Such tests are
executed but their results are not taken into account. Tests are waived when
their results are not reliable enough, e.g. when they're just introduced or are
being fixed.
Testing timeout
---------------
We aim to provide a report within reasonable timeframe. Tests that haven't
finished running yet are marked with ⏱.
From: Andy Lutomirski <luto(a)kernel.org>
Trying to clear DR7 around a #DB from usermode malfunctions if we
schedule when delivering SIGTRAP. Rather than trying to define a
special no-recursion region, just allow a single level of recursion.
We do the same thing for NMI, and it hasn't caused any problems yet.
Fixes: 9f58fdde95c9 ("x86/db: Split out dr6/7 handling")
Reported-by: Kyle Huey <me(a)kylehuey.com>
Debugged-by: Josh Poimboeuf <jpoimboe(a)redhat.com>
Signed-off-by: Andy Lutomirski <luto(a)kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Cc: stable(a)vger.kernel.org
Link: https://lkml.kernel.org/r/8b9bd05f187231df008d48cf818a6a311cbd5c98.15978823…
---
arch/x86/kernel/traps.c | 65 ++++++++++++++++++++++--------------------------
1 file changed, 31 insertions(+), 34 deletions(-)
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -729,20 +729,9 @@ static bool is_sysenter_singlestep(struc
#endif
}
-static __always_inline void debug_enter(unsigned long *dr6, unsigned long *dr7)
+static __always_inline unsigned long debug_read_clear_dr6(void)
{
- /*
- * Disable breakpoints during exception handling; recursive exceptions
- * are exceedingly 'fun'.
- *
- * Since this function is NOKPROBE, and that also applies to
- * HW_BREAKPOINT_X, we can't hit a breakpoint before this (XXX except a
- * HW_BREAKPOINT_W on our stack)
- *
- * Entry text is excluded for HW_BP_X and cpu_entry_area, which
- * includes the entry stack is excluded for everything.
- */
- *dr7 = local_db_save();
+ unsigned long dr6;
/*
* The Intel SDM says:
@@ -755,15 +744,12 @@ static __always_inline void debug_enter(
*
* Keep it simple: clear DR6 immediately.
*/
- get_debugreg(*dr6, 6);
+ get_debugreg(dr6, 6);
set_debugreg(0, 6);
/* Filter out all the reserved bits which are preset to 1 */
- *dr6 &= ~DR6_RESERVED;
-}
+ dr6 &= ~DR6_RESERVED;
-static __always_inline void debug_exit(unsigned long dr7)
-{
- local_db_restore(dr7);
+ return dr6;
}
/*
@@ -863,6 +849,18 @@ static void handle_debug(struct pt_regs
static __always_inline void exc_debug_kernel(struct pt_regs *regs,
unsigned long dr6)
{
+ /*
+ * Disable breakpoints during exception handling; recursive exceptions
+ * are exceedingly 'fun'.
+ *
+ * Since this function is NOKPROBE, and that also applies to
+ * HW_BREAKPOINT_X, we can't hit a breakpoint before this (XXX except a
+ * HW_BREAKPOINT_W on our stack)
+ *
+ * Entry text is excluded for HW_BP_X and cpu_entry_area, which
+ * includes the entry stack is excluded for everything.
+ */
+ unsigned long dr7 = local_db_save();
bool irq_state = idtentry_enter_nmi(regs);
instrumentation_begin();
@@ -883,6 +881,8 @@ static __always_inline void exc_debug_ke
instrumentation_end();
idtentry_exit_nmi(regs, irq_state);
+
+ local_db_restore(dr7);
}
static __always_inline void exc_debug_user(struct pt_regs *regs,
@@ -894,6 +894,15 @@ static __always_inline void exc_debug_us
*/
WARN_ON_ONCE(!user_mode(regs));
+ /*
+ * NB: We can't easily clear DR7 here because
+ * idtentry_exit_to_usermode() can invoke ptrace, schedule, access
+ * user memory, etc. This means that a recursive #DB is possible. If
+ * this happens, that #DB will hit exc_debug_kernel() and clear DR7.
+ * Since we're not on the IST stack right now, everything will be
+ * fine.
+ */
+
irqentry_enter_from_user_mode(regs);
instrumentation_begin();
@@ -907,36 +916,24 @@ static __always_inline void exc_debug_us
/* IST stack entry */
DEFINE_IDTENTRY_DEBUG(exc_debug)
{
- unsigned long dr6, dr7;
-
- debug_enter(&dr6, &dr7);
- exc_debug_kernel(regs, dr6);
- debug_exit(dr7);
+ exc_debug_kernel(regs, debug_read_clear_dr6());
}
/* User entry, runs on regular task stack */
DEFINE_IDTENTRY_DEBUG_USER(exc_debug)
{
- unsigned long dr6, dr7;
-
- debug_enter(&dr6, &dr7);
- exc_debug_user(regs, dr6);
- debug_exit(dr7);
+ exc_debug_user(regs, debug_read_clear_dr6());
}
#else
/* 32 bit does not have separate entry points. */
DEFINE_IDTENTRY_RAW(exc_debug)
{
- unsigned long dr6, dr7;
-
- debug_enter(&dr6, &dr7);
+ unsigned long dr6 = debug_read_clear_dr6();
if (user_mode(regs))
exc_debug_user(regs, dr6);
else
exc_debug_kernel(regs, dr6);
-
- debug_exit(dr7);
}
#endif
From: Joerg Roedel <jroedel(a)suse.de>
One can not simply remove vmalloc faulting on x86-32. Upstream
commit: 7f0a002b5a21 ("x86/mm: remove vmalloc faulting")
removed it on x86 alltogether because previously the
arch_sync_kernel_mappings() interface was introduced. This interface
added synchronization of vmalloc/ioremap page-table updates to all
page-tables in the system at creation time and was thought to make
vmalloc faulting obsolete.
But that assumption was incredibly naive.
It turned out that there is a race window between the time the vmalloc
or ioremap code establishes a mapping and the time it synchronizes
this change to other page-tables in the system.
During this race window another CPU or thread can establish a vmalloc
mapping which uses the same intermediate page-table entries (e.g. PMD
or PUD) and does no synchronization in the end, because it found all
necessary mappings already present in the kernel reference page-table.
But when these intermediate page-table entries are not yet
synchronized, the other CPU or thread will continue with a vmalloc
address that is not yet mapped in the page-table it currently uses,
causing an unhandled page fault and oops like below:
BUG: unable to handle page fault for address: fe80c000
#PF: supervisor write access in kernel mode
#PF: error_code(0x0002) - not-present page
*pde = 33183067 *pte = a8648163
Oops: 0002 [#1] SMP
CPU: 1 PID: 13514 Comm: cve-2017-17053 Tainted: G
0-next-20200811 #1
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
4/01/2014
EIP: memcpy+0xf/0x20
Code: 68 d0 7d ee d6 e8 11 1c c7 ff 0f 31 31 c3 59 58 cc cc cc cc cc cc 55 89 e5 57 89 c7 56 89 d6 53 89 cb a5 89 d9 83 e1 03 74 02 f3 a4 5b 5e 5f 5d c3 90 55 89 e5
EAX: fe80c000 EBX: 00010000 ECX: 00004000 EDX: fbfbd000
ESI: fbfbd000 EDI: fe80c000 EBP: f11f1e2c ESP: f11f1e20
DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 EFLAGS: 00010216
CR0: 80050033 CR2: fe80c000 CR3: 314c0000 CR4: 003506d0
DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
DR6: ffff4ff0 DR7: 00000400
Call Trace:
ldt_dup_context+0x66/0x80
dup_mm+0x2b3/0x480
copy_process+0x133b/0x15c0
_do_fork+0x94/0x3e0
__ia32_sys_clone+0x67/0x80
__do_fast_syscall_32+0x3f/0x70
do_fast_syscall_32+0x29/0x60
do_SYSENTER_32+0x15/0x20
entry_SYSENTER_32+0x9f/0xf2
EIP: 0xb7eef549
So the arch_sync_kernel_mappings() interface is racy, but removing it
would mean to re-introduce the vmalloc_sync_all() interface, which is
even more awful. Keep arch_sync_kernel_mappings() in place and catch
the race condition in the page-fault handler instead.
Do a partial revert of above commit to get vmalloc faulting on x86-32
back in place.
Reported-by: Naresh Kamboju <naresh.kamboju(a)linaro.org>
Fixes: 7f0a002b5a21 ("x86/mm: remove vmalloc faulting")
Cc: stable(a)vger.kernel.org # v5.8+
Signed-off-by: Joerg Roedel <jroedel(a)suse.de>
---
arch/x86/mm/fault.c | 78 +++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 78 insertions(+)
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 35f1498e9832..6e3e8a124903 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -190,6 +190,53 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
return pmd_k;
}
+/*
+ * Handle a fault on the vmalloc or module mapping area
+ *
+ * This is needed because there is a race condition between the time
+ * when the vmalloc mapping code updates the PMD to the point in time
+ * where it synchronizes this update with the other page-tables in the
+ * system.
+ *
+ * In this race window another thread/CPU can map an area on the same
+ * PMD, finds it already present and does not synchronize it with the
+ * rest of the system yet. As a result v[mz]alloc might return areas
+ * which are not mapped in every page-table in the system, causing an
+ * unhandled page-fault when they are accessed.
+ */
+static noinline int vmalloc_fault(unsigned long address)
+{
+ unsigned long pgd_paddr;
+ pmd_t *pmd_k;
+ pte_t *pte_k;
+
+ /* Make sure we are in vmalloc area: */
+ if (!(address >= VMALLOC_START && address < VMALLOC_END))
+ return -1;
+
+ /*
+ * Synchronize this task's top level page-table
+ * with the 'reference' page table.
+ *
+ * Do _not_ use "current" here. We might be inside
+ * an interrupt in the middle of a task switch..
+ */
+ pgd_paddr = read_cr3_pa();
+ pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
+ if (!pmd_k)
+ return -1;
+
+ if (pmd_large(*pmd_k))
+ return 0;
+
+ pte_k = pte_offset_kernel(pmd_k, address);
+ if (!pte_present(*pte_k))
+ return -1;
+
+ return 0;
+}
+NOKPROBE_SYMBOL(vmalloc_fault);
+
void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
{
unsigned long addr;
@@ -1110,6 +1157,37 @@ do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code,
*/
WARN_ON_ONCE(hw_error_code & X86_PF_PK);
+#ifdef CONFIG_X86_32
+ /*
+ * We can fault-in kernel-space virtual memory on-demand. The
+ * 'reference' page table is init_mm.pgd.
+ *
+ * NOTE! We MUST NOT take any locks for this case. We may
+ * be in an interrupt or a critical region, and should
+ * only copy the information from the master page table,
+ * nothing more.
+ *
+ * Before doing this on-demand faulting, ensure that the
+ * fault is not any of the following:
+ * 1. A fault on a PTE with a reserved bit set.
+ * 2. A fault caused by a user-mode access. (Do not demand-
+ * fault kernel memory due to user-mode accesses).
+ * 3. A fault caused by a page-level protection violation.
+ * (A demand fault would be on a non-present page which
+ * would have X86_PF_PROT==0).
+ *
+ * This is only needed to close a race condition on x86-32 in
+ * the vmalloc mapping/unmapping code. See the comment above
+ * vmalloc_fault() for details. On x86-64 the race does not
+ * exist as the vmalloc mappings don't need to be synchronized
+ * there.
+ */
+ if (!(hw_error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) {
+ if (vmalloc_fault(address) >= 0)
+ return;
+ }
+#endif
+
/* Was the fault spurious, caused by lazy TLB invalidation? */
if (spurious_kernel_fault(hw_error_code, address))
return;
--
2.28.0
Currently __set_oom_adj loops through all processes in the system to
keep oom_score_adj and oom_score_adj_min in sync between processes
sharing their mm. This is done for any task with more that one mm_users,
which includes processes with multiple threads (sharing mm and signals).
However for such processes the loop is unnecessary because their signal
structure is shared as well.
Android updates oom_score_adj whenever a tasks changes its role
(background/foreground/...) or binds to/unbinds from a service, making
it more/less important. Such operation can happen frequently.
We noticed that updates to oom_score_adj became more expensive and after
further investigation found out that the patch mentioned in "Fixes"
introduced a regression. Using Pixel 4 with a typical Android workload,
write time to oom_score_adj increased from ~3.57us to ~362us. Moreover
this regression linearly depends on the number of multi-threaded
processes running on the system.
Mark the mm with a new MMF_MULTIPROCESS flag bit when task is created with
(CLONE_VM && !CLONE_THREAD && !CLONE_VFORK). Change __set_oom_adj to use
MMF_MULTIPROCESS instead of mm_users to decide whether oom_score_adj
update should be synchronized between multiple processes. To prevent
races between clone() and __set_oom_adj(), when oom_score_adj of the
process being cloned might be modified from userspace, we use
oom_adj_mutex. Its scope is changed to global. The combination of
(CLONE_VM && !CLONE_THREAD) is rarely used except for the case of vfork().
To prevent performance regressions of vfork(), we skip taking oom_adj_mutex
and setting MMF_MULTIPROCESS when CLONE_VFORK is specified. Clearing the
MMF_MULTIPROCESS flag (when the last process sharing the mm exits) is left
out of this patch to keep it simple and because it is believed that this
threading model is rare. Should there ever be a need for optimizing that
case as well, it can be done by hooking into the exit path, likely
following the mm_update_next_owner pattern.
With the combination of (CLONE_VM && !CLONE_THREAD && !CLONE_VFORK) being
quite rare, the regression is gone after the change is applied.
Fixes: 44a70adec910 ("mm, oom_adj: make sure processes sharing mm have same view of oom_score_adj")
Reported-by: Tim Murray <timmurray(a)google.com>
Debugged-by: Minchan Kim <minchan(a)kernel.org>
Suggested-by: Michal Hocko <mhocko(a)kernel.org>
Signed-off-by: Suren Baghdasaryan <surenb(a)google.com>
---
v3:
- Addressed Eric Biederman's comments from:
https://lore.kernel.org/linux-mm/87imd6n0qk.fsf@x220.int.ebiederm.org/
-- renabled oom_adj_lock back to oom_adj_mutex
-- renamed MMF_PROC_SHARED into MMF_MULTIPROCESS and fixed its comment
- Updated description to reflect the change
v2:
- https://lore.kernel.org/linux-mm/20200824153036.3201505-1-surenb@google.com/
- Implemented proposal from Michal Hocko in:
https://lore.kernel.org/linux-fsdevel/20200820124109.GI5033@dhcp22.suse.cz/
- Updated description to reflect the change
v1:
- https://lore.kernel.org/linux-mm/20200820002053.1424000-1-surenb@google.com/
fs/proc/base.c | 3 +--
include/linux/oom.h | 1 +
include/linux/sched/coredump.h | 1 +
kernel/fork.c | 21 +++++++++++++++++++++
mm/oom_kill.c | 2 ++
5 files changed, 26 insertions(+), 2 deletions(-)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 617db4e0faa0..aa69c35d904c 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1055,7 +1055,6 @@ static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count,
static int __set_oom_adj(struct file *file, int oom_adj, bool legacy)
{
- static DEFINE_MUTEX(oom_adj_mutex);
struct mm_struct *mm = NULL;
struct task_struct *task;
int err = 0;
@@ -1095,7 +1094,7 @@ static int __set_oom_adj(struct file *file, int oom_adj, bool legacy)
struct task_struct *p = find_lock_task_mm(task);
if (p) {
- if (atomic_read(&p->mm->mm_users) > 1) {
+ if (test_bit(MMF_MULTIPROCESS, &p->mm->flags)) {
mm = p->mm;
mmgrab(mm);
}
diff --git a/include/linux/oom.h b/include/linux/oom.h
index f022f581ac29..2db9a1432511 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -55,6 +55,7 @@ struct oom_control {
};
extern struct mutex oom_lock;
+extern struct mutex oom_adj_mutex;
static inline void set_current_oom_origin(void)
{
diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h
index ecdc6542070f..dfd82eab2902 100644
--- a/include/linux/sched/coredump.h
+++ b/include/linux/sched/coredump.h
@@ -72,6 +72,7 @@ static inline int get_dumpable(struct mm_struct *mm)
#define MMF_DISABLE_THP 24 /* disable THP for all VMAs */
#define MMF_OOM_VICTIM 25 /* mm is the oom victim */
#define MMF_OOM_REAP_QUEUED 26 /* mm was queued for oom_reaper */
+#define MMF_MULTIPROCESS 27 /* mm is shared between processes */
#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP)
#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\
diff --git a/kernel/fork.c b/kernel/fork.c
index 4d32190861bd..6129a88c19ad 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1809,6 +1809,25 @@ static __always_inline void delayed_free_task(struct task_struct *tsk)
free_task(tsk);
}
+static void copy_oom_score_adj(u64 clone_flags, struct task_struct *tsk)
+{
+ /* Skip if kernel thread */
+ if (!tsk->mm)
+ return;
+
+ /* Skip if spawning a thread or using vfork */
+ if ((clone_flags & (CLONE_VM | CLONE_THREAD | CLONE_VFORK)) != CLONE_VM)
+ return;
+
+ /* We need to synchronize with __set_oom_adj */
+ mutex_lock(&oom_adj_mutex);
+ set_bit(MMF_MULTIPROCESS, &tsk->mm->flags);
+ /* Update the values in case they were changed after copy_signal */
+ tsk->signal->oom_score_adj = current->signal->oom_score_adj;
+ tsk->signal->oom_score_adj_min = current->signal->oom_score_adj_min;
+ mutex_unlock(&oom_adj_mutex);
+}
+
/*
* This creates a new process as a copy of the old one,
* but does not actually start it yet.
@@ -2281,6 +2300,8 @@ static __latent_entropy struct task_struct *copy_process(
trace_task_newtask(p, clone_flags);
uprobe_copy_process(p, clone_flags);
+ copy_oom_score_adj(clone_flags, p);
+
return p;
bad_fork_cancel_cgroup:
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index e90f25d6385d..8b84661a6410 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -64,6 +64,8 @@ int sysctl_oom_dump_tasks = 1;
* and mark_oom_victim
*/
DEFINE_MUTEX(oom_lock);
+/* Serializes oom_score_adj and oom_score_adj_min updates */
+DEFINE_MUTEX(oom_adj_mutex);
static inline bool is_memcg_oom(struct oom_control *oc)
{
--
2.28.0.526.ge36021eeef-goog