Instruct the compiler to read the next element in the list iteration
once, and that it is not allowed to reload the value from the stale
element later. This is important as during the course of the safe
iteration, the stale element may be poisoned (unbeknownst to the
compiler).
This helps prevent kcsan warnings over 'unsafe' conduct in releasing the
list elements during list_for_each_entry_safe() and friends.
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: "Paul E. McKenney" <paulmck(a)kernel.org>
Cc: Randy Dunlap <rdunlap(a)infradead.org>
Cc: stable(a)vger.kernel.org
---
include/linux/list.h | 50 +++++++++++++++++++++++++++++++-------------
1 file changed, 36 insertions(+), 14 deletions(-)
diff --git a/include/linux/list.h b/include/linux/list.h
index 884216db3246..c4d215d02259 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -536,6 +536,17 @@ static inline void list_splice_tail_init(struct list_head *list,
#define list_next_entry(pos, member) \
list_entry((pos)->member.next, typeof(*(pos)), member)
+/**
+ * list_next_entry_safe - get the next element in list [once]
+ * @pos: the type * to cursor
+ * @member: the name of the list_head within the struct.
+ *
+ * Like list_next_entry() but prevents the compiler from reloading the
+ * next element.
+ */
+#define list_next_entry_safe(pos, member) \
+ list_entry(READ_ONCE((pos)->member.next), typeof(*(pos)), member)
+
/**
* list_prev_entry - get the prev element in list
* @pos: the type * to cursor
@@ -544,6 +555,17 @@ static inline void list_splice_tail_init(struct list_head *list,
#define list_prev_entry(pos, member) \
list_entry((pos)->member.prev, typeof(*(pos)), member)
+/**
+ * list_prev_entry_safe - get the prev element in list [once]
+ * @pos: the type * to cursor
+ * @member: the name of the list_head within the struct.
+ *
+ * Like list_prev_entry() but prevents the compiler from reloading the
+ * previous element.
+ */
+#define list_prev_entry_safe(pos, member) \
+ list_entry(READ_ONCE((pos)->member.prev), typeof(*(pos)), member)
+
/**
* list_for_each - iterate over a list
* @pos: the &struct list_head to use as a loop cursor.
@@ -686,9 +708,9 @@ static inline void list_splice_tail_init(struct list_head *list,
*/
#define list_for_each_entry_safe(pos, n, head, member) \
for (pos = list_first_entry(head, typeof(*pos), member), \
- n = list_next_entry(pos, member); \
+ n = list_next_entry_safe(pos, member); \
&pos->member != (head); \
- pos = n, n = list_next_entry(n, member))
+ pos = n, n = list_next_entry_safe(n, member))
/**
* list_for_each_entry_safe_continue - continue list iteration safe against removal
@@ -700,11 +722,11 @@ static inline void list_splice_tail_init(struct list_head *list,
* Iterate over list of given type, continuing after current point,
* safe against removal of list entry.
*/
-#define list_for_each_entry_safe_continue(pos, n, head, member) \
- for (pos = list_next_entry(pos, member), \
- n = list_next_entry(pos, member); \
- &pos->member != (head); \
- pos = n, n = list_next_entry(n, member))
+#define list_for_each_entry_safe_continue(pos, n, head, member) \
+ for (pos = list_next_entry(pos, member), \
+ n = list_next_entry_safe(pos, member); \
+ &pos->member != (head); \
+ pos = n, n = list_next_entry_safe(n, member))
/**
* list_for_each_entry_safe_from - iterate over list from current point safe against removal
@@ -716,10 +738,10 @@ static inline void list_splice_tail_init(struct list_head *list,
* Iterate over list of given type from current point, safe against
* removal of list entry.
*/
-#define list_for_each_entry_safe_from(pos, n, head, member) \
- for (n = list_next_entry(pos, member); \
- &pos->member != (head); \
- pos = n, n = list_next_entry(n, member))
+#define list_for_each_entry_safe_from(pos, n, head, member) \
+ for (n = list_next_entry_safe(pos, member); \
+ &pos->member != (head); \
+ pos = n, n = list_next_entry_safe(n, member))
/**
* list_for_each_entry_safe_reverse - iterate backwards over list safe against removal
@@ -733,9 +755,9 @@ static inline void list_splice_tail_init(struct list_head *list,
*/
#define list_for_each_entry_safe_reverse(pos, n, head, member) \
for (pos = list_last_entry(head, typeof(*pos), member), \
- n = list_prev_entry(pos, member); \
+ n = list_prev_entry_safe(pos, member); \
&pos->member != (head); \
- pos = n, n = list_prev_entry(n, member))
+ pos = n, n = list_prev_entry_safe(n, member))
/**
* list_safe_reset_next - reset a stale list_for_each_entry_safe loop
@@ -750,7 +772,7 @@ static inline void list_splice_tail_init(struct list_head *list,
* completing the current iteration of the loop body.
*/
#define list_safe_reset_next(pos, n, member) \
- n = list_next_entry(pos, member)
+ n = list_next_entry_safe(pos, member)
/*
* Double linked lists with a single pointer list head.
--
2.20.1
The patch titled
Subject: page-flags: fix a crash at SetPageError(THP_SWAP)
has been added to the -mm tree. Its filename is
page-flags-fix-a-crash-at-setpageerrorthp_swap.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/page-flags-fix-a-crash-at-setpagee…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/page-flags-fix-a-crash-at-setpagee…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Qian Cai <cai(a)lca.pw>
Subject: page-flags: fix a crash at SetPageError(THP_SWAP)
The commit bd4c82c22c36 ("mm, THP, swap: delay splitting THP after swapped
out") supported writing THP to a swap device but forgot to upgrade an
older commit df8c94d13c7e ("page-flags: define behavior of FS/IO-related
flags on compound pages") which could trigger a crash during THP swapping
out with DEBUG_VM_PGFLAGS=y,
kernel BUG at include/linux/page-flags.h:317!
page dumped because: VM_BUG_ON_PAGE(1 && PageCompound(page))
page:fffff3b2ec3a8000 refcount:512 mapcount:0 mapping:000000009eb0338c
index:0x7f6e58200 head:fffff3b2ec3a8000 order:9 compound_mapcount:0
compound_pincount:0
anon flags:
0x45fffe0000d8454(uptodate|lru|workingset|owner_priv_1|writeback|head|reclaim|swapbacked)
end_swap_bio_write()
SetPageError(page)
VM_BUG_ON_PAGE(1 && PageCompound(page))
<IRQ>
bio_endio+0x297/0x560
dec_pending+0x218/0x430 [dm_mod]
clone_endio+0xe4/0x2c0 [dm_mod]
bio_endio+0x297/0x560
blk_update_request+0x201/0x920
scsi_end_request+0x6b/0x4b0
scsi_io_completion+0x509/0x7e0
scsi_finish_command+0x1ed/0x2a0
scsi_softirq_done+0x1c9/0x1d0
__blk_mqnterrupt+0xf/0x20
</IRQ>
Fix by checking PF_NO_TAIL in those places instead.
Link: http://lkml.kernel.org/r/20200310235846.1319-1-cai@lca.pw
Fixes: bd4c82c22c36 ("mm, THP, swap: delay splitting THP after swapped out")
Signed-off-by: Qian Cai <cai(a)lca.pw>
Acked-by: "Huang, Ying" <ying.huang(a)intel.com>
Reviewed-by: David Hildenbrand <david(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/page-flags.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/include/linux/page-flags.h~page-flags-fix-a-crash-at-setpageerrorthp_swap
+++ a/include/linux/page-flags.h
@@ -311,7 +311,7 @@ static inline int TestClearPage##uname(s
__PAGEFLAG(Locked, locked, PF_NO_TAIL)
PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) __CLEARPAGEFLAG(Waiters, waiters, PF_ONLY_HEAD)
-PAGEFLAG(Error, error, PF_NO_COMPOUND) TESTCLEARFLAG(Error, error, PF_NO_COMPOUND)
+PAGEFLAG(Error, error, PF_NO_TAIL) TESTCLEARFLAG(Error, error, PF_NO_TAIL)
PAGEFLAG(Referenced, referenced, PF_HEAD)
TESTCLEARFLAG(Referenced, referenced, PF_HEAD)
__SETPAGEFLAG(Referenced, referenced, PF_HEAD)
_
Patches currently in -mm which might be from cai(a)lca.pw are
page-flags-fix-a-crash-at-setpageerrorthp_swap.patch
mm-disable-kcsan-for-kmemleak.patch
mm-swapfile-fix-data-races-in-try_to_unuse.patch
kasan-detect-negative-size-in-memory-operation-function-fix.patch
mm-vmscan-fix-data-races-at-kswapd_classzone_idx.patch
percpu_counter-fix-a-data-race-at-vm_committed_as.patch
mm-frontswap-mark-various-intentional-data-races.patch
mm-page_io-mark-various-intentional-data-races.patch
mm-page_io-mark-various-intentional-data-races-v2.patch
mm-swap_state-mark-various-intentional-data-races.patch
mm-swapfile-fix-and-annotate-various-data-races.patch
mm-swapfile-fix-and-annotate-various-data-races-v2.patch
mm-page_counter-fix-various-data-races-at-memsw.patch
mm-memcontrol-fix-a-data-race-in-scan-count.patch
mm-list_lru-fix-a-data-race-in-list_lru_count_one.patch
mm-mempool-fix-a-data-race-in-mempool_free.patch
mm-util-annotate-an-data-race-at-vm_committed_as.patch
mm-rmap-annotate-a-data-race-at-tlb_flush_batched.patch
mm-annotate-a-data-race-in-page_zonenum.patch
mm-swap-annotate-data-races-for-lru_rotate_pvecs.patch
The busy timeout for the CMD5 to put the eMMC into sleep state, is specific
to the card. Potentially the timeout may exceed the host->max_busy_timeout.
If that becomes the case, mmc_sleep() converts from using an R1B response
to an R1 response, as to prevent the host from doing HW busy detection.
However, it has turned out that some hosts requires an R1B response no
matter what, so let's respect that via checking MMC_CAP_NEED_RSP_BUSY. Note
that, if the R1B gets enforced, the host becomes fully responsible of
managing the needed busy timeout, in one way or the other.
Suggested-by: Sowjanya Komatineni <skomatineni(a)nvidia.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
---
drivers/mmc/core/mmc.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index f6912ded652d..de14b5845f52 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1910,9 +1910,12 @@ static int mmc_sleep(struct mmc_host *host)
* If the max_busy_timeout of the host is specified, validate it against
* the sleep cmd timeout. A failure means we need to prevent the host
* from doing hw busy detection, which is done by converting to a R1
- * response instead of a R1B.
+ * response instead of a R1B. Note, some hosts requires R1B, which also
+ * means they are on their own when it comes to deal with the busy
+ * timeout.
*/
- if (host->max_busy_timeout && (timeout_ms > host->max_busy_timeout)) {
+ if (!(host->caps & MMC_CAP_NEED_RSP_BUSY) && host->max_busy_timeout &&
+ (timeout_ms > host->max_busy_timeout)) {
cmd.flags = MMC_RSP_R1 | MMC_CMD_AC;
} else {
cmd.flags = MMC_RSP_R1B | MMC_CMD_AC;
--
2.20.1
From: Eric Biggers <ebiggers(a)google.com>
It's long been possible to disable kernel module autoloading completely
by setting /proc/sys/kernel/modprobe to the empty string. This can be
preferable to setting it to a nonexistent file since it avoids the
overhead of an attempted execve(), avoids potential deadlocks, and
avoids the call to security_kernel_module_request() and thus on
SELinux-based systems eliminates the need to write SELinux rules to
dontaudit module_request.
However, when module autoloading is disabled in this way,
request_module() returns 0. This is broken because callers expect 0 to
mean that the module was successfully loaded.
Apparently this was never noticed because this method of disabling
module autoloading isn't used much, and also most callers don't use the
return value of request_module() since it's always necessary to check
whether the module registered its functionality or not anyway. But
improperly returning 0 can indeed confuse a few callers, for example
get_fs_type() in fs/filesystems.c where it causes a WARNING to be hit:
if (!fs && (request_module("fs-%.*s", len, name) == 0)) {
fs = __get_fs_type(name, len);
WARN_ONCE(!fs, "request_module fs-%.*s succeeded, but still no fs?\n", len, name);
}
This is easily reproduced with:
echo > /proc/sys/kernel/modprobe
mount -t NONEXISTENT none /
It causes:
request_module fs-NONEXISTENT succeeded, but still no fs?
WARNING: CPU: 1 PID: 1106 at fs/filesystems.c:275 get_fs_type+0xd6/0xf0
[...]
Arguably this warning is broken and should be removed, since the module
could have been unloaded already. However, request_module() should also
correctly return an error when it fails. So let's make it return
-ENOENT, which matches the error when the modprobe binary doesn't exist.
Cc: stable(a)vger.kernel.org
Cc: Alexei Starovoitov <ast(a)kernel.org>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Jeff Vander Stoep <jeffv(a)google.com>
Cc: Jessica Yu <jeyu(a)kernel.org>
Cc: Kees Cook <keescook(a)chromium.org>
Cc: Luis Chamberlain <mcgrof(a)kernel.org>
Signed-off-by: Eric Biggers <ebiggers(a)google.com>
---
kernel/kmod.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/kernel/kmod.c b/kernel/kmod.c
index bc6addd9152b..a2de58de6ab6 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -120,7 +120,7 @@ static int call_modprobe(char *module_name, int wait)
* invoke it.
*
* If module auto-loading support is disabled then this function
- * becomes a no-operation.
+ * simply returns -ENOENT.
*/
int __request_module(bool wait, const char *fmt, ...)
{
@@ -137,7 +137,7 @@ int __request_module(bool wait, const char *fmt, ...)
WARN_ON_ONCE(wait && current_is_async());
if (!modprobe_path[0])
- return 0;
+ return -ENOENT;
va_start(args, fmt);
ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
--
2.25.1.481.gfbce0eb801-goog