From: Mark Rutland mark.rutland@arm.com
[ Upstream commit dc1b4df09acdca7a89806b28f235cd6d8dcd3d24 ]
Arnd reports that on 32-bit architectures, the fallbacks for atomic64_read_acquire() and atomic64_set_release() are broken as they use smp_load_acquire() and smp_store_release() respectively, which do not work on types larger than the native word size.
Since those contain compiletime_assert_atomic_type(), any attempt to use those fallbacks will result in a build-time error. e.g. with the following added to arch/arm/kernel/setup.c:
| void test_atomic64(atomic64_t *v) | { | atomic64_set_release(v, 5); | atomic64_read_acquire(v); | }
The compiler will complain as follows:
| In file included from <command-line>: | In function 'arch_atomic64_set_release', | inlined from 'test_atomic64' at ./include/linux/atomic/atomic-instrumented.h:669:2: | ././include/linux/compiler_types.h:346:38: error: call to '__compiletime_assert_9' declared with attribute error: Need native word sized stores/loads for atomicity. | 346 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) | | ^ | ././include/linux/compiler_types.h:327:4: note: in definition of macro '__compiletime_assert' | 327 | prefix ## suffix(); \ | | ^~~~~~ | ././include/linux/compiler_types.h:346:2: note: in expansion of macro '_compiletime_assert' | 346 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) | | ^~~~~~~~~~~~~~~~~~~ | ././include/linux/compiler_types.h:349:2: note: in expansion of macro 'compiletime_assert' | 349 | compiletime_assert(__native_word(t), \ | | ^~~~~~~~~~~~~~~~~~ | ./include/asm-generic/barrier.h:133:2: note: in expansion of macro 'compiletime_assert_atomic_type' | 133 | compiletime_assert_atomic_type(*p); \ | | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ./include/asm-generic/barrier.h:164:55: note: in expansion of macro '__smp_store_release' | 164 | #define smp_store_release(p, v) do { kcsan_release(); __smp_store_release(p, v); } while (0) | | ^~~~~~~~~~~~~~~~~~~ | ./include/linux/atomic/atomic-arch-fallback.h:1270:2: note: in expansion of macro 'smp_store_release' | 1270 | smp_store_release(&(v)->counter, i); | | ^~~~~~~~~~~~~~~~~ | make[2]: *** [scripts/Makefile.build:288: arch/arm/kernel/setup.o] Error 1 | make[1]: *** [scripts/Makefile.build:550: arch/arm/kernel] Error 2 | make: *** [Makefile:1831: arch/arm] Error 2
Fix this by only using smp_load_acquire() and smp_store_release() for native atomic types, and otherwise falling back to the regular barriers necessary for acquire/release semantics, as we do in the more generic acquire and release fallbacks.
Since the fallback templates are used to generate the atomic64_*() and atomic_*() operations, the __native_word() check is added to both. For the atomic_*() operations, which are always 32-bit, the __native_word() check is redundant but not harmful, as it is always true.
For the example above this works as expected on 32-bit, e.g. for arm multi_v7_defconfig:
| <test_atomic64>: | push {r4, r5} | dmb ish | pldw [r0] | mov r2, #5 | mov r3, #0 | ldrexd r4, [r0] | strexd r4, r2, [r0] | teq r4, #0 | bne 484 <test_atomic64+0x14> | ldrexd r2, [r0] | dmb ish | pop {r4, r5} | bx lr
... and also on 64-bit, e.g. for arm64 defconfig:
| <test_atomic64>: | bti c | paciasp | mov x1, #0x5 | stlr x1, [x0] | ldar x0, [x0] | autiasp | ret
Reported-by: Arnd Bergmann arnd@arndb.de Signed-off-by: Mark Rutland mark.rutland@arm.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Reviewed-by: Ard Biesheuvel ardb@kernel.org Reviewed-by: Boqun Feng boqun.feng@gmail.com Link: https://lore.kernel.org/r/20220207101943.439825-1-mark.rutland@arm.com Signed-off-by: Sasha Levin sashal@kernel.org --- include/linux/atomic/atomic-arch-fallback.h | 38 ++++++++++++++++++--- scripts/atomic/fallbacks/read_acquire | 11 +++++- scripts/atomic/fallbacks/set_release | 7 +++- 3 files changed, 49 insertions(+), 7 deletions(-)
diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h index a3dba31df01e..6db58d180866 100644 --- a/include/linux/atomic/atomic-arch-fallback.h +++ b/include/linux/atomic/atomic-arch-fallback.h @@ -151,7 +151,16 @@ static __always_inline int arch_atomic_read_acquire(const atomic_t *v) { - return smp_load_acquire(&(v)->counter); + int ret; + + if (__native_word(atomic_t)) { + ret = smp_load_acquire(&(v)->counter); + } else { + ret = arch_atomic_read(v); + __atomic_acquire_fence(); + } + + return ret; } #define arch_atomic_read_acquire arch_atomic_read_acquire #endif @@ -160,7 +169,12 @@ arch_atomic_read_acquire(const atomic_t *v) static __always_inline void arch_atomic_set_release(atomic_t *v, int i) { - smp_store_release(&(v)->counter, i); + if (__native_word(atomic_t)) { + smp_store_release(&(v)->counter, i); + } else { + __atomic_release_fence(); + arch_atomic_set(v, i); + } } #define arch_atomic_set_release arch_atomic_set_release #endif @@ -1258,7 +1272,16 @@ arch_atomic_dec_if_positive(atomic_t *v) static __always_inline s64 arch_atomic64_read_acquire(const atomic64_t *v) { - return smp_load_acquire(&(v)->counter); + s64 ret; + + if (__native_word(atomic64_t)) { + ret = smp_load_acquire(&(v)->counter); + } else { + ret = arch_atomic64_read(v); + __atomic_acquire_fence(); + } + + return ret; } #define arch_atomic64_read_acquire arch_atomic64_read_acquire #endif @@ -1267,7 +1290,12 @@ arch_atomic64_read_acquire(const atomic64_t *v) static __always_inline void arch_atomic64_set_release(atomic64_t *v, s64 i) { - smp_store_release(&(v)->counter, i); + if (__native_word(atomic64_t)) { + smp_store_release(&(v)->counter, i); + } else { + __atomic_release_fence(); + arch_atomic64_set(v, i); + } } #define arch_atomic64_set_release arch_atomic64_set_release #endif @@ -2358,4 +2386,4 @@ arch_atomic64_dec_if_positive(atomic64_t *v) #endif
#endif /* _LINUX_ATOMIC_FALLBACK_H */ -// cca554917d7ea73d5e3e7397dd70c484cad9b2c4 +// 8e2cc06bc0d2c0967d2f8424762bd48555ee40ae diff --git a/scripts/atomic/fallbacks/read_acquire b/scripts/atomic/fallbacks/read_acquire index 803ba7561076..a0ea1d26e6b2 100755 --- a/scripts/atomic/fallbacks/read_acquire +++ b/scripts/atomic/fallbacks/read_acquire @@ -2,6 +2,15 @@ cat <<EOF static __always_inline ${ret} arch_${atomic}_read_acquire(const ${atomic}_t *v) { - return smp_load_acquire(&(v)->counter); + ${int} ret; + + if (__native_word(${atomic}_t)) { + ret = smp_load_acquire(&(v)->counter); + } else { + ret = arch_${atomic}_read(v); + __atomic_acquire_fence(); + } + + return ret; } EOF diff --git a/scripts/atomic/fallbacks/set_release b/scripts/atomic/fallbacks/set_release index 86ede759f24e..05cdb7f42477 100755 --- a/scripts/atomic/fallbacks/set_release +++ b/scripts/atomic/fallbacks/set_release @@ -2,6 +2,11 @@ cat <<EOF static __always_inline void arch_${atomic}_set_release(${atomic}_t *v, ${int} i) { - smp_store_release(&(v)->counter, i); + if (__native_word(${atomic}_t)) { + smp_store_release(&(v)->counter, i); + } else { + __atomic_release_fence(); + arch_${atomic}_set(v, i); + } } EOF
From: Waiman Long longman@redhat.com
[ Upstream commit fb7275acd6fb988313dddd8d3d19efa70d9015ad ]
When dumping lock_classes information via /proc/lockdep, we can't take the lockdep lock as the lock hold time is indeterminate. Iterating over all_lock_classes without holding lock can be dangerous as there is a slight chance that it may branch off to other lists leading to infinite loop or even access invalid memory if changes are made to all_lock_classes list in parallel.
To avoid this problem, iteration of lock classes is now done directly on the lock_classes array itself. The lock_classes_in_use bitmap is checked to see if the lock class is being used. To avoid iterating the full array all the times, a new max_lock_class_idx value is added to track the maximum lock_class index that is currently being used.
We can theoretically take the lockdep lock for iterating all_lock_classes when other lockdep files (lockdep_stats and lock_stat) are accessed as the lock hold time will be shorter for them. For consistency, they are also modified to iterate the lock_classes array directly.
Signed-off-by: Waiman Long longman@redhat.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Link: https://lkml.kernel.org/r/20220211035526.1329503-2-longman@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/locking/lockdep.c | 14 +++++--- kernel/locking/lockdep_internals.h | 6 ++-- kernel/locking/lockdep_proc.c | 51 +++++++++++++++++++++++++----- 3 files changed, 56 insertions(+), 15 deletions(-)
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 92127296cf2b..70dfccaa1d8d 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -183,11 +183,9 @@ static DECLARE_BITMAP(list_entries_in_use, MAX_LOCKDEP_ENTRIES); static struct hlist_head lock_keys_hash[KEYHASH_SIZE]; unsigned long nr_lock_classes; unsigned long nr_zapped_classes; -#ifndef CONFIG_DEBUG_LOCKDEP -static -#endif +unsigned long max_lock_class_idx; struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; -static DECLARE_BITMAP(lock_classes_in_use, MAX_LOCKDEP_KEYS); +DECLARE_BITMAP(lock_classes_in_use, MAX_LOCKDEP_KEYS);
static inline struct lock_class *hlock_class(struct held_lock *hlock) { @@ -338,7 +336,7 @@ static inline void lock_release_holdtime(struct held_lock *hlock) * elements. These elements are linked together by the lock_entry member in * struct lock_class. */ -LIST_HEAD(all_lock_classes); +static LIST_HEAD(all_lock_classes); static LIST_HEAD(free_lock_classes);
/** @@ -1240,6 +1238,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) struct lockdep_subclass_key *key; struct hlist_head *hash_head; struct lock_class *class; + int idx;
DEBUG_LOCKS_WARN_ON(!irqs_disabled());
@@ -1305,6 +1304,9 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) * of classes. */ list_move_tail(&class->lock_entry, &all_lock_classes); + idx = class - lock_classes; + if (idx > max_lock_class_idx) + max_lock_class_idx = idx;
if (verbose(class)) { graph_unlock(); @@ -5986,6 +5988,8 @@ static void zap_class(struct pending_free *pf, struct lock_class *class) WRITE_ONCE(class->name, NULL); nr_lock_classes--; __clear_bit(class - lock_classes, lock_classes_in_use); + if (class - lock_classes == max_lock_class_idx) + max_lock_class_idx--; } else { WARN_ONCE(true, "%s() failed for class %s\n", __func__, class->name); diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h index ecb8662e7a4e..bbe9000260d0 100644 --- a/kernel/locking/lockdep_internals.h +++ b/kernel/locking/lockdep_internals.h @@ -121,7 +121,6 @@ static const unsigned long LOCKF_USED_IN_IRQ_READ =
#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5)
-extern struct list_head all_lock_classes; extern struct lock_chain lock_chains[];
#define LOCK_USAGE_CHARS (2*XXX_LOCK_USAGE_STATES + 1) @@ -151,6 +150,10 @@ extern unsigned int nr_large_chain_blocks;
extern unsigned int max_lockdep_depth; extern unsigned int max_bfs_queue_depth; +extern unsigned long max_lock_class_idx; + +extern struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; +extern unsigned long lock_classes_in_use[];
#ifdef CONFIG_PROVE_LOCKING extern unsigned long lockdep_count_forward_deps(struct lock_class *); @@ -205,7 +208,6 @@ struct lockdep_stats { };
DECLARE_PER_CPU(struct lockdep_stats, lockdep_stats); -extern struct lock_class lock_classes[MAX_LOCKDEP_KEYS];
#define __debug_atomic_inc(ptr) \ this_cpu_inc(lockdep_stats.ptr); diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c index b8d9a050c337..15fdc7fa5c68 100644 --- a/kernel/locking/lockdep_proc.c +++ b/kernel/locking/lockdep_proc.c @@ -24,14 +24,33 @@
#include "lockdep_internals.h"
+/* + * Since iteration of lock_classes is done without holding the lockdep lock, + * it is not safe to iterate all_lock_classes list directly as the iteration + * may branch off to free_lock_classes or the zapped list. Iteration is done + * directly on the lock_classes array by checking the lock_classes_in_use + * bitmap and max_lock_class_idx. + */ +#define iterate_lock_classes(idx, class) \ + for (idx = 0, class = lock_classes; idx <= max_lock_class_idx; \ + idx++, class++) + static void *l_next(struct seq_file *m, void *v, loff_t *pos) { - return seq_list_next(v, &all_lock_classes, pos); + struct lock_class *class = v; + + ++class; + *pos = class - lock_classes; + return (*pos > max_lock_class_idx) ? NULL : class; }
static void *l_start(struct seq_file *m, loff_t *pos) { - return seq_list_start_head(&all_lock_classes, *pos); + unsigned long idx = *pos; + + if (idx > max_lock_class_idx) + return NULL; + return lock_classes + idx; }
static void l_stop(struct seq_file *m, void *v) @@ -57,14 +76,16 @@ static void print_name(struct seq_file *m, struct lock_class *class)
static int l_show(struct seq_file *m, void *v) { - struct lock_class *class = list_entry(v, struct lock_class, lock_entry); + struct lock_class *class = v; struct lock_list *entry; char usage[LOCK_USAGE_CHARS]; + int idx = class - lock_classes;
- if (v == &all_lock_classes) { + if (v == lock_classes) seq_printf(m, "all lock classes:\n"); + + if (!test_bit(idx, lock_classes_in_use)) return 0; - }
seq_printf(m, "%p", class->key); #ifdef CONFIG_DEBUG_LOCKDEP @@ -220,8 +241,11 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
#ifdef CONFIG_PROVE_LOCKING struct lock_class *class; + unsigned long idx;
- list_for_each_entry(class, &all_lock_classes, lock_entry) { + iterate_lock_classes(idx, class) { + if (!test_bit(idx, lock_classes_in_use)) + continue;
if (class->usage_mask == 0) nr_unused++; @@ -254,6 +278,7 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
sum_forward_deps += lockdep_count_forward_deps(class); } + #ifdef CONFIG_DEBUG_LOCKDEP DEBUG_LOCKS_WARN_ON(debug_atomic_read(nr_unused_locks) != nr_unused); #endif @@ -345,6 +370,8 @@ static int lockdep_stats_show(struct seq_file *m, void *v) seq_printf(m, " max bfs queue depth: %11u\n", max_bfs_queue_depth); #endif + seq_printf(m, " max lock class index: %11lu\n", + max_lock_class_idx); lockdep_stats_debug_show(m); seq_printf(m, " debug_locks: %11u\n", debug_locks); @@ -622,12 +649,16 @@ static int lock_stat_open(struct inode *inode, struct file *file) if (!res) { struct lock_stat_data *iter = data->stats; struct seq_file *m = file->private_data; + unsigned long idx;
- list_for_each_entry(class, &all_lock_classes, lock_entry) { + iterate_lock_classes(idx, class) { + if (!test_bit(idx, lock_classes_in_use)) + continue; iter->class = class; iter->stats = lock_stats(class); iter++; } + data->iter_end = iter;
sort(data->stats, data->iter_end - data->stats, @@ -645,6 +676,7 @@ static ssize_t lock_stat_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct lock_class *class; + unsigned long idx; char c;
if (count) { @@ -654,8 +686,11 @@ static ssize_t lock_stat_write(struct file *file, const char __user *buf, if (c != '0') return count;
- list_for_each_entry(class, &all_lock_classes, lock_entry) + iterate_lock_classes(idx, class) { + if (!test_bit(idx, lock_classes_in_use)) + continue; clear_lock_stats(class); + } } return count; }
From: Ritesh Harjani riteshh@linux.ibm.com
[ Upstream commit a5c0e2fdf7cea535ba03259894dc184e5a4c2800 ]
ext4_mb_mark_bb() currently wrongly calculates cluster len (clen) and flex_group->free_clusters. This patch fixes that.
Identified based on code review of ext4_mb_mark_bb() function.
Signed-off-by: Ritesh Harjani riteshh@linux.ibm.com Reviewed-by: Jan Kara jack@suse.cz Link: https://lore.kernel.org/r/a0b035d536bafa88110b74456853774b64c8ac40.164499260... Signed-off-by: Theodore Ts'o tytso@mit.edu Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ext4/mballoc.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-)
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 74e3286d0e26..9a749327336f 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3899,10 +3899,11 @@ void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block, struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_group_t group; ext4_grpblk_t blkoff; - int i, clen, err; + int i, err; int already; + unsigned int clen, clen_changed;
- clen = EXT4_B2C(sbi, len); + clen = EXT4_NUM_B2C(sbi, len);
ext4_get_group_no_and_offset(sb, block, &group, &blkoff); bitmap_bh = ext4_read_block_bitmap(sb, group); @@ -3923,6 +3924,7 @@ void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block, if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) == !state) already++;
+ clen_changed = clen - already; if (state) ext4_set_bits(bitmap_bh->b_data, blkoff, clen); else @@ -3935,9 +3937,9 @@ void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block, group, gdp)); } if (state) - clen = ext4_free_group_clusters(sb, gdp) - clen + already; + clen = ext4_free_group_clusters(sb, gdp) - clen_changed; else - clen = ext4_free_group_clusters(sb, gdp) + clen - already; + clen = ext4_free_group_clusters(sb, gdp) + clen_changed;
ext4_free_group_clusters_set(sb, gdp, clen); ext4_block_bitmap_csum_set(sb, group, gdp, bitmap_bh); @@ -3947,10 +3949,13 @@ void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block,
if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, group); + struct flex_groups *fg = sbi_array_rcu_deref(sbi, + s_flex_groups, flex_group);
- atomic64_sub(len, - &sbi_array_rcu_deref(sbi, s_flex_groups, - flex_group)->free_clusters); + if (state) + atomic64_sub(clen_changed, &fg->free_clusters); + else + atomic64_add(clen_changed, &fg->free_clusters); }
err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh);
From: Ritesh Harjani riteshh@linux.ibm.com
[ Upstream commit bfdc502a4a4c058bf4cbb1df0c297761d528f54d ]
In case of flex_bg feature (which is by default enabled), extents for any given inode might span across blocks from two different block group. ext4_mb_mark_bb() only reads the buffer_head of block bitmap once for the starting block group, but it fails to read it again when the extent length boundary overflows to another block group. Then in this below loop it accesses memory beyond the block group bitmap buffer_head and results into a data abort.
for (i = 0; i < clen; i++) if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) == !state) already++;
This patch adds this functionality for checking block group boundary in ext4_mb_mark_bb() and update the buffer_head(bitmap_bh) for every different block group.
w/o this patch, I was easily able to hit a data access abort using Power platform.
<...> [ 74.327662] EXT4-fs error (device loop3): ext4_mb_generate_buddy:1141: group 11, block bitmap and bg descriptor inconsistent: 21248 vs 23294 free clusters [ 74.533214] EXT4-fs (loop3): shut down requested (2) [ 74.536705] Aborting journal on device loop3-8. [ 74.702705] BUG: Unable to handle kernel data access on read at 0xc00000005e980000 [ 74.703727] Faulting instruction address: 0xc0000000007bffb8 cpu 0xd: Vector: 300 (Data Access) at [c000000015db7060] pc: c0000000007bffb8: ext4_mb_mark_bb+0x198/0x5a0 lr: c0000000007bfeec: ext4_mb_mark_bb+0xcc/0x5a0 sp: c000000015db7300 msr: 800000000280b033 dar: c00000005e980000 dsisr: 40000000 current = 0xc000000027af6880 paca = 0xc00000003ffd5200 irqmask: 0x03 irq_happened: 0x01 pid = 5167, comm = mount <...> enter ? for help [c000000015db7380] c000000000782708 ext4_ext_clear_bb+0x378/0x410 [c000000015db7400] c000000000813f14 ext4_fc_replay+0x1794/0x2000 [c000000015db7580] c000000000833f7c do_one_pass+0xe9c/0x12a0 [c000000015db7710] c000000000834504 jbd2_journal_recover+0x184/0x2d0 [c000000015db77c0] c000000000841398 jbd2_journal_load+0x188/0x4a0 [c000000015db7880] c000000000804de8 ext4_fill_super+0x2638/0x3e10 [c000000015db7a40] c0000000005f8404 get_tree_bdev+0x2b4/0x350 [c000000015db7ae0] c0000000007ef058 ext4_get_tree+0x28/0x40 [c000000015db7b00] c0000000005f6344 vfs_get_tree+0x44/0x100 [c000000015db7b70] c00000000063c408 path_mount+0xdd8/0xe70 [c000000015db7c40] c00000000063c8f0 sys_mount+0x450/0x550 [c000000015db7d50] c000000000035770 system_call_exception+0x4a0/0x4e0 [c000000015db7e10] c00000000000c74c system_call_common+0xec/0x250
Signed-off-by: Ritesh Harjani riteshh@linux.ibm.com Reviewed-by: Jan Kara jack@suse.cz Link: https://lore.kernel.org/r/2609bc8f66fc15870616ee416a18a3d392a209c4.164499260... Signed-off-by: Theodore Ts'o tytso@mit.edu Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ext4/mballoc.c | 131 +++++++++++++++++++++++++++------------------- 1 file changed, 76 insertions(+), 55 deletions(-)
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 9a749327336f..9b3fad3235b8 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3901,72 +3901,93 @@ void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block, ext4_grpblk_t blkoff; int i, err; int already; - unsigned int clen, clen_changed; + unsigned int clen, clen_changed, thisgrp_len;
- clen = EXT4_NUM_B2C(sbi, len); - - ext4_get_group_no_and_offset(sb, block, &group, &blkoff); - bitmap_bh = ext4_read_block_bitmap(sb, group); - if (IS_ERR(bitmap_bh)) { - err = PTR_ERR(bitmap_bh); - bitmap_bh = NULL; - goto out_err; - } - - err = -EIO; - gdp = ext4_get_group_desc(sb, group, &gdp_bh); - if (!gdp) - goto out_err; + while (len > 0) { + ext4_get_group_no_and_offset(sb, block, &group, &blkoff);
- ext4_lock_group(sb, group); - already = 0; - for (i = 0; i < clen; i++) - if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) == !state) - already++; - - clen_changed = clen - already; - if (state) - ext4_set_bits(bitmap_bh->b_data, blkoff, clen); - else - mb_test_and_clear_bits(bitmap_bh->b_data, blkoff, clen); - if (ext4_has_group_desc_csum(sb) && - (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { - gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); - ext4_free_group_clusters_set(sb, gdp, - ext4_free_clusters_after_init(sb, - group, gdp)); - } - if (state) - clen = ext4_free_group_clusters(sb, gdp) - clen_changed; - else - clen = ext4_free_group_clusters(sb, gdp) + clen_changed; + /* + * Check to see if we are freeing blocks across a group + * boundary. + * In case of flex_bg, this can happen that (block, len) may + * span across more than one group. In that case we need to + * get the corresponding group metadata to work with. + * For this we have goto again loop. + */ + thisgrp_len = min_t(unsigned int, (unsigned int)len, + EXT4_BLOCKS_PER_GROUP(sb) - EXT4_C2B(sbi, blkoff)); + clen = EXT4_NUM_B2C(sbi, thisgrp_len);
- ext4_free_group_clusters_set(sb, gdp, clen); - ext4_block_bitmap_csum_set(sb, group, gdp, bitmap_bh); - ext4_group_desc_csum_set(sb, group, gdp); + bitmap_bh = ext4_read_block_bitmap(sb, group); + if (IS_ERR(bitmap_bh)) { + err = PTR_ERR(bitmap_bh); + bitmap_bh = NULL; + break; + }
- ext4_unlock_group(sb, group); + err = -EIO; + gdp = ext4_get_group_desc(sb, group, &gdp_bh); + if (!gdp) + break;
- if (sbi->s_log_groups_per_flex) { - ext4_group_t flex_group = ext4_flex_group(sbi, group); - struct flex_groups *fg = sbi_array_rcu_deref(sbi, - s_flex_groups, flex_group); + ext4_lock_group(sb, group); + already = 0; + for (i = 0; i < clen; i++) + if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) == + !state) + already++;
+ clen_changed = clen - already; if (state) - atomic64_sub(clen_changed, &fg->free_clusters); + ext4_set_bits(bitmap_bh->b_data, blkoff, clen); else - atomic64_add(clen_changed, &fg->free_clusters); + mb_test_and_clear_bits(bitmap_bh->b_data, blkoff, clen); + if (ext4_has_group_desc_csum(sb) && + (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); + ext4_free_group_clusters_set(sb, gdp, + ext4_free_clusters_after_init(sb, group, gdp)); + } + if (state) + clen = ext4_free_group_clusters(sb, gdp) - clen_changed; + else + clen = ext4_free_group_clusters(sb, gdp) + clen_changed; + + ext4_free_group_clusters_set(sb, gdp, clen); + ext4_block_bitmap_csum_set(sb, group, gdp, bitmap_bh); + ext4_group_desc_csum_set(sb, group, gdp); + + ext4_unlock_group(sb, group); + + if (sbi->s_log_groups_per_flex) { + ext4_group_t flex_group = ext4_flex_group(sbi, group); + struct flex_groups *fg = sbi_array_rcu_deref(sbi, + s_flex_groups, flex_group); + + if (state) + atomic64_sub(clen_changed, &fg->free_clusters); + else + atomic64_add(clen_changed, &fg->free_clusters); + + } + + err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh); + if (err) + break; + sync_dirty_buffer(bitmap_bh); + err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh); + sync_dirty_buffer(gdp_bh); + if (err) + break; + + block += thisgrp_len; + len -= thisgrp_len; + brelse(bitmap_bh); + BUG_ON(len < 0); }
- err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh); if (err) - goto out_err; - sync_dirty_buffer(bitmap_bh); - err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh); - sync_dirty_buffer(gdp_bh); - -out_err: - brelse(bitmap_bh); + brelse(bitmap_bh); }
/*
From: Valentin Schneider valentin.schneider@arm.com
[ Upstream commit fa2c3254d7cfff5f7a916ab928a562d1165f17bb ]
As of commit
c6e7bd7afaeb ("sched/core: Optimize ttwu() spinning on p->on_cpu")
the following sequence becomes possible:
p->__state = TASK_INTERRUPTIBLE; __schedule() deactivate_task(p); ttwu() READ !p->on_rq p->__state=TASK_WAKING trace_sched_switch() __trace_sched_switch_state() task_state_index() return 0;
TASK_WAKING isn't in TASK_REPORT, so the task appears as TASK_RUNNING in the trace event.
Prevent this by pushing the value read from __schedule() down the trace event.
Reported-by: Abhijeet Dharmapurikar adharmap@quicinc.com Signed-off-by: Valentin Schneider valentin.schneider@arm.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Reviewed-by: Steven Rostedt (Google) rostedt@goodmis.org Link: https://lore.kernel.org/r/20220120162520.570782-2-valentin.schneider@arm.com Signed-off-by: Sasha Levin sashal@kernel.org --- include/linux/sched.h | 11 ++++++++--- include/trace/events/sched.h | 11 +++++++---- kernel/sched/core.c | 4 ++-- kernel/trace/fgraph.c | 4 +++- kernel/trace/ftrace.c | 4 +++- kernel/trace/trace_events.c | 8 ++++++-- kernel/trace/trace_osnoise.c | 4 +++- kernel/trace/trace_sched_switch.c | 1 + kernel/trace/trace_sched_wakeup.c | 1 + 9 files changed, 34 insertions(+), 14 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h index 76e869550646..8fcf76fed984 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1616,10 +1616,10 @@ static inline pid_t task_pgrp_nr(struct task_struct *tsk) #define TASK_REPORT_IDLE (TASK_REPORT + 1) #define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1)
-static inline unsigned int task_state_index(struct task_struct *tsk) +static inline unsigned int __task_state_index(unsigned int tsk_state, + unsigned int tsk_exit_state) { - unsigned int tsk_state = READ_ONCE(tsk->__state); - unsigned int state = (tsk_state | tsk->exit_state) & TASK_REPORT; + unsigned int state = (tsk_state | tsk_exit_state) & TASK_REPORT;
BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX);
@@ -1629,6 +1629,11 @@ static inline unsigned int task_state_index(struct task_struct *tsk) return fls(state); }
+static inline unsigned int task_state_index(struct task_struct *tsk) +{ + return __task_state_index(READ_ONCE(tsk->__state), tsk->exit_state); +} + static inline char task_index_to_char(unsigned int state) { static const char state_char[] = "RSDTtXZPI"; diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 94640482cfe7..65e786756321 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -187,7 +187,9 @@ DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new, TP_ARGS(p));
#ifdef CREATE_TRACE_POINTS -static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p) +static inline long __trace_sched_switch_state(bool preempt, + unsigned int prev_state, + struct task_struct *p) { unsigned int state;
@@ -208,7 +210,7 @@ static inline long __trace_sched_switch_state(bool preempt, struct task_struct * * it for left shift operation to get the correct task->state * mapping. */ - state = task_state_index(p); + state = __task_state_index(prev_state, p->exit_state);
return state ? (1 << (state - 1)) : state; } @@ -220,10 +222,11 @@ static inline long __trace_sched_switch_state(bool preempt, struct task_struct * TRACE_EVENT(sched_switch,
TP_PROTO(bool preempt, + unsigned int prev_state, struct task_struct *prev, struct task_struct *next),
- TP_ARGS(preempt, prev, next), + TP_ARGS(preempt, prev_state, prev, next),
TP_STRUCT__entry( __array( char, prev_comm, TASK_COMM_LEN ) @@ -239,7 +242,7 @@ TRACE_EVENT(sched_switch, memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); __entry->prev_pid = prev->pid; __entry->prev_prio = prev->prio; - __entry->prev_state = __trace_sched_switch_state(preempt, prev); + __entry->prev_state = __trace_sched_switch_state(preempt, prev_state, prev); memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); __entry->next_pid = next->pid; __entry->next_prio = next->prio; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index a0747eaa2dba..7233b7df3768 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4786,7 +4786,7 @@ static struct rq *finish_task_switch(struct task_struct *prev) { struct rq *rq = this_rq(); struct mm_struct *mm = rq->prev_mm; - long prev_state; + unsigned int prev_state;
/* * The previous task will have left us with a preempt_count of 2 @@ -6294,7 +6294,7 @@ static void __sched notrace __schedule(unsigned int sched_mode) migrate_disable_switch(rq, prev); psi_sched_switch(prev, next, !task_on_rq_queued(prev));
- trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next); + trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev_state, prev, next);
/* Also unlocks the rq: */ rq = context_switch(rq, prev, next, &rf); diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index b8a0d1d564fb..bb383ac91198 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -413,7 +413,9 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
static void ftrace_graph_probe_sched_switch(void *ignore, bool preempt, - struct task_struct *prev, struct task_struct *next) + unsigned int prev_state, + struct task_struct *prev, + struct task_struct *next) { unsigned long long timestamp; int index; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index c672040142e9..136520a5b7c0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -7093,7 +7093,9 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops)
static void ftrace_filter_pid_sched_switch_probe(void *data, bool preempt, - struct task_struct *prev, struct task_struct *next) + unsigned int prev_state, + struct task_struct *prev, + struct task_struct *next) { struct trace_array *tr = data; struct trace_pid_list *pid_list; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 44d031ffe511..dd17171829d6 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -759,7 +759,9 @@ void trace_event_follow_fork(struct trace_array *tr, bool enable)
static void event_filter_pid_sched_switch_probe_pre(void *data, bool preempt, - struct task_struct *prev, struct task_struct *next) + unsigned int prev_state, + struct task_struct *prev, + struct task_struct *next) { struct trace_array *tr = data; struct trace_pid_list *no_pid_list; @@ -783,7 +785,9 @@ event_filter_pid_sched_switch_probe_pre(void *data, bool preempt,
static void event_filter_pid_sched_switch_probe_post(void *data, bool preempt, - struct task_struct *prev, struct task_struct *next) + unsigned int prev_state, + struct task_struct *prev, + struct task_struct *next) { struct trace_array *tr = data; struct trace_pid_list *no_pid_list; diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index 93de784ee681..c3bd968e6147 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -1000,7 +1000,9 @@ thread_exit(struct osnoise_variables *osn_var, struct task_struct *t) * used to record the beginning and to report the end of a thread noise window. */ static void -trace_sched_switch_callback(void *data, bool preempt, struct task_struct *p, +trace_sched_switch_callback(void *data, bool preempt, + unsigned int prev_state, + struct task_struct *p, struct task_struct *n) { struct osnoise_variables *osn_var = this_cpu_osn_var(); diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index e304196d7c28..993b0ed10d8c 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -22,6 +22,7 @@ static DEFINE_MUTEX(sched_register_mutex);
static void probe_sched_switch(void *ignore, bool preempt, + unsigned int prev_state, struct task_struct *prev, struct task_struct *next) { int flags; diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 2402de520eca..46429f9a96fa 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -426,6 +426,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
static void notrace probe_wakeup_sched_switch(void *ignore, bool preempt, + unsigned int prev_state, struct task_struct *prev, struct task_struct *next) { struct trace_array_cpu *data;
From: Valentin Schneider valentin.schneider@arm.com
[ Upstream commit 25795ef6299f07ce3838f3253a9cb34f64efcfae ]
TASK_RTLOCK_WAIT currently isn't part of TASK_REPORT, thus a task blocking on an rtlock will appear as having a task state == 0, IOW TASK_RUNNING.
The actual state is saved in p->saved_state, but reading it after reading p->__state has a few issues: o that could still be TASK_RUNNING in the case of e.g. rt_spin_lock o ttwu_state_match() might have changed that to TASK_RUNNING
As pointed out by Eric, adding TASK_RTLOCK_WAIT to TASK_REPORT implies exposing a new state to userspace tools which way not know what to do with them. The only information that needs to be conveyed here is that a task is waiting on an rt_mutex, which matches TASK_UNINTERRUPTIBLE - there's no need for a new state.
Reported-by: Uwe Kleine-König u.kleine-koenig@pengutronix.de Signed-off-by: Valentin Schneider valentin.schneider@arm.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Reviewed-by: Steven Rostedt (Google) rostedt@goodmis.org Link: https://lore.kernel.org/r/20220120162520.570782-3-valentin.schneider@arm.com Signed-off-by: Sasha Levin sashal@kernel.org --- include/linux/sched.h | 8 ++++++++ 1 file changed, 8 insertions(+)
diff --git a/include/linux/sched.h b/include/linux/sched.h index 8fcf76fed984..031588cd2ccb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1626,6 +1626,14 @@ static inline unsigned int __task_state_index(unsigned int tsk_state, if (tsk_state == TASK_IDLE) state = TASK_REPORT_IDLE;
+ /* + * We're lying here, but rather than expose a completely new task state + * to userspace, we can make this appear as if the task has gone through + * a regular rt_mutex_lock() call. + */ + if (tsk_state == TASK_RTLOCK_WAIT) + state = TASK_UNINTERRUPTIBLE; + return fls(state); }
From: Theodore Ts'o tytso@mit.edu
[ Upstream commit cc5095747edfb054ca2068d01af20be3fcc3634f ]
[un]pin_user_pages_remote is dirtying pages without properly warning the file system in advance. A related race was noted by Jan Kara in 2018[1]; however, more recently instead of it being a very hard-to-hit race, it could be reliably triggered by process_vm_writev(2) which was discovered by Syzbot[2].
This is technically a bug in mm/gup.c, but arguably ext4 is fragile in that if some other kernel subsystem dirty pages without properly notifying the file system using page_mkwrite(), ext4 will BUG, while other file systems will not BUG (although data will still be lost).
So instead of crashing with a BUG, issue a warning (since there may be potential data loss) and just mark the page as clean to avoid unprivileged denial of service attacks until the problem can be properly fixed. More discussion and background can be found in the thread starting at [2].
[1] https://lore.kernel.org/linux-mm/20180103100430.GE4911@quack2.suse.cz [2] https://lore.kernel.org/r/Yg0m6IjcNmfaSokM@google.com
Reported-by: syzbot+d59332e2db681cf18f0318a06e994ebbb529a8db@syzkaller.appspotmail.com Reported-by: Lee Jones lee.jones@linaro.org Signed-off-by: Theodore Ts'o tytso@mit.edu Link: https://lore.kernel.org/r/YiDS9wVfq4mM2jGK@mit.edu Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ext4/inode.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 22a5140546fb..fff52292c01e 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1992,6 +1992,15 @@ static int ext4_writepage(struct page *page, else len = PAGE_SIZE;
+ /* Should never happen but for bugs in other kernel subsystems */ + if (!page_has_buffers(page)) { + ext4_warning_inode(inode, + "page %lu does not have buffers attached", page->index); + ClearPageDirty(page); + unlock_page(page); + return 0; + } + page_bufs = page_buffers(page); /* * We cannot do block allocation or other extent handling in this @@ -2595,6 +2604,22 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) wait_on_page_writeback(page); BUG_ON(PageWriteback(page));
+ /* + * Should never happen but for buggy code in + * other subsystems that call + * set_page_dirty() without properly warning + * the file system first. See [1] for more + * information. + * + * [1] https://lore.kernel.org/linux-mm/20180103100430.GE4911@quack2.suse.cz + */ + if (!page_has_buffers(page)) { + ext4_warning_inode(mpd->inode, "page %lu does not have buffers attached", page->index); + ClearPageDirty(page); + unlock_page(page); + continue; + } + if (mpd->map.m_len == 0) mpd->first_page = page->index; mpd->next_page = page->index + 1;
From: Theodore Ts'o tytso@mit.edu
[ Upstream commit cc5095747edfb054ca2068d01af20be3fcc3634f ]
[un]pin_user_pages_remote is dirtying pages without properly warning the file system in advance. A related race was noted by Jan Kara in 2018[1]; however, more recently instead of it being a very hard-to-hit race, it could be reliably triggered by process_vm_writev(2) which was discovered by Syzbot[2].
This is technically a bug in mm/gup.c, but arguably ext4 is fragile in that if some other kernel subsystem dirty pages without properly notifying the file system using page_mkwrite(), ext4 will BUG, while other file systems will not BUG (although data will still be lost).
So instead of crashing with a BUG, issue a warning (since there may be potential data loss) and just mark the page as clean to avoid unprivileged denial of service attacks until the problem can be properly fixed. More discussion and background can be found in the thread starting at [2].
[1] https://lore.kernel.org/linux-mm/20180103100430.GE4911@quack2.suse.cz [2] https://lore.kernel.org/r/Yg0m6IjcNmfaSokM@google.com
Reported-by: syzbot+d59332e2db681cf18f0318a06e994ebbb529a8db@syzkaller.appspotmail.com Reported-by: Lee Jones lee.jones@linaro.org Signed-off-by: Theodore Ts'o tytso@mit.edu Link: https://lore.kernel.org/r/YiDS9wVfq4mM2jGK@mit.edu Signed-off-by: Sasha Levin sashal@kernel.org
fs/ext4/inode.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 22a5140546fb..fff52292c01e 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1992,6 +1992,15 @@ static int ext4_writepage(struct page *page, else len = PAGE_SIZE;
- /* Should never happen but for bugs in other kernel subsystems */
- if (!page_has_buffers(page)) {
ext4_warning_inode(inode,
"page %lu does not have buffers attached", page->index);
ClearPageDirty(page);
unlock_page(page);
return 0;
- }
- page_bufs = page_buffers(page); /*
- We cannot do block allocation or other extent handling in this
@@ -2595,6 +2604,22 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) wait_on_page_writeback(page); BUG_ON(PageWriteback(page));
/*
* Should never happen but for buggy code in
* other subsystems that call
* set_page_dirty() without properly warning
* the file system first. See [1] for more
* information.
*
* [1] https://lore.kernel.org/linux-mm/20180103100430.GE4911@quack2.suse.cz
*/
if (!page_has_buffers(page)) {
ext4_warning_inode(mpd->inode, "page %lu does not have buffers attached", page->index);
ClearPageDirty(page);
unlock_page(page);
continue;
}
if (mpd->map.m_len == 0) mpd->first_page = page->index; mpd->next_page = page->index + 1;
-- 2.34.1
I see the command but can't find the corresponding bug. The email is sent to syzbot+HASH@syzkaller.appspotmail.com address but the HASH does not correspond to any known bug. Please double check the address.
From: Chao Yu chao@kernel.org
[ Upstream commit f41ee8b91c00770d718be2ff4852a80017ae9ab3 ]
As Wenqing Liu reported in bugzilla:
https://bugzilla.kernel.org/show_bug.cgi?id=215657
- Overview UBSAN: array-index-out-of-bounds in fs/f2fs/segment.c:3460:2 when mount and operate a corrupted image
- Reproduce tested on kernel 5.17-rc4, 5.17-rc6
1. mkdir test_crash 2. cd test_crash 3. unzip tmp2.zip 4. mkdir mnt 5. ./single_test.sh f2fs 2
- Kernel dump [ 46.434454] loop0: detected capacity change from 0 to 131072 [ 46.529839] F2FS-fs (loop0): Mounted with checkpoint version = 7548c2d9 [ 46.738319] ================================================================================ [ 46.738412] UBSAN: array-index-out-of-bounds in fs/f2fs/segment.c:3460:2 [ 46.738475] index 231 is out of range for type 'unsigned int [2]' [ 46.738539] CPU: 2 PID: 939 Comm: umount Not tainted 5.17.0-rc6 #1 [ 46.738547] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-1ubuntu1.1 04/01/2014 [ 46.738551] Call Trace: [ 46.738556] <TASK> [ 46.738563] dump_stack_lvl+0x47/0x5c [ 46.738581] ubsan_epilogue+0x5/0x50 [ 46.738592] __ubsan_handle_out_of_bounds+0x68/0x80 [ 46.738604] f2fs_allocate_data_block+0xdff/0xe60 [f2fs] [ 46.738819] do_write_page+0xef/0x210 [f2fs] [ 46.738934] f2fs_do_write_node_page+0x3f/0x80 [f2fs] [ 46.739038] __write_node_page+0x2b7/0x920 [f2fs] [ 46.739162] f2fs_sync_node_pages+0x943/0xb00 [f2fs] [ 46.739293] f2fs_write_checkpoint+0x7bb/0x1030 [f2fs] [ 46.739405] kill_f2fs_super+0x125/0x150 [f2fs] [ 46.739507] deactivate_locked_super+0x60/0xc0 [ 46.739517] deactivate_super+0x70/0xb0 [ 46.739524] cleanup_mnt+0x11a/0x200 [ 46.739532] __cleanup_mnt+0x16/0x20 [ 46.739538] task_work_run+0x67/0xa0 [ 46.739547] exit_to_user_mode_prepare+0x18c/0x1a0 [ 46.739559] syscall_exit_to_user_mode+0x26/0x40 [ 46.739568] do_syscall_64+0x46/0xb0 [ 46.739584] entry_SYSCALL_64_after_hwframe+0x44/0xae
The root cause is we missed to do sanity check on curseg->alloc_type, result in out-of-bound accessing on sbi->block_count[] array, fix it.
Signed-off-by: Chao Yu chao@kernel.org Signed-off-by: Jaegeuk Kim jaegeuk@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- fs/f2fs/segment.c | 7 +++++++ 1 file changed, 7 insertions(+)
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d716553bdc02..338a57360bb8 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -4747,6 +4747,13 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi)
sanity_check_seg_type(sbi, curseg->seg_type);
+ if (curseg->alloc_type != LFS && curseg->alloc_type != SSR) { + f2fs_err(sbi, + "Current segment has invalid alloc_type:%d", + curseg->alloc_type); + return -EFSCORRUPTED; + } + if (f2fs_test_bit(blkofs, se->cur_valid_map)) goto out;
From: Chuck Lever chuck.lever@oracle.com
[ Upstream commit 50719bf3442dd6cd05159e9c98d020b3919ce978 ]
These have been incorrect since the function was introduced.
A proper kerneldoc comment is added since this function, though static, is part of an external interface.
Reported-by: Dai Ngo dai.ngo@oracle.com Signed-off-by: Chuck Lever chuck.lever@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/nfsd/nfs4state.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index db4a47a280dc..181bc3d9f566 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4693,6 +4693,14 @@ nfsd_break_deleg_cb(struct file_lock *fl) return ret; }
+/** + * nfsd_breaker_owns_lease - Check if lease conflict was resolved + * @fl: Lock state to check + * + * Return values: + * %true: Lease conflict was resolved + * %false: Lease conflict was not resolved. + */ static bool nfsd_breaker_owns_lease(struct file_lock *fl) { struct nfs4_delegation *dl = fl->fl_owner; @@ -4700,11 +4708,11 @@ static bool nfsd_breaker_owns_lease(struct file_lock *fl) struct nfs4_client *clp;
if (!i_am_nfsd()) - return NULL; + return false; rqst = kthread_data(current); /* Note rq_prog == NFS_ACL_PROGRAM is also possible: */ if (rqst->rq_prog != NFS_PROGRAM || rqst->rq_vers < 4) - return NULL; + return false; clp = *(rqst->rq_lease_breaker); return dl->dl_stid.sc_client == clp; }
From: Jaegeuk Kim jaegeuk@kernel.org
[ Upstream commit ba900534f807f0b327c92d5141c85d2313e2d55c ]
Let's purge inode cache in order to avoid the below deadlock.
[freeze test] shrinkder freeze_super - pwercpu_down_write(SB_FREEZE_FS) - super_cache_scan - down_read(&sb->s_umount) - prune_icache_sb - dispose_list - evict - f2fs_evict_inode thaw_super - down_write(&sb->s_umount); - __percpu_down_read(SB_FREEZE_FS)
Reviewed-by: Chao Yu chao@kernel.org Signed-off-by: Jaegeuk Kim jaegeuk@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- Documentation/ABI/testing/sysfs-fs-f2fs | 1 + fs/f2fs/debug.c | 1 + fs/f2fs/f2fs.h | 1 + fs/f2fs/inode.c | 6 ++++-- fs/f2fs/super.c | 4 ++++ 5 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index f627e705e663..48d41b669627 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -425,6 +425,7 @@ Description: Show status of f2fs superblock in real time. 0x800 SBI_QUOTA_SKIP_FLUSH skip flushing quota in current CP 0x1000 SBI_QUOTA_NEED_REPAIR quota file may be corrupted 0x2000 SBI_IS_RESIZEFS resizefs is in process + 0x4000 SBI_IS_FREEZING freefs is in process ====== ===================== =================================
What: /sys/fs/f2fs/<disk>/ckpt_thread_ioprio diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 8c50518475a9..07ad0d81f0c5 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -338,6 +338,7 @@ static char *s_flag[] = { [SBI_QUOTA_SKIP_FLUSH] = " quota_skip_flush", [SBI_QUOTA_NEED_REPAIR] = " quota_need_repair", [SBI_IS_RESIZEFS] = " resizefs", + [SBI_IS_FREEZING] = " freezefs", };
static int stat_show(struct seq_file *s, void *v) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c68817d83a53..0a0fa1a64d06 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1253,6 +1253,7 @@ enum { SBI_QUOTA_SKIP_FLUSH, /* skip flushing quota in current CP */ SBI_QUOTA_NEED_REPAIR, /* quota file may be corrupted */ SBI_IS_RESIZEFS, /* resizefs is in process */ + SBI_IS_FREEZING, /* freezefs is in process */ };
enum { diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 4557de37a911..749aea800986 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -769,7 +769,8 @@ void f2fs_evict_inode(struct inode *inode) f2fs_remove_ino_entry(sbi, inode->i_ino, UPDATE_INO); f2fs_remove_ino_entry(sbi, inode->i_ino, FLUSH_INO);
- sb_start_intwrite(inode->i_sb); + if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING)) + sb_start_intwrite(inode->i_sb); set_inode_flag(inode, FI_NO_ALLOC); i_size_write(inode, 0); retry: @@ -800,7 +801,8 @@ void f2fs_evict_inode(struct inode *inode) if (dquot_initialize_needed(inode)) set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); } - sb_end_intwrite(inode->i_sb); + if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING)) + sb_end_intwrite(inode->i_sb); no_delete: dquot_drop(inode);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 6174c4f4cee7..9a1d16657b80 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1663,11 +1663,15 @@ static int f2fs_freeze(struct super_block *sb) /* ensure no checkpoint required */ if (!llist_empty(&F2FS_SB(sb)->cprc_info.issue_list)) return -EINVAL; + + /* to avoid deadlock on f2fs_evict_inode->SB_FREEZE_FS */ + set_sbi_flag(F2FS_SB(sb), SBI_IS_FREEZING); return 0; }
static int f2fs_unfreeze(struct super_block *sb) { + clear_sbi_flag(F2FS_SB(sb), SBI_IS_FREEZING); return 0; }
From: Anand Jain anand.jain@oracle.com
[ Upstream commit 770c79fb65506fc7c16459855c3839429f46cb32 ]
Identifying and removing the stale device from the fs_uuids list is done by btrfs_free_stale_devices(). btrfs_free_stale_devices() in turn depends on device_path_matched() to check if the device appears in more than one btrfs_device structure.
The matching of the device happens by its path, the device path. However, when device mapper is in use, the dm device paths are nothing but a link to the actual block device, which leads to the device_path_matched() failing to match.
Fix this by matching the dev_t as provided by lookup_bdev() instead of plain string compare of the device paths.
Reported-by: Josef Bacik josef@toxicpanda.com Signed-off-by: Anand Jain anand.jain@oracle.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/volumes.c | 45 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 7 deletions(-)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 06a1a7c2254c..33bd94ee4690 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -530,15 +530,48 @@ btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder, return ret; }
-static bool device_path_matched(const char *path, struct btrfs_device *device) +/* + * Check if the device in the path matches the device in the given struct device. + * + * Returns: + * true If it is the same device. + * false If it is not the same device or on error. + */ +static bool device_matched(const struct btrfs_device *device, const char *path) { - int found; + char *device_name; + dev_t dev_old; + dev_t dev_new; + int ret; + + /* + * If we are looking for a device with the matching dev_t, then skip + * device without a name (a missing device). + */ + if (!device->name) + return false; + + device_name = kzalloc(BTRFS_PATH_NAME_MAX, GFP_KERNEL); + if (!device_name) + return false;
rcu_read_lock(); - found = strcmp(rcu_str_deref(device->name), path); + scnprintf(device_name, BTRFS_PATH_NAME_MAX, "%s", rcu_str_deref(device->name)); rcu_read_unlock();
- return found == 0; + ret = lookup_bdev(device_name, &dev_old); + kfree(device_name); + if (ret) + return false; + + ret = lookup_bdev(path, &dev_new); + if (ret) + return false; + + if (dev_old == dev_new) + return true; + + return false; }
/* @@ -571,9 +604,7 @@ static int btrfs_free_stale_devices(const char *path, &fs_devices->devices, dev_list) { if (skip_device && skip_device == device) continue; - if (path && !device->name) - continue; - if (path && !device_path_matched(path, device)) + if (path && !device_matched(device, path)) continue; if (fs_devices->opened) { /* for an already deleted device return 0 */
From: Josef Bacik josef@toxicpanda.com
[ Upstream commit 03ddb19d2ea745228879b9334f3b550c88acb10a ]
We can either fail to find a csum entry at all and return -ENOENT, or we can find a range that is close, but return -EFBIG. In essence these both mean the same thing when we are doing a lookup for a csum in an existing range, we didn't find a csum. We want to treat both of these errors the same way, complain loudly that there wasn't a csum. This currently happens anyway because we do
count = search_csum_tree(); if (count <= 0) { // reloc and error handling }
However it forces us to incorrectly treat EIO or ENOMEM errors as on disk corruption. Fix this by returning 0 if we get either -ENOENT or -EFBIG from btrfs_lookup_csum() so we can do proper error handling.
Reviewed-by: Boris Burkov boris@bur.io Reviewed-by: Johannes Thumshirn johannes.thumshirn@wdc.com Signed-off-by: Josef Bacik josef@toxicpanda.com Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/file-item.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 0b9401a5afd3..161a69d7e117 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -303,7 +303,7 @@ static int search_csum_tree(struct btrfs_fs_info *fs_info, read_extent_buffer(path->nodes[0], dst, (unsigned long)item, ret * csum_size); out: - if (ret == -ENOENT) + if (ret == -ENOENT || ret == -EFBIG) ret = 0; return ret; }
From: Jaegeuk Kim jaegeuk@kernel.org
[ Upstream commit 98237fcda4a24e67b0a4498c17d5aa4ad4537bc7 ]
[14696.634553] task:cat state:D stack: 0 pid:1613738 ppid:1613735 flags:0x00000004 [14696.638285] Call Trace: [14696.639038] <TASK> [14696.640032] __schedule+0x302/0x930 [14696.640969] schedule+0x58/0xd0 [14696.641799] schedule_preempt_disabled+0x18/0x30 [14696.642890] __mutex_lock.constprop.0+0x2fb/0x4f0 [14696.644035] ? mod_objcg_state+0x10c/0x310 [14696.645040] ? obj_cgroup_charge+0xe1/0x170 [14696.646067] __mutex_lock_slowpath+0x13/0x20 [14696.647126] mutex_lock+0x34/0x40 [14696.648070] stat_show+0x25/0x17c0 [f2fs] [14696.649218] seq_read_iter+0x120/0x4b0 [14696.650289] ? aa_file_perm+0x12a/0x500 [14696.651357] ? lru_cache_add+0x1c/0x20 [14696.652470] seq_read+0xfd/0x140 [14696.653445] full_proxy_read+0x5c/0x80 [14696.654535] vfs_read+0xa0/0x1a0 [14696.655497] ksys_read+0x67/0xe0 [14696.656502] __x64_sys_read+0x1a/0x20 [14696.657580] do_syscall_64+0x3b/0xc0 [14696.658671] entry_SYSCALL_64_after_hwframe+0x44/0xae [14696.660068] RIP: 0033:0x7efe39df1cb2 [14696.661133] RSP: 002b:00007ffc8badd948 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [14696.662958] RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007efe39df1cb2 [14696.664757] RDX: 0000000000020000 RSI: 00007efe399df000 RDI: 0000000000000003 [14696.666542] RBP: 00007efe399df000 R08: 00007efe399de010 R09: 00007efe399de010 [14696.668363] R10: 0000000000000022 R11: 0000000000000246 R12: 0000000000000000 [14696.670155] R13: 0000000000000003 R14: 0000000000020000 R15: 0000000000020000 [14696.671965] </TASK> [14696.672826] task:umount state:D stack: 0 pid:1614985 ppid:1614984 flags:0x00004000 [14696.674930] Call Trace: [14696.675903] <TASK> [14696.676780] __schedule+0x302/0x930 [14696.677927] schedule+0x58/0xd0 [14696.679019] schedule_preempt_disabled+0x18/0x30 [14696.680412] __mutex_lock.constprop.0+0x2fb/0x4f0 [14696.681783] ? destroy_inode+0x65/0x80 [14696.683006] __mutex_lock_slowpath+0x13/0x20 [14696.684305] mutex_lock+0x34/0x40 [14696.685442] f2fs_destroy_stats+0x1e/0x60 [f2fs] [14696.686803] f2fs_put_super+0x158/0x390 [f2fs] [14696.688238] generic_shutdown_super+0x7a/0x120 [14696.689621] kill_block_super+0x27/0x50 [14696.690894] kill_f2fs_super+0x7f/0x100 [f2fs] [14696.692311] deactivate_locked_super+0x35/0xa0 [14696.693698] deactivate_super+0x40/0x50 [14696.694985] cleanup_mnt+0x139/0x190 [14696.696209] __cleanup_mnt+0x12/0x20 [14696.697390] task_work_run+0x64/0xa0 [14696.698587] exit_to_user_mode_prepare+0x1b7/0x1c0 [14696.700053] syscall_exit_to_user_mode+0x27/0x50 [14696.701418] do_syscall_64+0x48/0xc0 [14696.702630] entry_SYSCALL_64_after_hwframe+0x44/0xae
Reviewed-by: Chao Yu chao@kernel.org Signed-off-by: Jaegeuk Kim jaegeuk@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- fs/f2fs/debug.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-)
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 07ad0d81f0c5..b449c7a372a4 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -21,7 +21,7 @@ #include "gc.h"
static LIST_HEAD(f2fs_stat_list); -static DEFINE_MUTEX(f2fs_stat_mutex); +static DEFINE_RAW_SPINLOCK(f2fs_stat_lock); #ifdef CONFIG_DEBUG_FS static struct dentry *f2fs_debugfs_root; #endif @@ -345,8 +345,9 @@ static int stat_show(struct seq_file *s, void *v) { struct f2fs_stat_info *si; int i = 0, j = 0; + unsigned long flags;
- mutex_lock(&f2fs_stat_mutex); + raw_spin_lock_irqsave(&f2fs_stat_lock, flags); list_for_each_entry(si, &f2fs_stat_list, stat_list) { update_general_status(si->sbi);
@@ -574,7 +575,7 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, " - paged : %llu KB\n", si->page_mem >> 10); } - mutex_unlock(&f2fs_stat_mutex); + raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags); return 0; }
@@ -585,6 +586,7 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); struct f2fs_stat_info *si; + unsigned long flags; int i;
si = f2fs_kzalloc(sbi, sizeof(struct f2fs_stat_info), GFP_KERNEL); @@ -620,9 +622,9 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) atomic_set(&sbi->max_aw_cnt, 0); atomic_set(&sbi->max_vw_cnt, 0);
- mutex_lock(&f2fs_stat_mutex); + raw_spin_lock_irqsave(&f2fs_stat_lock, flags); list_add_tail(&si->stat_list, &f2fs_stat_list); - mutex_unlock(&f2fs_stat_mutex); + raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags);
return 0; } @@ -630,10 +632,11 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { struct f2fs_stat_info *si = F2FS_STAT(sbi); + unsigned long flags;
- mutex_lock(&f2fs_stat_mutex); + raw_spin_lock_irqsave(&f2fs_stat_lock, flags); list_del(&si->stat_list); - mutex_unlock(&f2fs_stat_mutex); + raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags);
kfree(si); }
From: Chao Yu chao@kernel.org
[ Upstream commit d284af43f703760e261b1601378a0c13a19d5f1f ]
In lz4_decompress_pages(), if size of decompressed data is not equal to expected one, we should print the size rather than size of target buffer for decompressed data, fix it.
Signed-off-by: Chao Yu chao.yu@oppo.com Signed-off-by: Jaegeuk Kim jaegeuk@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- fs/f2fs/compress.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 58d255d3a518..6adf04725954 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -312,10 +312,9 @@ static int lz4_decompress_pages(struct decompress_io_ctx *dic) }
if (ret != PAGE_SIZE << dic->log_cluster_size) { - printk_ratelimited("%sF2FS-fs (%s): lz4 invalid rlen:%zu, " + printk_ratelimited("%sF2FS-fs (%s): lz4 invalid ret:%d, " "expected:%lu\n", KERN_ERR, - F2FS_I_SB(dic->inode)->sb->s_id, - dic->rlen, + F2FS_I_SB(dic->inode)->sb->s_id, ret, PAGE_SIZE << dic->log_cluster_size); return -EIO; }
From: Rohith Surabattula rohiths@microsoft.com
[ Upstream commit 06a466565d54a1a42168f9033a062a3f5c40e73b ]
When session gets reconnected during mount then read size in super block fs context gets set to zero and after negotiate, rsize is not modified which results in incorrect read with requested bytes as zero. Fixes intermittent failure of xfstest generic/240
Note that stable requires a different version of this patch which will be sent to the stable mailing list.
Signed-off-by: Rohith Surabattula rohiths@microsoft.com Acked-by: Paulo Alcantara (SUSE) pc@cjr.nz Signed-off-by: Steve French stfrench@microsoft.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/cifs/cifsfs.c | 3 +++ fs/cifs/file.c | 10 ++++++++++ 2 files changed, 13 insertions(+)
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 21bf82fc2278..ba6f536d76de 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -210,6 +210,9 @@ cifs_read_super(struct super_block *sb) if (rc) goto out_no_root; /* tune readahead according to rsize if readahead size not set on mount */ + if (cifs_sb->ctx->rsize == 0) + cifs_sb->ctx->rsize = + tcon->ses->server->ops->negotiate_rsize(tcon, cifs_sb->ctx); if (cifs_sb->ctx->rasize) sb->s_bdi->ra_pages = cifs_sb->ctx->rasize / PAGE_SIZE; else diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 82bbaf8e92b7..b23f6b489bb9 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3734,6 +3734,11 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file, break; }
+ if (cifs_sb->ctx->rsize == 0) + cifs_sb->ctx->rsize = + server->ops->negotiate_rsize(tlink_tcon(open_file->tlink), + cifs_sb->ctx); + rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, &rsize, credits); if (rc) @@ -4512,6 +4517,11 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, break; }
+ if (cifs_sb->ctx->rsize == 0) + cifs_sb->ctx->rsize = + server->ops->negotiate_rsize(tlink_tcon(open_file->tlink), + cifs_sb->ctx); + rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, &rsize, credits); if (rc)
From: Dongliang Mu mudongliangabcd@gmail.com
[ Upstream commit 714fbf2647b1a33d914edd695d4da92029c7e7c0 ]
ntfs_read_inode_mount invokes ntfs_malloc_nofs with zero allocation size. It triggers one BUG in the __ntfs_malloc function.
Fix this by adding sanity check on ni->attr_list_size.
Link: https://lkml.kernel.org/r/20220120094914.47736-1-dzm91@hust.edu.cn Reported-by: syzbot+3c765c5248797356edaa@syzkaller.appspotmail.com Signed-off-by: Dongliang Mu mudongliangabcd@gmail.com Acked-by: Anton Altaparmakov anton@tuxera.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ntfs/inode.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 4474adb393ca..517b71c73aa9 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -1881,6 +1881,10 @@ int ntfs_read_inode_mount(struct inode *vi) } /* Now allocate memory for the attribute list. */ ni->attr_list_size = (u32)ntfs_attr_size(a); + if (!ni->attr_list_size) { + ntfs_error(sb, "Attr_list_size is zero"); + goto put_err_out; + } ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size); if (!ni->attr_list) { ntfs_error(sb, "Not enough memory to allocate buffer "
linux-stable-mirror@lists.linaro.org