While working on extended rand for last_error/first_error timestamps, I noticed that the endianess is wrong, we access the little-endian fields in struct ext4_super_block as native-endian when we print them.
This adds a special case in ext4_attr_show() and ext4_attr_store() to byteswap the superblock fields if needed.
In older kernels, this code was part of super.c, it got moved to sysfs.c in linux-4.4.
Cc: stable@vger.kernel.org Fixes: 52c198c6820f ("ext4: add sysfs entry showing whether the fs contains errors") Signed-off-by: Arnd Bergmann arnd@arndb.de --- fs/ext4/sysfs.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-)
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index f34da0bb8f17..b970a200f20c 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -274,8 +274,12 @@ static ssize_t ext4_attr_show(struct kobject *kobj, case attr_pointer_ui: if (!ptr) return 0; - return snprintf(buf, PAGE_SIZE, "%u\n", - *((unsigned int *) ptr)); + if (a->attr_ptr == ptr_ext4_super_block_offset) + return snprintf(buf, PAGE_SIZE, "%u\n", + le32_to_cpup(ptr)); + else + return snprintf(buf, PAGE_SIZE, "%u\n", + *((unsigned int *) ptr)); case attr_pointer_atomic: if (!ptr) return 0; @@ -308,7 +312,10 @@ static ssize_t ext4_attr_store(struct kobject *kobj, ret = kstrtoul(skip_spaces(buf), 0, &t); if (ret) return ret; - *((unsigned int *) ptr) = t; + if (a->attr_ptr == ptr_ext4_super_block_offset) + *((__le32 *) ptr) = cpu_to_le32(t); + else + *((unsigned int *) ptr) = t; return len; case attr_inode_readahead: return inode_readahead_blks_store(sbi, buf, len);
The mmp_time field is 64 bits wide, which is good, but calling get_seconds() results in a 32-bit value on 32-bit architectures. Using ktime_get_real_seconds() instead returns 64 bits everywhere.
Signed-off-by: Arnd Bergmann arnd@arndb.de --- fs/ext4/mmp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 27b9a76a0dfa..39da4eb48361 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -147,7 +147,7 @@ static int kmmpd(void *data)
mmp_block = le64_to_cpu(es->s_mmp_block); mmp = (struct mmp_struct *)(bh->b_data); - mmp->mmp_time = cpu_to_le64(get_seconds()); + mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds()); /* * Start with the higher mmp_check_interval and reduce it if * the MMP block is being updated on time. @@ -165,7 +165,7 @@ static int kmmpd(void *data) seq = 1;
mmp->mmp_seq = cpu_to_le32(seq); - mmp->mmp_time = cpu_to_le64(get_seconds()); + mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds()); last_update_time = jiffies;
retval = write_mmp_block(sb, bh); @@ -244,7 +244,7 @@ static int kmmpd(void *data) * Unmount seems to be clean. */ mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN); - mmp->mmp_time = cpu_to_le64(get_seconds()); + mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds());
retval = write_mmp_block(sb, bh);
On Wed 20-06-18 17:32:59, Arnd Bergmann wrote:
The mmp_time field is 64 bits wide, which is good, but calling get_seconds() results in a 32-bit value on 32-bit architectures. Using ktime_get_real_seconds() instead returns 64 bits everywhere.
Signed-off-by: Arnd Bergmann arnd@arndb.de
Looks good.
Reviewed-by: Jan Kara jack@suse.cz
Honza
fs/ext4/mmp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 27b9a76a0dfa..39da4eb48361 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -147,7 +147,7 @@ static int kmmpd(void *data) mmp_block = le64_to_cpu(es->s_mmp_block); mmp = (struct mmp_struct *)(bh->b_data);
- mmp->mmp_time = cpu_to_le64(get_seconds());
- mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds()); /*
- Start with the higher mmp_check_interval and reduce it if
- the MMP block is being updated on time.
@@ -165,7 +165,7 @@ static int kmmpd(void *data) seq = 1; mmp->mmp_seq = cpu_to_le32(seq);
mmp->mmp_time = cpu_to_le64(get_seconds());
last_update_time = jiffies;mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds());
retval = write_mmp_block(sb, bh); @@ -244,7 +244,7 @@ static int kmmpd(void *data) * Unmount seems to be clean. */ mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN);
- mmp->mmp_time = cpu_to_le64(get_seconds());
- mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds());
retval = write_mmp_block(sb, bh); -- 2.9.0
We only care about the low 32-bit for i_dtime as explained in commit b5f515735bea ("ext4: avoid Y2038 overflow in recently_deleted()"), so the use of get_seconds() is correct here, but that function is getting removed in the process of the y2038 fixes, so let's use the modern ktime_get_real_seconds() here.
Signed-off-by: Arnd Bergmann arnd@arndb.de --- fs/ext4/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 2ea07efbe016..600e2063d1df 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -317,7 +317,7 @@ void ext4_evict_inode(struct inode *inode) * (Well, we could do this if we need to, but heck - it works) */ ext4_orphan_del(handle, inode); - EXT4_I(inode)->i_dtime = get_seconds(); + EXT4_I(inode)->i_dtime = ktime_get_real_seconds();
/* * One subtle ordering requirement: if anything has gone wrong
This is the last missing piece for the inode times on 32-bit systems: now that VFS interfaces use timespec64, we just need to stop truncating the tv_sec values for y2038 compatibililty.
Signed-off-by: Arnd Bergmann arnd@arndb.de --- fs/ext4/ext4.h | 18 +++++++----------- fs/ext4/ialloc.c | 2 +- 2 files changed, 8 insertions(+), 12 deletions(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 0b127853c584..6b4f4369a08c 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -789,17 +789,16 @@ struct move_extent { * affected filesystem before 2242. */
-static inline __le32 ext4_encode_extra_time(struct timespec *time) +static inline __le32 ext4_encode_extra_time(struct timespec64 *time) { u32 extra = sizeof(time->tv_sec) > 4 ? ((time->tv_sec - (s32)time->tv_sec) >> 32) & EXT4_EPOCH_MASK : 0; return cpu_to_le32(extra | (time->tv_nsec << EXT4_EPOCH_BITS)); }
-static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra) +static inline void ext4_decode_extra_time(struct timespec64 *time, __le32 extra) { - if (unlikely(sizeof(time->tv_sec) > 4 && - (extra & cpu_to_le32(EXT4_EPOCH_MASK)))) { + if (unlikely(extra & cpu_to_le32(EXT4_EPOCH_MASK))) {
#if 1 /* Handle legacy encoding of pre-1970 dates with epoch @@ -821,9 +820,8 @@ static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra) do { \ (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \ if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) {\ - struct timespec ts = timespec64_to_timespec((inode)->xtime); \ (raw_inode)->xtime ## _extra = \ - ext4_encode_extra_time(&ts); \ + ext4_encode_extra_time(&(inode)->xtime); \ } \ } while (0)
@@ -840,10 +838,8 @@ do { \ do { \ (inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime); \ if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) { \ - struct timespec ts = timespec64_to_timespec((inode)->xtime); \ - ext4_decode_extra_time(&ts, \ + ext4_decode_extra_time(&(inode)->xtime, \ raw_inode->xtime ## _extra); \ - (inode)->xtime = timespec_to_timespec64(ts); \ } \ else \ (inode)->xtime.tv_nsec = 0; \ @@ -993,9 +989,9 @@ struct ext4_inode_info {
/* * File creation time. Its function is same as that of - * struct timespec i_{a,c,m}time in the generic inode. + * struct timespec64 i_{a,c,m}time in the generic inode. */ - struct timespec i_crtime; + struct timespec64 i_crtime;
/* mballoc */ struct list_head i_prealloc_list; diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index f525f909b559..8c00c871f5dc 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -1073,7 +1073,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir, /* This is the optimal IO size (for stat), not the fs block size */ inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); - ei->i_crtime = timespec64_to_timespec(inode->i_mtime); + ei->i_crtime = inode->i_mtime;
memset(ei->i_data, 0, sizeof(ei->i_data)); ei->i_dir_start_lookup = 0;
On Wed 20-06-18 17:33:01, Arnd Bergmann wrote:
-static inline __le32 ext4_encode_extra_time(struct timespec *time) +static inline __le32 ext4_encode_extra_time(struct timespec64 *time) { u32 extra = sizeof(time->tv_sec) > 4 ?
I suppose this condition can be removed now.
((time->tv_sec - (s32)time->tv_sec) >> 32) & EXT4_EPOCH_MASK : 0;
return cpu_to_le32(extra | (time->tv_nsec << EXT4_EPOCH_BITS)); } -static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra) +static inline void ext4_decode_extra_time(struct timespec64 *time, __le32 extra)
Wrap?
Otherwise the patch looks good.
Honza
On Fri, Jun 22, 2018 at 9:37 AM, Jan Kara jack@suse.cz wrote:
On Wed 20-06-18 17:33:01, Arnd Bergmann wrote:
-static inline __le32 ext4_encode_extra_time(struct timespec *time) +static inline __le32 ext4_encode_extra_time(struct timespec64 *time) { u32 extra = sizeof(time->tv_sec) > 4 ?
I suppose this condition can be removed now.
Right, good point. I also verified that we have no other similar constructs in the kernel any more now.
((time->tv_sec - (s32)time->tv_sec) >> 32) & EXT4_EPOCH_MASK : 0; return cpu_to_le32(extra | (time->tv_nsec << EXT4_EPOCH_BITS));
}
-static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra) +static inline void ext4_decode_extra_time(struct timespec64 *time, __le32 extra)
Wrap?
Ok, done.
Otherwise the patch looks good.
Thanks!
Arnd
jbd2 is one of the few callers of current_kernel_time64(), which is a wrapper around ktime_get_coarse_real_ts64(). This calls the latter directly for consistency with the rest of the kernel that is moving to the ktime_get_ family of time accessors.
Signed-off-by: Arnd Bergmann arnd@arndb.de --- fs/jbd2/commit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 8de0e7723316..150cc030b4d7 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -121,7 +121,7 @@ static int journal_submit_commit_record(journal_t *journal, struct commit_header *tmp; struct buffer_head *bh; int ret; - struct timespec64 now = current_kernel_time64(); + struct timespec64 now;
*cbh = NULL;
@@ -134,6 +134,7 @@ static int journal_submit_commit_record(journal_t *journal, return 1;
tmp = (struct commit_header *)bh->b_data; + ktime_get_coarse_real_ts64(&now); tmp->h_commit_sec = cpu_to_be64(now.tv_sec); tmp->h_commit_nsec = cpu_to_be32(now.tv_nsec);
On Wed 20-06-18 17:33:02, Arnd Bergmann wrote:
jbd2 is one of the few callers of current_kernel_time64(), which is a wrapper around ktime_get_coarse_real_ts64(). This calls the latter directly for consistency with the rest of the kernel that is moving to the ktime_get_ family of time accessors.
Signed-off-by: Arnd Bergmann arnd@arndb.de
Looks good. You can add:
Reviewed-by: Jan Kara jack@suse.cz
Honza
fs/jbd2/commit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 8de0e7723316..150cc030b4d7 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -121,7 +121,7 @@ static int journal_submit_commit_record(journal_t *journal, struct commit_header *tmp; struct buffer_head *bh; int ret;
- struct timespec64 now = current_kernel_time64();
- struct timespec64 now;
*cbh = NULL; @@ -134,6 +134,7 @@ static int journal_submit_commit_record(journal_t *journal, return 1; tmp = (struct commit_header *)bh->b_data;
- ktime_get_coarse_real_ts64(&now); tmp->h_commit_sec = cpu_to_be64(now.tv_sec); tmp->h_commit_nsec = cpu_to_be32(now.tv_nsec);
2.9.0
The inode timestamps use 34 bits in ext4, but the various timestamps in the superblock are limited to 32 bits. If every user accesses these as 'unsigned', then this is good until year 2106, but it seems better to extend this a bit further in the process of removing the deprecated get_seconds() function.
This adds another byte for each timestamp in the superblock, making them long enough to store timestamps beyond what is in the inodes, which seems good enough here (in ocfs2, they are already 64-bit wide, which is appropriate for a new layout).
I did not modify e2fsprogs, which obviously needs the same change to actually interpret future timestamps correctly.
Signed-off-by: Arnd Bergmann arnd@arndb.de --- fs/ext4/ext4.h | 9 ++++++++- fs/ext4/super.c | 35 ++++++++++++++++++++++++++--------- fs/ext4/sysfs.c | 18 ++++++++++++++++-- 3 files changed, 50 insertions(+), 12 deletions(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 6b4f4369a08c..cac1464383e4 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1294,7 +1294,14 @@ struct ext4_super_block { __le32 s_lpf_ino; /* Location of the lost+found inode */ __le32 s_prj_quota_inum; /* inode for tracking project quota */ __le32 s_checksum_seed; /* crc32c(uuid) if csum_seed set */ - __le32 s_reserved[98]; /* Padding to the end of the block */ + __u8 s_wtime_hi; + __u8 s_mtime_hi; + __u8 s_mkfs_time_hi; + __u8 s_lastcheck_hi; + __u8 s_first_error_time_hi; + __u8 s_last_error_time_hi; + __u8 s_pad[2]; + __le32 s_reserved[96]; /* Padding to the end of the block */ __le32 s_checksum; /* crc32c(superblock) */ };
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 0c4c2201b3aa..2063d4e5ed08 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -312,6 +312,20 @@ void ext4_itable_unused_set(struct super_block *sb, bg->bg_itable_unused_hi = cpu_to_le16(count >> 16); }
+static void ext4_update_tstamp(__le32 *lo, __u8 *hi) +{ + time64_t now = ktime_get_real_seconds(); + + now = clamp_val(now, 0, 0xffffffffffull); + + *lo = cpu_to_le32(lower_32_bits(now)); + *hi = upper_32_bits(now); +} + +static time64_t ext4_get_tstamp(__le32 *lo, __u8 *hi) +{ + return ((time64_t)(*hi) << 32) + le32_to_cpu(*lo); +}
static void __save_error_info(struct super_block *sb, const char *func, unsigned int line) @@ -322,11 +336,12 @@ static void __save_error_info(struct super_block *sb, const char *func, if (bdev_read_only(sb->s_bdev)) return; es->s_state |= cpu_to_le16(EXT4_ERROR_FS); - es->s_last_error_time = cpu_to_le32(get_seconds()); + ext4_update_tstamp(&es->s_last_error_time, &es->s_last_error_time_hi); strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func)); es->s_last_error_line = cpu_to_le32(line); if (!es->s_first_error_time) { es->s_first_error_time = es->s_last_error_time; + es->s_first_error_time_hi = es->s_last_error_time_hi; strncpy(es->s_first_error_func, func, sizeof(es->s_first_error_func)); es->s_first_error_line = cpu_to_le32(line); @@ -2163,8 +2178,8 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, "warning: maximal mount count reached, " "running e2fsck is recommended"); else if (le32_to_cpu(es->s_checkinterval) && - (le32_to_cpu(es->s_lastcheck) + - le32_to_cpu(es->s_checkinterval) <= get_seconds())) + (ext4_get_tstamp(&es->s_lastcheck, &es->s_lastcheck_hi) + + le32_to_cpu(es->s_checkinterval) <= ktime_get_real_seconds())) ext4_msg(sb, KERN_WARNING, "warning: checktime reached, " "running e2fsck is recommended"); @@ -2173,7 +2188,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); le16_add_cpu(&es->s_mnt_count, 1); - es->s_mtime = cpu_to_le32(get_seconds()); + ext4_update_tstamp(&es->s_mtime, &es->s_mtime_hi); ext4_update_dynamic_rev(sb); if (sbi->s_journal) ext4_set_feature_journal_needs_recovery(sb); @@ -2839,8 +2854,9 @@ static void print_daily_error_info(struct timer_list *t) ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u", le32_to_cpu(es->s_error_count)); if (es->s_first_error_time) { - printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %u: %.*s:%d", - sb->s_id, le32_to_cpu(es->s_first_error_time), + printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %llu: %.*s:%d", + sb->s_id, + ext4_get_tstamp(&es->s_first_error_time, &es->s_first_error_time_hi), (int) sizeof(es->s_first_error_func), es->s_first_error_func, le32_to_cpu(es->s_first_error_line)); @@ -2853,8 +2869,9 @@ static void print_daily_error_info(struct timer_list *t) printk(KERN_CONT "\n"); } if (es->s_last_error_time) { - printk(KERN_NOTICE "EXT4-fs (%s): last error at time %u: %.*s:%d", - sb->s_id, le32_to_cpu(es->s_last_error_time), + printk(KERN_NOTICE "EXT4-fs (%s): last error at time %llu: %.*s:%d", + sb->s_id, + ext4_get_tstamp(&es->s_last_error_time, &es->s_last_error_time_hi), (int) sizeof(es->s_last_error_func), es->s_last_error_func, le32_to_cpu(es->s_last_error_line)); @@ -4747,7 +4764,7 @@ static int ext4_commit_super(struct super_block *sb, int sync) * to complain and force a full file system check. */ if (!(sb->s_flags & SB_RDONLY)) - es->s_wtime = cpu_to_le32(get_seconds()); + ext4_update_tstamp(&es->s_wtime, &es->s_wtime_hi); if (sb->s_bdev->bd_part) es->s_kbytes_written = cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index b970a200f20c..fe58aa905cbe 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -25,6 +25,8 @@ typedef enum { attr_reserved_clusters, attr_inode_readahead, attr_trigger_test_error, + attr_first_error_time, + attr_last_error_time, attr_feature, attr_pointer_ui, attr_pointer_atomic, @@ -182,8 +184,8 @@ EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst); EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval); EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst); EXT4_RO_ATTR_ES_UI(errors_count, s_error_count); -EXT4_RO_ATTR_ES_UI(first_error_time, s_first_error_time); -EXT4_RO_ATTR_ES_UI(last_error_time, s_last_error_time); +EXT4_ATTR(first_error_time, 0444, first_error_time); +EXT4_ATTR(last_error_time, 0444, last_error_time);
static unsigned int old_bump_val = 128; EXT4_ATTR_PTR(max_writeback_mb_bump, 0444, pointer_ui, &old_bump_val); @@ -249,6 +251,12 @@ static void *calc_ptr(struct ext4_attr *a, struct ext4_sb_info *sbi) return NULL; }
+static ssize_t print_time(char *buf, __le32 lo, __u8 hi) +{ + return snprintf(buf, PAGE_SIZE, "%lld", + ((time64_t)hi << 32) + le32_to_cpu(lo)); +} + static ssize_t ext4_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -287,6 +295,12 @@ static ssize_t ext4_attr_show(struct kobject *kobj, atomic_read((atomic_t *) ptr)); case attr_feature: return snprintf(buf, PAGE_SIZE, "supported\n"); + case attr_first_error_time: + return print_time(buf, sbi->s_es->s_first_error_time, + sbi->s_es->s_first_error_time_hi); + case attr_last_error_time: + return print_time(buf, sbi->s_es->s_last_error_time, + sbi->s_es->s_last_error_time_hi); }
return 0;