The series is aimed at adding timestamp checking and policy related to it to vfs.
The series was developed with discussions and guidance from Arnd Bergmann.
The original thread is at https://lkml.org/lkml/2016/11/2/294
Associated test: xfstests generic/402 Note that the above test will be run and will fail all filesystems that do not have correct limits specified in the xfstests or the kernel or that don't support times beyond the test dates. I will be submitting a follow up xfstest and kernel patches to update all filesystems. Currently ext4 is the only filesystem that reflects correct limits.
The branch is available at https://github.com/deepa-hub/vfs.git refs/heads/vfs_timestamp_policy
Changes since v2: * Introduce early boot param override for checks. * Drop afs patch for timestamp limits. Changes since v1: * return EROFS on mount errors * fix mtime copy/paste error in utimes
Deepa Dinamani (5): vfs: Add file timestamp range support vfs: Add checks for filesystem timestamp limits ext4: Initialize timestamps limits vfs: Add timestamp_truncate() api utimes: Clamp the timestamps before update
fs/ext4/ext4.h | 4 ++++ fs/ext4/super.c | 7 ++++++- fs/inode.c | 45 ++++++++++++++++++++++++++++++++++++++++++++- fs/internal.h | 2 ++ fs/libfs.c | 4 ++++ fs/namespace.c | 12 ++++++++++++ fs/super.c | 9 +++++++++ fs/utimes.c | 17 +++++++++++++---- include/linux/fs.h | 4 ++++ include/linux/time64.h | 6 ++++++ include/uapi/linux/fs.h | 6 +++++- kernel/sysctl.c | 7 +++++++ 12 files changed, 116 insertions(+), 7 deletions(-)
Add fields to the superblock to track the min and max timestamps supported by filesystems.
Initially, when a superblock is allocated, initialize it to the max and min values the fields can hold. Individual filesystems override these to match their actual limits.
Pseudo filesystems are assumed to always support the min and max allowable values for the fields.
Note that the time ranges are saved in type time64_t rather than time_t. This is required because if we save ranges in time_t then we would not be able to save timestamp ranges for files that support timestamps beyond y2038.
Signed-off-by: Deepa Dinamani deepa.kernel@gmail.com --- fs/libfs.c | 4 ++++ fs/super.c | 2 ++ include/linux/fs.h | 3 +++ include/linux/time64.h | 2 ++ 4 files changed, 11 insertions(+)
diff --git a/fs/libfs.c b/fs/libfs.c index 28d6f35..90ee803 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -257,6 +257,8 @@ struct dentry *mount_pseudo_xattr(struct file_system_type *fs_type, char *name, s->s_op = ops ? ops : &simple_super_operations; s->s_xattr = xattr; s->s_time_gran = 1; + s->s_time_min = TIME64_MIN; + s->s_time_max = TIME64_MAX; root = new_inode(s); if (!root) goto Enomem; @@ -518,6 +520,8 @@ int simple_fill_super(struct super_block *s, unsigned long magic, s->s_magic = magic; s->s_op = &simple_super_operations; s->s_time_gran = 1; + s->s_time_min = TIME64_MIN; + s->s_time_max = TIME64_MAX;
inode = new_inode(s); if (!inode) diff --git a/fs/super.c b/fs/super.c index b8b6a08..f9c2241 100644 --- a/fs/super.c +++ b/fs/super.c @@ -247,6 +247,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags, s->s_maxbytes = MAX_NON_LFS; s->s_op = &default_op; s->s_time_gran = 1000000000; + s->s_time_min = TIME64_MIN; + s->s_time_max = TIME64_MAX; s->cleancache_poolid = CLEANCACHE_NO_POOL;
s->s_shrink.seeks = DEFAULT_SEEKS; diff --git a/include/linux/fs.h b/include/linux/fs.h index de8ed0b..ef55dfb 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1337,6 +1337,9 @@ struct super_block { /* Granularity of c/m/atime in ns. Cannot be worse than a second */ u32 s_time_gran; + /* Time limits for c/m/atime in seconds. */ + time64_t s_time_min; + time64_t s_time_max;
/* * The next field is for VFS *only*. No filesystems have any business diff --git a/include/linux/time64.h b/include/linux/time64.h index 980c71b..25433b18 100644 --- a/include/linux/time64.h +++ b/include/linux/time64.h @@ -38,6 +38,8 @@ struct itimerspec64 {
/* Located here for timespec[64]_valid_strict */ #define TIME64_MAX ((s64)~((u64)1 << 63)) +#define TIME64_MIN (-TIME64_MAX - 1) + #define KTIME_MAX ((s64)~((u64)1 << 63)) #define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC)
On Sat, Feb 18, 2017 at 03:30:04PM -0800, Deepa Dinamani wrote:
Add fields to the superblock to track the min and max timestamps supported by filesystems.
Initially, when a superblock is allocated, initialize it to the max and min values the fields can hold. Individual filesystems override these to match their actual limits.
Pseudo filesystems are assumed to always support the min and max allowable values for the fields.
@@ -257,6 +257,8 @@ struct dentry *mount_pseudo_xattr(struct file_system_type *fs_type, char *name,
- s->s_time_min = TIME64_MIN;
- s->s_time_max = TIME64_MAX;
@@ -518,6 +520,8 @@ int simple_fill_super(struct super_block *s, unsigned long magic,
- s->s_time_min = TIME64_MIN;
- s->s_time_max = TIME64_MAX;
@@ -247,6 +247,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
- s->s_time_min = TIME64_MIN;
- s->s_time_max = TIME64_MAX;
If we always initialize them that way, why does libfs.c code bother reassign the same values?
@@ -257,6 +257,8 @@ struct dentry *mount_pseudo_xattr(struct file_system_type *fs_type, char *name,
s->s_time_min = TIME64_MIN;
s->s_time_max = TIME64_MAX;
@@ -518,6 +520,8 @@ int simple_fill_super(struct super_block *s, unsigned long magic,
s->s_time_min = TIME64_MIN;
s->s_time_max = TIME64_MAX;
@@ -247,6 +247,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
s->s_time_min = TIME64_MIN;
s->s_time_max = TIME64_MAX;
If we always initialize them that way, why does libfs.c code bother reassign the same values?
Initially, I considered the idea of having invalid range initialization for all super blocks: https://patchwork.kernel.org/patch/8488641/
At that point, it made sense to override the initializations in libfs.c.
The problem with this is we will have to change all filesystems at the same time. So I decided to change the strategy to include valid limits so that we don't break any filesystems and can first merge the basic infrastructure. But, I was hoping we could go back to invalid range eventually which is why I left the libfs.c initializations in.
But, you are right. This is redundant for now and maybe makes sense to leave it out. I will post an update.
Thanks, Deepa
Allow read only mounts for filesystems that do not have maximum timestamps beyond the y2038 expiry timestamp.
Also, allow a sysctl override to all such filesystems to be mounted with write permissions. A boot param supports initial override of these checks from the early boot without recompilation.
Suggested-by: Arnd Bergmann arnd@arndb.de Signed-off-by: Deepa Dinamani deepa.kernel@gmail.com --- fs/inode.c | 13 +++++++++++++ fs/internal.h | 2 ++ fs/namespace.c | 12 ++++++++++++ fs/super.c | 7 +++++++ include/linux/fs.h | 1 + include/linux/time64.h | 4 ++++ include/uapi/linux/fs.h | 6 +++++- kernel/sysctl.c | 7 +++++++ 8 files changed, 51 insertions(+), 1 deletion(-)
diff --git a/fs/inode.c b/fs/inode.c index 0b3ee89..0573a3e 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -75,6 +75,19 @@ static DEFINE_PER_CPU(unsigned long, nr_unused);
static struct kmem_cache *inode_cachep __read_mostly;
+struct vfs_max_timestamp_check timestamp_check = { + .timestamp_supported = Y2038_EXPIRY_TIMESTAMP, + .check_on = 0, +}; + +static int __init setup_timestamp_check(char *str) +{ + timestamp_check.check_on = 1; + return 0; +} + +early_param("fstimestampcheck", setup_timestamp_check); + static long get_nr_inodes(void) { int i; diff --git a/fs/internal.h b/fs/internal.h index 11c6d89..4b3cb9e 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -67,6 +67,8 @@ extern int finish_automount(struct vfsmount *, struct path *);
extern int sb_prepare_remount_readonly(struct super_block *);
+extern bool sb_file_times_updatable(struct super_block *sb); + extern void __init mnt_init(void);
extern int __mnt_want_write(struct vfsmount *); diff --git a/fs/namespace.c b/fs/namespace.c index 8bfad42..dbf3f1c 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -538,6 +538,18 @@ static void __mnt_unmake_readonly(struct mount *mnt) unlock_mount_hash(); }
+bool sb_file_times_updatable(struct super_block *sb) +{ + + if (!timestamp_check.check_on) + return true; + + if (sb->s_time_max > timestamp_check.timestamp_supported) + return true; + + return false; +} + int sb_prepare_remount_readonly(struct super_block *sb) { struct mount *mnt; diff --git a/fs/super.c b/fs/super.c index f9c2241..4e7577b 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1245,6 +1245,13 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data) WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to " "negative value (%lld)\n", type->name, sb->s_maxbytes);
+ if (!(sb->s_flags & MS_RDONLY) && !sb_file_times_updatable(sb)) { + WARN(1, "File times cannot be updated on the filesystem.\n"); + WARN(1, "Retry mounting the filesystem readonly.\n"); + error = -EROFS; + goto out_sb; + } + up_write(&sb->s_umount); free_secdata(secdata); return root; diff --git a/include/linux/fs.h b/include/linux/fs.h index ef55dfb..e12a32d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -68,6 +68,7 @@ extern struct inodes_stat_t inodes_stat; extern int leases_enable, lease_break_time; extern int sysctl_protected_symlinks; extern int sysctl_protected_hardlinks; +extern struct vfs_max_timestamp_check timestamp_check;
struct buffer_head; typedef int (get_block_t)(struct inode *inode, sector_t iblock, diff --git a/include/linux/time64.h b/include/linux/time64.h index 25433b18..906e0b3 100644 --- a/include/linux/time64.h +++ b/include/linux/time64.h @@ -43,6 +43,10 @@ struct itimerspec64 { #define KTIME_MAX ((s64)~((u64)1 << 63)) #define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC)
+/* Timestamps on boundary */ +#define Y2038_EXPIRY_TIMESTAMP S32_MAX /* 2147483647 */ +#define Y2106_EXPIRY_TIMESTAMP U32_MAX /* 4294967295 */ + #if __BITS_PER_LONG == 64
static inline struct timespec timespec64_to_timespec(const struct timespec64 ts64) diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 048a85e..125e4ae 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -91,6 +91,11 @@ struct files_stat_struct { unsigned long max_files; /* tunable */ };
+struct vfs_max_timestamp_check { + time64_t timestamp_supported; + int check_on; +}; + struct inodes_stat_t { long nr_inodes; long nr_unused; @@ -100,7 +105,6 @@ struct inodes_stat_t {
#define NR_FILE 8192 /* this can well be larger on a larger system */
- /* * These are the fs-independent mount-flags: up to 32 flags are supported */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index bb260ce..eda60d9 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1667,6 +1667,13 @@ static struct ctl_table fs_table[] = { .proc_handler = proc_doulongvec_minmax, }, { + .procname = "fs-timestamp-check-on", + .data = ×tamp_check.check_on, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { .procname = "nr_open", .data = &sysctl_nr_open, .maxlen = sizeof(unsigned int),
ext4 has different overflow limits for max filesystem timestamps based on the extra bytes available.
Signed-off-by: Deepa Dinamani deepa.kernel@gmail.com --- fs/ext4/ext4.h | 4 ++++ fs/ext4/super.c | 7 ++++++- 2 files changed, 10 insertions(+), 1 deletion(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 2bef0bd..7bac2cd 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1639,6 +1639,10 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
#define EXT4_GOOD_OLD_INODE_SIZE 128
+#define EXT4_EXTRA_TIMESTAMP_MAX (((s64)1 << 34) - 1 + S32_MIN) +#define EXT4_NON_EXTRA_TIMESTAMP_MAX Y2038_EXPIRY_TIMESTAMP +#define EXT4_TIMESTAMP_MIN S32_MIN + /* * Feature set definitions */ diff --git a/fs/ext4/super.c b/fs/ext4/super.c index a673558..a77e11c 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3689,8 +3689,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_inode_size); goto failed_mount; } - if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) + if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2); + sb->s_time_max = EXT4_EXTRA_TIMESTAMP_MAX; + } else + sb->s_time_max = EXT4_NON_EXTRA_TIMESTAMP_MAX; + + sb->s_time_min = EXT4_TIMESTAMP_MIN; }
sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
timespec_trunc() function is used to truncate a filesystem timestamp to the right granularity. But, the function does not clamp tv_sec part of the timestamps according to the filesystem timestamp limits.
Also, timespec_trunc() is exclusively used for filesystem timestamps. Move the api to be part of vfs.
The replacement api: timestamp_truncate() also alters the signature of the function to accommodate filesystem timestamp clamping according to flesystem limits.
Note that the clamp_t macro is used for clamping here as vfs is not yet using struct timespec64 internally. This is required for compilation purposes. Also note that clamp won't do the right thing for timestamps beyond 2038 on 32-bit machines until the vfs uses timespec64. After the vfs is transitioned to use timespec64 for timestamps, clamp_t() can be replaced by clamp().
Signed-off-by: Deepa Dinamani deepa.kernel@gmail.com --- fs/inode.c | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-)
diff --git a/fs/inode.c b/fs/inode.c index 0573a3e..6a1bc12 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -2114,6 +2114,36 @@ void inode_nohighmem(struct inode *inode) EXPORT_SYMBOL(inode_nohighmem);
/** + * fs_timespec_trunc - Truncate timespec to a granularity + * @t: Timespec + * @gran: Granularity in ns. + * + * Truncate a timespec to a granularity. Always rounds down. gran must + * not be 0 nor greater than a second (NSEC_PER_SEC, or 10^9 ns). + */ +struct timespec timestamp_truncate(struct timespec t, struct inode *inode) +{ + struct super_block *sb = inode->i_sb; + unsigned int gran = sb->s_time_gran; + + t.tv_sec = clamp_t(time64_t, t.tv_sec, sb->s_time_min, sb->s_time_max); + + /* Avoid division in the common cases 1 ns and 1 s. */ + if (gran == 1) { + /* nothing */ + } else if (gran == NSEC_PER_SEC) { + t.tv_nsec = 0; + } else if (gran > 1 && gran < NSEC_PER_SEC) { + t.tv_nsec -= t.tv_nsec % gran; + } else { + WARN(1, "illegal file time granularity: %u", gran); + } + return t; +} +EXPORT_SYMBOL(timestamp_truncate); + + +/** * current_time - Return FS time * @inode: inode. * @@ -2132,6 +2162,6 @@ struct timespec current_time(struct inode *inode) return now; }
- return timespec_trunc(now, inode->i_sb->s_time_gran); + return timestamp_truncate(now, inode); } EXPORT_SYMBOL(current_time);
Hi Deepa,
[auto build test WARNING on linus/master] [also build test WARNING on v4.10-rc8 next-20170217] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
url: https://github.com/0day-ci/linux/commits/Deepa-Dinamani/vfs-Add-timestamp-ra... reproduce: make htmldocs
All warnings (new ones prefixed by >>):
fs/inode.c:1679: warning: No description found for parameter 'rcu' fs/inode.c:2125: warning: No description found for parameter 'inode'
fs/inode.c:2125: warning: Excess function parameter 'gran' description in 'timestamp_truncate'
include/linux/jbd2.h:442: warning: No description found for parameter 'i_transaction' include/linux/jbd2.h:442: warning: No description found for parameter 'i_next_transaction' include/linux/jbd2.h:442: warning: No description found for parameter 'i_list' include/linux/jbd2.h:442: warning: No description found for parameter 'i_vfs_inode' include/linux/jbd2.h:442: warning: No description found for parameter 'i_flags' include/linux/jbd2.h:494: warning: No description found for parameter 'h_rsv_handle' include/linux/jbd2.h:494: warning: No description found for parameter 'h_reserved' include/linux/jbd2.h:494: warning: No description found for parameter 'h_type' include/linux/jbd2.h:494: warning: No description found for parameter 'h_line_no' include/linux/jbd2.h:494: warning: No description found for parameter 'h_start_jiffies' include/linux/jbd2.h:494: warning: No description found for parameter 'h_requested_credits' include/linux/jbd2.h:1047: warning: No description found for parameter 'j_chkpt_bhs[JBD2_NR_BATCH]' include/linux/jbd2.h:1047: warning: No description found for parameter 'j_devname[BDEVNAME_SIZE+24]' include/linux/jbd2.h:1047: warning: No description found for parameter 'j_average_commit_time' include/linux/jbd2.h:1047: warning: No description found for parameter 'j_min_batch_time' include/linux/jbd2.h:1047: warning: No description found for parameter 'j_max_batch_time' include/linux/jbd2.h:1047: warning: No description found for parameter 'j_commit_callback' include/linux/jbd2.h:1047: warning: No description found for parameter 'j_failed_commit' include/linux/jbd2.h:1047: warning: No description found for parameter 'j_chksum_driver' include/linux/jbd2.h:1047: warning: No description found for parameter 'j_csum_seed' fs/jbd2/transaction.c:428: warning: No description found for parameter 'rsv_blocks' fs/jbd2/transaction.c:428: warning: No description found for parameter 'gfp_mask' fs/jbd2/transaction.c:428: warning: No description found for parameter 'type' fs/jbd2/transaction.c:428: warning: No description found for parameter 'line_no' fs/jbd2/transaction.c:504: warning: No description found for parameter 'type' fs/jbd2/transaction.c:504: warning: No description found for parameter 'line_no' fs/jbd2/transaction.c:634: warning: No description found for parameter 'gfp_mask'
vim +2125 fs/inode.c
2109 2110 void inode_nohighmem(struct inode *inode) 2111 { 2112 mapping_set_gfp_mask(inode->i_mapping, GFP_USER); 2113 } 2114 EXPORT_SYMBOL(inode_nohighmem); 2115 2116 /** 2117 * fs_timespec_trunc - Truncate timespec to a granularity 2118 * @t: Timespec 2119 * @gran: Granularity in ns. 2120 * 2121 * Truncate a timespec to a granularity. Always rounds down. gran must 2122 * not be 0 nor greater than a second (NSEC_PER_SEC, or 10^9 ns). 2123 */ 2124 struct timespec timestamp_truncate(struct timespec t, struct inode *inode)
2125 {
2126 struct super_block *sb = inode->i_sb; 2127 unsigned int gran = sb->s_time_gran; 2128 2129 t.tv_sec = clamp_t(time64_t, t.tv_sec, sb->s_time_min, sb->s_time_max); 2130 2131 /* Avoid division in the common cases 1 ns and 1 s. */ 2132 if (gran == 1) { 2133 /* nothing */
--- 0-DAY kernel test infrastructure Open Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation
fs/inode.c:2125: warning: Excess function parameter 'gran' description in 'timestamp_truncate'
2116 /** 2117 * fs_timespec_trunc - Truncate timespec to a granularity 2118 * @t: Timespec 2119 * @gran: Granularity in ns. 2120 * 2121 * Truncate a timespec to a granularity. Always rounds down. gran must 2122 * not be 0 nor greater than a second (NSEC_PER_SEC, or 10^9 ns). 2123 */ 2124 struct timespec timestamp_truncate(struct timespec t, struct inode *inode)
2125 {
2126 struct super_block *sb = inode->i_sb; 2127 unsigned int gran = sb->s_time_gran;
This because the comment does not match the actual function signature: The gran param mentioned in the comment is not actually part of function signature.
I will fix the comment in the next version.
-Deepa
POSIX.1 section for futimens, utimensat and utimes says: The file's relevant timestamp shall be set to the greatest value supported by the file system that is not greater than the specified time.
Clamp the timestamps accordingly before assignment.
Note that the clamp_t macro is used for clamping here as vfs is not yet using struct timespec64 internally. This is required for compilation purposes. Also note that clamp won't do the right thing for timestamps beyond 2038 on 32-bit machines until the vfs uses timespec64. After the vfs is transitioned to use timespec64 for timestamps, clamp_t() can be replaced by clamp().
Signed-off-by: Deepa Dinamani deepa.kernel@gmail.com --- fs/utimes.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-)
diff --git a/fs/utimes.c b/fs/utimes.c index 32b15b3..052fe5d 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -53,6 +53,7 @@ static int utimes_common(const struct path *path, struct timespec *times) int error; struct iattr newattrs; struct inode *inode = path->dentry->d_inode; + struct super_block *sb = inode->i_sb; struct inode *delegated_inode = NULL;
error = mnt_want_write(path->mnt); @@ -68,16 +69,24 @@ static int utimes_common(const struct path *path, struct timespec *times) if (times[0].tv_nsec == UTIME_OMIT) newattrs.ia_valid &= ~ATTR_ATIME; else if (times[0].tv_nsec != UTIME_NOW) { - newattrs.ia_atime.tv_sec = times[0].tv_sec; - newattrs.ia_atime.tv_nsec = times[0].tv_nsec; + newattrs.ia_atime.tv_sec = + clamp_t(time64_t, times[0].tv_sec, sb->s_time_min, sb->s_time_max); + if (times[0].tv_sec >= sb->s_time_max) + newattrs.ia_atime.tv_nsec = 0; + else + newattrs.ia_atime.tv_nsec = times[0].tv_nsec; newattrs.ia_valid |= ATTR_ATIME_SET; }
if (times[1].tv_nsec == UTIME_OMIT) newattrs.ia_valid &= ~ATTR_MTIME; else if (times[1].tv_nsec != UTIME_NOW) { - newattrs.ia_mtime.tv_sec = times[1].tv_sec; - newattrs.ia_mtime.tv_nsec = times[1].tv_nsec; + newattrs.ia_mtime.tv_sec = + clamp_t(time64_t, times[1].tv_sec, sb->s_time_min, sb->s_time_max); + if (times[1].tv_sec >= sb->s_time_max) + newattrs.ia_mtime.tv_nsec = 0; + else + newattrs.ia_mtime.tv_nsec = times[1].tv_nsec; newattrs.ia_valid |= ATTR_MTIME_SET; } /*