This is a preparation patch to add range checking for inode
timestamps.
Extend struct super_block to include information about the max
and min inode times each filesystem can hold. These are dependent
on the on-disk format of filesystems.
These range checks will be used to clamp timestamps to filesystem
allowed ranges.
Individual filesystems do not have the same on disk format as
the in memory inodes. Range checking and clamping times assigned
to inodes will help keep in memory and on-disk timestamps to be
in sync.
Every time a new superblock is created, make sure that the superblock
max and min timestamp fields are assigned invalid values.
Another series will initialize these fields to appropriate values for
every filesystem.
The values are currently ignored. The exact policy and behavior will be
decided in a separate patch.
max and min times are initialized to MIN_VFS_TIME and MAX_VFS_TIME
respectively so that even if one of the fields is uninitialized,
it can be detected by using the condition max_time < min_time.
The original idea for the feature comes from the discussion:
https://lkml.org/lkml/2014/5/30/669
Signed-off-by: Deepa Dinamani <deepa.kernel(a)gmail.com>
---
The intention is to include this as part of 4.6 so that the follow on
patches can go into 4.7.
The series and the plan have been discussed with Arnd Bergmann.
Changes from v1:
* Delete INVALID macros, use VFS_TIME macros directly.
* Add comment in alloc_super() to explain range checking.
* Reword the commit text to reflect the above.
fs/super.c | 7 +++++++
include/linux/fs.h | 12 +++++++++++-
2 files changed, 18 insertions(+), 1 deletion(-)
diff --git a/fs/super.c b/fs/super.c
index 1182af8..37ec188 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -239,6 +239,13 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
s->s_maxbytes = MAX_NON_LFS;
s->s_op = &default_op;
s->s_time_gran = 1000000000;
+ /*
+ * Assign a default empty range [MAX_VFS_TIME, MIN_VFS_TIME].
+ * This will help VFS detect filesystems that do not populate
+ * these fields in the superblock.
+ */
+ s->s_time_min = MAX_VFS_TIME;
+ s->s_time_max = MIN_VFS_TIME;
s->cleancache_poolid = CLEANCACHE_NO_POOL;
s->s_shrink.seeks = DEFAULT_SEEKS;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1af4727..cee8f99 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -927,6 +927,9 @@ static inline struct file *get_file(struct file *f)
#define MAX_LFS_FILESIZE ((loff_t)0x7fffffffffffffffLL)
#endif
+#define MIN_VFS_TIME S64_MIN
+#define MAX_VFS_TIME S64_MAX
+
#define FL_POSIX 1
#define FL_FLOCK 2
#define FL_DELEG 4 /* NFSv4 delegation */
@@ -1343,7 +1346,14 @@ struct super_block {
/* Granularity of c/m/atime in ns.
Cannot be worse than a second */
- u32 s_time_gran;
+ u32 s_time_gran;
+
+ /*
+ * Max and min values for timestamps
+ * according to the range supported by filesystems.
+ */
+ time64_t s_time_min;
+ time64_t s_time_max;
/*
* The next field is for VFS *only*. No filesystems have any business
--
1.9.1
This is a preparation patch to add range checking for inode
timestamps.
These range checks will be used to clamp timestamps to filesystem
allowed ranges.
Individual filesystems do not have the same on disk format as
the in memory inodes. Range checking and clamping times assigned
to inodes will help keep in memory and on-disk timestamps to be
in sync.
Extend struct super_block to include information about the max
and min inode times each filesystem can hold. These are dependent
on the on-disk format of filesystems.
Every time a new superblock is created, make sure that the superblock
max and min timestamp fields are assigned invalid values.
Another series will initialize these fields to appropriate values for
every filesystem.
The values are currently ignored. The exact policy and behavior will be
decided in a separate patch.
MAX_INVALID_VFS_TIME and MIN_INVALID_VFS_TIME are initialized to S64_MIN
and S64_MAX respectively so that even if one of the fields is
uninitialized, it can be detected by using the condition
max_time < min_time.
The original idea for the feature comes from the discussion:
https://lkml.org/lkml/2014/5/30/669
Signed-off-by: Deepa Dinamani <deepa.kernel(a)gmail.com>
---
The intention is to include this as part of 4.6 so that the follow on
patches can go into 4.7.
The series and the plan have been discussed with Arnd Bergmann.
fs/super.c | 2 ++
include/linux/fs.h | 13 ++++++++++++-
2 files changed, 14 insertions(+), 1 deletion(-)
diff --git a/fs/super.c b/fs/super.c
index 1182af8..d70a8f6 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -239,6 +239,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
s->s_maxbytes = MAX_NON_LFS;
s->s_op = &default_op;
s->s_time_gran = 1000000000;
+ s->s_time_max = MAX_INVALID_VFS_TIME;
+ s->s_time_min = MIN_INVALID_VFS_TIME;
s->cleancache_poolid = CLEANCACHE_NO_POOL;
s->s_shrink.seeks = DEFAULT_SEEKS;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1af4727..15b41e6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -927,6 +927,12 @@ static inline struct file *get_file(struct file *f)
#define MAX_LFS_FILESIZE ((loff_t)0x7fffffffffffffffLL)
#endif
+#define MAX_VFS_TIME S64_MAX
+#define MIN_VFS_TIME S64_MIN
+
+#define MAX_INVALID_VFS_TIME S64_MIN
+#define MIN_INVALID_VFS_TIME S64_MAX
+
#define FL_POSIX 1
#define FL_FLOCK 2
#define FL_DELEG 4 /* NFSv4 delegation */
@@ -1343,7 +1349,12 @@ struct super_block {
/* Granularity of c/m/atime in ns.
Cannot be worse than a second */
- u32 s_time_gran;
+ u32 s_time_gran;
+ /* Max and min values for timestamps
+ * according to the range supported by filesystems.
+ */
+ time64_t s_time_max;
+ time64_t s_time_min;
/*
* The next field is for VFS *only*. No filesystems have any business
--
1.9.1
The series contains infrastucture patches required to convert
vfs times to use 64 bit time.
The intention is to include these as part of 4.6 so that the follow on
patches can go into 4.7.
Patch 1 is as per the agreed upon RFC approach 2b:
https://lkml.org/lkml/2016/2/12/105
And, patch 2 is as per previously agreed upon discussion in:
https://lkml.org/lkml/2016/1/7/20
The patches that will use these will be posted for the subsequent
kernel release.
The series and the plan have been discussed with Arnd Bergmann.
Deepa Dinamani (2):
fs: Add current_fs_time_sec() function
vfs: Add vfs_time accessors
include/linux/fs.h | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
--
1.9.1
Introduction:
The series is aimed at transitioning network timestamps to being
y2038 safe.
All patches can be reviewed and merged independently.
Socket timestamps and ioctl calls will be handled separately.
Thanks to Arnd Bergmann for discussing solution options with me.
Solution:
Data type struct timespec is not y2038 safe.
Replace timespec with struct timespec64 which is y2038 safe.
Changes v1 -> v2:
Move and rename inet_current_time() as discussed
Squash patches 1 and 2
Reword commit text for patch 2/3
Carry over review tags
Deepa Dinamani (3):
net: ipv4: Convert IP network timestamps to be y2038 safe
net: ipv4: tcp_probe: Replace timespec with timespec64
net: sctp: Convert log timestamps to be y2038 safe
include/net/ip.h | 2 ++
net/ipv4/af_inet.c | 26 ++++++++++++++++++++++++++
net/ipv4/icmp.c | 5 +----
net/ipv4/ip_options.c | 14 ++++++--------
net/ipv4/tcp_probe.c | 8 ++++----
net/sctp/probe.c | 10 +++++-----
6 files changed, 44 insertions(+), 21 deletions(-)
--
1.9.1
Cc: Vlad Yasevich <vyasevich(a)gmail.com>
Cc: Neil Horman <nhorman(a)tuxdriver.com>
Cc: "David S. Miller" <davem(a)davemloft.net>
Cc: Alexey Kuznetsov <kuznet(a)ms2.inr.ac.ru>
Cc: James Morris <jmorris(a)namei.org>
Cc: Hideaki YOSHIFUJI <yoshfuji(a)linux-ipv6.org>
Cc: Patrick McHardy <kaber(a)trash.net>
Cc: linux-sctp(a)vger.kernel.org
long/ kernel_time_t is 32 bit on a 32 bit system and
64 bit on a 64 bit system.
ceph_encode_timespec() encodes only the lower 32 bits on
a 64 bit system and encodes all of 32 bits on a 32bit
system.
ceph_decode_timespec() decodes 32 bit tv_sec and tv_nsec
into kernel_time_t/ long.
The encode and decode functions do not match when the
values are negative:
Consider the following scenario on a 32 bit system:
When a negative number is cast to u32 as encode does, the
value is positive and is greater than INT_MAX. Decode reads
back this value. And, this value cannot be represented by
long on 32 bit systems. So by section 6.3.1.3 of the
C99 standard, the result is implementation defined.
Consider the following scenario on a 64 bit system:
When a negative number is cast to u32 as encode does, the
value is positive. This value is later assigned by decode
function by a cast to long. Since this value can be
represented in long data type, this becomes a positive
value greater than INT_MAX. But, the value encoded was
negative, so the encode and decode functions do not match.
Change the decode function as follows to overcome the above
bug:
The decode should first cast the value to a s64 this will
be positive value greater than INT_MAX(in case of a negative
encoded value)and then cast this value again as s32, which
drops the higher order 32 bits.
On 32 bit systems, this is the right value in kernel_time_t/
long.
On 64 bit systems, assignment to kernel_time_t/ long
will sign extend this value to reflect the signed bit encoded.
Assume ceph timestamp ranges permitted are 1902..2038.
Suggested-by: Arnd Bergmann <arnd(a)arndb.de>
Signed-off-by: Deepa Dinamani <deepa.kernel(a)gmail.com>
---
include/linux/ceph/decode.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h
index a6ef9cc..e777e99 100644
--- a/include/linux/ceph/decode.h
+++ b/include/linux/ceph/decode.h
@@ -137,8 +137,8 @@ bad:
static inline void ceph_decode_timespec(struct timespec *ts,
const struct ceph_timespec *tv)
{
- ts->tv_sec = (__kernel_time_t)le32_to_cpu(tv->tv_sec);
- ts->tv_nsec = (long)le32_to_cpu(tv->tv_nsec);
+ ts->tv_sec = (s32)(s64)le32_to_cpu(tv->tv_sec);
+ ts->tv_nsec = (s32)(s64)le32_to_cpu(tv->tv_nsec);
}
static inline void ceph_encode_timespec(struct ceph_timespec *tv,
const struct timespec *ts)
--
1.9.1