current_time is one of the few callers of current_kernel_time64(), which
is a wrapper around ktime_get_coarse_real_ts64(). This calls the latter
directly for consistency with the rest of the kernel that is moving to
the ktime_get_ family of time accessors.
An open questions is whether we may want to actually call the more
accurate ktime_get_real_ts64() for file systems that save high-resolution
timestamps in their on-disk format. This would add a small but measurable
overhead to each update of the inode stamps but lead to inode timestamps
to actually have a usable resolution better than one jiffy (1 to 10
milliseconds normally).
I traced the original addition of the current_kernel_time() call to set
the nanosecond fields back to linux-2.5.48, where Andi Kleen added a
patch with subject "nanosecond stat timefields". This adds the original
call to current_kernel_time and the truncation to the resolution of the
file system, but makes no mention of the intended accuracy. At the time,
we had a do_gettimeofday() interface that on some architectures could
return a microsecond-resolution timestamp, but there was no interface
for getting an accurate timestamp in nanosecond resolution, neither inside
the kernel nor from user space. This makes me suspect that the use of
coarse timestamps was never really a conscious decision but instead
a result of whatever API was available 16 years ago.
Signed-off-by: Arnd Bergmann <arnd(a)arndb.de>
---
fs/inode.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/fs/inode.c b/fs/inode.c
index 2c300e981796..e27bd9334939 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -2133,7 +2133,9 @@ EXPORT_SYMBOL(timespec64_trunc);
*/
struct timespec64 current_time(struct inode *inode)
{
- struct timespec64 now = current_kernel_time64();
+ struct timespec64 now;
+
+ ktime_get_coarse_real_ts64(&now);
if (unlikely(!inode->i_sb)) {
WARN(1, "current_time() called with uninitialized super_block in the inode");
--
2.9.0
get_seconds() can overflow on 32-bit architectures and is deprecated
because of that. The use in the aacraid driver has the same problem due
to a limited firmware interface, it also overflows in the year 2106.
This changes all calls to get_seconds() to the non-deprecated
ktime_get_real_seconds(), which unfortunately doesn't solve that problem
but gets rid of one user of the deprecated interface.
Signed-off-by: Arnd Bergmann <arnd(a)arndb.de>
---
drivers/scsi/aacraid/rx.c | 2 +-
drivers/scsi/aacraid/sa.c | 2 +-
drivers/scsi/aacraid/src.c | 4 ++--
3 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/scsi/aacraid/rx.c b/drivers/scsi/aacraid/rx.c
index 620166694171..576cdf9cc120 100644
--- a/drivers/scsi/aacraid/rx.c
+++ b/drivers/scsi/aacraid/rx.c
@@ -319,7 +319,7 @@ static void aac_rx_start_adapter(struct aac_dev *dev)
union aac_init *init;
init = dev->init;
- init->r7.host_elapsed_seconds = cpu_to_le32(get_seconds());
+ init->r7.host_elapsed_seconds = cpu_to_le32(ktime_get_real_seconds());
// We can only use a 32 bit address here
rx_sync_cmd(dev, INIT_STRUCT_BASE_ADDRESS, (u32)(ulong)dev->init_pa,
0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL);
diff --git a/drivers/scsi/aacraid/sa.c b/drivers/scsi/aacraid/sa.c
index 882f40353b96..efa96c1c6aa3 100644
--- a/drivers/scsi/aacraid/sa.c
+++ b/drivers/scsi/aacraid/sa.c
@@ -251,7 +251,7 @@ static void aac_sa_start_adapter(struct aac_dev *dev)
* Fill in the remaining pieces of the init.
*/
init = dev->init;
- init->r7.host_elapsed_seconds = cpu_to_le32(get_seconds());
+ init->r7.host_elapsed_seconds = cpu_to_le32(ktime_get_real_seconds());
/* We can only use a 32 bit address here */
sa_sync_cmd(dev, INIT_STRUCT_BASE_ADDRESS,
(u32)(ulong)dev->init_pa, 0, 0, 0, 0, 0,
diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c
index 4ebb35a29caa..5a299975a289 100644
--- a/drivers/scsi/aacraid/src.c
+++ b/drivers/scsi/aacraid/src.c
@@ -409,7 +409,7 @@ static void aac_src_start_adapter(struct aac_dev *dev)
init = dev->init;
if (dev->comm_interface == AAC_COMM_MESSAGE_TYPE3) {
- init->r8.host_elapsed_seconds = cpu_to_le32(get_seconds());
+ init->r8.host_elapsed_seconds = cpu_to_le32(ktime_get_real_seconds());
src_sync_cmd(dev, INIT_STRUCT_BASE_ADDRESS,
lower_32_bits(dev->init_pa),
upper_32_bits(dev->init_pa),
@@ -417,7 +417,7 @@ static void aac_src_start_adapter(struct aac_dev *dev)
(AAC_MAX_HRRQ - 1) * sizeof(struct _rrq),
0, 0, 0, NULL, NULL, NULL, NULL, NULL);
} else {
- init->r7.host_elapsed_seconds = cpu_to_le32(get_seconds());
+ init->r7.host_elapsed_seconds = cpu_to_le32(ktime_get_real_seconds());
// We can only use a 32 bit address here
src_sync_cmd(dev, INIT_STRUCT_BASE_ADDRESS,
(u32)(ulong)dev->init_pa, 0, 0, 0, 0, 0,
--
2.9.0
'struct rusage' contains the run times of a process in 'timeval' format
and is accessed through the wait4() and getrusage() system calls. This
is not a problem for y2038 safety by itself, but causes an issue when
the C library starts using 64-bit time_t on 32-bit architectures because
the structure layout becomes incompatible.
There are three possible ways of dealing with this:
a) deprecate the wait4() and getrusage() system calls, and create
a set of kernel interfaces based around a newly defined structure that
could solve multiple problems at once, e.g. provide more fine-grained
timestamps. The C library could then implement the posix interfaces
on top of the new system calls.
b) Extend the approach taken by the x32 ABI, and use the 64-bit
native structure layout for rusage on all architectures with new
system calls that is otherwise compatible. A downside of this
is that it requires a number of ugly hacks to deal with all the
other fields of the structure also becoming 64 bit wide.
Especially on big-endian architectures, we can't easily use the
union trick from glibc.
c) Change the definition of struct rusage to be independent of
time_t. This is the easiest change, as it does not involve new system
call entry points, but it requires the C library to convert between
the kernel format of the structure and the user space definition.
d) Add a new ABI variant of 'struct rusage' that corresponds to the
current layout with 32-bit counters but 64-bit time_t. This would
minimize the libc changes but require additional kernel code to
handle a third binary layout on 64-bit kernels.
I'm picking approach c) for its simplicity. As pointed out by reviewers,
simply using the kernel structure in user space would not be POSIX
compliant, but I have verified that none of the usual C libraries (glibc,
musl, uclibc-ng, newlib) do that. Instead, they all provide their own
definition of 'struct rusage' to applications in sys/resource.h.
To be on the safe side, I'm only changing the definition inside of
the kernel and for user space with an updated 'time_t'. All existing
users will see the traditional layout that is compatible with what the
C libraries export. A 32-bit application that includes linux/resource.h
but uses an update C library with 64-bit time_t will now see the low-level
kernel structure that corresponds to the getrusage() system call interface
but that will be different from one defined in sys/resource.h for the
getrusage library interface.
Link: https://patchwork.kernel.org/patch/10077527/
Cc: Paul Eggert <eggert(a)cs.ucla.edu>
Cc: Eric W. Biederman <ebiederm(a)xmission.com>
Signed-off-by: Arnd Bergmann <arnd(a)arndb.de>
---
arch/alpha/kernel/osf_sys.c | 15 +++++++++------
include/uapi/linux/resource.h | 14 ++++++++++++--
kernel/sys.c | 4 ++--
3 files changed, 23 insertions(+), 10 deletions(-)
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 89faa6f4de47..cad03ee445b3 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -1184,6 +1184,7 @@ SYSCALL_DEFINE4(osf_wait4, pid_t, pid, int __user *, ustatus, int, options,
struct rusage32 __user *, ur)
{
unsigned int status = 0;
+ struct rusage32 r32;
struct rusage r;
long err = kernel_wait4(pid, &status, options, &r);
if (err <= 0)
@@ -1192,12 +1193,14 @@ SYSCALL_DEFINE4(osf_wait4, pid_t, pid, int __user *, ustatus, int, options,
return -EFAULT;
if (!ur)
return err;
- if (put_tv_to_tv32(&ur->ru_utime, &r.ru_utime))
- return -EFAULT;
- if (put_tv_to_tv32(&ur->ru_stime, &r.ru_stime))
- return -EFAULT;
- if (copy_to_user(&ur->ru_maxrss, &r.ru_maxrss,
- sizeof(struct rusage32) - offsetof(struct rusage32, ru_maxrss)))
+ r32.ru_utime.tv_sec = r.ru_utime.tv_sec;
+ r32.ru_utime.tv_usec = r.ru_utime.tv_usec;
+ r32.ru_stime.tv_sec = r.ru_stime.tv_sec;
+ r32.ru_stime.tv_usec = r.ru_stime.tv_usec;
+ memcpy(&r32.ru_maxrss, &r.ru_maxrss,
+ sizeof(struct rusage32) - offsetof(struct rusage32, ru_maxrss));
+
+ if (copy_to_user(ur, &r32, sizeof(r32)))
return -EFAULT;
return err;
}
diff --git a/include/uapi/linux/resource.h b/include/uapi/linux/resource.h
index cc00fd079631..611d3745c70a 100644
--- a/include/uapi/linux/resource.h
+++ b/include/uapi/linux/resource.h
@@ -22,8 +22,18 @@
#define RUSAGE_THREAD 1 /* only the calling thread */
struct rusage {
- struct timeval ru_utime; /* user time used */
- struct timeval ru_stime; /* system time used */
+#if (__BITS_PER_LONG != 32 || !defined(__USE_TIME_BITS64)) && !defined(__KERNEL__)
+ struct timeval ru_utime; /* user time used */
+ struct timeval ru_stime; /* system time used */
+#else
+ /*
+ * For 32-bit user space with 64-bit time_t, the binary layout
+ * in these fields is incompatible with 'struct timeval', so the
+ * C library has to translate this into the POSIX compatible layout.
+ */
+ struct __kernel_old_timeval ru_utime;
+ struct __kernel_old_timeval ru_stime;
+#endif
__kernel_long_t ru_maxrss; /* maximum resident set size */
__kernel_long_t ru_ixrss; /* integral shared memory size */
__kernel_long_t ru_idrss; /* integral unshared data size */
diff --git a/kernel/sys.c b/kernel/sys.c
index ad692183dfe9..1de538f622e8 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1769,8 +1769,8 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
unlock_task_sighand(p, &flags);
out:
- r->ru_utime = ns_to_timeval(utime);
- r->ru_stime = ns_to_timeval(stime);
+ r->ru_utime = ns_to_kernel_old_timeval(utime);
+ r->ru_stime = ns_to_kernel_old_timeval(stime);
if (who != RUSAGE_CHILDREN) {
struct mm_struct *mm = get_task_mm(p);
--
2.9.0
The patch titled
Subject: kernel/sys.c: remove get_monotonic_boottime()
has been removed from the -mm tree. Its filename was
sysinfo-remove-get_monotonic_boottime.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Arnd Bergmann <arnd(a)arndb.de>
Subject: kernel/sys.c: remove get_monotonic_boottime()
get_monotonic_boottime() is deprecated because it uses the old 'timespec'
structure. This replaces one of the last callers with a call to
ktime_get_boottime.
Link: http://lkml.kernel.org/r/20180618150114.849216-1-arnd@arndb.de
Signed-off-by: Arnd Bergmann <arnd(a)arndb.de>
Reviewed-by: Cyrill Gorcunov <gorcunov(a)gmail.com>
Reviewed-by: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: <y2038(a)lists.linaro.org>
Cc: Dominik Brodowski <linux(a)dominikbrodowski.net>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/sys.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff -puN kernel/sys.c~sysinfo-remove-get_monotonic_boottime kernel/sys.c
--- a/kernel/sys.c~sysinfo-remove-get_monotonic_boottime
+++ a/kernel/sys.c
@@ -2523,11 +2523,11 @@ static int do_sysinfo(struct sysinfo *in
{
unsigned long mem_total, sav_total;
unsigned int mem_unit, bitcount;
- struct timespec tp;
+ struct timespec64 tp;
memset(info, 0, sizeof(struct sysinfo));
- get_monotonic_boottime(&tp);
+ ktime_get_boottime_ts64(&tp);
info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT);
_
Patches currently in -mm which might be from arnd(a)arndb.de are
ocfs2-dlmglue-clean-up-timestamp-handling.patch
shmem-use-monotonic-time-for-i_generation.patch
procfs-uptime-use-ktime_get_boottime_ts64.patch
crash-print-timestamp-using-time64_t.patch
nilfs2-use-64-bit-superblock-timstamps.patch
reiserfs-remove-unused-j_timestamp.patch
reiserfs-use-monotonic-time-for-j_trans_start_time.patch
reiserfs-remove-obsolete-print_time-function.patch
fat-propagate-64-bit-inode-timestamps.patch
adfs-use-timespec64-for-time-conversion.patch
vmcore-hide-vmcoredd_mmap_dumps-for-nommu-builds.patch
This uses the deprecated time_t type but is write-only, and could be
removed, but as Jeff explains, having a timestamp can be usefule for
post-mortem analysis in crash dumps.
In order to remove one of the last instances of time_t, this changes
the type to time64_t, same as j_trans_start_time.
Cc: Jan Kara <jack(a)suse.cz>
Cc: Jeff Mahoney <jeffm(a)suse.com>
Signed-off-by: Arnd Bergmann <arnd(a)arndb.de>
---
This could replace "reiserfs: remove unused j_timestamp" if we decide
that we want to keep that variable.
Posting this patch as an alternative now, while waiting for Jeff to
reply on how important this really is.
---
fs/reiserfs/reiserfs.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h
index 621b9a07080a..e5ca9ed79e54 100644
--- a/fs/reiserfs/reiserfs.h
+++ b/fs/reiserfs/reiserfs.h
@@ -271,7 +271,7 @@ struct reiserfs_journal_list {
struct mutex j_commit_mutex;
unsigned int j_trans_id;
- time_t j_timestamp;
+ time64_t j_timestamp; /* write-only but useful for crash dump analysis */
struct reiserfs_list_bitmap *j_list_bitmap;
struct buffer_head *j_commit_bh; /* commit buffer head */
struct reiserfs_journal_cnode *j_realblock;
--
2.9.0
While working on extended rand for last_error/first_error timestamps,
I noticed that the endianess is wrong, we access the little-endian
fields in struct ext4_super_block as native-endian when we print them.
This adds a special case in ext4_attr_show() and ext4_attr_store()
to byteswap the superblock fields if needed.
In older kernels, this code was part of super.c, it got moved to sysfs.c
in linux-4.4.
Cc: stable(a)vger.kernel.org
Fixes: 52c198c6820f ("ext4: add sysfs entry showing whether the fs contains errors")
Signed-off-by: Arnd Bergmann <arnd(a)arndb.de>
---
fs/ext4/sysfs.c | 13 ++++++++++---
1 file changed, 10 insertions(+), 3 deletions(-)
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index f34da0bb8f17..b970a200f20c 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -274,8 +274,12 @@ static ssize_t ext4_attr_show(struct kobject *kobj,
case attr_pointer_ui:
if (!ptr)
return 0;
- return snprintf(buf, PAGE_SIZE, "%u\n",
- *((unsigned int *) ptr));
+ if (a->attr_ptr == ptr_ext4_super_block_offset)
+ return snprintf(buf, PAGE_SIZE, "%u\n",
+ le32_to_cpup(ptr));
+ else
+ return snprintf(buf, PAGE_SIZE, "%u\n",
+ *((unsigned int *) ptr));
case attr_pointer_atomic:
if (!ptr)
return 0;
@@ -308,7 +312,10 @@ static ssize_t ext4_attr_store(struct kobject *kobj,
ret = kstrtoul(skip_spaces(buf), 0, &t);
if (ret)
return ret;
- *((unsigned int *) ptr) = t;
+ if (a->attr_ptr == ptr_ext4_super_block_offset)
+ *((__le32 *) ptr) = cpu_to_le32(t);
+ else
+ *((unsigned int *) ptr) = t;
return len;
case attr_inode_readahead:
return inode_readahead_blks_store(sbi, buf, len);
--
2.9.0