The series contains the last unmerged uses of CURRENT_TIME, CURRENT_TIME_SEC, and current_fs_time(). The series also deletes these apis.
All the patches except [PATCH 9/12] and [PATCH 10/12] are resend patches. These patches fix new instances of CURRENT_TIME. cifs and ceph patches have been squashed so that we have one patch per filesystem.
We want to get these merged onto 4.12 release so that I can post the series that changes vfs timestamps to use 64 bits for 4.13 release.
I'm proposing these to be merged through Andrew's tree.
Filesystem maintainers, please let Andrew know if you will be picking up the patch in your trees.
Let me know if anybody has other preferences for merging.
Deepa Dinamani (12): fs: f2fs: Use ktime_get_real_seconds for sit_info times trace: Make trace_hwlat timestamp y2038 safe fs: cifs: Replace CURRENT_TIME by other appropriate apis fs: ceph: CURRENT_TIME with ktime_get_real_ts() fs: ufs: Use ktime_get_real_ts64() for birthtime audit: Use timespec64 to represent audit timestamps fs: btrfs: Use ktime_get_real_ts for root ctime fs: ubifs: Replace CURRENT_TIME_SEC with current_time lustre: Replace CURRENT_TIME macro apparmorfs: Replace CURRENT_TIME with current_time() time: Delete CURRENT_TIME_SEC and CURRENT_TIME time: Delete current_fs_time() function
drivers/block/rbd.c | 2 +- drivers/staging/lustre/lustre/llite/llite_lib.c | 6 +++--- drivers/staging/lustre/lustre/osc/osc_io.c | 4 ++-- fs/btrfs/root-tree.c | 3 ++- fs/ceph/mds_client.c | 4 +++- fs/cifs/cifsencrypt.c | 4 +++- fs/cifs/cifssmb.c | 10 ++++----- fs/cifs/inode.c | 28 +++++++++++++------------ fs/f2fs/segment.c | 2 +- fs/f2fs/segment.h | 5 +++-- fs/ubifs/dir.c | 12 +++++------ fs/ubifs/file.c | 12 +++++------ fs/ubifs/ioctl.c | 2 +- fs/ubifs/misc.h | 10 --------- fs/ubifs/sb.c | 14 +++++++++---- fs/ubifs/xattr.c | 6 +++--- fs/ufs/ialloc.c | 6 ++++-- include/linux/audit.h | 4 ++-- include/linux/fs.h | 1 - include/linux/time.h | 3 --- kernel/audit.c | 10 ++++----- kernel/audit.h | 2 +- kernel/auditsc.c | 6 +++--- kernel/time/time.c | 14 ------------- kernel/trace/trace_entries.h | 6 +++--- kernel/trace/trace_hwlat.c | 14 ++++++------- kernel/trace/trace_output.c | 9 ++++---- net/ceph/messenger.c | 6 ++++-- net/ceph/osd_client.c | 4 ++-- security/apparmor/apparmorfs.c | 2 +- 30 files changed, 100 insertions(+), 111 deletions(-)
CURRENT_TIME_SEC is not y2038 safe.
Replace use of CURRENT_TIME_SEC with ktime_get_real_seconds in segment timestamps used by GC algorithm including the segment mtime timestamps.
Signed-off-by: Deepa Dinamani deepa.kernel@gmail.com Reviewed-by: Arnd Bergmann arnd@arndb.de --- fs/f2fs/segment.c | 2 +- fs/f2fs/segment.h | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 010324c..0531500 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2678,7 +2678,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi) sit_i->dirty_sentries = 0; sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK; sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time); - sit_i->mounted_time = CURRENT_TIME_SEC.tv_sec; + sit_i->mounted_time = ktime_get_real_seconds(); mutex_init(&sit_i->sentry_lock); return 0; } diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 57e36c1..156afc3 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -692,8 +692,9 @@ static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start) static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi) { struct sit_info *sit_i = SIT_I(sbi); - return sit_i->elapsed_time + CURRENT_TIME_SEC.tv_sec - - sit_i->mounted_time; + time64_t now = ktime_get_real_seconds(); + + return sit_i->elapsed_time + now - sit_i->mounted_time; }
static inline void set_summary(struct f2fs_summary *sum, nid_t nid,
struct timespec is not y2038 safe on 32 bit machines and needs to be replaced by struct timespec64 in order to represent times beyond year 2038 on such machines.
Fix all the timestamp representation in struct trace_hwlat and all the corresponding implementations.
Signed-off-by: Deepa Dinamani deepa.kernel@gmail.com --- kernel/trace/trace_entries.h | 6 +++--- kernel/trace/trace_hwlat.c | 14 +++++++------- kernel/trace/trace_output.c | 9 ++++----- 3 files changed, 14 insertions(+), 15 deletions(-)
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index c203ac4..adcdbbe 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -348,14 +348,14 @@ FTRACE_ENTRY(hwlat, hwlat_entry, __field( u64, duration ) __field( u64, outer_duration ) __field( u64, nmi_total_ts ) - __field_struct( struct timespec, timestamp ) - __field_desc( long, timestamp, tv_sec ) + __field_struct( struct timespec64, timestamp ) + __field_desc( s64, timestamp, tv_sec ) __field_desc( long, timestamp, tv_nsec ) __field( unsigned int, nmi_count ) __field( unsigned int, seqnum ) ),
- F_printk("cnt:%u\tts:%010lu.%010lu\tinner:%llu\touter:%llunmi-ts:%llu\tnmi-count:%u\n", + F_printk("cnt:%u\tts:%010llu.%010lu\tinner:%llu\touter:%llunmi-ts:%llu\tnmi-count:%u\n", __entry->seqnum, __entry->tv_sec, __entry->tv_nsec, diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index 21ea6ae..d7c8e4e 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -79,12 +79,12 @@ static u64 last_tracing_thresh = DEFAULT_LAT_THRESHOLD * NSEC_PER_USEC;
/* Individual latency samples are stored here when detected. */ struct hwlat_sample { - u64 seqnum; /* unique sequence */ - u64 duration; /* delta */ - u64 outer_duration; /* delta (outer loop) */ - u64 nmi_total_ts; /* Total time spent in NMIs */ - struct timespec timestamp; /* wall time */ - int nmi_count; /* # NMIs during this sample */ + u64 seqnum; /* unique sequence */ + u64 duration; /* delta */ + u64 outer_duration; /* delta (outer loop) */ + u64 nmi_total_ts; /* Total time spent in NMIs */ + struct timespec64 timestamp; /* wall time */ + int nmi_count; /* # NMIs during this sample */ };
/* keep the global state somewhere. */ @@ -250,7 +250,7 @@ static int get_sample(void) s.seqnum = hwlat_data.count; s.duration = sample; s.outer_duration = outer_sample; - s.timestamp = CURRENT_TIME; + ktime_get_real_ts64(&s.timestamp); s.nmi_total_ts = nmi_total_ts; s.nmi_count = nmi_count; trace_hwlat_sample(&s); diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 02a4aeb..08f9bab 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -4,7 +4,6 @@ * Copyright (C) 2008 Red Hat Inc, Steven Rostedt srostedt@redhat.com * */ - #include <linux/module.h> #include <linux/mutex.h> #include <linux/ftrace.h> @@ -1161,11 +1160,11 @@ trace_hwlat_print(struct trace_iterator *iter, int flags,
trace_assign_type(field, entry);
- trace_seq_printf(s, "#%-5u inner/outer(us): %4llu/%-5llu ts:%ld.%09ld", + trace_seq_printf(s, "#%-5u inner/outer(us): %4llu/%-5llu ts:%lld.%09ld", field->seqnum, field->duration, field->outer_duration, - field->timestamp.tv_sec, + (long long)field->timestamp.tv_sec, field->timestamp.tv_nsec);
if (field->nmi_count) { @@ -1195,10 +1194,10 @@ trace_hwlat_raw(struct trace_iterator *iter, int flags,
trace_assign_type(field, iter->ent);
- trace_seq_printf(s, "%llu %lld %ld %09ld %u\n", + trace_seq_printf(s, "%llu %lld %lld %09ld %u\n", field->duration, field->outer_duration, - field->timestamp.tv_sec, + (long long)field->timestamp.tv_sec, field->timestamp.tv_nsec, field->seqnum);
On Fri, 7 Apr 2017 17:57:00 -0700 Deepa Dinamani deepa.kernel@gmail.com wrote:
struct timespec is not y2038 safe on 32 bit machines and needs to be replaced by struct timespec64 in order to represent times beyond year 2038 on such machines.
Fix all the timestamp representation in struct trace_hwlat and all the corresponding implementations.
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 02a4aeb..08f9bab 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -4,7 +4,6 @@
- Copyright (C) 2008 Red Hat Inc, Steven Rostedt srostedt@redhat.com
*/
#include <linux/module.h> #include <linux/mutex.h> #include <linux/ftrace.h> @@ -1161,11 +1160,11 @@ trace_hwlat_print(struct trace_iterator *iter, int flags, trace_assign_type(field, entry);
- trace_seq_printf(s, "#%-5u inner/outer(us): %4llu/%-5llu ts:%ld.%09ld",
- trace_seq_printf(s, "#%-5u inner/outer(us): %4llu/%-5llu ts:%lld.%09ld", field->seqnum, field->duration, field->outer_duration,
field->timestamp.tv_sec,
(long long)field->timestamp.tv_sec,
Refresh my memory. We need the cast because on 64 bit boxes timestamp.tv_sec is just a long?
Other than that.
Reviewed-by: Steven Rostedt (VMware) rostedt@goodmis.org
-- Steve
field->timestamp.tv_nsec);
if (field->nmi_count) { @@ -1195,10 +1194,10 @@ trace_hwlat_raw(struct trace_iterator *iter, int flags, trace_assign_type(field, iter->ent);
- trace_seq_printf(s, "%llu %lld %ld %09ld %u\n",
- trace_seq_printf(s, "%llu %lld %lld %09ld %u\n", field->duration, field->outer_duration,
field->timestamp.tv_sec,
(long long)field->timestamp.tv_sec, field->timestamp.tv_nsec, field->seqnum);
trace_seq_printf(s, "#%-5u inner/outer(us): %4llu/%-5llu ts:%ld.%09ld",
trace_seq_printf(s, "#%-5u inner/outer(us): %4llu/%-5llu ts:%lld.%09ld", field->seqnum, field->duration, field->outer_duration,
field->timestamp.tv_sec,
(long long)field->timestamp.tv_sec,
Refresh my memory. We need the cast because on 64 bit boxes timestamp.tv_sec is just a long?
This is only required until we change the definition of timespec64. Right now it is defined as
#if __BITS_PER_LONG == 64 # define timespec64 timespec #else struct timespec64 { time64_t tv_sec; long tv_nsec; }; #endif
And timespec.tv_sec is just long int on 64 bit machines. This is why we need the cast now.
We will probably change this and only define __kernel_timespec instead of timespec, leaving only one definition of timespec64. At that time, we will not need this.
-Deepa
CURRENT_TIME macro is not y2038 safe on 32 bit systems.
The patch replaces all the uses of CURRENT_TIME by current_time() for filesystem times, and ktime_get_* functions for authentication timestamps and timezone calculations.
This is also in preparation for the patch that transitions vfs timestamps to use 64 bit time and hence make them y2038 safe.
CURRENT_TIME macro will be deleted before merging the aforementioned change.
The inode timestamps read from the server are assumed to have correct granularity and range.
The patch also assumes that the difference between server and client times lie in the range INT_MIN..INT_MAX. This is valid because this is the difference between current times between server and client, and the largest timezone difference is in the range of one day.
All cifs timestamps currently use timespec representation internally. Authentication and timezone timestamps can also be transitioned into using timespec64 when all other timestamps for cifs is transitioned to use timespec64.
Signed-off-by: Deepa Dinamani deepa.kernel@gmail.com Reviewed-by: Arnd Bergmann arnd@arndb.de --- fs/cifs/cifsencrypt.c | 4 +++- fs/cifs/cifssmb.c | 10 +++++----- fs/cifs/inode.c | 28 +++++++++++++++------------- 3 files changed, 23 insertions(+), 19 deletions(-)
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 058ac9b..68abbb0 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -478,6 +478,7 @@ find_timestamp(struct cifs_ses *ses) unsigned char *blobptr; unsigned char *blobend; struct ntlmssp2_name *attrptr; + struct timespec ts;
if (!ses->auth_key.len || !ses->auth_key.response) return 0; @@ -502,7 +503,8 @@ find_timestamp(struct cifs_ses *ses) blobptr += attrsize; /* advance attr value */ }
- return cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); + ktime_get_real_ts(&ts); + return cpu_to_le64(cifs_UnixTimeToNT(ts)); }
static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 0669506..2f279b7 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -478,14 +478,14 @@ decode_lanman_negprot_rsp(struct TCP_Server_Info *server, NEGOTIATE_RSP *pSMBr) * this requirement. */ int val, seconds, remain, result; - struct timespec ts, utc; - utc = CURRENT_TIME; + struct timespec ts; + unsigned long utc = ktime_get_real_seconds(); ts = cnvrtDosUnixTm(rsp->SrvTime.Date, rsp->SrvTime.Time, 0); cifs_dbg(FYI, "SrvTime %d sec since 1970 (utc: %d) diff: %d\n", - (int)ts.tv_sec, (int)utc.tv_sec, - (int)(utc.tv_sec - ts.tv_sec)); - val = (int)(utc.tv_sec - ts.tv_sec); + (int)ts.tv_sec, (int)utc, + (int)(utc - ts.tv_sec)); + val = (int)(utc - ts.tv_sec); seconds = abs(val); result = (seconds / MIN_TZ_ADJ) * MIN_TZ_ADJ; remain = seconds % MIN_TZ_ADJ; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index b261db3..c3b2fa0 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -322,9 +322,9 @@ cifs_create_dfs_fattr(struct cifs_fattr *fattr, struct super_block *sb) fattr->cf_mode = S_IFDIR | S_IXUGO | S_IRWXU; fattr->cf_uid = cifs_sb->mnt_uid; fattr->cf_gid = cifs_sb->mnt_gid; - fattr->cf_atime = CURRENT_TIME; - fattr->cf_ctime = CURRENT_TIME; - fattr->cf_mtime = CURRENT_TIME; + ktime_get_real_ts(&fattr->cf_mtime); + fattr->cf_mtime = timespec_trunc(fattr->cf_mtime, sb->s_time_gran); + fattr->cf_atime = fattr->cf_ctime = fattr->cf_mtime; fattr->cf_nlink = 2; fattr->cf_flags |= CIFS_FATTR_DFS_REFERRAL; } @@ -586,9 +586,10 @@ static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path, /* Fill a cifs_fattr struct with info from FILE_ALL_INFO */ static void cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, - struct cifs_sb_info *cifs_sb, bool adjust_tz, + struct super_block *sb, bool adjust_tz, bool symlink) { + struct cifs_sb_info *cifs_sb = CIFS_SB(sb); struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
memset(fattr, 0, sizeof(*fattr)); @@ -598,8 +599,10 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
if (info->LastAccessTime) fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime); - else - fattr->cf_atime = CURRENT_TIME; + else { + ktime_get_real_ts(&fattr->cf_atime); + fattr->cf_atime = timespec_trunc(fattr->cf_atime, sb->s_time_gran); + }
fattr->cf_ctime = cifs_NTtimeToUnix(info->ChangeTime); fattr->cf_mtime = cifs_NTtimeToUnix(info->LastWriteTime); @@ -659,7 +662,6 @@ cifs_get_file_info(struct file *filp) FILE_ALL_INFO find_data; struct cifs_fattr fattr; struct inode *inode = file_inode(filp); - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct cifsFileInfo *cfile = filp->private_data; struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); struct TCP_Server_Info *server = tcon->ses->server; @@ -671,7 +673,7 @@ cifs_get_file_info(struct file *filp) rc = server->ops->query_file_info(xid, tcon, &cfile->fid, &find_data); switch (rc) { case 0: - cifs_all_info_to_fattr(&fattr, &find_data, cifs_sb, false, + cifs_all_info_to_fattr(&fattr, &find_data, inode->i_sb, false, false); break; case -EREMOTE: @@ -753,7 +755,7 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, }
if (!rc) { - cifs_all_info_to_fattr(&fattr, data, cifs_sb, adjust_tz, + cifs_all_info_to_fattr(&fattr, data, sb, adjust_tz, symlink); } else if (rc == -EREMOTE) { cifs_create_dfs_fattr(&fattr, sb); @@ -1363,9 +1365,9 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry) cifs_inode = CIFS_I(inode); cifs_inode->time = 0; /* will force revalidate to get info when needed */ - inode->i_ctime = current_fs_time(sb); + inode->i_ctime = current_time(inode); } - dir->i_ctime = dir->i_mtime = current_fs_time(sb); + dir->i_ctime = dir->i_mtime = current_time(dir); cifs_inode = CIFS_I(dir); CIFS_I(dir)->time = 0; /* force revalidate of dir as well */ unlink_out: @@ -1633,7 +1635,7 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry) cifsInode->time = 0;
d_inode(direntry)->i_ctime = inode->i_ctime = inode->i_mtime = - current_fs_time(inode->i_sb); + current_time(inode);
rmdir_exit: kfree(full_path); @@ -1806,7 +1808,7 @@ cifs_rename2(struct inode *source_dir, struct dentry *source_dentry, CIFS_I(source_dir)->time = CIFS_I(target_dir)->time = 0;
source_dir->i_ctime = source_dir->i_mtime = target_dir->i_ctime = - target_dir->i_mtime = current_fs_time(source_dir->i_sb); + target_dir->i_mtime = current_time(source_dir);
cifs_rename_exit: kfree(info_buf_source);
CURRENT_TIME is not y2038 safe. The macro will be deleted and all the references to it will be replaced by ktime_get_* apis.
struct timespec is also not y2038 safe. Retain timespec for timestamp representation here as ceph uses it internally everywhere. These references will be changed to use struct timespec64 in a separate patch.
The current_fs_time() api is being changed to use vfs struct inode* as an argument instead of struct super_block*.
Set the new mds client request r_stamp field using ktime_get_real_ts() instead of using current_fs_time().
Also, since r_stamp is used as mtime on the server, use timespec_trunc() to truncate the timestamp, using the right granularity from the superblock.
This api will be transitioned to be y2038 safe along with vfs.
Signed-off-by: Deepa Dinamani deepa.kernel@gmail.com Reviewed-by: Arnd Bergmann arnd@arndb.de --- drivers/block/rbd.c | 2 +- fs/ceph/mds_client.c | 4 +++- net/ceph/messenger.c | 6 ++++-- net/ceph/osd_client.c | 4 ++-- 4 files changed, 10 insertions(+), 6 deletions(-)
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 517838b..77204da 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1922,7 +1922,7 @@ static void rbd_osd_req_format_write(struct rbd_obj_request *obj_request) { struct ceph_osd_request *osd_req = obj_request->osd_req;
- osd_req->r_mtime = CURRENT_TIME; + ktime_get_real_ts(&osd_req->r_mtime); osd_req->r_data_offset = obj_request->offset; }
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index c681762..1d3fa90 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1666,6 +1666,7 @@ struct ceph_mds_request * ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) { struct ceph_mds_request *req = kzalloc(sizeof(*req), GFP_NOFS); + struct timespec ts;
if (!req) return ERR_PTR(-ENOMEM); @@ -1684,7 +1685,8 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) init_completion(&req->r_safe_completion); INIT_LIST_HEAD(&req->r_unsafe_item);
- req->r_stamp = current_fs_time(mdsc->fsc->sb); + ktime_get_real_ts(&ts); + req->r_stamp = timespec_trunc(ts, mdsc->fsc->sb->s_time_gran);
req->r_op = op; req->r_direct_mode = mode; diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index f76bb33..5766a6c 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1386,8 +1386,9 @@ static void prepare_write_keepalive(struct ceph_connection *con) dout("prepare_write_keepalive %p\n", con); con_out_kvec_reset(con); if (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2) { - struct timespec now = CURRENT_TIME; + struct timespec now;
+ ktime_get_real_ts(&now); con_out_kvec_add(con, sizeof(tag_keepalive2), &tag_keepalive2); ceph_encode_timespec(&con->out_temp_keepalive2, &now); con_out_kvec_add(con, sizeof(con->out_temp_keepalive2), @@ -3176,8 +3177,9 @@ bool ceph_con_keepalive_expired(struct ceph_connection *con, { if (interval > 0 && (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2)) { - struct timespec now = CURRENT_TIME; + struct timespec now; struct timespec ts; + ktime_get_real_ts(&now); jiffies_to_timespec(interval, &ts); ts = timespec_add(con->last_keepalive_ack, ts); return timespec_compare(&now, &ts) >= 0; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index e15ea9e..242d7c0 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -3574,7 +3574,7 @@ ceph_osdc_watch(struct ceph_osd_client *osdc, ceph_oid_copy(&lreq->t.base_oid, oid); ceph_oloc_copy(&lreq->t.base_oloc, oloc); lreq->t.flags = CEPH_OSD_FLAG_WRITE; - lreq->mtime = CURRENT_TIME; + ktime_get_real_ts(&lreq->mtime);
lreq->reg_req = alloc_linger_request(lreq); if (!lreq->reg_req) { @@ -3632,7 +3632,7 @@ int ceph_osdc_unwatch(struct ceph_osd_client *osdc, ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid); ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc); req->r_flags = CEPH_OSD_FLAG_WRITE; - req->r_mtime = CURRENT_TIME; + ktime_get_real_ts(&req->r_mtime); osd_req_op_watch_init(req, 0, lreq->linger_id, CEPH_OSD_WATCH_OP_UNWATCH);
On Sat, Apr 8, 2017 at 8:57 AM, Deepa Dinamani deepa.kernel@gmail.com wrote:
CURRENT_TIME is not y2038 safe. The macro will be deleted and all the references to it will be replaced by ktime_get_* apis.
struct timespec is also not y2038 safe. Retain timespec for timestamp representation here as ceph uses it internally everywhere. These references will be changed to use struct timespec64 in a separate patch.
The current_fs_time() api is being changed to use vfs struct inode* as an argument instead of struct super_block*.
Set the new mds client request r_stamp field using ktime_get_real_ts() instead of using current_fs_time().
Also, since r_stamp is used as mtime on the server, use timespec_trunc() to truncate the timestamp, using the right granularity from the superblock.
This api will be transitioned to be y2038 safe along with vfs.
Signed-off-by: Deepa Dinamani deepa.kernel@gmail.com Reviewed-by: Arnd Bergmann arnd@arndb.de
drivers/block/rbd.c | 2 +- fs/ceph/mds_client.c | 4 +++- net/ceph/messenger.c | 6 ++++-- net/ceph/osd_client.c | 4 ++-- 4 files changed, 10 insertions(+), 6 deletions(-)
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 517838b..77204da 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1922,7 +1922,7 @@ static void rbd_osd_req_format_write(struct rbd_obj_request *obj_request) { struct ceph_osd_request *osd_req = obj_request->osd_req;
osd_req->r_mtime = CURRENT_TIME;
ktime_get_real_ts(&osd_req->r_mtime); osd_req->r_data_offset = obj_request->offset;
}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index c681762..1d3fa90 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1666,6 +1666,7 @@ struct ceph_mds_request * ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) { struct ceph_mds_request *req = kzalloc(sizeof(*req), GFP_NOFS);
struct timespec ts; if (!req) return ERR_PTR(-ENOMEM);
@@ -1684,7 +1685,8 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) init_completion(&req->r_safe_completion); INIT_LIST_HEAD(&req->r_unsafe_item);
req->r_stamp = current_fs_time(mdsc->fsc->sb);
ktime_get_real_ts(&ts);
req->r_stamp = timespec_trunc(ts, mdsc->fsc->sb->s_time_gran);
This change causes our kernel_untar_tar test case to fail (inode's ctime goes back). The reason is that there is time drift between the time stamps got by ktime_get_real_ts() and current_time(). We need to revert this change until current_time() uses ktime_get_real_ts() internally.
Regards Yan, Zheng
req->r_op = op; req->r_direct_mode = mode;
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index f76bb33..5766a6c 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1386,8 +1386,9 @@ static void prepare_write_keepalive(struct ceph_connection *con) dout("prepare_write_keepalive %p\n", con); con_out_kvec_reset(con); if (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2) {
struct timespec now = CURRENT_TIME;
struct timespec now;
ktime_get_real_ts(&now); con_out_kvec_add(con, sizeof(tag_keepalive2), &tag_keepalive2); ceph_encode_timespec(&con->out_temp_keepalive2, &now); con_out_kvec_add(con, sizeof(con->out_temp_keepalive2),
@@ -3176,8 +3177,9 @@ bool ceph_con_keepalive_expired(struct ceph_connection *con, { if (interval > 0 && (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2)) {
struct timespec now = CURRENT_TIME;
struct timespec now; struct timespec ts;
ktime_get_real_ts(&now); jiffies_to_timespec(interval, &ts); ts = timespec_add(con->last_keepalive_ack, ts); return timespec_compare(&now, &ts) >= 0;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index e15ea9e..242d7c0 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -3574,7 +3574,7 @@ ceph_osdc_watch(struct ceph_osd_client *osdc, ceph_oid_copy(&lreq->t.base_oid, oid); ceph_oloc_copy(&lreq->t.base_oloc, oloc); lreq->t.flags = CEPH_OSD_FLAG_WRITE;
lreq->mtime = CURRENT_TIME;
ktime_get_real_ts(&lreq->mtime); lreq->reg_req = alloc_linger_request(lreq); if (!lreq->reg_req) {
@@ -3632,7 +3632,7 @@ int ceph_osdc_unwatch(struct ceph_osd_client *osdc, ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid); ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc); req->r_flags = CEPH_OSD_FLAG_WRITE;
req->r_mtime = CURRENT_TIME;
ktime_get_real_ts(&req->r_mtime); osd_req_op_watch_init(req, 0, lreq->linger_id, CEPH_OSD_WATCH_OP_UNWATCH);
-- 2.7.4
On Thu, Jun 1, 2017 at 11:56 AM, Yan, Zheng ukernel@gmail.com wrote:
On Sat, Apr 8, 2017 at 8:57 AM, Deepa Dinamani deepa.kernel@gmail.com wrote:
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 517838b..77204da 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1922,7 +1922,7 @@ static void rbd_osd_req_format_write(struct rbd_obj_request *obj_request) { struct ceph_osd_request *osd_req = obj_request->osd_req;
osd_req->r_mtime = CURRENT_TIME;
ktime_get_real_ts(&osd_req->r_mtime); osd_req->r_data_offset = obj_request->offset;
}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index c681762..1d3fa90 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1666,6 +1666,7 @@ struct ceph_mds_request * ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) { struct ceph_mds_request *req = kzalloc(sizeof(*req), GFP_NOFS);
struct timespec ts; if (!req) return ERR_PTR(-ENOMEM);
@@ -1684,7 +1685,8 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) init_completion(&req->r_safe_completion); INIT_LIST_HEAD(&req->r_unsafe_item);
req->r_stamp = current_fs_time(mdsc->fsc->sb);
ktime_get_real_ts(&ts);
req->r_stamp = timespec_trunc(ts, mdsc->fsc->sb->s_time_gran);
This change causes our kernel_untar_tar test case to fail (inode's ctime goes back). The reason is that there is time drift between the time stamps got by ktime_get_real_ts() and current_time(). We need to revert this change until current_time() uses ktime_get_real_ts() internally.
Hmm, the change was not supposed to have a user-visible effect, so something has gone wrong, but I don't immediately see how it relates to what you observe.
ktime_get_real_ts() and current_time() use the same time base, there is no drift, but there is a difference in resolution, as the latter uses the time stamp of the last jiffies update, which may be up to one jiffy (10ms) behind the exact time we put in the request stamps here.
Do you still see problems if you use current_kernel_time() instead of ktime_get_real_ts()?
Arnd
On Thu, Jun 1, 2017 at 6:22 PM, Arnd Bergmann arnd@arndb.de wrote:
On Thu, Jun 1, 2017 at 11:56 AM, Yan, Zheng ukernel@gmail.com wrote:
On Sat, Apr 8, 2017 at 8:57 AM, Deepa Dinamani deepa.kernel@gmail.com wrote:
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 517838b..77204da 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1922,7 +1922,7 @@ static void rbd_osd_req_format_write(struct rbd_obj_request *obj_request) { struct ceph_osd_request *osd_req = obj_request->osd_req;
osd_req->r_mtime = CURRENT_TIME;
ktime_get_real_ts(&osd_req->r_mtime); osd_req->r_data_offset = obj_request->offset;
}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index c681762..1d3fa90 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1666,6 +1666,7 @@ struct ceph_mds_request * ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) { struct ceph_mds_request *req = kzalloc(sizeof(*req), GFP_NOFS);
struct timespec ts; if (!req) return ERR_PTR(-ENOMEM);
@@ -1684,7 +1685,8 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) init_completion(&req->r_safe_completion); INIT_LIST_HEAD(&req->r_unsafe_item);
req->r_stamp = current_fs_time(mdsc->fsc->sb);
ktime_get_real_ts(&ts);
req->r_stamp = timespec_trunc(ts, mdsc->fsc->sb->s_time_gran);
This change causes our kernel_untar_tar test case to fail (inode's ctime goes back). The reason is that there is time drift between the time stamps got by ktime_get_real_ts() and current_time(). We need to revert this change until current_time() uses ktime_get_real_ts() internally.
Hmm, the change was not supposed to have a user-visible effect, so something has gone wrong, but I don't immediately see how it relates to what you observe.
ktime_get_real_ts() and current_time() use the same time base, there is no drift, but there is a difference in resolution, as the latter uses the time stamp of the last jiffies update, which may be up to one jiffy (10ms) behind the exact time we put in the request stamps here.
Do you still see problems if you use current_kernel_time() instead of ktime_get_real_ts()?
The problem disappears after using current_kernel_time().
https://github.com/ceph/ceph-client/commit/2e0f648da23167034a3cf1500bc90ec60...
Regards Yan, Zheng
Arnd
On Thu, Jun 1, 2017 at 5:26 PM, Yan, Zheng ukernel@gmail.com wrote:
On Thu, Jun 1, 2017 at 6:22 PM, Arnd Bergmann arnd@arndb.de wrote:
On Thu, Jun 1, 2017 at 11:56 AM, Yan, Zheng ukernel@gmail.com wrote:
On Sat, Apr 8, 2017 at 8:57 AM, Deepa Dinamani deepa.kernel@gmail.com wrote:
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 517838b..77204da 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1922,7 +1922,7 @@ static void rbd_osd_req_format_write(struct rbd_obj_request *obj_request) { struct ceph_osd_request *osd_req = obj_request->osd_req;
osd_req->r_mtime = CURRENT_TIME;
ktime_get_real_ts(&osd_req->r_mtime); osd_req->r_data_offset = obj_request->offset;
}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index c681762..1d3fa90 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1666,6 +1666,7 @@ struct ceph_mds_request * ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) { struct ceph_mds_request *req = kzalloc(sizeof(*req), GFP_NOFS);
struct timespec ts; if (!req) return ERR_PTR(-ENOMEM);
@@ -1684,7 +1685,8 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) init_completion(&req->r_safe_completion); INIT_LIST_HEAD(&req->r_unsafe_item);
req->r_stamp = current_fs_time(mdsc->fsc->sb);
ktime_get_real_ts(&ts);
req->r_stamp = timespec_trunc(ts, mdsc->fsc->sb->s_time_gran);
This change causes our kernel_untar_tar test case to fail (inode's ctime goes back). The reason is that there is time drift between the time stamps got by ktime_get_real_ts() and current_time(). We need to revert this change until current_time() uses ktime_get_real_ts() internally.
Hmm, the change was not supposed to have a user-visible effect, so something has gone wrong, but I don't immediately see how it relates to what you observe.
ktime_get_real_ts() and current_time() use the same time base, there is no drift, but there is a difference in resolution, as the latter uses the time stamp of the last jiffies update, which may be up to one jiffy (10ms) behind the exact time we put in the request stamps here.
Do you still see problems if you use current_kernel_time() instead of ktime_get_real_ts()?
The problem disappears after using current_kernel_time().
https://github.com/ceph/ceph-client/commit/2e0f648da23167034a3cf1500bc90ec60...
From the commit above:
"It seems there is time drift between ktime_get_real_ts() and current_kernel_time()"
Its more of a granularity difference. current_kernel_time() returns the cached time at the last tick, where as ktime_get_real_ts() reads the clocksource hardware and returns the immediate time.
Filesystems usually use the cached time (similar to CLOCK_REALTIME_COARSE), for performance reasons, as touching the clocksource takes time.
thanks -john
On Thu, Jun 1, 2017 at 5:36 PM, John Stultz john.stultz@linaro.org wrote:
On Thu, Jun 1, 2017 at 5:26 PM, Yan, Zheng ukernel@gmail.com wrote:
On Thu, Jun 1, 2017 at 6:22 PM, Arnd Bergmann arnd@arndb.de wrote:
On Thu, Jun 1, 2017 at 11:56 AM, Yan, Zheng ukernel@gmail.com wrote:
On Sat, Apr 8, 2017 at 8:57 AM, Deepa Dinamani deepa.kernel@gmail.com wrote:
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 517838b..77204da 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1922,7 +1922,7 @@ static void rbd_osd_req_format_write(struct rbd_obj_request *obj_request) { struct ceph_osd_request *osd_req = obj_request->osd_req;
osd_req->r_mtime = CURRENT_TIME;
ktime_get_real_ts(&osd_req->r_mtime); osd_req->r_data_offset = obj_request->offset;
}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index c681762..1d3fa90 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1666,6 +1666,7 @@ struct ceph_mds_request * ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) { struct ceph_mds_request *req = kzalloc(sizeof(*req), GFP_NOFS);
struct timespec ts; if (!req) return ERR_PTR(-ENOMEM);
@@ -1684,7 +1685,8 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) init_completion(&req->r_safe_completion); INIT_LIST_HEAD(&req->r_unsafe_item);
req->r_stamp = current_fs_time(mdsc->fsc->sb);
ktime_get_real_ts(&ts);
req->r_stamp = timespec_trunc(ts, mdsc->fsc->sb->s_time_gran);
This change causes our kernel_untar_tar test case to fail (inode's ctime goes back). The reason is that there is time drift between the time stamps got by ktime_get_real_ts() and current_time(). We need to revert this change until current_time() uses ktime_get_real_ts() internally.
Hmm, the change was not supposed to have a user-visible effect, so something has gone wrong, but I don't immediately see how it relates to what you observe.
ktime_get_real_ts() and current_time() use the same time base, there is no drift, but there is a difference in resolution, as the latter uses the time stamp of the last jiffies update, which may be up to one jiffy (10ms) behind the exact time we put in the request stamps here.
Do you still see problems if you use current_kernel_time() instead of ktime_get_real_ts()?
The problem disappears after using current_kernel_time().
https://github.com/ceph/ceph-client/commit/2e0f648da23167034a3cf1500bc90ec60...
From the commit above: "It seems there is time drift between ktime_get_real_ts() and current_kernel_time()"
Its more of a granularity difference. current_kernel_time() returns the cached time at the last tick, where as ktime_get_real_ts() reads the clocksource hardware and returns the immediate time.
Filesystems usually use the cached time (similar to CLOCK_REALTIME_COARSE), for performance reasons, as touching the clocksource takes time.
Alternatively, it would be best for this code also to use current_time(). I had suggested this in one of the previous versions of the patch. The implementation of current_time() will change when we switch vfs to use 64 bit time. This will prevent such errors from happening again. But, this also means there is more code reordering for these modules to get a reference to inode.
-Deepa
On Fri, Jun 2, 2017 at 8:57 AM, Deepa Dinamani deepa.kernel@gmail.com wrote:
On Thu, Jun 1, 2017 at 5:36 PM, John Stultz john.stultz@linaro.org wrote:
On Thu, Jun 1, 2017 at 5:26 PM, Yan, Zheng ukernel@gmail.com wrote:
On Thu, Jun 1, 2017 at 6:22 PM, Arnd Bergmann arnd@arndb.de wrote:
On Thu, Jun 1, 2017 at 11:56 AM, Yan, Zheng ukernel@gmail.com wrote:
On Sat, Apr 8, 2017 at 8:57 AM, Deepa Dinamani deepa.kernel@gmail.com wrote:
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 517838b..77204da 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1922,7 +1922,7 @@ static void rbd_osd_req_format_write(struct rbd_obj_request *obj_request) { struct ceph_osd_request *osd_req = obj_request->osd_req;
osd_req->r_mtime = CURRENT_TIME;
ktime_get_real_ts(&osd_req->r_mtime); osd_req->r_data_offset = obj_request->offset;
}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index c681762..1d3fa90 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1666,6 +1666,7 @@ struct ceph_mds_request * ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) { struct ceph_mds_request *req = kzalloc(sizeof(*req), GFP_NOFS);
struct timespec ts; if (!req) return ERR_PTR(-ENOMEM);
@@ -1684,7 +1685,8 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) init_completion(&req->r_safe_completion); INIT_LIST_HEAD(&req->r_unsafe_item);
req->r_stamp = current_fs_time(mdsc->fsc->sb);
ktime_get_real_ts(&ts);
req->r_stamp = timespec_trunc(ts, mdsc->fsc->sb->s_time_gran);
This change causes our kernel_untar_tar test case to fail (inode's ctime goes back). The reason is that there is time drift between the time stamps got by ktime_get_real_ts() and current_time(). We need to revert this change until current_time() uses ktime_get_real_ts() internally.
Hmm, the change was not supposed to have a user-visible effect, so something has gone wrong, but I don't immediately see how it relates to what you observe.
ktime_get_real_ts() and current_time() use the same time base, there is no drift, but there is a difference in resolution, as the latter uses the time stamp of the last jiffies update, which may be up to one jiffy (10ms) behind the exact time we put in the request stamps here.
Do you still see problems if you use current_kernel_time() instead of ktime_get_real_ts()?
The problem disappears after using current_kernel_time().
https://github.com/ceph/ceph-client/commit/2e0f648da23167034a3cf1500bc90ec60...
From the commit above: "It seems there is time drift between ktime_get_real_ts() and current_kernel_time()"
Its more of a granularity difference. current_kernel_time() returns the cached time at the last tick, where as ktime_get_real_ts() reads the clocksource hardware and returns the immediate time.
Filesystems usually use the cached time (similar to CLOCK_REALTIME_COARSE), for performance reasons, as touching the clocksource takes time.
Alternatively, it would be best for this code also to use current_time(). I had suggested this in one of the previous versions of the patch. The implementation of current_time() will change when we switch vfs to use 64 bit time. This will prevent such errors from happening again. But, this also means there is more code reordering for these modules to get a reference to inode.
I took a look. it's quite inconvenience to use current_time(). I prefer to temporarily use current_kernel_time().
Regards Yan, Zheng
-Deepa
On Fri, Jun 2, 2017 at 4:09 AM, Yan, Zheng ukernel@gmail.com wrote:
On Fri, Jun 2, 2017 at 8:57 AM, Deepa Dinamani deepa.kernel@gmail.com wrote:
On Thu, Jun 1, 2017 at 5:36 PM, John Stultz john.stultz@linaro.org wrote:
On Thu, Jun 1, 2017 at 5:26 PM, Yan, Zheng ukernel@gmail.com wrote:
On Thu, Jun 1, 2017 at 6:22 PM, Arnd Bergmann arnd@arndb.de wrote:
On Thu, Jun 1, 2017 at 11:56 AM, Yan, Zheng ukernel@gmail.com wrote:
On Sat, Apr 8, 2017 at 8:57 AM, Deepa Dinamani deepa.kernel@gmail.com wrote:
> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c > index 517838b..77204da 100644 > --- a/drivers/block/rbd.c > +++ b/drivers/block/rbd.c > @@ -1922,7 +1922,7 @@ static void rbd_osd_req_format_write(struct rbd_obj_request *obj_request) > { > struct ceph_osd_request *osd_req = obj_request->osd_req; > > - osd_req->r_mtime = CURRENT_TIME; > + ktime_get_real_ts(&osd_req->r_mtime); > osd_req->r_data_offset = obj_request->offset; > } > > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c > index c681762..1d3fa90 100644 > --- a/fs/ceph/mds_client.c > +++ b/fs/ceph/mds_client.c > @@ -1666,6 +1666,7 @@ struct ceph_mds_request * > ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) > { > struct ceph_mds_request *req = kzalloc(sizeof(*req), GFP_NOFS); > + struct timespec ts; > > if (!req) > return ERR_PTR(-ENOMEM); > @@ -1684,7 +1685,8 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) > init_completion(&req->r_safe_completion); > INIT_LIST_HEAD(&req->r_unsafe_item); > > - req->r_stamp = current_fs_time(mdsc->fsc->sb); > + ktime_get_real_ts(&ts); > + req->r_stamp = timespec_trunc(ts, mdsc->fsc->sb->s_time_gran);
This change causes our kernel_untar_tar test case to fail (inode's ctime goes back). The reason is that there is time drift between the time stamps got by ktime_get_real_ts() and current_time(). We need to revert this change until current_time() uses ktime_get_real_ts() internally.
Hmm, the change was not supposed to have a user-visible effect, so something has gone wrong, but I don't immediately see how it relates to what you observe.
ktime_get_real_ts() and current_time() use the same time base, there is no drift, but there is a difference in resolution, as the latter uses the time stamp of the last jiffies update, which may be up to one jiffy (10ms) behind the exact time we put in the request stamps here.
Do you still see problems if you use current_kernel_time() instead of ktime_get_real_ts()?
The problem disappears after using current_kernel_time().
https://github.com/ceph/ceph-client/commit/2e0f648da23167034a3cf1500bc90ec60...
From the commit above: "It seems there is time drift between ktime_get_real_ts() and current_kernel_time()"
Its more of a granularity difference. current_kernel_time() returns the cached time at the last tick, where as ktime_get_real_ts() reads the clocksource hardware and returns the immediate time.
Filesystems usually use the cached time (similar to CLOCK_REALTIME_COARSE), for performance reasons, as touching the clocksource takes time.
Alternatively, it would be best for this code also to use current_time(). I had suggested this in one of the previous versions of the patch. The implementation of current_time() will change when we switch vfs to use 64 bit time. This will prevent such errors from happening again. But, this also means there is more code reordering for these modules to get a reference to inode.
I took a look. it's quite inconvenience to use current_time(). I prefer to temporarily use current_kernel_time().
I've looked at the code some more and I think there is another angle to it: In your test case, 'tar' calls into the utimes syscall (or a member of its family), which sets the i_ctime field in the inode to the curren time (using current_time()), then calls __ceph_setattr(), which creates a mds client request, and ceph_mdsc_create_request() takes another time stamp and stores it in r_stamp.
We then store the first timestamp (only) in the in-memory inode, and the second time stamp in the request. Depending on the state of the inode, we may also set the ctime to a third timestamp we again take using current_time().
The mtime and atime from user space get passed correctly through union ceph_mds_request_args->setattr and are kept in sync between the in-memory inode and the persistent inode data, but the ctime in the inode never makes it to the lower protocol levels and instead we use the r_stamp field that got set a little earlier or a little later.
I believe the bug you see is the result of the two timestamps currently being almost guaranteed to be different in the latest kernels. Changing r_stamp to use current_kernel_time() will make it the same value most of the time (as it was before Deepa's patch), but when the timer interrupt happens between the timestamps, the two are still different, it's just much harder to hit.
I think the proper solution should be to change __ceph_setattr() in a way that has req->r_stamp always synchronized with i_ctime. If we copy i_ctime to r_stamp, that will also take care of the future issues with the planned changes to current_time().
The part I don't understand is what else r_stamp (i.e. the time stamp in ceph_msg_data with type== CEPH_MSG_CLIENT_REQUEST) is used for, other than setting ctime in CEPH_MDS_OP_SETATTR.
Will this be used to update the stored i_ctime for other operations too? If so, we would need to synchronize it with the in-memory i_ctime for all operations that do this.
Semi-related side note: I see that the granularity for ceph_timespec is 1000 nanoseconds, so the value is always a multiple of 1000. If the full 32-bit data gets stored, we could use this it to store the epoch number in the future:
static inline void ceph_decode_timespec(struct timespec64 *ts, const struct ceph_timespec *tv) { u32 ns_epoch = le32_to_cpu(tv->tv_nsec); u32 epoch = ns_epoch % 1000;
/* tv_sec is traditionally interpreted as unsigned * with time ranges 1970-2106, we extend * it to 1970-138069 */ ts->tv_sec = (u64)le32_to_cpu(tv->tv_sec) + (u64)epoch << 32; ts->tv_nsec = ns_epoch - epoch; }
Arnd
On Fri, Jun 2, 2017 at 5:45 PM, Arnd Bergmann arnd@arndb.de wrote:
On Fri, Jun 2, 2017 at 4:09 AM, Yan, Zheng ukernel@gmail.com wrote:
On Fri, Jun 2, 2017 at 8:57 AM, Deepa Dinamani deepa.kernel@gmail.com wrote:
On Thu, Jun 1, 2017 at 5:36 PM, John Stultz john.stultz@linaro.org wrote:
On Thu, Jun 1, 2017 at 5:26 PM, Yan, Zheng ukernel@gmail.com wrote:
On Thu, Jun 1, 2017 at 6:22 PM, Arnd Bergmann arnd@arndb.de wrote:
On Thu, Jun 1, 2017 at 11:56 AM, Yan, Zheng ukernel@gmail.com wrote: > On Sat, Apr 8, 2017 at 8:57 AM, Deepa Dinamani deepa.kernel@gmail.com wrote:
>> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c >> index 517838b..77204da 100644 >> --- a/drivers/block/rbd.c >> +++ b/drivers/block/rbd.c >> @@ -1922,7 +1922,7 @@ static void rbd_osd_req_format_write(struct rbd_obj_request *obj_request) >> { >> struct ceph_osd_request *osd_req = obj_request->osd_req; >> >> - osd_req->r_mtime = CURRENT_TIME; >> + ktime_get_real_ts(&osd_req->r_mtime); >> osd_req->r_data_offset = obj_request->offset; >> } >> >> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c >> index c681762..1d3fa90 100644 >> --- a/fs/ceph/mds_client.c >> +++ b/fs/ceph/mds_client.c >> @@ -1666,6 +1666,7 @@ struct ceph_mds_request * >> ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) >> { >> struct ceph_mds_request *req = kzalloc(sizeof(*req), GFP_NOFS); >> + struct timespec ts; >> >> if (!req) >> return ERR_PTR(-ENOMEM); >> @@ -1684,7 +1685,8 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) >> init_completion(&req->r_safe_completion); >> INIT_LIST_HEAD(&req->r_unsafe_item); >> >> - req->r_stamp = current_fs_time(mdsc->fsc->sb); >> + ktime_get_real_ts(&ts); >> + req->r_stamp = timespec_trunc(ts, mdsc->fsc->sb->s_time_gran); > > This change causes our kernel_untar_tar test case to fail (inode's > ctime goes back). The reason is that there is time drift between the > time stamps got by ktime_get_real_ts() and current_time(). We need to > revert this change until current_time() uses ktime_get_real_ts() > internally.
Hmm, the change was not supposed to have a user-visible effect, so something has gone wrong, but I don't immediately see how it relates to what you observe.
ktime_get_real_ts() and current_time() use the same time base, there is no drift, but there is a difference in resolution, as the latter uses the time stamp of the last jiffies update, which may be up to one jiffy (10ms) behind the exact time we put in the request stamps here.
Do you still see problems if you use current_kernel_time() instead of ktime_get_real_ts()?
The problem disappears after using current_kernel_time().
https://github.com/ceph/ceph-client/commit/2e0f648da23167034a3cf1500bc90ec60...
From the commit above: "It seems there is time drift between ktime_get_real_ts() and current_kernel_time()"
Its more of a granularity difference. current_kernel_time() returns the cached time at the last tick, where as ktime_get_real_ts() reads the clocksource hardware and returns the immediate time.
Filesystems usually use the cached time (similar to CLOCK_REALTIME_COARSE), for performance reasons, as touching the clocksource takes time.
Alternatively, it would be best for this code also to use current_time(). I had suggested this in one of the previous versions of the patch. The implementation of current_time() will change when we switch vfs to use 64 bit time. This will prevent such errors from happening again. But, this also means there is more code reordering for these modules to get a reference to inode.
I took a look. it's quite inconvenience to use current_time(). I prefer to temporarily use current_kernel_time().
I've looked at the code some more and I think there is another angle to it: In your test case, 'tar' calls into the utimes syscall (or a member of its family), which sets the i_ctime field in the inode to the curren time (using current_time()), then calls __ceph_setattr(), which creates a mds client request, and ceph_mdsc_create_request() takes another time stamp and stores it in r_stamp.
We then store the first timestamp (only) in the in-memory inode, and the second time stamp in the request. Depending on the state of the inode, we may also set the ctime to a third timestamp we again take using current_time().
The mtime and atime from user space get passed correctly through union ceph_mds_request_args->setattr and are kept in sync between the in-memory inode and the persistent inode data, but the ctime in the inode never makes it to the lower protocol levels and instead we use the r_stamp field that got set a little earlier or a little later.
I believe the bug you see is the result of the two timestamps currently being almost guaranteed to be different in the latest kernels. Changing r_stamp to use current_kernel_time() will make it the same value most of the time (as it was before Deepa's patch), but when the timer interrupt happens between the timestamps, the two are still different, it's just much harder to hit.
I think the proper solution should be to change __ceph_setattr() in a way that has req->r_stamp always synchronized with i_ctime. If we copy i_ctime to r_stamp, that will also take care of the future issues with the planned changes to current_time().
I already have a patch https://github.com/ceph/ceph-client/commit/24f54cd18e195a002ee3d2ab50dbc952f...
The part I don't understand is what else r_stamp (i.e. the time stamp in ceph_msg_data with type== CEPH_MSG_CLIENT_REQUEST) is used for, other than setting ctime in CEPH_MDS_OP_SETATTR.
Will this be used to update the stored i_ctime for other operations too? If so, we would need to synchronize it with the in-memory i_ctime for all operations that do this.
yes, mds uses it to update ctime of modified inodes. For example, when handling mkdir, mds set ctime of both parent inode and new inode to r_stamp.
Regards Yan, Zheng
Semi-related side note: I see that the granularity for ceph_timespec is 1000 nanoseconds, so the value is always a multiple of 1000. If the full 32-bit data gets stored, we could use this it to store the epoch number in the future:
static inline void ceph_decode_timespec(struct timespec64 *ts, const struct ceph_timespec *tv) { u32 ns_epoch = le32_to_cpu(tv->tv_nsec); u32 epoch = ns_epoch % 1000;
/* tv_sec is traditionally interpreted as unsigned * with time ranges 1970-2106, we extend * it to 1970-138069 */ ts->tv_sec = (u64)le32_to_cpu(tv->tv_sec) + (u64)epoch << 32; ts->tv_nsec = ns_epoch - epoch;
}
Arnd
On Fri, Jun 2, 2017 at 12:10 PM, Yan, Zheng ukernel@gmail.com wrote:
On Fri, Jun 2, 2017 at 5:45 PM, Arnd Bergmann arnd@arndb.de wrote:
On Fri, Jun 2, 2017 at 4:09 AM, Yan, Zheng ukernel@gmail.com wrote:
On Fri, Jun 2, 2017 at 8:57 AM, Deepa Dinamani deepa.kernel@gmail.com wrote:
On Thu, Jun 1, 2017 at 5:36 PM, John Stultz john.stultz@linaro.org wrote:
On Thu, Jun 1, 2017 at 5:26 PM, Yan, Zheng ukernel@gmail.com wrote:
I believe the bug you see is the result of the two timestamps currently being almost guaranteed to be different in the latest kernels. Changing r_stamp to use current_kernel_time() will make it the same value most of the time (as it was before Deepa's patch), but when the timer interrupt happens between the timestamps, the two are still different, it's just much harder to hit.
I think the proper solution should be to change __ceph_setattr() in a way that has req->r_stamp always synchronized with i_ctime. If we copy i_ctime to r_stamp, that will also take care of the future issues with the planned changes to current_time().
I already have a patch https://github.com/ceph/ceph-client/commit/24f54cd18e195a002ee3d2ab50dbc952f...
Looks good to me. In case anyone cares: Acked-by: Arnd Bergmann arnd@arndb.de
The part I don't understand is what else r_stamp (i.e. the time stamp in ceph_msg_data with type== CEPH_MSG_CLIENT_REQUEST) is used for, other than setting ctime in CEPH_MDS_OP_SETATTR.
Will this be used to update the stored i_ctime for other operations too? If so, we would need to synchronize it with the in-memory i_ctime for all operations that do this.
yes, mds uses it to update ctime of modified inodes. For example, when handling mkdir, mds set ctime of both parent inode and new inode to r_stamp.
I see, so we may have a variation of that problem there as well: From my reading of the code, the child inode is not in memory yet, so that seems fine, but I could not find where the parent in-memory inode i_ctime is updated in ceph, but it is most likely not the same as req->r_stamp (assuming it gets updated at all).
Would it make sense require all callers of ceph_mdsc_do_request() to update r_stamp at the same time as i_ctime to keep them in sync?
Arnd
On Fri, Jun 2, 2017 at 6:51 PM, Arnd Bergmann arnd@arndb.de wrote:
On Fri, Jun 2, 2017 at 12:10 PM, Yan, Zheng ukernel@gmail.com wrote:
On Fri, Jun 2, 2017 at 5:45 PM, Arnd Bergmann arnd@arndb.de wrote:
On Fri, Jun 2, 2017 at 4:09 AM, Yan, Zheng ukernel@gmail.com wrote:
On Fri, Jun 2, 2017 at 8:57 AM, Deepa Dinamani deepa.kernel@gmail.com wrote:
On Thu, Jun 1, 2017 at 5:36 PM, John Stultz john.stultz@linaro.org wrote:
On Thu, Jun 1, 2017 at 5:26 PM, Yan, Zheng ukernel@gmail.com wrote:
I believe the bug you see is the result of the two timestamps currently being almost guaranteed to be different in the latest kernels. Changing r_stamp to use current_kernel_time() will make it the same value most of the time (as it was before Deepa's patch), but when the timer interrupt happens between the timestamps, the two are still different, it's just much harder to hit.
I think the proper solution should be to change __ceph_setattr() in a way that has req->r_stamp always synchronized with i_ctime. If we copy i_ctime to r_stamp, that will also take care of the future issues with the planned changes to current_time().
I already have a patch https://github.com/ceph/ceph-client/commit/24f54cd18e195a002ee3d2ab50dbc952f...
Looks good to me. In case anyone cares: Acked-by: Arnd Bergmann arnd@arndb.de
The part I don't understand is what else r_stamp (i.e. the time stamp in ceph_msg_data with type== CEPH_MSG_CLIENT_REQUEST) is used for, other than setting ctime in CEPH_MDS_OP_SETATTR.
Will this be used to update the stored i_ctime for other operations too? If so, we would need to synchronize it with the in-memory i_ctime for all operations that do this.
yes, mds uses it to update ctime of modified inodes. For example, when handling mkdir, mds set ctime of both parent inode and new inode to r_stamp.
I see, so we may have a variation of that problem there as well: From my reading of the code, the child inode is not in memory yet, so that seems fine, but I could not find where the parent in-memory inode i_ctime is updated in ceph, but it is most likely not the same as req->r_stamp (assuming it gets updated at all).
i_ctime is updated when handling request reply, by ceph_fill_file_time(). __ceph_setattr() can update the in-memory inode's ctime after request reply is received. The difference between ktime_get_real_ts() and current_time() can be larger than round-trip time of request. So it's still possible that __ceph_setattr() make ctime go back.
Regards Yan, Zheng
Would it make sense require all callers of ceph_mdsc_do_request() to update r_stamp at the same time as i_ctime to keep them in sync?
Arnd
On Fri, Jun 2, 2017 at 1:18 PM, Yan, Zheng ukernel@gmail.com wrote:
On Fri, Jun 2, 2017 at 6:51 PM, Arnd Bergmann arnd@arndb.de wrote:
On Fri, Jun 2, 2017 at 12:10 PM, Yan, Zheng ukernel@gmail.com wrote:
On Fri, Jun 2, 2017 at 5:45 PM, Arnd Bergmann arnd@arndb.de wrote:
On Fri, Jun 2, 2017 at 4:09 AM, Yan, Zheng ukernel@gmail.com wrote:
On Fri, Jun 2, 2017 at 8:57 AM, Deepa Dinamani deepa.kernel@gmail.com wrote:
On Thu, Jun 1, 2017 at 5:36 PM, John Stultz john.stultz@linaro.org wrote: > On Thu, Jun 1, 2017 at 5:26 PM, Yan, Zheng ukernel@gmail.com wrote:
I believe the bug you see is the result of the two timestamps currently being almost guaranteed to be different in the latest kernels. Changing r_stamp to use current_kernel_time() will make it the same value most of the time (as it was before Deepa's patch), but when the timer interrupt happens between the timestamps, the two are still different, it's just much harder to hit.
I think the proper solution should be to change __ceph_setattr() in a way that has req->r_stamp always synchronized with i_ctime. If we copy i_ctime to r_stamp, that will also take care of the future issues with the planned changes to current_time().
I already have a patch https://github.com/ceph/ceph-client/commit/24f54cd18e195a002ee3d2ab50dbc952f...
Looks good to me. In case anyone cares: Acked-by: Arnd Bergmann arnd@arndb.de
The part I don't understand is what else r_stamp (i.e. the time stamp in ceph_msg_data with type== CEPH_MSG_CLIENT_REQUEST) is used for, other than setting ctime in CEPH_MDS_OP_SETATTR.
Will this be used to update the stored i_ctime for other operations too? If so, we would need to synchronize it with the in-memory i_ctime for all operations that do this.
yes, mds uses it to update ctime of modified inodes. For example, when handling mkdir, mds set ctime of both parent inode and new inode to r_stamp.
I see, so we may have a variation of that problem there as well: From my reading of the code, the child inode is not in memory yet, so that seems fine, but I could not find where the parent in-memory inode i_ctime is updated in ceph, but it is most likely not the same as req->r_stamp (assuming it gets updated at all).
i_ctime is updated when handling request reply, by ceph_fill_file_time(). __ceph_setattr() can update the in-memory inode's ctime after request reply is received. The difference between ktime_get_real_ts() and current_time() can be larger than round-trip time of request. So it's still possible that __ceph_setattr() make ctime go back.
But the __ceph_setattr() problem should be fixed by your patch, right?
What I meant is another related problem in ceph_mkdir() where the i_ctime field of the parent inode is different between the persistent representation in the mds and the in-memory representation.
Arnd
Would it make sense require all callers of ceph_mdsc_do_request() to update r_stamp at the same time as i_ctime to keep them in sync?
On Fri, Jun 2, 2017 at 7:33 PM, Arnd Bergmann arnd@arndb.de wrote:
On Fri, Jun 2, 2017 at 1:18 PM, Yan, Zheng ukernel@gmail.com wrote:
On Fri, Jun 2, 2017 at 6:51 PM, Arnd Bergmann arnd@arndb.de wrote:
On Fri, Jun 2, 2017 at 12:10 PM, Yan, Zheng ukernel@gmail.com wrote:
On Fri, Jun 2, 2017 at 5:45 PM, Arnd Bergmann arnd@arndb.de wrote:
On Fri, Jun 2, 2017 at 4:09 AM, Yan, Zheng ukernel@gmail.com wrote:
On Fri, Jun 2, 2017 at 8:57 AM, Deepa Dinamani deepa.kernel@gmail.com wrote: > On Thu, Jun 1, 2017 at 5:36 PM, John Stultz john.stultz@linaro.org wrote: >> On Thu, Jun 1, 2017 at 5:26 PM, Yan, Zheng ukernel@gmail.com wrote:
I believe the bug you see is the result of the two timestamps currently being almost guaranteed to be different in the latest kernels. Changing r_stamp to use current_kernel_time() will make it the same value most of the time (as it was before Deepa's patch), but when the timer interrupt happens between the timestamps, the two are still different, it's just much harder to hit.
I think the proper solution should be to change __ceph_setattr() in a way that has req->r_stamp always synchronized with i_ctime. If we copy i_ctime to r_stamp, that will also take care of the future issues with the planned changes to current_time().
I already have a patch https://github.com/ceph/ceph-client/commit/24f54cd18e195a002ee3d2ab50dbc952f...
Looks good to me. In case anyone cares: Acked-by: Arnd Bergmann arnd@arndb.de
The part I don't understand is what else r_stamp (i.e. the time stamp in ceph_msg_data with type== CEPH_MSG_CLIENT_REQUEST) is used for, other than setting ctime in CEPH_MDS_OP_SETATTR.
Will this be used to update the stored i_ctime for other operations too? If so, we would need to synchronize it with the in-memory i_ctime for all operations that do this.
yes, mds uses it to update ctime of modified inodes. For example, when handling mkdir, mds set ctime of both parent inode and new inode to r_stamp.
I see, so we may have a variation of that problem there as well: From my reading of the code, the child inode is not in memory yet, so that seems fine, but I could not find where the parent in-memory inode i_ctime is updated in ceph, but it is most likely not the same as req->r_stamp (assuming it gets updated at all).
i_ctime is updated when handling request reply, by ceph_fill_file_time(). __ceph_setattr() can update the in-memory inode's ctime after request reply is received. The difference between ktime_get_real_ts() and current_time() can be larger than round-trip time of request. So it's still possible that __ceph_setattr() make ctime go back.
But the __ceph_setattr() problem should be fixed by your patch, right?
What I meant is another related problem in ceph_mkdir() where the i_ctime field of the parent inode is different between the persistent representation in the mds and the in-memory representation.
I don't see any problem in mkdir case. Parent inode's i_ctime in mds is set to r_stamp. When client receives request reply, it set its in-memory inode's ctime to the same time stamp.
Regards Yan, Zheng
Arnd
Would it make sense require all callers of ceph_mdsc_do_request() to update r_stamp at the same time as i_ctime to keep them in sync?
On Fri, Jun 2, 2017 at 2:18 PM, Yan, Zheng ukernel@gmail.com wrote:
On Fri, Jun 2, 2017 at 7:33 PM, Arnd Bergmann arnd@arndb.de wrote:
On Fri, Jun 2, 2017 at 1:18 PM, Yan, Zheng ukernel@gmail.com wrote: What I meant is another related problem in ceph_mkdir() where the i_ctime field of the parent inode is different between the persistent representation in the mds and the in-memory representation.
I don't see any problem in mkdir case. Parent inode's i_ctime in mds is set to r_stamp. When client receives request reply, it set its in-memory inode's ctime to the same time stamp.
Ok, I see it now, thanks for the clarification. Most other file systems do this the other way round and update all fields in the in-memory inode structure first and then write that to persistent storage, so I was getting confused about the order of events here.
If I understand it all right, we have three different behaviors in ceph now, though the differences are very minor and probably don't ever matter:
- in setattr(), we update ctime in the in-memory inode first and then send the same time to the mds, and expect to set it again when the reply comes.
- in ceph_write_iter write() and mmap/page_mkwrite(), we call file_update_time() to set i_mtime and i_ctime to the same timestamp first once a write is observed by the fs and then take two other timestamps that we send to the mds, and update the in-memory inode a second time when the reply comes. ctime is never older than mtime here, as far as I can tell, but it may be newer when the timer interrupt happens between taking the two stamps.
- in all other calls, we only update the inode (and/or parent inode) after the reply arrives.
Arnd
On Fri, Jun 2, 2017 at 10:18 PM, Arnd Bergmann arnd@arndb.de wrote:
On Fri, Jun 2, 2017 at 2:18 PM, Yan, Zheng ukernel@gmail.com wrote:
On Fri, Jun 2, 2017 at 7:33 PM, Arnd Bergmann arnd@arndb.de wrote:
On Fri, Jun 2, 2017 at 1:18 PM, Yan, Zheng ukernel@gmail.com wrote: What I meant is another related problem in ceph_mkdir() where the i_ctime field of the parent inode is different between the persistent representation in the mds and the in-memory representation.
I don't see any problem in mkdir case. Parent inode's i_ctime in mds is set to r_stamp. When client receives request reply, it set its in-memory inode's ctime to the same time stamp.
Ok, I see it now, thanks for the clarification. Most other file systems do this the other way round and update all fields in the in-memory inode structure first and then write that to persistent storage, so I was getting confused about the order of events here.
If I understand it all right, we have three different behaviors in ceph now, though the differences are very minor and probably don't ever matter:
in setattr(), we update ctime in the in-memory inode first and then send the same time to the mds, and expect to set it again when the reply comes.
in ceph_write_iter write() and mmap/page_mkwrite(), we call file_update_time() to set i_mtime and i_ctime to the same timestamp first once a write is observed by the fs and then take two other timestamps that we send to the mds, and update the in-memory inode a second time when the reply comes. ctime is never older than mtime here, as far as I can tell, but it may be newer when the timer interrupt happens between taking the two stamps.
We don't use request to send i_mtime/i_ctime to mds in this case. Instead, we use cap flush message. i_mtime/i_ctime are directly encoded in cap flush message. When mds receives the cap flush message, it writes i_mtime/i_ctime to persistent storage and sends a cap flush ack message to client. (when client receives the cap flush ack message, it does not update i_mtime/i_ctime). There is no issue as you described.
- in all other calls, we only update the inode (and/or parent inode) after the reply arrives.
There are two cases. 1. Client updates in-memory inode's ctime, it sends the new ctime to mds through cap flush message. 2. client set mds request's r_stamp and send the request to mds. MDS updates relavent inodes' ctime and sends reply to client. Client updates in-memory inodes' ctime according to the reply.
Regards Yan, Zheng
Arnd
On Thu, Jun 1, 2017 at 6:22 PM, Arnd Bergmann arnd@arndb.de wrote:
On Thu, Jun 1, 2017 at 11:56 AM, Yan, Zheng ukernel@gmail.com wrote:
On Sat, Apr 8, 2017 at 8:57 AM, Deepa Dinamani deepa.kernel@gmail.com wrote:
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 517838b..77204da 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1922,7 +1922,7 @@ static void rbd_osd_req_format_write(struct rbd_obj_request *obj_request) { struct ceph_osd_request *osd_req = obj_request->osd_req;
osd_req->r_mtime = CURRENT_TIME;
ktime_get_real_ts(&osd_req->r_mtime); osd_req->r_data_offset = obj_request->offset;
}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index c681762..1d3fa90 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1666,6 +1666,7 @@ struct ceph_mds_request * ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) { struct ceph_mds_request *req = kzalloc(sizeof(*req), GFP_NOFS);
struct timespec ts; if (!req) return ERR_PTR(-ENOMEM);
@@ -1684,7 +1685,8 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) init_completion(&req->r_safe_completion); INIT_LIST_HEAD(&req->r_unsafe_item);
req->r_stamp = current_fs_time(mdsc->fsc->sb);
ktime_get_real_ts(&ts);
req->r_stamp = timespec_trunc(ts, mdsc->fsc->sb->s_time_gran);
This change causes our kernel_untar_tar test case to fail (inode's ctime goes back). The reason is that there is time drift between the time stamps got by ktime_get_real_ts() and current_time(). We need to revert this change until current_time() uses ktime_get_real_ts() internally.
Hmm, the change was not supposed to have a user-visible effect, so something has gone wrong, but I don't immediately see how it relates to what you observe.
ktime_get_real_ts() and current_time() use the same time base, there is no drift, but there is a difference in resolution, as the latter uses the time stamp of the last jiffies update, which may be up to one jiffy (10ms) behind the exact time we put in the request stamps here.
It happens in following sequence of events
1. create a new file, the inode's ctime is set to ktime_get_real_ts() 2. chmod the new file, the inode's ctime is set to current_time().
Inode's ctime goes back when current_time() is behind ktime_get_real_ts().
Regards Yan, Zheng
Do you still see problems if you use current_kernel_time() instead of ktime_get_real_ts()?
Arnd
CURRENT_TIME is not y2038 safe. Replace it with ktime_get_real_ts64(). Inode time formats are already 64 bit long and accommodates time64_t.
Signed-off-by: Deepa Dinamani deepa.kernel@gmail.com --- fs/ufs/ialloc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 9774555..d1dd8cc 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -176,6 +176,7 @@ struct inode *ufs_new_inode(struct inode *dir, umode_t mode) struct ufs_cg_private_info * ucpi; struct ufs_cylinder_group * ucg; struct inode * inode; + struct timespec64 ts; unsigned cg, bit, i, j, start; struct ufs_inode_info *ufsi; int err = -ENOSPC; @@ -323,8 +324,9 @@ struct inode *ufs_new_inode(struct inode *dir, umode_t mode) lock_buffer(bh); ufs2_inode = (struct ufs2_inode *)bh->b_data; ufs2_inode += ufs_inotofsbo(inode->i_ino); - ufs2_inode->ui_birthtime = cpu_to_fs64(sb, CURRENT_TIME.tv_sec); - ufs2_inode->ui_birthnsec = cpu_to_fs32(sb, CURRENT_TIME.tv_nsec); + ktime_get_real_ts64(&ts); + ufs2_inode->ui_birthtime = cpu_to_fs64(sb, ts.tv_sec); + ufs2_inode->ui_birthnsec = cpu_to_fs32(sb, ts.tv_nsec); mark_buffer_dirty(bh); unlock_buffer(bh); if (sb->s_flags & MS_SYNCHRONOUS)
struct timespec is not y2038 safe. Audit timestamps are recorded in string format into an audit buffer for a given context. These mark the entry timestamps for the syscalls. Use y2038 safe struct timespec64 to represent the times. The log strings can handle this transition as strings can hold upto 1024 characters.
Signed-off-by: Deepa Dinamani deepa.kernel@gmail.com Reviewed-by: Arnd Bergmann arnd@arndb.de Acked-by: Paul Moore paul@paul-moore.com Acked-by: Richard Guy Briggs rgb@redhat.com --- include/linux/audit.h | 4 ++-- kernel/audit.c | 10 +++++----- kernel/audit.h | 2 +- kernel/auditsc.c | 6 +++--- 4 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/include/linux/audit.h b/include/linux/audit.h index 6fdfefc..f830508 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -332,7 +332,7 @@ static inline void audit_ptrace(struct task_struct *t) /* Private API (for audit.c only) */ extern unsigned int audit_serial(void); extern int auditsc_get_stamp(struct audit_context *ctx, - struct timespec *t, unsigned int *serial); + struct timespec64 *t, unsigned int *serial); extern int audit_set_loginuid(kuid_t loginuid);
static inline kuid_t audit_get_loginuid(struct task_struct *tsk) @@ -511,7 +511,7 @@ static inline void __audit_seccomp(unsigned long syscall, long signr, int code) static inline void audit_seccomp(unsigned long syscall, long signr, int code) { } static inline int auditsc_get_stamp(struct audit_context *ctx, - struct timespec *t, unsigned int *serial) + struct timespec64 *t, unsigned int *serial) { return 0; } diff --git a/kernel/audit.c b/kernel/audit.c index 2f4964c..fcbf377 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1625,10 +1625,10 @@ unsigned int audit_serial(void) }
static inline void audit_get_stamp(struct audit_context *ctx, - struct timespec *t, unsigned int *serial) + struct timespec64 *t, unsigned int *serial) { if (!ctx || !auditsc_get_stamp(ctx, t, serial)) { - *t = CURRENT_TIME; + ktime_get_real_ts64(t); *serial = audit_serial(); } } @@ -1652,7 +1652,7 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, int type) { struct audit_buffer *ab; - struct timespec t; + struct timespec64 t; unsigned int uninitialized_var(serial);
if (audit_initialized != AUDIT_INITIALIZED) @@ -1705,8 +1705,8 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, }
audit_get_stamp(ab->ctx, &t, &serial); - audit_log_format(ab, "audit(%lu.%03lu:%u): ", - t.tv_sec, t.tv_nsec/1000000, serial); + audit_log_format(ab, "audit(%llu.%03lu:%u): ", + (unsigned long long)t.tv_sec, t.tv_nsec/1000000, serial);
return ab; } diff --git a/kernel/audit.h b/kernel/audit.h index 0f1cf6d..cdf96f4 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -112,7 +112,7 @@ struct audit_context { enum audit_state state, current_state; unsigned int serial; /* serial number for record */ int major; /* syscall number */ - struct timespec ctime; /* time of syscall entry */ + struct timespec64 ctime; /* time of syscall entry */ unsigned long argv[4]; /* syscall arguments */ long return_code;/* syscall return code */ u64 prio; diff --git a/kernel/auditsc.c b/kernel/auditsc.c index e59ffc7..a2d9217 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1532,7 +1532,7 @@ void __audit_syscall_entry(int major, unsigned long a1, unsigned long a2, return;
context->serial = 0; - context->ctime = CURRENT_TIME; + ktime_get_real_ts64(&context->ctime); context->in_syscall = 1; context->current_state = state; context->ppid = 0; @@ -1941,13 +1941,13 @@ EXPORT_SYMBOL_GPL(__audit_inode_child); /** * auditsc_get_stamp - get local copies of audit_context values * @ctx: audit_context for the task - * @t: timespec to store time recorded in the audit_context + * @t: timespec64 to store time recorded in the audit_context * @serial: serial value that is recorded in the audit_context * * Also sets the context as auditable. */ int auditsc_get_stamp(struct audit_context *ctx, - struct timespec *t, unsigned int *serial) + struct timespec64 *t, unsigned int *serial) { if (!ctx->in_syscall) return 0;
On Fri, Apr 7, 2017 at 8:57 PM, Deepa Dinamani deepa.kernel@gmail.com wrote:
struct timespec is not y2038 safe. Audit timestamps are recorded in string format into an audit buffer for a given context. These mark the entry timestamps for the syscalls. Use y2038 safe struct timespec64 to represent the times. The log strings can handle this transition as strings can hold upto 1024 characters.
Signed-off-by: Deepa Dinamani deepa.kernel@gmail.com Reviewed-by: Arnd Bergmann arnd@arndb.de Acked-by: Paul Moore paul@paul-moore.com Acked-by: Richard Guy Briggs rgb@redhat.com
include/linux/audit.h | 4 ++-- kernel/audit.c | 10 +++++----- kernel/audit.h | 2 +- kernel/auditsc.c | 6 +++--- 4 files changed, 11 insertions(+), 11 deletions(-)
I have no problem merging this patch into audit/next for v4.12, would you prefer me to do that so at least this patch is merged?
It would probably make life a small bit easier for us in the audit world too as it would reduce the potential merge conflict. However, that's a relatively small thing to worry about.
diff --git a/include/linux/audit.h b/include/linux/audit.h index 6fdfefc..f830508 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -332,7 +332,7 @@ static inline void audit_ptrace(struct task_struct *t) /* Private API (for audit.c only) */ extern unsigned int audit_serial(void); extern int auditsc_get_stamp(struct audit_context *ctx,
struct timespec *t, unsigned int *serial);
struct timespec64 *t, unsigned int *serial);
extern int audit_set_loginuid(kuid_t loginuid);
static inline kuid_t audit_get_loginuid(struct task_struct *tsk) @@ -511,7 +511,7 @@ static inline void __audit_seccomp(unsigned long syscall, long signr, int code) static inline void audit_seccomp(unsigned long syscall, long signr, int code) { } static inline int auditsc_get_stamp(struct audit_context *ctx,
struct timespec *t, unsigned int *serial)
struct timespec64 *t, unsigned int *serial)
{ return 0; } diff --git a/kernel/audit.c b/kernel/audit.c index 2f4964c..fcbf377 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1625,10 +1625,10 @@ unsigned int audit_serial(void) }
static inline void audit_get_stamp(struct audit_context *ctx,
struct timespec *t, unsigned int *serial)
struct timespec64 *t, unsigned int *serial)
{ if (!ctx || !auditsc_get_stamp(ctx, t, serial)) {
*t = CURRENT_TIME;
ktime_get_real_ts64(t); *serial = audit_serial(); }
} @@ -1652,7 +1652,7 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, int type) { struct audit_buffer *ab;
struct timespec t;
struct timespec64 t; unsigned int uninitialized_var(serial); if (audit_initialized != AUDIT_INITIALIZED)
@@ -1705,8 +1705,8 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, }
audit_get_stamp(ab->ctx, &t, &serial);
audit_log_format(ab, "audit(%lu.%03lu:%u): ",
t.tv_sec, t.tv_nsec/1000000, serial);
audit_log_format(ab, "audit(%llu.%03lu:%u): ",
(unsigned long long)t.tv_sec, t.tv_nsec/1000000, serial); return ab;
} diff --git a/kernel/audit.h b/kernel/audit.h index 0f1cf6d..cdf96f4 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -112,7 +112,7 @@ struct audit_context { enum audit_state state, current_state; unsigned int serial; /* serial number for record */ int major; /* syscall number */
struct timespec ctime; /* time of syscall entry */
struct timespec64 ctime; /* time of syscall entry */ unsigned long argv[4]; /* syscall arguments */ long return_code;/* syscall return code */ u64 prio;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index e59ffc7..a2d9217 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1532,7 +1532,7 @@ void __audit_syscall_entry(int major, unsigned long a1, unsigned long a2, return;
context->serial = 0;
context->ctime = CURRENT_TIME;
ktime_get_real_ts64(&context->ctime); context->in_syscall = 1; context->current_state = state; context->ppid = 0;
@@ -1941,13 +1941,13 @@ EXPORT_SYMBOL_GPL(__audit_inode_child); /**
- auditsc_get_stamp - get local copies of audit_context values
- @ctx: audit_context for the task
- @t: timespec to store time recorded in the audit_context
*/
- @t: timespec64 to store time recorded in the audit_context
- @serial: serial value that is recorded in the audit_context
- Also sets the context as auditable.
int auditsc_get_stamp(struct audit_context *ctx,
struct timespec *t, unsigned int *serial)
struct timespec64 *t, unsigned int *serial)
{ if (!ctx->in_syscall) return 0; -- 2.7.4
I have no problem merging this patch into audit/next for v4.12, would you prefer me to do that so at least this patch is merged?
This would be fine. But, I think whoever takes the last 2 deletion patches should also take them. I'm not sure how that part works out.
It would probably make life a small bit easier for us in the audit world too as it would reduce the potential merge conflict. However, that's a relatively small thing to worry about.
-Deepa
On Sat, Apr 8, 2017 at 1:58 PM, Deepa Dinamani deepa.kernel@gmail.com wrote:
I have no problem merging this patch into audit/next for v4.12, would you prefer me to do that so at least this patch is merged?
This would be fine. But, I think whoever takes the last 2 deletion patches should also take them. I'm not sure how that part works out.
FWIW, I just merged this into the audit/next branch so at least this will go into v4.12.
It would probably make life a small bit easier for us in the audit world too as it would reduce the potential merge conflict. However, that's a relatively small thing to worry about.
On Sat, Apr 8, 2017 at 5:58 PM, Deepa Dinamani deepa.kernel@gmail.com wrote:
I have no problem merging this patch into audit/next for v4.12, would you prefer me to do that so at least this patch is merged?
This would be fine. But, I think whoever takes the last 2 deletion patches should also take them. I'm not sure how that part works out.
It would probably make life a small bit easier for us in the audit world too as it would reduce the potential merge conflict. However, that's a relatively small thing to worry about.
As Andrew has picked the remaining patches up into -mm, this will work out fine: any patches picked up by the respective maintainers for v4.12 should arrive as git pull requests before the -mm patches get applied at a later stage of the merge window.
Arnd
btrfs_root_item maintains the ctime for root updates. This is not part of vfs_inode.
Since current_time() uses struct inode* as an argument as Linus suggested, this cannot be used to update root times unless, we modify the signature to use inode.
Since btrfs uses nanosecond time granularity, it can also use ktime_get_real_ts directly to obtain timestamp for the root. It is necessary to use the timespec time api here because the same btrfs_set_stack_timespec_*() apis are used for vfs inode times as well. These can be transitioned to using timespec64 when btrfs internally changes to use timespec64 as well.
Signed-off-by: Deepa Dinamani deepa.kernel@gmail.com Acked-by: David Sterba dsterba@suse.com Reviewed-by: Arnd Bergmann arnd@arndb.de --- fs/btrfs/root-tree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index a08224e..7d6bc30 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -501,8 +501,9 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct btrfs_root_item *item = &root->root_item; - struct timespec ct = current_fs_time(root->fs_info->sb); + struct timespec ct;
+ ktime_get_real_ts(&ct); spin_lock(&root->root_item_lock); btrfs_set_root_ctransid(item, trans->transid); btrfs_set_stack_timespec_sec(&item->ctime, ct.tv_sec);
On Fri, Apr 07, 2017 at 05:57:05PM -0700, Deepa Dinamani wrote:
btrfs_root_item maintains the ctime for root updates. This is not part of vfs_inode.
Since current_time() uses struct inode* as an argument as Linus suggested, this cannot be used to update root times unless, we modify the signature to use inode.
Since btrfs uses nanosecond time granularity, it can also use ktime_get_real_ts directly to obtain timestamp for the root. It is necessary to use the timespec time api here because the same btrfs_set_stack_timespec_*() apis are used for vfs inode times as well. These can be transitioned to using timespec64 when btrfs internally changes to use timespec64 as well.
Signed-off-by: Deepa Dinamani deepa.kernel@gmail.com Acked-by: David Sterba dsterba@suse.com Reviewed-by: Arnd Bergmann arnd@arndb.de
I'm going to add the patch to my 4.12 queue and will let Andrew know.
CURRENT_TIME_SEC is not y2038 safe. current_time() will be transitioned to use 64 bit time along with vfs in a separate patch. There is no plan to transition CURRENT_TIME_SEC to use y2038 safe time interfaces.
current_time() returns timestamps according to the granularities set in the inode's super_block. The granularity check to call current_fs_time() or CURRENT_TIME_SEC is not required.
Use current_time() directly to update inode timestamp. Use timespec_trunc during file system creation, before the first inode is created.
Signed-off-by: Deepa Dinamani deepa.kernel@gmail.com Reviewed-by: Arnd Bergmann arnd@arndb.de --- fs/ubifs/dir.c | 12 ++++++------ fs/ubifs/file.c | 12 ++++++------ fs/ubifs/ioctl.c | 2 +- fs/ubifs/misc.h | 10 ---------- fs/ubifs/sb.c | 14 ++++++++++---- fs/ubifs/xattr.c | 6 +++--- 6 files changed, 26 insertions(+), 30 deletions(-)
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 30825d88..8510d79 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -121,7 +121,7 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir,
inode_init_owner(inode, dir, mode); inode->i_mtime = inode->i_atime = inode->i_ctime = - ubifs_current_time(inode); + current_time(inode); inode->i_mapping->nrpages = 0;
switch (mode & S_IFMT) { @@ -750,7 +750,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, lock_2_inodes(dir, inode); inc_nlink(inode); ihold(inode); - inode->i_ctime = ubifs_current_time(inode); + inode->i_ctime = current_time(inode); dir->i_size += sz_change; dir_ui->ui_size = dir->i_size; dir->i_mtime = dir->i_ctime = inode->i_ctime; @@ -823,7 +823,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) }
lock_2_inodes(dir, inode); - inode->i_ctime = ubifs_current_time(dir); + inode->i_ctime = current_time(dir); drop_nlink(inode); dir->i_size -= sz_change; dir_ui->ui_size = dir->i_size; @@ -927,7 +927,7 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry) }
lock_2_inodes(dir, inode); - inode->i_ctime = ubifs_current_time(dir); + inode->i_ctime = current_time(dir); clear_nlink(inode); drop_nlink(dir); dir->i_size -= sz_change; @@ -1405,7 +1405,7 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, * Like most other Unix systems, set the @i_ctime for inodes on a * rename. */ - time = ubifs_current_time(old_dir); + time = current_time(old_dir); old_inode->i_ctime = time;
/* We must adjust parent link count when renaming directories */ @@ -1578,7 +1578,7 @@ static int ubifs_xrename(struct inode *old_dir, struct dentry *old_dentry,
lock_4_inodes(old_dir, new_dir, NULL, NULL);
- time = ubifs_current_time(old_dir); + time = current_time(old_dir); fst_inode->i_ctime = time; snd_inode->i_ctime = time; old_dir->i_mtime = old_dir->i_ctime = time; diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index d9ae86f..2cda3d6 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1196,7 +1196,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, mutex_lock(&ui->ui_mutex); ui->ui_size = inode->i_size; /* Truncation changes inode [mc]time */ - inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); + inode->i_mtime = inode->i_ctime = current_time(inode); /* Other attributes may be changed at the same time as well */ do_attr_changes(inode, attr); err = ubifs_jnl_truncate(c, inode, old_size, new_size); @@ -1243,7 +1243,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode, mutex_lock(&ui->ui_mutex); if (attr->ia_valid & ATTR_SIZE) { /* Truncation changes inode [mc]time */ - inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); + inode->i_mtime = inode->i_ctime = current_time(inode); /* 'truncate_setsize()' changed @i_size, update @ui_size */ ui->ui_size = inode->i_size; } @@ -1420,7 +1420,7 @@ int ubifs_update_time(struct inode *inode, struct timespec *time, */ static int update_mctime(struct inode *inode) { - struct timespec now = ubifs_current_time(inode); + struct timespec now = current_time(inode); struct ubifs_inode *ui = ubifs_inode(inode); struct ubifs_info *c = inode->i_sb->s_fs_info;
@@ -1434,7 +1434,7 @@ static int update_mctime(struct inode *inode) return err;
mutex_lock(&ui->ui_mutex); - inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); + inode->i_mtime = inode->i_ctime = current_time(inode); release = ui->dirty; mark_inode_dirty_sync(inode); mutex_unlock(&ui->ui_mutex); @@ -1511,7 +1511,7 @@ static int ubifs_vm_page_mkwrite(struct vm_fault *vmf) struct page *page = vmf->page; struct inode *inode = file_inode(vmf->vma->vm_file); struct ubifs_info *c = inode->i_sb->s_fs_info; - struct timespec now = ubifs_current_time(inode); + struct timespec now = current_time(inode); struct ubifs_budget_req req = { .new_page = 1 }; int err, update_time;
@@ -1579,7 +1579,7 @@ static int ubifs_vm_page_mkwrite(struct vm_fault *vmf) struct ubifs_inode *ui = ubifs_inode(inode);
mutex_lock(&ui->ui_mutex); - inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); + inode->i_mtime = inode->i_ctime = current_time(inode); release = ui->dirty; mark_inode_dirty_sync(inode); mutex_unlock(&ui->ui_mutex); diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c index da519ba..12b9eb50 100644 --- a/fs/ubifs/ioctl.c +++ b/fs/ubifs/ioctl.c @@ -126,7 +126,7 @@ static int setflags(struct inode *inode, int flags)
ui->flags = ioctl2ubifs(flags); ubifs_set_inode_flags(inode); - inode->i_ctime = ubifs_current_time(inode); + inode->i_ctime = current_time(inode); release = ui->dirty; mark_inode_dirty_sync(inode); mutex_unlock(&ui->ui_mutex); diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h index 8ece6ca..caf83d6 100644 --- a/fs/ubifs/misc.h +++ b/fs/ubifs/misc.h @@ -225,16 +225,6 @@ static inline void *ubifs_idx_key(const struct ubifs_info *c, }
/** - * ubifs_current_time - round current time to time granularity. - * @inode: inode - */ -static inline struct timespec ubifs_current_time(struct inode *inode) -{ - return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ? - current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; -} - -/** * ubifs_tnc_lookup - look up a file-system node. * @c: UBIFS file-system description object * @key: node key to lookup diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index 7f1ead2..8c25081 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c @@ -84,6 +84,8 @@ static int create_default_filesystem(struct ubifs_info *c) int min_leb_cnt = UBIFS_MIN_LEB_CNT; long long tmp64, main_bytes; __le64 tmp_le64; + __le32 tmp_le32; + struct timespec ts;
/* Some functions called from here depend on the @c->key_len filed */ c->key_len = UBIFS_SK_LEN; @@ -298,13 +300,17 @@ static int create_default_filesystem(struct ubifs_info *c) ino->ch.node_type = UBIFS_INO_NODE; ino->creat_sqnum = cpu_to_le64(++c->max_sqnum); ino->nlink = cpu_to_le32(2); - tmp_le64 = cpu_to_le64(CURRENT_TIME_SEC.tv_sec); + + ktime_get_real_ts(&ts); + ts = timespec_trunc(ts, DEFAULT_TIME_GRAN); + tmp_le64 = cpu_to_le64(ts.tv_sec); ino->atime_sec = tmp_le64; ino->ctime_sec = tmp_le64; ino->mtime_sec = tmp_le64; - ino->atime_nsec = 0; - ino->ctime_nsec = 0; - ino->mtime_nsec = 0; + tmp_le32 = cpu_to_le32(ts.tv_nsec); + ino->atime_nsec = tmp_le32; + ino->ctime_nsec = tmp_le32; + ino->mtime_nsec = tmp_le32; ino->mode = cpu_to_le32(S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO); ino->size = cpu_to_le64(UBIFS_INO_NODE_SZ);
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index efe00fc..3e53fdb 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -152,7 +152,7 @@ static int create_xattr(struct ubifs_info *c, struct inode *host, ui->data_len = size;
mutex_lock(&host_ui->ui_mutex); - host->i_ctime = ubifs_current_time(host); + host->i_ctime = current_time(host); host_ui->xattr_cnt += 1; host_ui->xattr_size += CALC_DENT_SIZE(fname_len(nm)); host_ui->xattr_size += CALC_XATTR_BYTES(size); @@ -234,7 +234,7 @@ static int change_xattr(struct ubifs_info *c, struct inode *host, mutex_unlock(&ui->ui_mutex);
mutex_lock(&host_ui->ui_mutex); - host->i_ctime = ubifs_current_time(host); + host->i_ctime = current_time(host); host_ui->xattr_size -= CALC_XATTR_BYTES(old_size); host_ui->xattr_size += CALC_XATTR_BYTES(size);
@@ -488,7 +488,7 @@ static int remove_xattr(struct ubifs_info *c, struct inode *host, return err;
mutex_lock(&host_ui->ui_mutex); - host->i_ctime = ubifs_current_time(host); + host->i_ctime = current_time(host); host_ui->xattr_cnt -= 1; host_ui->xattr_size -= CALC_DENT_SIZE(fname_len(nm)); host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len);
CURRENT_TIME macro is not y2038 safe on 32 bit systems.
The patch replaces all the uses of CURRENT_TIME by current_time() for filesystem times, and ktime_get_* functions for others.
struct timespec is also not y2038 safe. Retain timespec for timestamp representation here as lustre uses it internally everywhere. These references will be changed to use struct timespec64 in a separate patch.
This is also in preparation for the patch that transitions vfs timestamps to use 64 bit time and hence make them y2038 safe. current_time() is also planned to be transitioned to y2038 safe behavior along with this change.
CURRENT_TIME macro will be deleted before merging the aforementioned change.
Signed-off-by: Deepa Dinamani deepa.kernel@gmail.com --- drivers/staging/lustre/lustre/llite/llite_lib.c | 6 +++--- drivers/staging/lustre/lustre/osc/osc_io.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c index 7b80040..2b4b6b9 100644 --- a/drivers/staging/lustre/lustre/llite/llite_lib.c +++ b/drivers/staging/lustre/lustre/llite/llite_lib.c @@ -1472,17 +1472,17 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
/* We mark all of the fields "set" so MDS/OST does not re-set them */ if (attr->ia_valid & ATTR_CTIME) { - attr->ia_ctime = CURRENT_TIME; + attr->ia_ctime = current_time(inode); attr->ia_valid |= ATTR_CTIME_SET; } if (!(attr->ia_valid & ATTR_ATIME_SET) && (attr->ia_valid & ATTR_ATIME)) { - attr->ia_atime = CURRENT_TIME; + attr->ia_atime = current_time(inode); attr->ia_valid |= ATTR_ATIME_SET; } if (!(attr->ia_valid & ATTR_MTIME_SET) && (attr->ia_valid & ATTR_MTIME)) { - attr->ia_mtime = CURRENT_TIME; + attr->ia_mtime = current_time(inode); attr->ia_valid |= ATTR_MTIME_SET; }
diff --git a/drivers/staging/lustre/lustre/osc/osc_io.c b/drivers/staging/lustre/lustre/osc/osc_io.c index f991bee..cbab800 100644 --- a/drivers/staging/lustre/lustre/osc/osc_io.c +++ b/drivers/staging/lustre/lustre/osc/osc_io.c @@ -216,7 +216,7 @@ static int osc_io_submit(const struct lu_env *env, struct cl_object *obj = ios->cis_obj;
cl_object_attr_lock(obj); - attr->cat_mtime = LTIME_S(CURRENT_TIME); + attr->cat_mtime = ktime_get_real_seconds(); attr->cat_ctime = attr->cat_mtime; cl_object_attr_update(env, obj, attr, CAT_MTIME | CAT_CTIME); cl_object_attr_unlock(obj); @@ -256,7 +256,7 @@ static void osc_page_touch_at(const struct lu_env *env, kms > loi->loi_kms ? "" : "not ", loi->loi_kms, kms, loi->loi_lvb.lvb_size);
- attr->cat_ctime = LTIME_S(CURRENT_TIME); + attr->cat_ctime = ktime_get_real_seconds(); attr->cat_mtime = attr->cat_ctime; valid = CAT_MTIME | CAT_CTIME; if (kms > loi->loi_kms) {
CURRENT_TIME macro is not y2038 safe on 32 bit systems.
The patch replaces all the uses of CURRENT_TIME by current_time().
This is also in preparation for the patch that transitions vfs timestamps to use 64 bit time and hence make them y2038 safe. current_time() is also planned to be transitioned to y2038 safe behavior along with this change.
CURRENT_TIME macro will be deleted before merging the aforementioned change.
Signed-off-by: Deepa Dinamani deepa.kernel@gmail.com --- security/apparmor/apparmorfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index be0b498..4f6ac9d 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -1357,7 +1357,7 @@ static int aa_mk_null_file(struct dentry *parent)
inode->i_ino = get_next_ino(); inode->i_mode = S_IFCHR | S_IRUGO | S_IWUGO; - inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); init_special_inode(inode, S_IFCHR | S_IRUGO | S_IWUGO, MKDEV(MEM_MAJOR, 3)); d_instantiate(dentry, inode);
On 04/07/2017 05:57 PM, Deepa Dinamani wrote:
CURRENT_TIME macro is not y2038 safe on 32 bit systems.
The patch replaces all the uses of CURRENT_TIME by current_time().
This is also in preparation for the patch that transitions vfs timestamps to use 64 bit time and hence make them y2038 safe. current_time() is also planned to be transitioned to y2038 safe behavior along with this change.
CURRENT_TIME macro will be deleted before merging the aforementioned change.
Signed-off-by: Deepa Dinamani deepa.kernel@gmail.com
This is all good, and I have no objections to it being merged for 4.12. If it isn't this change is already queued up for the apparmor 4.13 merge
Acked-by: John Johansen john.johansen@canonical.com
security/apparmor/apparmorfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index be0b498..4f6ac9d 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -1357,7 +1357,7 @@ static int aa_mk_null_file(struct dentry *parent) inode->i_ino = get_next_ino(); inode->i_mode = S_IFCHR | S_IRUGO | S_IWUGO;
- inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
- inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); init_special_inode(inode, S_IFCHR | S_IRUGO | S_IWUGO, MKDEV(MEM_MAJOR, 3)); d_instantiate(dentry, inode);
All uses of CURRENT_TIME_SEC and CURRENT_TIME macros have been replaced by other time functions. These macros are also not y2038 safe. And, all their use cases can be fulfilled by y2038 safe ktime_get_* variants.
Signed-off-by: Deepa Dinamani deepa.kernel@gmail.com Acked-by: John Stultz john.stultz@linaro.org Reviewed-by: Arnd Bergmann arnd@arndb.de --- include/linux/time.h | 3 --- 1 file changed, 3 deletions(-)
diff --git a/include/linux/time.h b/include/linux/time.h index 23f0f5c..c0543f5 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -151,9 +151,6 @@ static inline bool timespec_inject_offset_valid(const struct timespec *ts) return true; }
-#define CURRENT_TIME (current_kernel_time()) -#define CURRENT_TIME_SEC ((struct timespec) { get_seconds(), 0 }) - /* Some architectures do not supply their own clocksource. * This is mainly the case in architectures that get their * inter-tick times by reading the counter on their interval
All uses of the current_fs_time() function have been replaced by other time interfaces.
And, its use cases can be fulfilled by current_time() or ktime_get_* variants.
Signed-off-by: Deepa Dinamani deepa.kernel@gmail.com Reviewed-by: Arnd Bergmann arnd@arndb.de --- include/linux/fs.h | 1 - kernel/time/time.c | 14 -------------- 2 files changed, 15 deletions(-)
diff --git a/include/linux/fs.h b/include/linux/fs.h index f1d7347..cce6c57 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1430,7 +1430,6 @@ static inline void i_gid_write(struct inode *inode, gid_t gid) inode->i_gid = make_kgid(inode->i_sb->s_user_ns, gid); }
-extern struct timespec current_fs_time(struct super_block *sb); extern struct timespec current_time(struct inode *inode);
/* diff --git a/kernel/time/time.c b/kernel/time/time.c index 25bdd25..cf69cca 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -230,20 +230,6 @@ SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p) return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret; }
-/** - * current_fs_time - Return FS time - * @sb: Superblock. - * - * Return the current time truncated to the time granularity supported by - * the fs. - */ -struct timespec current_fs_time(struct super_block *sb) -{ - struct timespec now = current_kernel_time(); - return timespec_trunc(now, sb->s_time_gran); -} -EXPORT_SYMBOL(current_fs_time); - /* * Convert jiffies to milliseconds and back. *