I recently discussed with Rich about the work needed to get 64-bit time_t support into musl. One of the first steps he identified was to find out which interfaces we would want to abstract or wrap for a new ABI given that we have to make a binary incompatible interface anyway.
I have found all the data structures that are provided by both the kernel headers and the musl headers now, and annotated what I think we the path forward could be. I already provided the same list on IRC, but here is a (slightly updated) copy for everyone else.
The takeaway is that we probably need to add new definitions for flock64, statfs, stat, termios, {msg,sem,shm}{buf,info,id_ds}, ipc_perm, rlimit, rusage, sched_param, time_t, timeval, timespec, itimerval, itimerspec, and timex, and then wrap all kernel interfaces that use those.
The same list can also be helpful when we try to clean up the kernel header files -- my idea was that we may want to prefix each struct tag with __kernel_ as we do for typedefs, and then have a kernel header that redefines them like
#ifdef __WANT_KERNEL_STRUCTS #define __kernel_flock flock #endif struct __kernel_flock { ... };
Arnd
/* sparc and mips are incompatible, keep wrapping flock64 */ include/uapi/asm-generic/fcntl.h:struct flock { arch/mips/include/uapi/asm/fcntl.h:struct flock {
/* pt_regs and sigcontext are arch specific, cannot abstract */ arch/*/include/uapi/asm/ptrace.h:struct pt_regs { arch/*/include/uapi/asm/ptrace.h:struct user_regs_struct { arch/arm64/include/uapi/asm/sigcontext.h:struct _aarch64_ctx { arch/arm64/include/uapi/asm/sigcontext.h:struct esr_context { arch/arm64/include/uapi/asm/sigcontext.h:struct extra_context { arch/arm64/include/uapi/asm/sigcontext.h:struct sve_context { arch/*/include/uapi/asm/sigcontext.h:struct sigcontext {
/* arch specific, has wrapper */ arch/*/include/uapi/asm/signal.h:struct sigaction { include/uapi/asm-generic/signal.h:struct sigaction {
/* arch specific, maybe add wrapper? */ arch/*/include/uapi/asm/signal.h:typedef struct sigaltstack { include/uapi/asm-generic/signal.h:typedef struct sigaltstack {
/* arch specific, need to look closer for incompatibilities */ include/uapi/asm-generic/siginfo.h:typedef struct sigevent {
/* arch specific, should add wrapper */ arch/*/include/uapi/asm/statfs.h:struct statfs { include/uapi/asm-generic/statfs.h:struct statfs {
/* arch specific, wrap statx instead */ arch/*/include/uapi/asm/stat.h:struct stat { include/uapi/asm-generic/stat.h:struct stat { include/uapi/linux/stat.h:struct statx { include/uapi/linux/stat.h:struct statx_timestamp {
/* arch specific, should wrap termios2 where possible, * need to check what musl does now */ arch/*/include/uapi/asm/termbits.h:struct termios { include/uapi/asm-generic/termbits.h:struct termios {
/* IPC: wrap them all */ include/uapi/linux/mqueue.h:struct mq_attr { include/uapi/linux/msg.h:struct msgbuf { include/uapi/linux/msg.h:struct msginfo { include/uapi/linux/msg.h:struct msqid_ds { include/uapi/linux/sem.h:struct sembuf { include/uapi/linux/sem.h:struct semid_ds { include/uapi/linux/sem.h:struct seminfo { include/uapi/linux/shm.h:struct shmid_ds { include/uapi/linux/shm.h:struct shm_info { include/uapi/linux/shm.h:struct shminfo {
/* rlimit/rlimit64: keep using only rlimit64 */ include/uapi/linux/resource.h:struct rlimit { include/uapi/linux/resource.h:struct rlimit64 {
/* rusuage: need to wrap: getrusage, wait4 */ include/uapi/linux/resource.h:struct rusage {
/* wrapped already, replace with a more extensible one */ include/uapi/linux/sched/types.h:struct sched_param {
/* prctl(PR_SET_MM); broken in kernel compat mode? * could be wrapped if necessary */ include/uapi/linux/prctl.h:struct prctl_mm_map {
/* inconsistent amount of padding, maybe wrap */ include/uapi/linux/sysinfo.h:struct sysinfo {
/* time64: need to use 64-bit versions of time_t */ include/uapi/linux/time.h:timespec { include/uapi/linux/time.h:struct itimerspec {
/* need to wrap */ include/uapi/linux/utime.h:struct utimbuf { include/uapi/linux/time.h:timeval { include/uapi/linux/time.h:struct itimerval {
/* no need to change */ include/uapi/linux/time.h:struct timezone {
/* probably need to wrap (depending on kernel decision) */ include/uapi/linux/timex.h:struct timex {
/* incompatible on x32 */ include/uapi/linux/times.h:struct tms { include/uapi/linux/uio.h:struct iovec {
/* tape driver ioctls, musl copy is incompatible * on mips64, sparc64 */ include/uapi/linux/mtio.h:struct mtget { include/uapi/linux/mtio.h:struct mtop { include/uapi/linux/mtio.h:struct mtpos {
/* compatible, no need to wrap */ include/uapi/asm-generic/fcntl.h:struct f_owner_ex { include/uapi/asm-generic/poll.h:struct pollfd { include/uapi/asm-generic/termios.h:struct winsize { include/uapi/linux/acct.h:struct acct_v3 include/uapi/linux/eventpoll.h:struct epoll_event { include/uapi/linux/fanotify.h:struct fanotify_event_metadata { include/uapi/linux/fanotify.h:struct fanotify_response { include/uapi/linux/signalfd.h:struct signalfd_siginfo {
/* fixed wire format */ include/uapi/linux/udp.h:struct udphdr { include/uapi/linux/icmp.h:struct icmphdr { include/uapi/linux/if_arp.h:struct arphdr { include/uapi/linux/tcp.h:struct tcphdr { include/uapi/linux/if_ether.h:struct ethhdr { include/uapi/linux/ip.h:struct iphdr {
/* other network stuff, fixed format */ include/uapi/linux/icmpv6.h:struct icmp6_filter { include/uapi/linux/if_arp.h:struct arpreq { include/uapi/linux/if_arp.h:struct arpreq_old { include/uapi/linux/if.h:struct ifconf { include/uapi/linux/if.h:struct ifmap { include/uapi/linux/if.h:struct ifreq { include/uapi/linux/if_packet.h:struct packet_mreq { include/uapi/linux/if_packet.h:struct sockaddr_ll { include/uapi/linux/in6.h:struct in6_addr { include/uapi/linux/in6.h:struct ipv6_mreq { include/uapi/linux/in6.h:struct sockaddr_in6 { include/uapi/linux/in.h:struct group_filter { include/uapi/linux/in.h:struct group_req { include/uapi/linux/in.h:struct group_source_req { include/uapi/linux/in.h:struct in_addr { include/uapi/linux/in.h:struct in_pktinfo { include/uapi/linux/in.h:struct ip_mreq { include/uapi/linux/in.h:struct ip_mreqn { include/uapi/linux/in.h:struct ip_mreq_source { include/uapi/linux/in.h:struct ip_msfilter { include/uapi/linux/in.h:struct sockaddr_in { include/uapi/linux/inotify.h:struct inotify_event { include/uapi/linux/ipc.h:struct ipc_perm include/uapi/linux/ipv6.h:struct in6_pktinfo { include/uapi/linux/ipv6.h:struct ip6_mtuinfo { include/uapi/linux/ipv6_route.h:struct in6_rtmsg { include/uapi/linux/route.h:struct rtentry { include/uapi/linux/tcp.h:struct tcp_diag_md5sig { include/uapi/linux/tcp.h:struct tcp_info { include/uapi/linux/tcp.h:struct tcp_md5sig { include/uapi/linux/tcp.h:struct tcp_repair_window { include/uapi/linux/un.h:struct sockaddr_un {
/* shared typedefs: all in ELF format; can't change */ arch/*/include/uapi/asm/elf.h:typedef ... elf_fpregset_t; arch/*/include/uapi/asm/elf.h:typedef ... elf_greg_t; arch/*/include/uapi/asm/elf.h:typedef elf_greg_t elf_gregset_t[ELF_NGREG]; arch/sparc/include/uapi/asm/uctx.h:} mcontext_t; arch/sparc/include/uapi/asm/uctx.h:typedef struct ucontext ucontext_t; include/uapi/linux/elf.h:typedef struct elf32_hdr Elf32_Ehdr; include/uapi/linux/elf.h:typedef struct elf64_hdr Elf64_Ehdr; include/uapi/linux/elf.h:typedef struct {...} Elf32_Shdr; include/uapi/linux/elf.h:typedef struct {...} Elf64_Shdr; include/uapi/linux/elf.h:typedef struct {...} Elf32_Chdr; include/uapi/linux/elf.h:typedef struct {...} Elf64_Chdr; include/uapi/linux/elf.h:typedef struct {...} Elf32_Nhdr; include/uapi/linux/elf.h:typedef struct {...} Elf64_Nhdr; include/uapi/linux/elf.h:typedef ... include/uapi/linux/elfcore.h:typedef elf_gregset_t gregset_t; include/uapi/linux/elfcore.h: elf_gregset_t pr_reg; /* GP registers */ include/uapi/linux/elfcore.h:typedef elf_greg_t greg_t; include/uapi/linux/elfcore.h:typedef elf_gregset_t gregset_t; include/uapi/linux/elfcore.h:typedef elf_fpregset_t fpregset_t; include/uapi/linux/elfcore.h:struct elf_prpsinfo include/uapi/linux/elfcore.h:struct elf_prstatus include/uapi/linux/elfcore.h:struct elf_siginfo
/* sg.h missing from exported kernel headers, can't change */ include/scsi/sg.h:typedef struct sg_iovec sg_iovec_t; include/scsi/sg.h:typedef struct sg_io_hdr sg_io_hdr_t; include/scsi/sg.h-struct sg_scsi_id { include/scsi/sg.h:typedef struct sg_req_info sg_req_info_t; include/scsi/sg.h:typedef struct sg_io_hdr Sg_io_hdr; include/scsi/sg.h:typedef struct sg_io_vec Sg_io_vec; include/scsi/sg.h:typedef struct sg_scsi_id Sg_scsi_id; include/scsi/sg.h:typedef struct sg_req_info Sg_req_info; include/scsi/sg.h-struct sg_header {
/* 32-bit on alpha, used in ustat (not provided by musl) */ include/uapi/asm-generic/posix_types.h:typedef __kernel_ulong_t __kernel_ino_t; /* 64-bit on mips64, used in mtio (should fix?) and ustat */ include/uapi/asm-generic/posix_types.h:typedef int __kernel_daddr_t;
/* 16 bit on older architectures but only used in IPC interfaces, which will get wrapped anyway */ include/uapi/asm-generic/posix_types.h:typedef unsigned int __kernel_mode_t; include/uapi/asm-generic/posix_types.h:typedef int __kernel_ipc_pid_t; include/uapi/asm-generic/posix_types.h:typedef unsigned int __kernel_uid_t; include/uapi/asm-generic/posix_types.h:typedef unsigned int __kernel_gid_t;
(Sorry for replying late again, I was not subscribed to the list then (I am now) and did not get Cc'd on the follow-ups to my original mail)
On Wed, 19 Dec 2018, Rich Felker wrote:
BTW regarding 64-bit time_t on 32-bit archs, Arnd has been working to make this happen for a long time. I believe it was over 3 years ago we first spoke about working on it in musl. Basically we've reached the point where 32-bit archs are a dead-end for developing embedded stuff that needs to run indefinitely without the ability to upgrade, and this domain is the main place where 32-bit archs are still very relevant. Once nice thing about making a new clean ABI is that embedded users who don't care about binary ecosystems can switch immediately, and desktop/server distros can take their time and switch from .1 to .2 when it works best for them.
FWIW, I have now uploaded a series that has a chance of getting merged for 5.1 in my y2038 tree: https://git.kernel.org/pub/scm/linux/kernel/git/arnd/playground.git/log/?h=y...
I still have to repeat the LTP tests I did over the summer after getting musl to build again with the changes that happened in the meantime, but this should be fairly close to what we get. Any comments on the kernel ABI changes are highly welcome.
Now, a few comments on findings so far. These won't be complete but they're a start:
The takeaway is that we probably need to add new definitions for flock64, statfs, stat, termios, {msg,sem,shm}{buf,info,id_ds}, ipc_perm,
Not clear on how flock[64?] is affected.
In my list, I had mentioned that the kernel's flock64 is different from musl's flock structure on sparc64 (which has an extra padding field) and on mips (I may have been mistaken there, only flock differs on mips32, flock64 is apparently fine).
If we don't care about musl on sparc, there may be no need to do anything here.
stat and ipc structures contain time_t's and definitely need to change.
Right, the traditional kernel definitions here have numerous problems, most importantly the fact that they are different on each of the old architectures.
I think termios is listed here because .2 ABI overhaul is a great opportunity to switch to the "termios2" interfaces, unify the userspace types, and make support for custom baud work right.
Correct. There is also some inconsistency between the architectures here.
rlimit, rusage, sched_param, time_t, timeval, timespec, itimerval, itimerspec, and timex, and then wrap all kernel interfaces that use those.
Not clear on how rlimit is affected, but most of these definitely are.
I probably had it mixed up with rusage here.
Arnd
On Thu, Dec 20, 2018 at 11:33:59AM +0100, Szabolcs Nagy wrote:
- Rich Felker <dalias@...c.org> [2018-12-19 19:30:44 -0500]:
On Tue, Dec 18, 2018 at 08:41:53PM +0100, Arnd Bergmann wrote: ".1" ABIs, this translation would mostly be the identity transformation, but on archs where we're already doing some hacks to fix up kernel ABI bugs (sysvipc on big endian, mips stat structure, x32 stuff, etc.) the hacks could be replaced by used of this translation infrastructure.
lesson of ilp32 was that libc cannot generally translate between a user and kernel abi (otherwise it could be done in userspace).
the problematic cases are when user talks to the kernel directly using libc types in a way that the libc cannot do the translation.
interfaces where the libc does not know the type, just an opaque pointer: ioctl, fcntl, getsockopt, setsockopt, raw syscall
Ultimately all of these *can* be translated just by enumerating all the broken interfaces and special-casing them. It's not pretty, though. What would probably happen (Arnd, do you know?) would be redefining the ioctl numbers etc. to "time64" versions of the interfaces, and for interfaces which are actually "important" to have work on old kernels, including translations to/from the corresponding old ioctl. Depending on the scope, that might be all or nearly all of them.
We've done it for most of them by now. In a lot of cases we got lucky because the ioctl command code changes with sizeof(time_t), so all we had to do in the kernel was to interpret those ioctl commands for 32-bit and 64-bit time_t.
In other cases, we have redefined the ioctl command codes in the header with some clever (hopefully not too clever) trick:
#if __BITS_PER_LONG == 64 #define LPSETTIMEOUT LPSETTIMEOUT_OLD #else #define LPSETTIMEOUT (sizeof(time_t) > sizeof(__kernel_long_t) ? \ LPSETTIMEOUT_NEW : LPSETTIMEOUT_OLD) #endif
This way, we guarantee that we can still detect the data type expected by an application calling LPSETTIMEOUT. The same approach is used for setsockopt and some other interfaces.
In other cases (in particular when we never pass absolute CLOCK_REALTIME data), we changed the type inside of a structure from time_t to 'long' or 'unsigned long', in order to keep the ABI unchanged. The disadvantage here is that it requires user space to use updated kernel headers, which is a problem for applications that ship with a copy of the kernel header.
I think for fcntl we were lucky that nothing passesa time_t.
direct communication channel to the kernel that may expose the abi incompatibility: netlink, sysfs, procfs
Netlink is the worst here since it's "hidden" behind normal read/write calls where the data is abstract bytes. If there's anything that needs to be fixed at the netlink layer it probably just requires redefining part of the _API_ to use fixed-width types rather than time_t or such.
I don't remember seeing any such case with netlink. Generally speaking, netlink already has to use fixed-width types in order to support compat mode, but there may be a couple of exceptions where the kernel requires nasty hacks here. The same is true for read/write based chardev interfaces such as /dev/input/eventX, which we had to redefine to use a structure based on 'unsigned long' instead of 'time_t' and require to use CLOCK_MONOTONIC to avoid the overflow.
types related to signal handling that may require sighandler wrapping to translate: siginfo_t, ucontext_t
Yes. I'm not proposing we do sighandler wrapping/translation now or in the future because it's a pain, but there are some good motivations to do it, so I'd like to keep the option open.
I'm certainly not planning to touch any of those in musl ;--)
time_t may not be affected by these, but it shows that translation is fragile in general, i wonder if we can ensure correct behaviour in all cases. there is also the problem of linux headers which may use and redefine libc types and user code may need to use those.
Redefining libc types is already broken, and the kernel headers that do it can't be used from userspace when libc headers are included. This issue is independent of type sizes/layouts matching.
I don't think any kernel headers _use_ libc types either. They generally use their own stuff.
'struct timespec' is a notable exception here, but probably not the only one. At the moment, both libc and kernel define this structure (and timeval, itimerval, itimerspec, ...), and in my work on the kernel interfaces I assumed that the libc version is the one that will prevail, while the kernel version should get removed.
Arnd