Hello Linus,
After this change, one more criu test became flaky. This is due to one of corner cases, so I am not sure that we need to fix something in the kernel. I have fixed this issue in the test. I am not sure that this will affect any real applications.
Here is the reproducer:
#include <unistd.h> #include <stdio.h>
int main() { char buf[1<<20]; int pid, p[2], ret;
if (pipe(p) < 0) return 1; pid = fork(); if (pid == 0) { close(p[1]);
ret = read(p[0], buf, sizeof(buf)); if (ret < 0) return 1; printf("read -> %d\n", ret); return 0; } close(p[0]); ret = write(p[1], buf, sizeof(buf)); if (ret < 0) return 1; printf("write -> %d\n", ret); return 0; }
Before this change: [avagin@laptop fifo]$ uname -a Linux laptop 5.3.7-200.fc30.x86_64 #1 SMP Fri Oct 18 20:13:59 UTC 2019 x86_64 x86_64 x86_64 GNU/Linux [avagin@laptop fifo]$ strace -e read,write,pipe -f ./pipe_bigbuf read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\260r\2\0\0\0\0\0"..., 832) = 832 read(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 784) = 784 read(3, "\4\0\0\0\20\0\0\0\5\0\0\0GNU\0\2\0\0\300\4\0\0\0\3\0\0\0\0\0\0\0", 32) = 32 read(3, "\4\0\0\0\24\0\0\0\3\0\0\0GNU\0gZ\316<\240z\v\206=\360\37F\32{\t\204"..., 68) = 68 read(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 784) = 784 read(3, "\4\0\0\0\20\0\0\0\5\0\0\0GNU\0\2\0\0\300\4\0\0\0\3\0\0\0\0\0\0\0", 32) = 32 read(3, "\4\0\0\0\24\0\0\0\3\0\0\0GNU\0gZ\316<\240z\v\206=\360\37F\32{\t\204"..., 68) = 68 pipe([3, 4]) = 0 strace: Process 622350 attached [pid 622349] write(4, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1048576 <unfinished ...> [pid 622350] read(3, <unfinished ...> [pid 622349] <... write resumed> ) = 1048576 [pid 622350] <... read resumed> "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1048576) = 1048576 [pid 622349] write(1, "write -> 1048576\n", 17write -> 1048576 ) = 17 [pid 622350] write(1, "read -> 1048576\n", 16read -> 1048576 ) = 16 [pid 622349] +++ exited with 0 +++ +++ exited with 0 +++
After this change: [root@fc24 ~]# strace -e read,write,pipe -f ./pipe_bigbuf read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\260r\2\0\0\0\0\0"..., 832) = 832 read(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 784) = 784 read(3, "\4\0\0\0\20\0\0\0\5\0\0\0GNU\0\2\0\0\300\4\0\0\0\3\0\0\0\0\0\0\0", 32) = 32 read(3, "\4\0\0\0\24\0\0\0\3\0\0\0GNU\0L\355\265_\4c\17r@ix\305q\26W\242"..., 68) = 68 read(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 784) = 784 read(3, "\4\0\0\0\20\0\0\0\5\0\0\0GNU\0\2\0\0\300\4\0\0\0\3\0\0\0\0\0\0\0", 32) = 32 read(3, "\4\0\0\0\24\0\0\0\3\0\0\0GNU\0L\355\265_\4c\17r@ix\305q\26W\242"..., 68) = 68 pipe([3, 4]) = 0 strace: Process 4946 attached [pid 4945] write(4, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1048576 <unfinished ...> [pid 4946] read(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1048576) = 65536 [pid 4946] write(1, "read -> 65536\n", 14read -> 65536 ) = 14 [pid 4945] <... write resumed>) = 131072 [pid 4946] +++ exited with 0 +++ --- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=4945, si_uid=0} --- +++ killed by SIGPIPE +++
On Tue, Feb 18, 2020 at 3:03 PM Linus Torvalds torvalds@linux-foundation.org wrote:
On Tue, Feb 18, 2020 at 2:33 PM Andrei Vagin avagin@gmail.com wrote:
I run CRIU tests on the kernel with both these patches. Everything work as expected.
Thanks. I've added your tested-by and pushed out the fix.
Linus