From: Jens Axboe axboe@kernel.dk
[ Upstream commit caf1aeaffc3b09649a56769e559333ae2c4f1802 ]
We can have dependencies between epoll and io_uring. Consider an epoll context, identified by the epfd file descriptor, and an io_uring file descriptor identified by iofd. If we add iofd to the epfd context, and arm a multishot poll request for epfd with iofd, then the multishot poll request will repeatedly trigger and generate events until terminated by CQ ring overflow. This isn't a desired behavior.
Add EPOLL_URING so that io_uring can pass it in as part of the poll wakeup key, and io_uring can check for that to detect a potential recursive invocation.
Cc: stable@vger.kernel.org # 6.0 Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org --- fs/eventpoll.c | 18 ++++++++++-------- include/uapi/linux/eventpoll.h | 6 ++++++ 2 files changed, 16 insertions(+), 8 deletions(-)
--- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -548,7 +548,8 @@ out_unlock: */ #ifdef CONFIG_DEBUG_LOCK_ALLOC
-static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi) +static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi, + unsigned pollflags) { struct eventpoll *ep_src; unsigned long flags; @@ -579,16 +580,17 @@ static void ep_poll_safewake(struct even } spin_lock_irqsave_nested(&ep->poll_wait.lock, flags, nests); ep->nests = nests + 1; - wake_up_locked_poll(&ep->poll_wait, EPOLLIN); + wake_up_locked_poll(&ep->poll_wait, EPOLLIN | pollflags); ep->nests = 0; spin_unlock_irqrestore(&ep->poll_wait.lock, flags); }
#else
-static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi) +static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi, + unsigned pollflags) { - wake_up_poll(&ep->poll_wait, EPOLLIN); + wake_up_poll(&ep->poll_wait, EPOLLIN | pollflags); }
#endif @@ -815,7 +817,7 @@ static void ep_free(struct eventpoll *ep
/* We need to release all tasks waiting for these file */ if (waitqueue_active(&ep->poll_wait)) - ep_poll_safewake(ep, NULL); + ep_poll_safewake(ep, NULL, 0);
/* * We need to lock this because we could be hit by @@ -1284,7 +1286,7 @@ out_unlock:
/* We have to call this outside the lock */ if (pwake) - ep_poll_safewake(ep, epi); + ep_poll_safewake(ep, epi, pollflags & EPOLL_URING_WAKE);
if (!(epi->event.events & EPOLLEXCLUSIVE)) ewake = 1; @@ -1589,7 +1591,7 @@ static int ep_insert(struct eventpoll *e
/* We have to call this outside the lock */ if (pwake) - ep_poll_safewake(ep, NULL); + ep_poll_safewake(ep, NULL, 0);
return 0;
@@ -1692,7 +1694,7 @@ static int ep_modify(struct eventpoll *e
/* We have to call this outside the lock */ if (pwake) - ep_poll_safewake(ep, NULL); + ep_poll_safewake(ep, NULL, 0);
return 0; } --- a/include/uapi/linux/eventpoll.h +++ b/include/uapi/linux/eventpoll.h @@ -41,6 +41,12 @@ #define EPOLLMSG (__force __poll_t)0x00000400 #define EPOLLRDHUP (__force __poll_t)0x00002000
+/* + * Internal flag - wakeup generated by io_uring, used to detect recursion back + * into the io_uring poll handler. + */ +#define EPOLL_URING_WAKE ((__force __poll_t)(1U << 27)) + /* Set exclusive wakeup mode for the target file descriptor */ #define EPOLLEXCLUSIVE ((__force __poll_t)(1U << 28))