io_uring: fix CQE reordering

author Pavel Begunkov <asml.silence@gmail.com>

Fri, 23 Sep 2022 13:53:25 +0000 (14:53 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 21 Oct 2022 10:39:24 +0000 (12:39 +0200)
author Pavel Begunkov <asml.silence@gmail.com>
Fri, 23 Sep 2022 13:53:25 +0000 (14:53 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 21 Oct 2022 10:39:24 +0000 (12:39 +0200)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c

index a22a32acf5902311cf480c690fa45cd75c8a5cb7..c5dd483a7de2fc68b9a5a18a2c6c8b646baeb364 100644 (file)
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -567,7 +567,7 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
  
         io_cq_lock(ctx);
         while (!list_empty(&ctx->cq_overflow_list)) {
-               struct io_uring_cqe *cqe = io_get_cqe(ctx);
+               struct io_uring_cqe *cqe = io_get_cqe_overflow(ctx, true);
                 struct io_overflow_cqe *ocqe;
  
                 if (!cqe && !force)
@@ -694,12 +694,19 @@ bool io_req_cqe_overflow(struct io_kiocb *req)
   * control dependency is enough as we're using WRITE_ONCE to
   * fill the cq entry
   */
-struct io_uring_cqe *__io_get_cqe(struct io_ring_ctx *ctx)
+struct io_uring_cqe *__io_get_cqe(struct io_ring_ctx *ctx, bool overflow)
  {
         struct io_rings *rings = ctx->rings;
         unsigned int off = ctx->cached_cq_tail & (ctx->cq_entries - 1);
         unsigned int free, queued, len;
  
+       /*
+        * Posting into the CQ when there are pending overflowed CQEs may break
+        * ordering guarantees, which will affect links, F_MORE users and more.
+        * Force overflow the completion.
+        */
+       if (!overflow && (ctx->check_cq & BIT(IO_CHECK_CQ_OVERFLOW_BIT)))
+               return NULL;
  
         /* userspace may cheat modifying the tail, be safe and do min */
         queued = min(__io_cqring_events(ctx), ctx->cq_entries);
@@ -2232,6 +2239,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
  
         do {
                 io_cqring_overflow_flush(ctx);
+
                 if (io_cqring_events(ctx) >= min_events)
                         return 0;
                 if (!io_run_task_work())
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h

index 2f73f83af9604e8c104fb41c2cf07ab22247c8ce..45809ae6f64ef37f47f15c69a1b1e37c679bece2 100644 (file)
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -24,7 +24,7 @@ enum {
         IOU_STOP_MULTISHOT      = -ECANCELED,
  };
  
-struct io_uring_cqe *__io_get_cqe(struct io_ring_ctx *ctx);
+struct io_uring_cqe *__io_get_cqe(struct io_ring_ctx *ctx, bool overflow);
  bool io_req_cqe_overflow(struct io_kiocb *req);
  int io_run_task_work_sig(void);
  void io_req_complete_failed(struct io_kiocb *req, s32 res);
@@ -91,7 +91,8 @@ static inline void io_cq_lock(struct io_ring_ctx *ctx)
  
  void io_cq_unlock_post(struct io_ring_ctx *ctx);
  
-static inline struct io_uring_cqe *io_get_cqe(struct io_ring_ctx *ctx)
+static inline struct io_uring_cqe *io_get_cqe_overflow(struct io_ring_ctx *ctx,
+                                                      bool overflow)
  {
         if (likely(ctx->cqe_cached < ctx->cqe_sentinel)) {
                 struct io_uring_cqe *cqe = ctx->cqe_cached;
@@ -103,7 +104,12 @@ static inline struct io_uring_cqe *io_get_cqe(struct io_ring_ctx *ctx)
                 return cqe;
         }
  
-       return __io_get_cqe(ctx);
+       return __io_get_cqe(ctx, overflow);
+}
+
+static inline struct io_uring_cqe *io_get_cqe(struct io_ring_ctx *ctx)
+{
+       return io_get_cqe_overflow(ctx, false);
  }
  
  static inline bool __io_fill_cqe_req(struct io_ring_ctx *ctx,
author	Pavel Begunkov <asml.silence@gmail.com>
	Fri, 23 Sep 2022 13:53:25 +0000 (14:53 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 21 Oct 2022 10:39:24 +0000 (12:39 +0200)
io_uring/io_uring.c		patch \| blob \| history
io_uring/io_uring.h		patch \| blob \| history