The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x b6985b9b82954caa53f862d6059d06c0526254f0
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167930928916599(a)kroah.com' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
b6985b9b8295 ("mptcp: use the workqueue to destroy unaccepted sockets")
7d803344fdc3 ("mptcp: fix deadlock in fastopen error path")
f2bb566f5c97 ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b6985b9b82954caa53f862d6059d06c0526254f0 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni(a)redhat.com>
Date: Thu, 9 Mar 2023 15:49:59 +0100
Subject: [PATCH] mptcp: use the workqueue to destroy unaccepted sockets
Christoph reported a UaF at token lookup time after having
refactored the passive socket initialization part:
BUG: KASAN: use-after-free in __token_bucket_busy+0x253/0x260
Read of size 4 at addr ffff88810698d5b0 by task syz-executor653/3198
CPU: 1 PID: 3198 Comm: syz-executor653 Not tainted 6.2.0-rc59af4eaa31c1f6c00c8f1e448ed99a45c66340dd5 #6
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
Call Trace:
<TASK>
dump_stack_lvl+0x6e/0x91
print_report+0x16a/0x46f
kasan_report+0xad/0x130
__token_bucket_busy+0x253/0x260
mptcp_token_new_connect+0x13d/0x490
mptcp_connect+0x4ed/0x860
__inet_stream_connect+0x80e/0xd90
tcp_sendmsg_fastopen+0x3ce/0x710
mptcp_sendmsg+0xff1/0x1a20
inet_sendmsg+0x11d/0x140
__sys_sendto+0x405/0x490
__x64_sys_sendto+0xdc/0x1b0
do_syscall_64+0x3b/0x90
entry_SYSCALL_64_after_hwframe+0x72/0xdc
We need to properly clean-up all the paired MPTCP-level
resources and be sure to release the msk last, even when
the unaccepted subflow is destroyed by the TCP internals
via inet_child_forget().
We can re-use the existing MPTCP_WORK_CLOSE_SUBFLOW infra,
explicitly checking that for the critical scenario: the
closed subflow is the MPC one, the msk is not accepted and
eventually going through full cleanup.
With such change, __mptcp_destroy_sock() is always called
on msk sockets, even on accepted ones. We don't need anymore
to transiently drop one sk reference at msk clone time.
Please note this commit depends on the parent one:
mptcp: refactor passive socket initialization
Fixes: 58b09919626b ("mptcp: create msk early")
Cc: stable(a)vger.kernel.org
Reported-and-tested-by: Christoph Paasch <cpaasch(a)apple.com>
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/347
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
Reviewed-by: Matthieu Baerts <matthieu.baerts(a)tessares.net>
Signed-off-by: Matthieu Baerts <matthieu.baerts(a)tessares.net>
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 447641d34c2c..2a2093d61835 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2342,7 +2342,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
goto out;
}
- sock_orphan(ssk);
subflow->disposable = 1;
/* if ssk hit tcp_done(), tcp_cleanup_ulp() cleared the related ops
@@ -2350,7 +2349,20 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
* reference owned by msk;
*/
if (!inet_csk(ssk)->icsk_ulp_ops) {
+ WARN_ON_ONCE(!sock_flag(ssk, SOCK_DEAD));
kfree_rcu(subflow, rcu);
+ } else if (msk->in_accept_queue && msk->first == ssk) {
+ /* if the first subflow moved to a close state, e.g. due to
+ * incoming reset and we reach here before inet_child_forget()
+ * the TCP stack could later try to close it via
+ * inet_csk_listen_stop(), or deliver it to the user space via
+ * accept().
+ * We can't delete the subflow - or risk a double free - nor let
+ * the msk survive - or will be leaked in the non accept scenario:
+ * fallback and let TCP cope with the subflow cleanup.
+ */
+ WARN_ON_ONCE(sock_flag(ssk, SOCK_DEAD));
+ mptcp_subflow_drop_ctx(ssk);
} else {
/* otherwise tcp will dispose of the ssk and subflow ctx */
if (ssk->sk_state == TCP_LISTEN) {
@@ -2398,9 +2410,10 @@ static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu)
return 0;
}
-static void __mptcp_close_subflow(struct mptcp_sock *msk)
+static void __mptcp_close_subflow(struct sock *sk)
{
struct mptcp_subflow_context *subflow, *tmp;
+ struct mptcp_sock *msk = mptcp_sk(sk);
might_sleep();
@@ -2414,7 +2427,15 @@ static void __mptcp_close_subflow(struct mptcp_sock *msk)
if (!skb_queue_empty_lockless(&ssk->sk_receive_queue))
continue;
- mptcp_close_ssk((struct sock *)msk, ssk, subflow);
+ mptcp_close_ssk(sk, ssk, subflow);
+ }
+
+ /* if the MPC subflow has been closed before the msk is accepted,
+ * msk will never be accept-ed, close it now
+ */
+ if (!msk->first && msk->in_accept_queue) {
+ sock_set_flag(sk, SOCK_DEAD);
+ inet_sk_state_store(sk, TCP_CLOSE);
}
}
@@ -2623,6 +2644,9 @@ static void mptcp_worker(struct work_struct *work)
__mptcp_check_send_data_fin(sk);
mptcp_check_data_fin(sk);
+ if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
+ __mptcp_close_subflow(sk);
+
/* There is no point in keeping around an orphaned sk timedout or
* closed, but we need the msk around to reply to incoming DATA_FIN,
* even if it is orphaned and in FIN_WAIT2 state
@@ -2638,9 +2662,6 @@ static void mptcp_worker(struct work_struct *work)
}
}
- if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
- __mptcp_close_subflow(msk);
-
if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
__mptcp_retrans(sk);
@@ -3078,6 +3099,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
msk->local_key = subflow_req->local_key;
msk->token = subflow_req->token;
msk->subflow = NULL;
+ msk->in_accept_queue = 1;
WRITE_ONCE(msk->fully_established, false);
if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD)
WRITE_ONCE(msk->csum_enabled, true);
@@ -3095,8 +3117,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
security_inet_csk_clone(nsk, req);
bh_unlock_sock(nsk);
- /* keep a single reference */
- __sock_put(nsk);
+ /* note: the newly allocated socket refcount is 2 now */
return nsk;
}
@@ -3152,8 +3173,6 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
goto out;
}
- /* acquire the 2nd reference for the owning socket */
- sock_hold(new_mptcp_sock);
newsk = new_mptcp_sock;
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
} else {
@@ -3704,6 +3723,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
struct sock *newsk = newsock->sk;
set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags);
+ msk->in_accept_queue = 0;
lock_sock(newsk);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 61fd8eabfca2..3a2db1b862dd 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -295,7 +295,8 @@ struct mptcp_sock {
u8 recvmsg_inq:1,
cork:1,
nodelay:1,
- fastopening:1;
+ fastopening:1,
+ in_accept_queue:1;
int connect_flags;
struct work_struct work;
struct sk_buff *ooo_last_skb;
@@ -666,6 +667,8 @@ void mptcp_subflow_set_active(struct mptcp_subflow_context *subflow);
bool mptcp_subflow_active(struct mptcp_subflow_context *subflow);
+void mptcp_subflow_drop_ctx(struct sock *ssk);
+
static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
struct mptcp_subflow_context *ctx)
{
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index a631a5e6fc7b..932a3e0eb22d 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -699,9 +699,10 @@ static bool subflow_hmac_valid(const struct request_sock *req,
static void mptcp_force_close(struct sock *sk)
{
- /* the msk is not yet exposed to user-space */
+ /* the msk is not yet exposed to user-space, and refcount is 2 */
inet_sk_state_store(sk, TCP_CLOSE);
sk_common_release(sk);
+ sock_put(sk);
}
static void subflow_ulp_fallback(struct sock *sk,
@@ -717,7 +718,7 @@ static void subflow_ulp_fallback(struct sock *sk,
mptcp_subflow_ops_undo_override(sk);
}
-static void subflow_drop_ctx(struct sock *ssk)
+void mptcp_subflow_drop_ctx(struct sock *ssk)
{
struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(ssk);
@@ -823,7 +824,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
if (new_msk)
mptcp_copy_inaddrs(new_msk, child);
- subflow_drop_ctx(child);
+ mptcp_subflow_drop_ctx(child);
goto out;
}
@@ -914,7 +915,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
return child;
dispose_child:
- subflow_drop_ctx(child);
+ mptcp_subflow_drop_ctx(child);
tcp_rsk(req)->drop_req = true;
inet_csk_prepare_for_destroy_sock(child);
tcp_done(child);
@@ -1866,7 +1867,6 @@ void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_s
struct sock *sk = (struct sock *)msk;
bool do_cancel_work;
- sock_hold(sk);
lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
next = msk->dl_next;
msk->first = NULL;
@@ -1954,6 +1954,13 @@ static void subflow_ulp_release(struct sock *ssk)
* when the subflow is still unaccepted
*/
release = ctx->disposable || list_empty(&ctx->node);
+
+ /* inet_child_forget() does not call sk_state_change(),
+ * explicitly trigger the socket close machinery
+ */
+ if (!release && !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW,
+ &mptcp_sk(sk)->flags))
+ mptcp_schedule_work(sk);
sock_put(sk);
}
commit b1a37ed00d7908a991c1d0f18a8cba3c2aa99bdc upstream.
Presently, when a report is processed, its proposed size, provided by
the user of the API (as Report Size * Report Count) is compared against
the subsystem default HID_MAX_BUFFER_SIZE (16k). However, some
low-level HID drivers allocate a reduced amount of memory to their
buffers (e.g. UHID only allocates UHID_DATA_MAX (4k) buffers), rending
this check inadequate in some cases.
In these circumstances, if the received report ends up being smaller
than the proposed report size, the remainder of the buffer is zeroed.
That is, the space between sizeof(csize) (size of the current report)
and the rsize (size proposed i.e. Report Size * Report Count), which can
be handled up to HID_MAX_BUFFER_SIZE (16k). Meaning that memset()
shoots straight past the end of the buffer boundary and starts zeroing
out in-use values, often resulting in calamity.
This patch introduces a new variable into 'struct hid_ll_driver' where
individual low-level drivers can over-ride the default maximum value of
HID_MAX_BUFFER_SIZE (16k) with something more sympathetic to the
interface.
Signed-off-by: Lee Jones <lee(a)kernel.org>
Signed-off-by: Jiri Kosina <jkosina(a)suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
[Lee: Backported to v4.9.y]
Signed-off-by: Lee Jones <lee(a)kernel.org>
---
drivers/hid/hid-core.c | 18 +++++++++++++-----
include/linux/hid.h | 3 +++
2 files changed, 16 insertions(+), 5 deletions(-)
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index c5207ed5d65b1..b9e6c51173571 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -246,6 +246,7 @@ static int hid_add_field(struct hid_parser *parser, unsigned report_type, unsign
unsigned usages;
unsigned offset;
unsigned i;
+ unsigned int max_buffer_size = HID_MAX_BUFFER_SIZE;
report = hid_register_report(parser->device, report_type, parser->global.report_id);
if (!report) {
@@ -269,8 +270,11 @@ static int hid_add_field(struct hid_parser *parser, unsigned report_type, unsign
offset = report->size;
report->size += parser->global.report_size * parser->global.report_count;
+ if (parser->device->ll_driver->max_buffer_size)
+ max_buffer_size = parser->device->ll_driver->max_buffer_size;
+
/* Total size check: Allow for possible report index byte */
- if (report->size > (HID_MAX_BUFFER_SIZE - 1) << 3) {
+ if (report->size > (max_buffer_size - 1) << 3) {
hid_err(parser->device, "report is too long\n");
return -1;
}
@@ -1548,6 +1552,7 @@ int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, u32 size,
struct hid_report_enum *report_enum = hid->report_enum + type;
struct hid_report *report;
struct hid_driver *hdrv;
+ int max_buffer_size = HID_MAX_BUFFER_SIZE;
unsigned int a;
u32 rsize, csize = size;
u8 *cdata = data;
@@ -1564,10 +1569,13 @@ int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, u32 size,
rsize = hid_compute_report_size(report);
- if (report_enum->numbered && rsize >= HID_MAX_BUFFER_SIZE)
- rsize = HID_MAX_BUFFER_SIZE - 1;
- else if (rsize > HID_MAX_BUFFER_SIZE)
- rsize = HID_MAX_BUFFER_SIZE;
+ if (hid->ll_driver->max_buffer_size)
+ max_buffer_size = hid->ll_driver->max_buffer_size;
+
+ if (report_enum->numbered && rsize >= max_buffer_size)
+ rsize = max_buffer_size - 1;
+ else if (rsize > max_buffer_size)
+ rsize = max_buffer_size;
if (csize < rsize) {
dbg_hid("report %d is too short, (%d < %d)\n", report->id,
diff --git a/include/linux/hid.h b/include/linux/hid.h
index a07fa623fd0c2..be12e7d14c4a1 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -736,6 +736,7 @@ struct hid_driver {
* @raw_request: send raw report request to device (e.g. feature report)
* @output_report: send output report to device
* @idle: send idle request to device
+ * @max_buffer_size: over-ride maximum data buffer size (default: HID_MAX_BUFFER_SIZE)
*/
struct hid_ll_driver {
int (*start)(struct hid_device *hdev);
@@ -760,6 +761,8 @@ struct hid_ll_driver {
int (*output_report) (struct hid_device *hdev, __u8 *buf, size_t len);
int (*idle)(struct hid_device *hdev, int report, int idle, int reqtype);
+
+ unsigned int max_buffer_size;
};
extern struct hid_ll_driver i2c_hid_ll_driver;
--
2.40.0.rc1.284.g88254d51c5-goog