set_memory_attr() was implemented by commit 4d1755b6a762 ("powerpc/mm:
implement set_memory_attr()") because the set_memory_xx() couldn't
be used at that time to modify memory "on the fly" as explained it
the commit.
But set_memory_attr() uses set_pte_at() which leads to warnings when
CONFIG_DEBUG_VM is selected, because set_pte_at() is unexpected for
updating existing page table entries.
The check could be bypassed by using __set_pte_at() instead,
as it was the case before commit c988cfd38e48 ("powerpc/32:
use set_memory_attr()") but since commit 9f7853d7609d ("powerpc/mm:
Fix set_memory_*() against concurrent accesses") it is now possible
to use set_memory_xx() functions to update page table entries
"on the fly" because the update is now atomic.
For DEBUG_PAGEALLOC we need to clear and set back _PAGE_PRESENT.
Add set_memory_np() and set_memory_p() for that.
Replace all uses of set_memory_attr() by the relevant set_memory_xx()
and remove set_memory_attr().
Reported-by: Maxime Bizon <mbizon(a)freebox.fr>
Fixes: c988cfd38e48 ("powerpc/32: use set_memory_attr()")
Cc: stable(a)vger.kernel.org
Depends-on: 9f7853d7609d ("powerpc/mm: Fix set_memory_*() against concurrent accesses")
Signed-off-by: Christophe Leroy <christophe.leroy(a)csgroup.eu>
Reviewed-by: Russell Currey <ruscur(a)russell.cc>
Tested-by: Maxime Bizon <mbizon(a)freebox.fr>
---
v3: Use _PAGE_PRESENT directly as all platforms have the bit
v2: Add comment to SET_MEMORY_P and SET_MEMORY_NP
---
arch/powerpc/include/asm/set_memory.h | 12 ++++++++-
arch/powerpc/mm/pageattr.c | 39 +++++----------------------
arch/powerpc/mm/pgtable_32.c | 24 ++++++++---------
3 files changed, 28 insertions(+), 47 deletions(-)
diff --git a/arch/powerpc/include/asm/set_memory.h b/arch/powerpc/include/asm/set_memory.h
index b040094f7920..7ebc807aa8cc 100644
--- a/arch/powerpc/include/asm/set_memory.h
+++ b/arch/powerpc/include/asm/set_memory.h
@@ -6,6 +6,8 @@
#define SET_MEMORY_RW 1
#define SET_MEMORY_NX 2
#define SET_MEMORY_X 3
+#define SET_MEMORY_NP 4 /* Set memory non present */
+#define SET_MEMORY_P 5 /* Set memory present */
int change_memory_attr(unsigned long addr, int numpages, long action);
@@ -29,6 +31,14 @@ static inline int set_memory_x(unsigned long addr, int numpages)
return change_memory_attr(addr, numpages, SET_MEMORY_X);
}
-int set_memory_attr(unsigned long addr, int numpages, pgprot_t prot);
+static inline int set_memory_np(unsigned long addr, int numpages)
+{
+ return change_memory_attr(addr, numpages, SET_MEMORY_NP);
+}
+
+static inline int set_memory_p(unsigned long addr, int numpages)
+{
+ return change_memory_attr(addr, numpages, SET_MEMORY_P);
+}
#endif
diff --git a/arch/powerpc/mm/pageattr.c b/arch/powerpc/mm/pageattr.c
index 8812454e70ff..85753e32a4de 100644
--- a/arch/powerpc/mm/pageattr.c
+++ b/arch/powerpc/mm/pageattr.c
@@ -46,6 +46,12 @@ static int change_page_attr(pte_t *ptep, unsigned long addr, void *data)
case SET_MEMORY_X:
pte_update_delta(ptep, addr, _PAGE_KERNEL_RO, _PAGE_KERNEL_ROX);
break;
+ case SET_MEMORY_NP:
+ pte_update(&init_mm, addr, ptep, _PAGE_PRESENT, 0, 0);
+ break;
+ case SET_MEMORY_P:
+ pte_update(&init_mm, addr, ptep, 0, _PAGE_PRESENT, 0);
+ break;
default:
WARN_ON_ONCE(1);
break;
@@ -90,36 +96,3 @@ int change_memory_attr(unsigned long addr, int numpages, long action)
return apply_to_existing_page_range(&init_mm, start, size,
change_page_attr, (void *)action);
}
-
-/*
- * Set the attributes of a page:
- *
- * This function is used by PPC32 at the end of init to set final kernel memory
- * protection. It includes changing the maping of the page it is executing from
- * and data pages it is using.
- */
-static int set_page_attr(pte_t *ptep, unsigned long addr, void *data)
-{
- pgprot_t prot = __pgprot((unsigned long)data);
-
- spin_lock(&init_mm.page_table_lock);
-
- set_pte_at(&init_mm, addr, ptep, pte_modify(*ptep, prot));
- flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
-
- spin_unlock(&init_mm.page_table_lock);
-
- return 0;
-}
-
-int set_memory_attr(unsigned long addr, int numpages, pgprot_t prot)
-{
- unsigned long start = ALIGN_DOWN(addr, PAGE_SIZE);
- unsigned long sz = numpages * PAGE_SIZE;
-
- if (numpages <= 0)
- return 0;
-
- return apply_to_existing_page_range(&init_mm, start, sz, set_page_attr,
- (void *)pgprot_val(prot));
-}
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 906e4e4328b2..f71ededdc02a 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -135,10 +135,12 @@ void mark_initmem_nx(void)
unsigned long numpages = PFN_UP((unsigned long)_einittext) -
PFN_DOWN((unsigned long)_sinittext);
- if (v_block_mapped((unsigned long)_sinittext))
+ if (v_block_mapped((unsigned long)_sinittext)) {
mmu_mark_initmem_nx();
- else
- set_memory_attr((unsigned long)_sinittext, numpages, PAGE_KERNEL);
+ } else {
+ set_memory_nx((unsigned long)_sinittext, numpages);
+ set_memory_rw((unsigned long)_sinittext, numpages);
+ }
}
#ifdef CONFIG_STRICT_KERNEL_RWX
@@ -152,18 +154,14 @@ void mark_rodata_ro(void)
return;
}
- numpages = PFN_UP((unsigned long)_etext) -
- PFN_DOWN((unsigned long)_stext);
-
- set_memory_attr((unsigned long)_stext, numpages, PAGE_KERNEL_ROX);
/*
- * mark .rodata as read only. Use __init_begin rather than __end_rodata
- * to cover NOTES and EXCEPTION_TABLE.
+ * mark .text and .rodata as read only. Use __init_begin rather than
+ * __end_rodata to cover NOTES and EXCEPTION_TABLE.
*/
numpages = PFN_UP((unsigned long)__init_begin) -
- PFN_DOWN((unsigned long)__start_rodata);
+ PFN_DOWN((unsigned long)_stext);
- set_memory_attr((unsigned long)__start_rodata, numpages, PAGE_KERNEL_RO);
+ set_memory_ro((unsigned long)_stext, numpages);
// mark_initmem_nx() should have already run by now
ptdump_check_wx();
@@ -179,8 +177,8 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
return;
if (enable)
- set_memory_attr(addr, numpages, PAGE_KERNEL);
+ set_memory_p(addr, numpages);
else
- set_memory_attr(addr, numpages, __pgprot(0));
+ set_memory_np(addr, numpages);
}
#endif /* CONFIG_DEBUG_PAGEALLOC */
--
2.33.1
From: Desmond Cheong Zhi Xi <desmondcheongzx(a)gmail.com>
commit ba316be1b6a00db7126ed9a39f9bee434a508043 upstream.
struct sock.sk_timer should be used as a sock cleanup timer. However,
SCO uses it to implement sock timeouts.
This causes issues because struct sock.sk_timer's callback is run in
an IRQ context, and the timer callback function sco_sock_timeout takes
a spin lock on the socket. However, other functions such as
sco_conn_del and sco_conn_ready take the spin lock with interrupts
enabled.
This inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} lock usage could
lead to deadlocks as reported by Syzbot [1]:
CPU0
----
lock(slock-AF_BLUETOOTH-BTPROTO_SCO);
<Interrupt>
lock(slock-AF_BLUETOOTH-BTPROTO_SCO);
To fix this, we use delayed work to implement SCO sock timouts
instead. This allows us to avoid taking the spin lock on the socket in
an IRQ context, and corrects the misuse of struct sock.sk_timer.
As a note, cancel_delayed_work is used instead of
cancel_delayed_work_sync in sco_sock_set_timer and
sco_sock_clear_timer to avoid a deadlock. In the future, the call to
bh_lock_sock inside sco_sock_timeout should be changed to lock_sock to
synchronize with other functions using lock_sock. However, since
sco_sock_set_timer and sco_sock_clear_timer are sometimes called under
the locked socket (in sco_connect and __sco_sock_close),
cancel_delayed_work_sync might cause them to sleep until an
sco_sock_timeout that has started finishes running. But
sco_sock_timeout would also sleep until it can grab the lock_sock.
Using cancel_delayed_work is fine because sco_sock_timeout does not
change from run to run, hence there is no functional difference
between:
1. waiting for a timeout to finish running before scheduling another
timeout
2. scheduling another timeout while a timeout is running.
Link: https://syzkaller.appspot.com/bug?id=9089d89de0502e120f234ca0fc8a703f7368b3… [1]
Reported-by: syzbot+2f6d7c28bb4bf7e82060(a)syzkaller.appspotmail.com
Tested-by: syzbot+2f6d7c28bb4bf7e82060(a)syzkaller.appspotmail.com
Signed-off-by: Desmond Cheong Zhi Xi <desmondcheongzx(a)gmail.com>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz(a)intel.com>
[OP: adjusted context for 4.14]
Signed-off-by: Ovidiu Panait <ovidiu.panait(a)windriver.com>
---
Note: these 2 fixes are part of CVE-2021-3640 patchset and are already present
in 4.14+ stable branches. For this backport, small contextual adjustments
were done to account for the old setup_timer() API usage.
net/bluetooth/sco.c | 35 +++++++++++++++++++++++++++++------
1 file changed, 29 insertions(+), 6 deletions(-)
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index a5cc8942fc3f..69d489f1f363 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -48,6 +48,8 @@ struct sco_conn {
spinlock_t lock;
struct sock *sk;
+ struct delayed_work timeout_work;
+
unsigned int mtu;
};
@@ -73,9 +75,20 @@ struct sco_pinfo {
#define SCO_CONN_TIMEOUT (HZ * 40)
#define SCO_DISCONN_TIMEOUT (HZ * 2)
-static void sco_sock_timeout(unsigned long arg)
+static void sco_sock_timeout(struct work_struct *work)
{
- struct sock *sk = (struct sock *)arg;
+ struct sco_conn *conn = container_of(work, struct sco_conn,
+ timeout_work.work);
+ struct sock *sk;
+
+ sco_conn_lock(conn);
+ sk = conn->sk;
+ if (sk)
+ sock_hold(sk);
+ sco_conn_unlock(conn);
+
+ if (!sk)
+ return;
BT_DBG("sock %p state %d", sk, sk->sk_state);
@@ -89,14 +102,21 @@ static void sco_sock_timeout(unsigned long arg)
static void sco_sock_set_timer(struct sock *sk, long timeout)
{
+ if (!sco_pi(sk)->conn)
+ return;
+
BT_DBG("sock %p state %d timeout %ld", sk, sk->sk_state, timeout);
- sk_reset_timer(sk, &sk->sk_timer, jiffies + timeout);
+ cancel_delayed_work(&sco_pi(sk)->conn->timeout_work);
+ schedule_delayed_work(&sco_pi(sk)->conn->timeout_work, timeout);
}
static void sco_sock_clear_timer(struct sock *sk)
{
+ if (!sco_pi(sk)->conn)
+ return;
+
BT_DBG("sock %p state %d", sk, sk->sk_state);
- sk_stop_timer(sk, &sk->sk_timer);
+ cancel_delayed_work(&sco_pi(sk)->conn->timeout_work);
}
/* ---- SCO connections ---- */
@@ -176,6 +196,9 @@ static void sco_conn_del(struct hci_conn *hcon, int err)
sco_chan_del(sk, err);
bh_unlock_sock(sk);
sock_put(sk);
+
+ /* Ensure no more work items will run before freeing conn. */
+ cancel_delayed_work_sync(&conn->timeout_work);
}
hcon->sco_data = NULL;
@@ -190,6 +213,8 @@ static void __sco_chan_add(struct sco_conn *conn, struct sock *sk,
sco_pi(sk)->conn = conn;
conn->sk = sk;
+ INIT_DELAYED_WORK(&conn->timeout_work, sco_sock_timeout);
+
if (parent)
bt_accept_enqueue(parent, sk, true);
}
@@ -466,8 +491,6 @@ static struct sock *sco_sock_alloc(struct net *net, struct socket *sock,
sco_pi(sk)->setting = BT_VOICE_CVSD_16BIT;
- setup_timer(&sk->sk_timer, sco_sock_timeout, (unsigned long)sk);
-
bt_sock_link(&sco_sk_list, sk);
return sk;
}
--
2.25.1
Syzbot detected a NULL pointer dereference of nfc_llcp_sock->dev pointer
(which is a 'struct nfc_dev *') with calls to llcp_sock_sendmsg() after
a failed llcp_sock_bind(). The message being sent is a SOCK_DGRAM.
KASAN report:
BUG: KASAN: null-ptr-deref in nfc_alloc_send_skb+0x2d/0xc0
Read of size 4 at addr 00000000000005c8 by task llcp_sock_nfc_a/899
CPU: 5 PID: 899 Comm: llcp_sock_nfc_a Not tainted 5.16.0-rc6-next-20211224-00001-gc6437fbf18b0 #125
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014
Call Trace:
<TASK>
dump_stack_lvl+0x45/0x59
? nfc_alloc_send_skb+0x2d/0xc0
__kasan_report.cold+0x117/0x11c
? mark_lock+0x480/0x4f0
? nfc_alloc_send_skb+0x2d/0xc0
kasan_report+0x38/0x50
nfc_alloc_send_skb+0x2d/0xc0
nfc_llcp_send_ui_frame+0x18c/0x2a0
? nfc_llcp_send_i_frame+0x230/0x230
? __local_bh_enable_ip+0x86/0xe0
? llcp_sock_connect+0x470/0x470
? llcp_sock_connect+0x470/0x470
sock_sendmsg+0x8e/0xa0
____sys_sendmsg+0x253/0x3f0
...
The issue was visible only with multiple simultaneous calls to bind() and
sendmsg(), which resulted in most of the bind() calls to fail. The
bind() was failing on checking if there is available WKS/SDP/SAP
(respective bit in 'struct nfc_llcp_local' fields). When there was no
available WKS/SDP/SAP, the bind returned error but the sendmsg() to such
socket was able to trigger mentioned NULL pointer dereference of
nfc_llcp_sock->dev.
The code looks simply racy and currently it protects several paths
against race with checks for (!nfc_llcp_sock->local) which is NULL-ified
in error paths of bind(). The llcp_sock_sendmsg() did not have such
check but called function nfc_llcp_send_ui_frame() had, although not
protected with lock_sock().
Therefore the race could look like (same socket is used all the time):
CPU0 CPU1
==== ====
llcp_sock_bind()
- lock_sock()
- success
- release_sock()
- return 0
llcp_sock_sendmsg()
- lock_sock()
- release_sock()
llcp_sock_bind(), same socket
- lock_sock()
- error
- nfc_llcp_send_ui_frame()
- if (!llcp_sock->local)
- llcp_sock->local = NULL
- nfc_put_device(dev)
- dereference llcp_sock->dev
- release_sock()
- return -ERRNO
The nfc_llcp_send_ui_frame() checked llcp_sock->local outside of the
lock, which is racy and ineffective check. Instead, its caller
llcp_sock_sendmsg(), should perform the check inside lock_sock().
Reported-and-tested-by: syzbot+7f23bcddf626e0593a39(a)syzkaller.appspotmail.com
Fixes: b874dec21d1c ("NFC: Implement LLCP connection less Tx path")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)canonical.com>
---
Changes since v1:
1. Only split to independent set and updating Syzbot tested tag.
---
net/nfc/llcp_sock.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 6cfd30fc0798..0b93a17b9f11 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -789,6 +789,11 @@ static int llcp_sock_sendmsg(struct socket *sock, struct msghdr *msg,
lock_sock(sk);
+ if (!llcp_sock->local) {
+ release_sock(sk);
+ return -ENODEV;
+ }
+
if (sk->sk_type == SOCK_DGRAM) {
DECLARE_SOCKADDR(struct sockaddr_nfc_llcp *, addr,
msg->msg_name);
--
2.32.0