csdlock_debug uses early_param and static_branch_enable() to enable
csd_lock_wait feature, which triggers a panic on arm64 with config:
CONFIG_SPARSEMEM=y
CONFIG_SPARSEMEM_VMEMMAP=n
With CONFIG_SPARSEMEM_VMEMMAP=n, __nr_to_section is called in
static_key_enable() and returns NULL which makes NULL dereference
because mem_section is initialized in sparse_init() which is later
than parse_early_param() stage.
For powerpc this is also broken, because early_param stage is
earlier than jump_label_init() so static_key_enable won't work.
powerpc throws an warning: "static key 'xxx' used before call
to jump_label_init()".
Thus, early_param is too early for csd_lock_wait to run
static_branch_enable(), so changes it to __setup to fix these.
Fixes: 8d0968cc6b8f ("locking/csd_lock: Add boot parameter for controlling CSD lock debugging")
Cc: stable(a)vger.kernel.org
Reported-by: Chen jingwen <chenjingwen6(a)huawei.com>
Signed-off-by: Chen Zhongjin <chenzhongjin(a)huawei.com>
---
Change v3 -> v4:
Fix title and description because this fix is also applied
to powerpc.
For more detailed arm64 bug report see:
https://lore.kernel.org/linux-arm-kernel/e8715911-f835-059d-27f8-cc5f5ad30a…
Change v2 -> v3:
Add module name in title
Change v1 -> v2:
Fix return 1 for __setup
---
kernel/smp.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/kernel/smp.c b/kernel/smp.c
index 65a630f62363..381eb15cd28f 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -174,9 +174,9 @@ static int __init csdlock_debug(char *str)
if (val)
static_branch_enable(&csdlock_debug_enabled);
- return 0;
+ return 1;
}
-early_param("csdlock_debug", csdlock_debug);
+__setup("csdlock_debug=", csdlock_debug);
static DEFINE_PER_CPU(call_single_data_t *, cur_csd);
static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func);
--
2.17.1
hallo Greg
5.18.1-rc1
compiles [1], boots and runs here on x86_64
(Intel i5-11400, Fedora 36)
Thanks
Tested-by: Ronald Warsow <rwarsow(a)gmx.de
[1]
not without similar warnings I mentioned here:
https://marc.info/?l=linux-kernel&m=165333405018563&w=4
Ronald
Good Morning,
We are Kostiantyn Chychkov and Maryna Chudnovska from Ukraine, we need your service, we have gone through your profile and we will like to work with you on an important service that needs urgent attention due to the ongoing war in our country. Kindly acknowledge this inquiry as soon as possible for a detailed discussion about the service.
Thank you.
Yours expectantly,
Kostiantyn Chichkov & Ms. Maryna Chudnovska,
From Ukraine.
The hartid variable is of type int but compared with
ULONG_MAX(INVALID_HARTID). This issue is fixed by changing
the hartid variable type to unsigned long.
Fixes: c78f94f35cf6 ("RISC-V: Use __cpu_up_stack/task_pointer only for spinwait method")
Cc: stable(a)vger.kernel.org
Signed-off-by: Sunil V L <sunilvl(a)ventanamicro.com>
Reviewed-by: Atish Patra <atishp(a)rivosinc.com>
---
arch/riscv/kernel/cpu_ops_spinwait.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/riscv/kernel/cpu_ops_spinwait.c b/arch/riscv/kernel/cpu_ops_spinwait.c
index 346847f6c41c..3ade9152a3c7 100644
--- a/arch/riscv/kernel/cpu_ops_spinwait.c
+++ b/arch/riscv/kernel/cpu_ops_spinwait.c
@@ -18,7 +18,7 @@ void *__cpu_spinwait_task_pointer[NR_CPUS] __section(".data");
static void cpu_update_secondary_bootdata(unsigned int cpuid,
struct task_struct *tidle)
{
- int hartid = cpuid_to_hartid_map(cpuid);
+ unsigned long hartid = cpuid_to_hartid_map(cpuid);
/*
* The hartid must be less than NR_CPUS to avoid out-of-bound access
@@ -27,7 +27,7 @@ static void cpu_update_secondary_bootdata(unsigned int cpuid,
* spinwait booting is not the recommended approach for any platforms
* booting Linux in S-mode and can be disabled in the future.
*/
- if (hartid == INVALID_HARTID || hartid >= NR_CPUS)
+ if (hartid == INVALID_HARTID || hartid >= (unsigned long) NR_CPUS)
return;
/* Make sure tidle is updated */
--
2.25.1
The hartid variable is of type int but compared with
ULONG_MAX(INVALID_HARTID). This issue is fixed by changing
the hartid variable type to unsigned long.
Fixes: c78f94f35cf6 ("RISC-V: Use __cpu_up_stack/task_pointer only for spinwait method")
Cc: stable(a)vger.kernel.org
Signed-off-by: Sunil V L <sunilvl(a)ventanamicro.com>
---
arch/riscv/kernel/cpu_ops_spinwait.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/riscv/kernel/cpu_ops_spinwait.c b/arch/riscv/kernel/cpu_ops_spinwait.c
index 346847f6c41c..3ade9152a3c7 100644
--- a/arch/riscv/kernel/cpu_ops_spinwait.c
+++ b/arch/riscv/kernel/cpu_ops_spinwait.c
@@ -18,7 +18,7 @@ void *__cpu_spinwait_task_pointer[NR_CPUS] __section(".data");
static void cpu_update_secondary_bootdata(unsigned int cpuid,
struct task_struct *tidle)
{
- int hartid = cpuid_to_hartid_map(cpuid);
+ unsigned long hartid = cpuid_to_hartid_map(cpuid);
/*
* The hartid must be less than NR_CPUS to avoid out-of-bound access
@@ -27,7 +27,7 @@ static void cpu_update_secondary_bootdata(unsigned int cpuid,
* spinwait booting is not the recommended approach for any platforms
* booting Linux in S-mode and can be disabled in the future.
*/
- if (hartid == INVALID_HARTID || hartid >= NR_CPUS)
+ if (hartid == INVALID_HARTID || hartid >= (unsigned long) NR_CPUS)
return;
/* Make sure tidle is updated */
--
2.25.1
Guten Tag,
Ich möchte Sie persönlich kontaktieren; Ich weiß, Sie werden überrascht sein, meine E-Mail zu lesen. Bitte seien Sie nicht skeptisch, wenn Sie mir antworten. Mein Name ist Rechtsanwalt Francis Perez Crespo.
Es tut mir leid, Ihren Tag zu unterbrechen, mit gebührendem Respekt, Vertrauen und Demut. Ich schreibe Ihnen diese E-Mail, von der ich glaube, dass sie für Sie von großem Interesse wäre, und um zu sehen, ob Ihre E-Mail funktioniert.
Ich habe etwas absolut Wichtiges mit Ihnen zu besprechen. Für weitere Einzelheiten senden Sie mir bitte eine E-Mail mit folgenden Angaben.
Vollständiger Name:
Heimatadresse:
Telefonnummer:
Handynummer:
Geburtsdatum:
Beruf:
Mit freundlichen Grüßen.
Francis Pérez Crespo
RECHTSANWALT
It looks like that when we moved nouveau over to using drm_dp_aux_init()
and registering it's aux bus during late connector registration, we totally
forgot to fix the failure codepath in nouveau_connector_create() - as it
still seems to assume that drm_dp_aux_init() can fail (it can't).
So, let's fix that and also add a missing check to ensure that we've
properly allocated nv_connector->aux.name while we're at it.
Signed-off-by: Lyude Paul <lyude(a)redhat.com>
Fixes: fd43ad9d47e7 ("drm/nouveau/kms/nv50-: Move AUX adapter reg to connector late register/early unregister")
Cc: <stable(a)vger.kernel.org> # v5.14+
---
drivers/gpu/drm/nouveau/nouveau_connector.c | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index 22b83a6577eb..df83c4654e26 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -1361,13 +1361,11 @@ nouveau_connector_create(struct drm_device *dev,
snprintf(aux_name, sizeof(aux_name), "sor-%04x-%04x",
dcbe->hasht, dcbe->hashm);
nv_connector->aux.name = kstrdup(aux_name, GFP_KERNEL);
- drm_dp_aux_init(&nv_connector->aux);
- if (ret) {
- NV_ERROR(drm, "Failed to init AUX adapter for sor-%04x-%04x: %d\n",
- dcbe->hasht, dcbe->hashm, ret);
+ if (!nv_connector->aux.name) {
kfree(nv_connector);
- return ERR_PTR(ret);
+ return ERR_PTR(-ENOMEM);
}
+ drm_dp_aux_init(&nv_connector->aux);
fallthrough;
default:
funcs = &nouveau_connector_funcs;
--
2.35.3
Hi Greg,
Can you please consider taking the patch "KVM: x86: hyper-v: fix type
of valid_bank_mask" into stable?
Commit ea8c66fe8d8f4f93df941e52120a3512d7bf5128 upstream.
Thanks,
Yury
The patch titled
Subject: mm/page_alloc: always attempt to allocate at least one page during bulk allocation
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-page_alloc-always-attempt-to-allocate-at-least-one-page-during-bulk-allocation.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Mel Gorman <mgorman(a)techsingularity.net>
Subject: mm/page_alloc: always attempt to allocate at least one page during bulk allocation
Date: Thu, 26 May 2022 10:12:10 +0100
Peter Pavlisko reported the following problem on kernel bugzilla 216007.
When I try to extract an uncompressed tar archive (2.6 milion
files, 760.3 GiB in size) on newly created (empty) XFS file system,
after first low tens of gigabytes extracted the process hangs in
iowait indefinitely. One CPU core is 100% occupied with iowait,
the other CPU core is idle (on 2-core Intel Celeron G1610T).
It was bisected to c9fa563072e1 ("xfs: use alloc_pages_bulk_array() for
buffers") but XFS is only the messenger. The problem is that nothing is
waking kswapd to reclaim some pages at a time the PCP lists cannot be
refilled until some reclaim happens. The bulk allocator checks that there
are some pages in the array and the original intent was that a bulk
allocator did not necessarily need all the requested pages and it was best
to return as quickly as possible.
This was fine for the first user of the API but both NFS and XFS require
the requested number of pages be available before making progress. Both
could be adjusted to call the page allocator directly if a bulk allocation
fails but it puts a burden on users of the API. Adjust the semantics to
attempt at least one allocation via __alloc_pages() before returning so
kswapd is woken if necessary.
It was reported via bugzilla that the patch addressed the problem and that
the tar extraction completed successfully. This may also address bug
215975 but has yet to be confirmed.
BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=216007
BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=215975
Link: https://lkml.kernel.org/r/20220526091210.GC3441@techsingularity.net
Fixes: 387ba26fb1cb ("mm/page_alloc: add a bulk page allocator")
Signed-off-by: Mel Gorman <mgorman(a)techsingularity.net>
Cc: "Darrick J. Wong" <djwong(a)kernel.org>
Cc: Dave Chinner <dchinner(a)redhat.com>
Cc: Jan Kara <jack(a)suse.cz>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: Jesper Dangaard Brouer <brouer(a)redhat.com>
Cc: Chuck Lever <chuck.lever(a)oracle.com>
Cc: <stable(a)vger.kernel.org> # v5.13+
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/page_alloc.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/mm/page_alloc.c~mm-page_alloc-always-attempt-to-allocate-at-least-one-page-during-bulk-allocation
+++ a/mm/page_alloc.c
@@ -5324,8 +5324,8 @@ unsigned long __alloc_pages_bulk(gfp_t g
page = __rmqueue_pcplist(zone, 0, ac.migratetype, alloc_flags,
pcp, pcp_list);
if (unlikely(!page)) {
- /* Try and get at least one page */
- if (!nr_populated)
+ /* Try and allocate at least one page */
+ if (!nr_account)
goto failed_irq;
break;
}
_
Patches currently in -mm which might be from mgorman(a)techsingularity.net are
mm-page_alloc-always-attempt-to-allocate-at-least-one-page-during-bulk-allocation.patch
mm-page_alloc-add-page-buddy_list-and-page-pcp_list.patch
mm-page_alloc-use-only-one-pcp-list-for-thp-sized-allocations.patch
mm-page_alloc-split-out-buddy-removal-code-from-rmqueue-into-separate-helper.patch
mm-page_alloc-protect-pcp-lists-with-a-spinlock.patch
hey Greg,
I'm proposing the following commits for stable, as they fix an
oops we're seeing in our testing[*]:
This is needed to 4.14.y -> 5.18.y:
1bbc21785b73 ACPI: sysfs: Fix BERT error region memory mapping
A dependency of the above, needed for 4.14.y -> 5.10.y
bdd56d7d8931 ACPI: sysfs: Make sparse happy about address space in use
-dann
[*] https://launchpad.net/bugs/1973153
Upon nfsd shutdown any pending DRC cache is freed. DRC cache use is
tracked via a percpu counter. In the current code the percpu counter
is destroyed before. If any pending cache is still present,
percpu_counter_add is called with a percpu counter==NULL. This causes
a kernel crash.
The solution is to destroy the percpu counter after the cache is freed.
Fixes: e567b98ce9a4b (“nfsd: protect concurrent access to nfsd stats counters”)
Signed-off-by: Julian Schroeder <jumaco(a)amazon.com>
---
fs/nfsd/nfscache.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 0b3f12aa37ff..7da88bdc0d6c 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -206,7 +206,6 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn)
struct svc_cacherep *rp;
unsigned int i;
- nfsd_reply_cache_stats_destroy(nn);
unregister_shrinker(&nn->nfsd_reply_cache_shrinker);
for (i = 0; i < nn->drc_hashsize; i++) {
@@ -217,6 +216,7 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn)
rp, nn);
}
}
+ nfsd_reply_cache_stats_destroy(nn);
kvfree(nn->drc_hashtbl);
nn->drc_hashtbl = NULL;
--
2.32.0
--
WELCOME TO ILLUMINATI, The Club of the Rich and Famous; is the world
oldest and largest fraternity made up of 5 Millions Members.We are one
Family under one father who is the Supreme Being. In ILLUMINATI we
believe that we were born in paradise and no member should struggle in
this world. Hence all our new members are given Money Rewards once
they join in order to upgrade their lifestyle.; interested members
should contact us via Email: illuminatihome999world(a)gmail.com
From: Eric Dumazet <edumazet(a)google.com>
commit 190cc82489f46f9d88e73c81a47e14f80a791e1a upstream
RFC 6056 (Recommendations for Transport-Protocol Port Randomization)
provides good summary of why source selection needs extra care.
David Dworken reminded us that linux implements Algorithm 3
as described in RFC 6056 3.3.3
Quoting David :
In the context of the web, this creates an interesting info leak where
websites can count how many TCP connections a user's computer is
establishing over time. For example, this allows a website to count
exactly how many subresources a third party website loaded.
This also allows:
- Distinguishing between different users behind a VPN based on
distinct source port ranges.
- Tracking users over time across multiple networks.
- Covert communication channels between different browsers/browser
profiles running on the same computer
- Tracking what applications are running on a computer based on
the pattern of how fast source ports are getting incremented.
Section 3.3.4 describes an enhancement, that reduces
attackers ability to use the basic information currently
stored into the shared 'u32 hint'.
This change also decreases collision rate when
multiple applications need to connect() to
different destinations.
Signed-off-by: Eric Dumazet <edumazet(a)google.com>
Reported-by: David Dworken <ddworken(a)google.com>
Cc: Willem de Bruijn <willemb(a)google.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
[SG: Adjusted context]
Signed-off-by: Stefan Ghinea <stefan.ghinea(a)windriver.com>
---
net/ipv4/inet_hashtables.c | 20 +++++++++++++++++---
1 file changed, 17 insertions(+), 3 deletions(-)
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 1346e45cf8d1..0bc6549c38b1 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -587,6 +587,17 @@ void inet_unhash(struct sock *sk)
}
EXPORT_SYMBOL_GPL(inet_unhash);
+/* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm
+ * Note that we use 32bit integers (vs RFC 'short integers')
+ * because 2^16 is not a multiple of num_ephemeral and this
+ * property might be used by clever attacker.
+ * RFC claims using TABLE_LENGTH=10 buckets gives an improvement,
+ * we use 256 instead to really give more isolation and
+ * privacy, this only consumes 1 KB of kernel memory.
+ */
+#define INET_TABLE_PERTURB_SHIFT 8
+static u32 table_perturb[1 << INET_TABLE_PERTURB_SHIFT];
+
int __inet_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *sk, u32 port_offset,
int (*check_established)(struct inet_timewait_death_row *,
@@ -600,7 +611,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
struct inet_bind_bucket *tb;
u32 remaining, offset;
int ret, i, low, high;
- static u32 hint;
+ u32 index;
if (port) {
head = &hinfo->bhash[inet_bhashfn(net, port,
@@ -625,7 +636,10 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
if (likely(remaining > 1))
remaining &= ~1U;
- offset = (hint + port_offset) % remaining;
+ net_get_random_once(table_perturb, sizeof(table_perturb));
+ index = hash_32(port_offset, INET_TABLE_PERTURB_SHIFT);
+
+ offset = (READ_ONCE(table_perturb[index]) + port_offset) % remaining;
/* In first pass we try ports of @low parity.
* inet_csk_get_port() does the opposite choice.
*/
@@ -678,7 +692,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
return -EADDRNOTAVAIL;
ok:
- hint += i + 2;
+ WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2);
/* Head lock still held and bh's disabled */
inet_bind_hash(sk, tb, port);
--
2.36.1
From: Magnus Karlsson <magnus.karlsson(a)intel.com>
Fix a crash in the zero-copy driver that occurs when it fails to
allocate buffers from user-space. This crash can easily be triggered
by a malicious program that does not provide any buffers in the fill
ring for the kernel to use.
Note that this bug does not exist in upstream since the batched buffer
allocation interface got introduced in 5.16 and replaced this code.
Reported-by: Jeff Shaw <jeffrey.b.shaw(a)intel.com>
Tested-by: Jeff Shaw <jeffrey.b.shaw(a)intel.com>
Signed-off-by: Magnus Karlsson <magnus.karlsson(a)intel.com>
---
drivers/net/ethernet/intel/ice/ice_xsk.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index 2b1873061912..5581747947e5 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -378,7 +378,7 @@ bool ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, u16 count)
do {
*xdp = xsk_buff_alloc(rx_ring->xsk_pool);
- if (!xdp) {
+ if (!*xdp) {
ok = false;
break;
}
base-commit: 9f43e3ac7e662f352f829077723fa0b92ccaded1
--
2.34.1
The patch below does not apply to the 5.17-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 9f46c187e2e680ecd9de7983e4d081c3391acc76 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini(a)redhat.com>
Date: Fri, 20 May 2022 13:48:11 -0400
Subject: [PATCH] KVM: x86/mmu: fix NULL pointer dereference on guest INVPCID
With shadow paging enabled, the INVPCID instruction results in a call
to kvm_mmu_invpcid_gva. If INVPCID is executed with CR0.PG=0, the
invlpg callback is not set and the result is a NULL pointer dereference.
Fix it trivially by checking for mmu->invlpg before every call.
There are other possibilities:
- check for CR0.PG, because KVM (like all Intel processors after P5)
flushes guest TLB on CR0.PG changes so that INVPCID/INVLPG are a
nop with paging disabled
- check for EFER.LMA, because KVM syncs and flushes when switching
MMU contexts outside of 64-bit mode
All of these are tricky, go for the simple solution. This is CVE-2022-1789.
Reported-by: Yongkang Jia <kangel(a)zju.edu.cn>
Cc: stable(a)vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 56ebc4fb7f91..45e1573f8f1d 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5470,14 +5470,16 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
uint i;
if (pcid == kvm_get_active_pcid(vcpu)) {
- mmu->invlpg(vcpu, gva, mmu->root.hpa);
+ if (mmu->invlpg)
+ mmu->invlpg(vcpu, gva, mmu->root.hpa);
tlb_flush = true;
}
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
if (VALID_PAGE(mmu->prev_roots[i].hpa) &&
pcid == kvm_get_pcid(vcpu, mmu->prev_roots[i].pgd)) {
- mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
+ if (mmu->invlpg)
+ mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
tlb_flush = true;
}
}
Hi,
I would like to kindly request the inclusion of commit 24ef83f6e31d
("Input: goodix - fix spurious key release events") to the 5.4 stable
tree.
It fixes the spurious touches reported on an imx6dl board with Goodix
GT911 running kernel 5.4.
Thanks,
Fabio Estevam
[Public]
Hi,
The firmware on some OEM laptops with AMD SOCs advertise that they have sensors connected to AMD SFH but they really don't
physically have them. In 5.19 a commit has gone in that discovers this case and prevents the driver from advertising this sensor
to userspace. This might not seem like a big deal to have sensors advertised that aren't really there, but AMD has observed
that specifically on orientation sensors the random garbage data associated can cause userspace to interpret a screen rotation
during resume from suspend.
As GNOME has a daemon running that interprets these events I've seen first hand that it can cause the display go upside down
without a lot of recourse other than command line tools or rebooting.
Can you please backport this commit to 5.15.y+ and later to fix this:
commit b5d7f43e97dabfa04a4be5ff027ce7da119332be ("HID: amd_sfh: Add support for sensor discovery")
Thanks,
The routine huge_pmd_unshare is passed a pointer to an address
associated with an area which may be unshared. If unshare is successful
this address is updated to 'optimize' callers iterating over huge page
addresses. For the optimization to work correctly, address should be
updated to the last huge page in the unmapped/unshared area. However,
in the common case where the passed address is PUD_SIZE aligned, the
address is incorrectly updated to the address of the preceding huge
page. That wastes CPU cycles as the unmapped/unshared range is scanned
twice.
Cc: <stable(a)vger.kernel.org>
Fixes: 39dde65c9940 ("shared page table for hugetlb page")
Signed-off-by: Mike Kravetz <mike.kravetz(a)oracle.com>
---
mm/hugetlb.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 01f0e2e5ab48..7c468ac1d069 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -6755,7 +6755,14 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
pud_clear(pud);
put_page(virt_to_page(ptep));
mm_dec_nr_pmds(mm);
- *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE;
+ /*
+ * This update of passed address optimizes loops sequentially
+ * processing addresses in increments of huge page size (PMD_SIZE
+ * in this case). By clearing the pud, a PUD_SIZE area is unmapped.
+ * Update address to the 'last page' in the cleared area so that
+ * calling loop can move to first page past this area.
+ */
+ *addr |= PUD_SIZE - PMD_SIZE;
return 1;
}
--
2.35.3
Resending this because stable(a)vger.kernel.org using wrong header field.
Apologize for duplicates.
On Thu, 19 May 2022 09:52:07 -0400
Sasha Levin <sashal(a)kernel.org> wrote:
> This is a note to let you know that I've just added the patch titled
>
> um: port_user: Improve error handling when port-helper is not found
>
> to the 5.17-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> um-port_user-improve-error-handling-when-port-helper.patch
> and it can be found in the queue-5.17 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
First, I should say that I'm not familiar with the process so I'm
likely to be wrong on any number of things. Second I'm the author of
this patch and I would like to see this included in the stable trees.
However, it appears to me that there is a problem in including just this
patch, as it depends on a previous patch which does not appear to be
applied[1].
> commit efc324ad7e7e1c92a8862bd71b2f5f8f15513304
> Author: Glenn Washburn <development(a)efficientek.com>
> Date: Thu Mar 3 01:53:32 2022 -0600
>
> um: port_user: Improve error handling when port-helper is not found
>
> [ Upstream commit 3cb5a7f167c620a8b0e38b0446df2e024d2243dc ]
>
> Check if port-helper exists and is executable. If not, write an error
> message to the kernel log with information to help the user diagnose the
> issue and exit with an error. If UML_PORT_HELPER was not set, write a
> message suggesting that the user set it. This makes it easier to understand
> why telneting to the UML instance is failing and what can be done to fix it.
>
> Signed-off-by: Glenn Washburn <development(a)efficientek.com>
> Signed-off-by: Richard Weinberger <richard(a)nod.at>
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
>
> diff --git a/arch/um/drivers/port_user.c b/arch/um/drivers/port_user.c
> index 5b5b64cb1071..133ca7bf2d91 100644
> --- a/arch/um/drivers/port_user.c
> +++ b/arch/um/drivers/port_user.c
> @@ -5,6 +5,7 @@
>
> #include <stdio.h>
> #include <stdlib.h>
> +#include <string.h>
> #include <errno.h>
> #include <termios.h>
> #include <unistd.h>
> @@ -175,6 +176,17 @@ int port_connection(int fd, int *socket, int *pid_out)
> if (new < 0)
> return -errno;
>
> + err = os_access(argv[2], X_OK);
> + if (err < 0) {
> + printk(UM_KERN_ERR "port_connection : error accessing port-helper "
> + "executable at %s: %s\n", argv[2], strerror(-err));
> + if (env == NULL)
The the afore mentioned patch that this patch depends on "env" is
declared and set. Without it, I'd expect this to fail to compile. As
such, I may be wrong in that the dependent patch was not already
included because I'd expect there to have been a compile test prior to
this patch getting to this phase.
My suspicion is that the stable trees try to not include new
functionality, which the missing patch may have been considered to have
done, and thus was not included. If its deemed undesirable to include
the missing patch, this "if" block can be removed. Although, I think
the missing patch is valuable enough to include.
The above goes for all the stable branches that this patch is set to be
included in.
Glenn
[1]
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
> + printk(UM_KERN_ERR "Set UML_PORT_HELPER environment "
> + "variable to path to uml-utilities port-helper "
> + "binary\n");
> + goto out_close;
> + }
> +
> err = os_pipe(socket, 0, 0);
> if (err < 0)
> goto out_close;
Guten Tag,
Ich habe mich nur gefragt, ob Sie meine vorherige E-Mail bekommen
haben ?
Ich habe versucht, Sie per E-Mail zu erreichen.
Kommen Sie bitte schnell zu mir zurück, es ist sehr wichtig.
Danke
Falcao Deterin
falcaodeterin(a)gmail.com
----------------------------------
Good Afternoon,
I was just wondering if you got my Previous E-mail
have ?
I tried to reach you by E-mail.
Please come back to me quickly, it is very Important.
Thanks
Falcao Deterin
falcaodeterin(a)gmail.com
On 5/20/2022 10:06 AM, Jan Beulich wrote:
> On 20.05.2022 15:33, Chuck Zmudzinski wrote:
>> On 5/20/2022 5:41 AM, Jan Beulich wrote:
>>> On 20.05.2022 10:30, Chuck Zmudzinski wrote:
>>>> On 5/20/2022 2:59 AM, Chuck Zmudzinski wrote:
>>>>> On 5/20/2022 2:05 AM, Jan Beulich wrote:
>>>>>> On 20.05.2022 06:43, Chuck Zmudzinski wrote:
>>>>>>> On 5/4/22 5:14 AM, Juergen Gross wrote:
>>>>>>>> On 04.05.22 10:31, Jan Beulich wrote:
>>>>>>>>> On 03.05.2022 15:22, Juergen Gross wrote:
>>>>>>>>>
>>>>>>>>> ... these uses there are several more. You say nothing on why
>>>>>>>>> those want
>>>>>>>>> leaving unaltered. When preparing my earlier patch I did inspect them
>>>>>>>>> and came to the conclusion that these all would also better
>>>>>>>>> observe the
>>>>>>>>> adjusted behavior (or else I couldn't have left pat_enabled() as the
>>>>>>>>> only predicate). In fact, as said in the description of my earlier
>>>>>>>>> patch, in
>>>>>>>>> my debugging I did find the use in i915_gem_object_pin_map() to be
>>>>>>>>> the
>>>>>>>>> problematic one, which you leave alone.
>>>>>>>> Oh, I missed that one, sorry.
>>>>>>> That is why your patch would not fix my Haswell unless
>>>>>>> it also touches i915_gem_object_pin_map() in
>>>>>>> drivers/gpu/drm/i915/gem/i915_gem_pages.c
>>>>>>>
>>>>>>>> I wanted to be rather defensive in my changes, but I agree at least
>>>>>>>> the
>>>>>>>> case in arch_phys_wc_add() might want to be changed, too.
>>>>>>> I think your approach needs to be more aggressive so it will fix
>>>>>>> all the known false negatives introduced by bdd8b6c98239
>>>>>>> such as the one in i915_gem_object_pin_map().
>>>>>>>
>>>>>>> I looked at Jan's approach and I think it would fix the issue
>>>>>>> with my Haswell as long as I don't use the nopat option. I
>>>>>>> really don't have a strong opinion on that question, but I
>>>>>>> think the nopat option as a Linux kernel option, as opposed
>>>>>>> to a hypervisor option, should only affect the kernel, and
>>>>>>> if the hypervisor provides the pat feature, then the kernel
>>>>>>> should not override that,
>>>>>> Hmm, why would the kernel not be allowed to override that? Such
>>>>>> an override would affect only the single domain where the
>>>>>> kernel runs; other domains could take their own decisions.
>>>>>>
>>>>>> Also, for the sake of completeness: "nopat" used when running on
>>>>>> bare metal has the same bad effect on system boot, so there
>>>>>> pretty clearly is an error cleanup issue in the i915 driver. But
>>>>>> that's orthogonal, and I expect the maintainers may not even care
>>>>>> (but tell us "don't do that then").
>>>> Actually I just did a test with the last official Debian kernel
>>>> build of Linux 5.16, that is, a kernel before bdd8b6c98239 was
>>>> applied. In fact, the nopat option does *not* break the i915 driver
>>>> in 5.16. That is, with the nopat option, the i915 driver loads
>>>> normally on both the bare metal and on the Xen hypervisor.
>>>> That means your presumption (and the presumption of
>>>> the author of bdd8b6c98239) that the "nopat" option was
>>>> being observed by the i915 driver is incorrect. Setting "nopat"
>>>> had no effect on my system with Linux 5.16. So after doing these
>>>> tests, I am against the aggressive approach of breaking the i915
>>>> driver with the "nopat" option because prior to bdd8b6c98239,
>>>> nopat did not break the i915 driver. Why break it now?
>>> Because that's, in my understanding, is the purpose of "nopat"
>>> (not breaking the driver of course - that's a driver bug -, but
>>> having an effect on the driver).
>> I wouldn't call it a driver bug, but an incorrect configuration of the
>> kernel by the user. I presume X86_FEATURE_PAT is required by the
>> i915 driver
> The driver ought to work fine without PAT (and hence without being
> able to make WC mappings). It would use UC instead and be slow, but
> it ought to work.
>
>> and therefore the driver should refuse to disable
>> it if the user requests to disable it and instead warn the user that
>> the driver did not disable the feature, contrary to what the user
>> requested with the nopat option.
>>
>> In any case, my test did not verify that when nopat is set in Linux 5.16,
>> the thread takes the same code path as when nopat is not set,
>> so I am not totally sure that the reason nopat does not break the
>> i915 driver in 5.16 is that static_cpu_has(X86_FEATURE_PAT)
>> returns true even when nopat is set. I could test it with a custom
>> log message in 5.16 if that is necessary.
>>
>> Are you saying it was wrong for static_cpu_has(X86_FEATURE_PAT)
>> to return true in 5.16 when the user requests nopat?
> No, I'm not saying that. It was wrong for this construct to be used
> in the driver, which was fixed for 5.17 (and which had caused the
> regression I did observe, leading to the patch as a hopefully least
> bad option).
>
>> I think that is
>> just permitting a bad configuration to break the driver that a
>> well-written operating system should not allow. The i915 driver
>> was, in my opinion, correctly ignoring the nopat option in 5.16
>> because that option is not compatible with the hardware the
>> i915 driver is trying to initialize and setup at boot time. At least
>> that is my understanding now, but I will need to test it on 5.16
>> to be sure I understand it correctly.
>>
>> Also, AFAICT, your patch would break the driver when the nopat
>> option is set and only fix the regression introduced by bdd8b6c98239
>> when nopat is not set on my box, so your patch would
>> introduce a regression relative to Linux 5.16 and earlier for the
>> case when nopat is set on my box. I think your point would
>> be that it is not a regression if it is an incorrect user configuration.
> Again no - my view is that there's a separate, pre-existing issue
> in the driver which was uncovered by the change. This may be a
> perceived regression, but is imo different from a real one.
>
> Jan
Since it is a regression, I think for now bdd8b6c98239 should
be reverted and the fix backported to Linux 5.17 stable until
the underlying memory subsystem can provide the i915 driver
with an updated test for the PAT feature that also meets the
requirements of the author of bdd8b6c98239 without breaking
the i915 driver. The i915 driver relies on the memory subsytem
to provide it with an accurate test for the existence of
X86_FEATURE_PAT. I think your patch provides that more accurate
test so that bdd8b6c98239 could be re-applied when your patch is
committed. Juergen's patch would have to touch bdd8b6c98239
with new functions that probably have unknown and unintended
consequences, so I think your approach is also better in that regard.
As regards your patch, there is just a disagreement about how the
i915 driver should behave if nopat is set. I agree the i915 driver
could do a better job handling that case, at least with better error
logs.
Chuck
>
>> I respond by saying a well-written driver should refuse to honor
>> the incorrect configuration requested by the user and instead
>> warn the user that it did not honor the incorrect kernel option.
>>
>> I am only presuming what your patch would do on my box based
>> on what I learned about this problem from my debugging. I can
>> also test your patch on my box to verify that my understanding of
>> it is correct.
>>
>> I also have not yet verified Juergen's patch will not fix it, but
>> I am almost certain it will not unless it is expanded so it also
>> touches i915_gem_object_pin_map() with the fix. I plan to test
>> his patch, but expanded so it touches that function also.
>>
>> I also plan to test your patch with and without nopat and report the
>> results in the thread where you posted your patch. Hopefully
>> by tomorrow I will have the results.
>>
>> Chuck
>>
commit eadb2f47a3ced5c64b23b90fd2a3463f63726066 upstream.
KGDB and KDB allow read and write access to kernel memory, and thus
should be restricted during lockdown. An attacker with access to a
serial port (for example, via a hypervisor console, which some cloud
vendors provide over the network) could trigger the debugger so it is
important that the debugger respect the lockdown mode when/if it is
triggered.
Fix this by integrating lockdown into kdb's existing permissions
mechanism. Unfortunately kgdb does not have any permissions mechanism
(although it certainly could be added later) so, for now, kgdb is simply
and brutally disabled by immediately exiting the gdb stub without taking
any action.
For lockdowns established early in the boot (e.g. the normal case) then
this should be fine but on systems where kgdb has set breakpoints before
the lockdown is enacted than "bad things" will happen.
CVE: CVE-2022-21499
Co-developed-by: Stephen Brennan <stephen.s.brennan(a)oracle.com>
Signed-off-by: Stephen Brennan <stephen.s.brennan(a)oracle.com>
Reviewed-by: Douglas Anderson <dianders(a)chromium.org>
Signed-off-by: Daniel Thompson <daniel.thompson(a)linaro.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
---
Notes:
Original patch did not backport cleanly. This backport is fixed up,
compile tested (on arm64) and side-by-side compared against the
original.
include/linux/security.h | 2 ++
kernel/debug/debug_core.c | 24 ++++++++++++++
kernel/debug/kdb/kdb_main.c | 62 ++++++++++++++++++++++++++++++++++--
security/lockdown/lockdown.c | 2 ++
4 files changed, 87 insertions(+), 3 deletions(-)
diff --git a/include/linux/security.h b/include/linux/security.h
index 3f6b8195ae9e..aa5c7141c8d1 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -118,10 +118,12 @@ enum lockdown_reason {
LOCKDOWN_MMIOTRACE,
LOCKDOWN_DEBUGFS,
LOCKDOWN_XMON_WR,
+ LOCKDOWN_DBG_WRITE_KERNEL,
LOCKDOWN_INTEGRITY_MAX,
LOCKDOWN_KCORE,
LOCKDOWN_KPROBES,
LOCKDOWN_BPF_READ,
+ LOCKDOWN_DBG_READ_KERNEL,
LOCKDOWN_PERF,
LOCKDOWN_TRACEFS,
LOCKDOWN_XMON_RW,
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 097ab02989f9..565987557ad8 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -56,6 +56,7 @@
#include <linux/vmacache.h>
#include <linux/rcupdate.h>
#include <linux/irq.h>
+#include <linux/security.h>
#include <asm/cacheflush.h>
#include <asm/byteorder.h>
@@ -685,6 +686,29 @@ static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs,
continue;
kgdb_connected = 0;
} else {
+ /*
+ * This is a brutal way to interfere with the debugger
+ * and prevent gdb being used to poke at kernel memory.
+ * This could cause trouble if lockdown is applied when
+ * there is already an active gdb session. For now the
+ * answer is simply "don't do that". Typically lockdown
+ * *will* be applied before the debug core gets started
+ * so only developers using kgdb for fairly advanced
+ * early kernel debug can be biten by this. Hopefully
+ * they are sophisticated enough to take care of
+ * themselves, especially with help from the lockdown
+ * message printed on the console!
+ */
+ if (security_locked_down(LOCKDOWN_DBG_WRITE_KERNEL)) {
+ if (IS_ENABLED(CONFIG_KGDB_KDB)) {
+ /* Switch back to kdb if possible... */
+ dbg_kdb_mode = 1;
+ continue;
+ } else {
+ /* ... otherwise just bail */
+ break;
+ }
+ }
error = gdb_serial_stub(ks);
}
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 4567fe998c30..7c96bf9a6c2c 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -45,6 +45,7 @@
#include <linux/proc_fs.h>
#include <linux/uaccess.h>
#include <linux/slab.h>
+#include <linux/security.h>
#include "kdb_private.h"
#undef MODULE_PARAM_PREFIX
@@ -198,10 +199,62 @@ struct task_struct *kdb_curr_task(int cpu)
}
/*
- * Check whether the flags of the current command and the permissions
- * of the kdb console has allow a command to be run.
+ * Update the permissions flags (kdb_cmd_enabled) to match the
+ * current lockdown state.
+ *
+ * Within this function the calls to security_locked_down() are "lazy". We
+ * avoid calling them if the current value of kdb_cmd_enabled already excludes
+ * flags that might be subject to lockdown. Additionally we deliberately check
+ * the lockdown flags independently (even though read lockdown implies write
+ * lockdown) since that results in both simpler code and clearer messages to
+ * the user on first-time debugger entry.
+ *
+ * The permission masks during a read+write lockdown permits the following
+ * flags: INSPECT, SIGNAL, REBOOT (and ALWAYS_SAFE).
+ *
+ * The INSPECT commands are not blocked during lockdown because they are
+ * not arbitrary memory reads. INSPECT covers the backtrace family (sometimes
+ * forcing them to have no arguments) and lsmod. These commands do expose
+ * some kernel state but do not allow the developer seated at the console to
+ * choose what state is reported. SIGNAL and REBOOT should not be controversial,
+ * given these are allowed for root during lockdown already.
+ */
+static void kdb_check_for_lockdown(void)
+{
+ const int write_flags = KDB_ENABLE_MEM_WRITE |
+ KDB_ENABLE_REG_WRITE |
+ KDB_ENABLE_FLOW_CTRL;
+ const int read_flags = KDB_ENABLE_MEM_READ |
+ KDB_ENABLE_REG_READ;
+
+ bool need_to_lockdown_write = false;
+ bool need_to_lockdown_read = false;
+
+ if (kdb_cmd_enabled & (KDB_ENABLE_ALL | write_flags))
+ need_to_lockdown_write =
+ security_locked_down(LOCKDOWN_DBG_WRITE_KERNEL);
+
+ if (kdb_cmd_enabled & (KDB_ENABLE_ALL | read_flags))
+ need_to_lockdown_read =
+ security_locked_down(LOCKDOWN_DBG_READ_KERNEL);
+
+ /* De-compose KDB_ENABLE_ALL if required */
+ if (need_to_lockdown_write || need_to_lockdown_read)
+ if (kdb_cmd_enabled & KDB_ENABLE_ALL)
+ kdb_cmd_enabled = KDB_ENABLE_MASK & ~KDB_ENABLE_ALL;
+
+ if (need_to_lockdown_write)
+ kdb_cmd_enabled &= ~write_flags;
+
+ if (need_to_lockdown_read)
+ kdb_cmd_enabled &= ~read_flags;
+}
+
+/*
+ * Check whether the flags of the current command, the permissions of the kdb
+ * console and the lockdown state allow a command to be run.
*/
-static inline bool kdb_check_flags(kdb_cmdflags_t flags, int permissions,
+static bool kdb_check_flags(kdb_cmdflags_t flags, int permissions,
bool no_args)
{
/* permissions comes from userspace so needs massaging slightly */
@@ -1188,6 +1241,9 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs,
kdb_curr_task(raw_smp_processor_id());
KDB_DEBUG_STATE("kdb_local 1", reason);
+
+ kdb_check_for_lockdown();
+
kdb_go_count = 0;
if (reason == KDB_REASON_DEBUG) {
/* special case below */
diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
index 3f38583bed06..655a6edb5d7f 100644
--- a/security/lockdown/lockdown.c
+++ b/security/lockdown/lockdown.c
@@ -33,10 +33,12 @@ static const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
[LOCKDOWN_MMIOTRACE] = "unsafe mmio",
[LOCKDOWN_DEBUGFS] = "debugfs access",
[LOCKDOWN_XMON_WR] = "xmon write access",
+ [LOCKDOWN_DBG_WRITE_KERNEL] = "use of kgdb/kdb to write kernel RAM",
[LOCKDOWN_INTEGRITY_MAX] = "integrity",
[LOCKDOWN_KCORE] = "/proc/kcore access",
[LOCKDOWN_KPROBES] = "use of kprobes",
[LOCKDOWN_BPF_READ] = "use of bpf to read kernel RAM",
+ [LOCKDOWN_DBG_READ_KERNEL] = "use of kgdb/kdb to read kernel RAM",
[LOCKDOWN_PERF] = "unsafe use of perf",
[LOCKDOWN_TRACEFS] = "use of tracefs",
[LOCKDOWN_XMON_RW] = "xmon read and write access",
base-commit: 04b092e4a01a3488e762897e2d29f85eda2c6a60
--
2.35.1
commit eadb2f47a3ced5c64b23b90fd2a3463f63726066 upstream.
KGDB and KDB allow read and write access to kernel memory, and thus
should be restricted during lockdown. An attacker with access to a
serial port (for example, via a hypervisor console, which some cloud
vendors provide over the network) could trigger the debugger so it is
important that the debugger respect the lockdown mode when/if it is
triggered.
Fix this by integrating lockdown into kdb's existing permissions
mechanism. Unfortunately kgdb does not have any permissions mechanism
(although it certainly could be added later) so, for now, kgdb is simply
and brutally disabled by immediately exiting the gdb stub without taking
any action.
For lockdowns established early in the boot (e.g. the normal case) then
this should be fine but on systems where kgdb has set breakpoints before
the lockdown is enacted than "bad things" will happen.
CVE: CVE-2022-21499
Co-developed-by: Stephen Brennan <stephen.s.brennan(a)oracle.com>
Signed-off-by: Stephen Brennan <stephen.s.brennan(a)oracle.com>
Reviewed-by: Douglas Anderson <dianders(a)chromium.org>
Signed-off-by: Daniel Thompson <daniel.thompson(a)linaro.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
---
Notes:
Original patch did not backport cleanly. This backport is fixed up,
compile tested (on arm64) and side-by-side compared against the
original.
include/linux/security.h | 2 ++
kernel/debug/debug_core.c | 24 ++++++++++++++
kernel/debug/kdb/kdb_main.c | 62 +++++++++++++++++++++++++++++++++++--
security/security.c | 2 ++
4 files changed, 87 insertions(+), 3 deletions(-)
diff --git a/include/linux/security.h b/include/linux/security.h
index 35355429648e..330029ef7e89 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -121,10 +121,12 @@ enum lockdown_reason {
LOCKDOWN_DEBUGFS,
LOCKDOWN_XMON_WR,
LOCKDOWN_BPF_WRITE_USER,
+ LOCKDOWN_DBG_WRITE_KERNEL,
LOCKDOWN_INTEGRITY_MAX,
LOCKDOWN_KCORE,
LOCKDOWN_KPROBES,
LOCKDOWN_BPF_READ,
+ LOCKDOWN_DBG_READ_KERNEL,
LOCKDOWN_PERF,
LOCKDOWN_TRACEFS,
LOCKDOWN_XMON_RW,
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 8661eb2b1771..0f31b22abe8d 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -56,6 +56,7 @@
#include <linux/vmacache.h>
#include <linux/rcupdate.h>
#include <linux/irq.h>
+#include <linux/security.h>
#include <asm/cacheflush.h>
#include <asm/byteorder.h>
@@ -756,6 +757,29 @@ static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs,
continue;
kgdb_connected = 0;
} else {
+ /*
+ * This is a brutal way to interfere with the debugger
+ * and prevent gdb being used to poke at kernel memory.
+ * This could cause trouble if lockdown is applied when
+ * there is already an active gdb session. For now the
+ * answer is simply "don't do that". Typically lockdown
+ * *will* be applied before the debug core gets started
+ * so only developers using kgdb for fairly advanced
+ * early kernel debug can be biten by this. Hopefully
+ * they are sophisticated enough to take care of
+ * themselves, especially with help from the lockdown
+ * message printed on the console!
+ */
+ if (security_locked_down(LOCKDOWN_DBG_WRITE_KERNEL)) {
+ if (IS_ENABLED(CONFIG_KGDB_KDB)) {
+ /* Switch back to kdb if possible... */
+ dbg_kdb_mode = 1;
+ continue;
+ } else {
+ /* ... otherwise just bail */
+ break;
+ }
+ }
error = gdb_serial_stub(ks);
}
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 930ac1b25ec7..4e09fab52faf 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -45,6 +45,7 @@
#include <linux/proc_fs.h>
#include <linux/uaccess.h>
#include <linux/slab.h>
+#include <linux/security.h>
#include "kdb_private.h"
#undef MODULE_PARAM_PREFIX
@@ -197,10 +198,62 @@ struct task_struct *kdb_curr_task(int cpu)
}
/*
- * Check whether the flags of the current command and the permissions
- * of the kdb console has allow a command to be run.
+ * Update the permissions flags (kdb_cmd_enabled) to match the
+ * current lockdown state.
+ *
+ * Within this function the calls to security_locked_down() are "lazy". We
+ * avoid calling them if the current value of kdb_cmd_enabled already excludes
+ * flags that might be subject to lockdown. Additionally we deliberately check
+ * the lockdown flags independently (even though read lockdown implies write
+ * lockdown) since that results in both simpler code and clearer messages to
+ * the user on first-time debugger entry.
+ *
+ * The permission masks during a read+write lockdown permits the following
+ * flags: INSPECT, SIGNAL, REBOOT (and ALWAYS_SAFE).
+ *
+ * The INSPECT commands are not blocked during lockdown because they are
+ * not arbitrary memory reads. INSPECT covers the backtrace family (sometimes
+ * forcing them to have no arguments) and lsmod. These commands do expose
+ * some kernel state but do not allow the developer seated at the console to
+ * choose what state is reported. SIGNAL and REBOOT should not be controversial,
+ * given these are allowed for root during lockdown already.
+ */
+static void kdb_check_for_lockdown(void)
+{
+ const int write_flags = KDB_ENABLE_MEM_WRITE |
+ KDB_ENABLE_REG_WRITE |
+ KDB_ENABLE_FLOW_CTRL;
+ const int read_flags = KDB_ENABLE_MEM_READ |
+ KDB_ENABLE_REG_READ;
+
+ bool need_to_lockdown_write = false;
+ bool need_to_lockdown_read = false;
+
+ if (kdb_cmd_enabled & (KDB_ENABLE_ALL | write_flags))
+ need_to_lockdown_write =
+ security_locked_down(LOCKDOWN_DBG_WRITE_KERNEL);
+
+ if (kdb_cmd_enabled & (KDB_ENABLE_ALL | read_flags))
+ need_to_lockdown_read =
+ security_locked_down(LOCKDOWN_DBG_READ_KERNEL);
+
+ /* De-compose KDB_ENABLE_ALL if required */
+ if (need_to_lockdown_write || need_to_lockdown_read)
+ if (kdb_cmd_enabled & KDB_ENABLE_ALL)
+ kdb_cmd_enabled = KDB_ENABLE_MASK & ~KDB_ENABLE_ALL;
+
+ if (need_to_lockdown_write)
+ kdb_cmd_enabled &= ~write_flags;
+
+ if (need_to_lockdown_read)
+ kdb_cmd_enabled &= ~read_flags;
+}
+
+/*
+ * Check whether the flags of the current command, the permissions of the kdb
+ * console and the lockdown state allow a command to be run.
*/
-static inline bool kdb_check_flags(kdb_cmdflags_t flags, int permissions,
+static bool kdb_check_flags(kdb_cmdflags_t flags, int permissions,
bool no_args)
{
/* permissions comes from userspace so needs massaging slightly */
@@ -1194,6 +1247,9 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs,
kdb_curr_task(raw_smp_processor_id());
KDB_DEBUG_STATE("kdb_local 1", reason);
+
+ kdb_check_for_lockdown();
+
kdb_go_count = 0;
if (reason == KDB_REASON_DEBUG) {
/* special case below */
diff --git a/security/security.c b/security/security.c
index d9d42d64f89f..360706cdabab 100644
--- a/security/security.c
+++ b/security/security.c
@@ -59,10 +59,12 @@ const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
[LOCKDOWN_DEBUGFS] = "debugfs access",
[LOCKDOWN_XMON_WR] = "xmon write access",
[LOCKDOWN_BPF_WRITE_USER] = "use of bpf to write user RAM",
+ [LOCKDOWN_DBG_WRITE_KERNEL] = "use of kgdb/kdb to write kernel RAM",
[LOCKDOWN_INTEGRITY_MAX] = "integrity",
[LOCKDOWN_KCORE] = "/proc/kcore access",
[LOCKDOWN_KPROBES] = "use of kprobes",
[LOCKDOWN_BPF_READ] = "use of bpf to read kernel RAM",
+ [LOCKDOWN_DBG_READ_KERNEL] = "use of kgdb/kdb to read kernel RAM",
[LOCKDOWN_PERF] = "unsafe use of perf",
[LOCKDOWN_TRACEFS] = "use of tracefs",
[LOCKDOWN_XMON_RW] = "xmon read and write access",
base-commit: c204ee3350ebbc4e2ab108cbce7afc0cac1c407d
--
2.35.1
Hello,
Greetings? I am Amjad. I work with a leading Bio Firm. Due to
the setbacks of the pandemic, my company has opened a bid in
search of new suppliers for basic raw materials needed in
production.
I am seeking a representative as I am a staff, I can not be
involved directly. It may not be your area of work but the
profits are great and I will guide you through. I have already
sourced a local supplier for this. I only need a reliable
representative.
Please get back to me so I can explain this in full.
Amjad
[ Upstream commit ae66fb2ba6c3dcaf8b9612b65aa949a1a4bed150 ]
RFC 8684 section 3.7 describes several opportunities for a MPTCP
connection to "fall back" to regular TCP early in the connection
process, before it has been confirmed that MPTCP options can be
successfully propagated on all SYN, SYN/ACK, and data packets. If a peer
acknowledges the first received data packet with a regular TCP header
(no MPTCP options), fallback is allowed.
If the recipient of that first data packet finds a MPTCP DSS checksum
error, this provides an opportunity to fail gracefully with a TCP
fallback rather than resetting the connection (as might happen if a
checksum failure were detected later).
This commit modifies the checksum failure code to attempt fallback on
the initial subflow of a MPTCP connection, only if it's a failure in the
first data mapping. In cases where the peer initiates the connection,
requests checksums, is the first to send data, and the peer is sending
incorrect checksums (see
https://github.com/multipath-tcp/mptcp_net-next/issues/275), this allows
the connection to proceed as TCP rather than reset.
Cc: <stable(a)vger.kernel.org> # 5.17.x
Cc: <stable(a)vger.kernel.org> # 5.15.x
Fixes: dd8bcd1768ff ("mptcp: validate the data checksum")
Acked-by: Paolo Abeni <pabeni(a)redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau(a)linux.intel.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
[mathew.j.martineau: backport: Resolved bitfield conflict in protocol.h]
Signed-off-by: Mat Martineau <mathew.j.martineau(a)linux.intel.com>
---
This patch is already in 5.17.10-rc1 and 5.15.42-rc1, but involves a
context dependency on upstream commit 4cf86ae84c71 which I have
requested to be dropped from the stable queues.
I'm posting this backport without the protocol.h conflict to
(hopefully?) make it easier for the stable maintainers to drop
4cf86ae84c71.
For context see https://lore.kernel.org/stable/fa953ec-288f-7715-c6fb-47a222e85270@linux.in…
Thanks,
Mat
---
net/mptcp/protocol.h | 3 ++-
net/mptcp/subflow.c | 21 ++++++++++++++++++---
2 files changed, 20 insertions(+), 4 deletions(-)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index aec767ee047a..46b343a0b17e 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -442,7 +442,8 @@ struct mptcp_subflow_context {
rx_eof : 1,
can_ack : 1, /* only after processing the remote a key */
disposable : 1, /* ctx can be free at ulp release time */
- stale : 1; /* unable to snd/rcv data, do not use for xmit */
+ stale : 1, /* unable to snd/rcv data, do not use for xmit */
+ valid_csum_seen : 1; /* at least one csum validated */
enum mptcp_data_avail data_avail;
u32 remote_nonce;
u64 thmac;
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 651f01d13191..8d5ddf8e3ef7 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -913,11 +913,14 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
subflow->map_data_csum);
if (unlikely(csum)) {
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR);
- subflow->send_mp_fail = 1;
- MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPFAILTX);
+ if (subflow->mp_join || subflow->valid_csum_seen) {
+ subflow->send_mp_fail = 1;
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPFAILTX);
+ }
return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY;
}
+ subflow->valid_csum_seen = 1;
return MAPPING_OK;
}
@@ -1099,6 +1102,18 @@ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ss
}
}
+static bool subflow_can_fallback(struct mptcp_subflow_context *subflow)
+{
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+
+ if (subflow->mp_join)
+ return false;
+ else if (READ_ONCE(msk->csum_enabled))
+ return !subflow->valid_csum_seen;
+ else
+ return !subflow->fully_established;
+}
+
static bool subflow_check_data_avail(struct sock *ssk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
@@ -1176,7 +1191,7 @@ static bool subflow_check_data_avail(struct sock *ssk)
return true;
}
- if (subflow->mp_join || subflow->fully_established) {
+ if (!subflow_can_fallback(subflow)) {
/* fatal protocol error, close the socket.
* subflow_error_report() will introduce the appropriate barriers
*/
--
2.36.1
I'm announcing the release of the 5.17.11 kernel.
All users of the 5.17 kernel series must upgrade.
The updated 5.17.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-5.17.y
and can be browsed at the normal kernel.org git web browser:
https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary
thanks,
greg k-h
------------
Makefile | 2 +-
net/mptcp/protocol.h | 3 ++-
net/mptcp/subflow.c | 21 ++++++++++++++++++---
3 files changed, 21 insertions(+), 5 deletions(-)
Greg Kroah-Hartman (1):
Linux 5.17.11
Mat Martineau (1):
mptcp: Do TCP fallback on early DSS checksum failure
I'm announcing the release of the 5.15.43 kernel.
All users of the 5.15 kernel series must upgrade.
The updated 5.15.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-5.15.y
and can be browsed at the normal kernel.org git web browser:
https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary
thanks,
greg k-h
------------
Makefile | 2 +-
net/mptcp/protocol.h | 3 ++-
net/mptcp/subflow.c | 21 ++++++++++++++++++---
3 files changed, 21 insertions(+), 5 deletions(-)
Greg Kroah-Hartman (1):
Linux 5.15.43
Mat Martineau (1):
mptcp: Do TCP fallback on early DSS checksum failure
This is the start of the stable review cycle for the 4.9.316 release.
There are 25 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Wed, 25 May 2022 16:56:55 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.9.316-rc…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-4.9.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 4.9.316-rc1
Yang Yingliang <yangyingliang(a)huawei.com>
net: stmmac: fix missing pci_disable_device() on error in stmmac_pci_probe()
Yang Yingliang <yangyingliang(a)huawei.com>
ethernet: tulip: fix missing pci_disable_device() on error in tulip_init_one()
Felix Fietkau <nbd(a)nbd.name>
mac80211: fix rx reordering with non explicit / psmp ack policy
Gleb Chesnokov <Chesnokov.G(a)raidix.com>
scsi: qla2xxx: Fix missed DMA unmap for aborted commands
Thomas Richter <tmricht(a)linux.ibm.com>
perf bench numa: Address compiler error on s390
Kevin Mitchell <kevmitch(a)arista.com>
igb: skip phy status check where unavailable
Ard Biesheuvel <ardb(a)kernel.org>
ARM: 9197/1: spectre-bhb: fix loop8 sequence for Thumb2
Ard Biesheuvel <ardb(a)kernel.org>
ARM: 9196/1: spectre-bhb: enable for Cortex-A15
Jiasheng Jiang <jiasheng(a)iscas.ac.cn>
net: af_key: add check for pfkey_broadcast in function pfkey_process
Duoming Zhou <duoming(a)zju.edu.cn>
NFC: nci: fix sleep in atomic context bugs caused by nci_skb_alloc
Christophe JAILLET <christophe.jaillet(a)wanadoo.fr>
net/qla3xxx: Fix a test in ql_reset_work()
Zixuan Fu <r33s3n6(a)gmail.com>
net: vmxnet3: fix possible NULL pointer dereference in vmxnet3_rq_cleanup()
Zixuan Fu <r33s3n6(a)gmail.com>
net: vmxnet3: fix possible use-after-free bugs in vmxnet3_rq_alloc_rx_buf()
Hangyu Hua <hbh25y(a)gmail.com>
drm/dp/mst: fix a possible memory leak in fetch_monitor_name()
Peter Zijlstra <peterz(a)infradead.org>
perf: Fix sys_perf_event_open() race against self
Takashi Iwai <tiwai(a)suse.de>
ALSA: wavefront: Proper check of get_user() error
Ulf Hansson <ulf.hansson(a)linaro.org>
mmc: core: Default to generic_cmd6_time as timeout in __mmc_switch()
Ulf Hansson <ulf.hansson(a)linaro.org>
mmc: block: Use generic_cmd6_time when modifying INAND_CMD38_ARG_EXT_CSD
Ulf Hansson <ulf.hansson(a)linaro.org>
mmc: core: Specify timeouts for BKOPS and CACHE_FLUSH for eMMC
linyujun <linyujun809(a)huawei.com>
ARM: 9191/1: arm/stacktrace, kasan: Silence KASAN warnings in unwind_frame()
Jakob Koschel <jakobkoschel(a)gmail.com>
drbd: remove usage of list iterator variable after loop
Xiaoke Wang <xkernel.wang(a)foxmail.com>
MIPS: lantiq: check the return value of kzalloc()
Jeff LaBundy <jeff(a)labundy.com>
Input: add bounds checking to input_set_capability()
David Gow <davidgow(a)google.com>
um: Cleanup syscall_handler_t definition/cast, fix warning
Willy Tarreau <w(a)1wt.eu>
floppy: use a statically allocated error counter
-------------
Diffstat:
Makefile | 4 +--
arch/arm/kernel/entry-armv.S | 2 +-
arch/arm/kernel/stacktrace.c | 10 +++---
arch/arm/mm/proc-v7-bugs.c | 1 +
arch/mips/lantiq/falcon/sysctrl.c | 2 ++
arch/mips/lantiq/xway/gptu.c | 2 ++
arch/mips/lantiq/xway/sysctrl.c | 46 +++++++++++++++---------
arch/x86/um/shared/sysdep/syscalls_64.h | 5 ++-
drivers/block/drbd/drbd_main.c | 7 ++--
drivers/block/floppy.c | 19 +++++-----
drivers/gpu/drm/drm_dp_mst_topology.c | 1 +
drivers/input/input.c | 19 ++++++++++
drivers/mmc/card/block.c | 6 ++--
drivers/mmc/core/core.c | 5 ++-
drivers/mmc/core/mmc_ops.c | 9 ++---
drivers/net/ethernet/dec/tulip/tulip_core.c | 5 ++-
drivers/net/ethernet/intel/igb/igb_main.c | 3 +-
drivers/net/ethernet/qlogic/qla3xxx.c | 3 +-
drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c | 4 +--
drivers/net/vmxnet3/vmxnet3_drv.c | 6 ++++
drivers/scsi/qla2xxx/qla_target.c | 3 ++
kernel/events/core.c | 14 ++++++++
net/key/af_key.c | 6 ++--
net/mac80211/rx.c | 3 +-
net/nfc/nci/data.c | 2 +-
net/nfc/nci/hci.c | 4 +--
sound/isa/wavefront/wavefront_synth.c | 3 +-
tools/perf/bench/numa.c | 2 +-
28 files changed, 134 insertions(+), 62 deletions(-)
#regzbot introduced v5.17.3..v5.17.4
#regzbot introduced: 001828fb3084379f3c3e228b905223c50bc237f9
Hello
Since 5.17.4 my laptop doesn't resume from suspend anymore. At resume,
symptoms are variable:
- either the laptop freezes;
- either the screen keeps blank;
- either the screen is OK but mouse is frozen;
- either display lags with several logs in dmesg:
[ 228.275492] [drm] Fence fallback timer expired on ring gfx
[ 228.395466] [drm:amdgpu_dm_atomic_commit_tail] *ERROR* Waiting for fences
timed out!
[ 228.779490] [drm] Fence fallback timer expired on ring gfx
[ 229.283484] [drm] Fence fallback timer expired on ring sdma0
[ 229.283485] [drm] Fence fallback timer expired on ring gfx
[ 229.787487] [drm] Fence fallback timer expired on ring gfx
...
I've bisected the problem.
Please note this laptop has a strange behaviour on suspend:
The first suspend request always fails (this point has never been fixed and
plagues us when trying to diagnose another regression on touchpad not resuming
in the past). The screen goes blank and I can get it OK when pressing the
power button, this seems to reset it. After that all suspend/resume works OK.
Since 5.17.4, it is not possible anymore to get the laptop working again after
the first suspend failure.
HW : HP Pavilion / Ryzen 4600H with AMD graphics integrated + NVidia 1650Ti
(turned off with ACPI call in order to get more battery, I'm not using NVidia
driver).
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From eadb2f47a3ced5c64b23b90fd2a3463f63726066 Mon Sep 17 00:00:00 2001
From: Daniel Thompson <daniel.thompson(a)linaro.org>
Date: Mon, 23 May 2022 19:11:02 +0100
Subject: [PATCH] lockdown: also lock down previous kgdb use
KGDB and KDB allow read and write access to kernel memory, and thus
should be restricted during lockdown. An attacker with access to a
serial port (for example, via a hypervisor console, which some cloud
vendors provide over the network) could trigger the debugger so it is
important that the debugger respect the lockdown mode when/if it is
triggered.
Fix this by integrating lockdown into kdb's existing permissions
mechanism. Unfortunately kgdb does not have any permissions mechanism
(although it certainly could be added later) so, for now, kgdb is simply
and brutally disabled by immediately exiting the gdb stub without taking
any action.
For lockdowns established early in the boot (e.g. the normal case) then
this should be fine but on systems where kgdb has set breakpoints before
the lockdown is enacted than "bad things" will happen.
CVE: CVE-2022-21499
Co-developed-by: Stephen Brennan <stephen.s.brennan(a)oracle.com>
Signed-off-by: Stephen Brennan <stephen.s.brennan(a)oracle.com>
Reviewed-by: Douglas Anderson <dianders(a)chromium.org>
Signed-off-by: Daniel Thompson <daniel.thompson(a)linaro.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
diff --git a/include/linux/security.h b/include/linux/security.h
index 25b3ef71f495..7fc4e9f49f54 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -121,10 +121,12 @@ enum lockdown_reason {
LOCKDOWN_DEBUGFS,
LOCKDOWN_XMON_WR,
LOCKDOWN_BPF_WRITE_USER,
+ LOCKDOWN_DBG_WRITE_KERNEL,
LOCKDOWN_INTEGRITY_MAX,
LOCKDOWN_KCORE,
LOCKDOWN_KPROBES,
LOCKDOWN_BPF_READ_KERNEL,
+ LOCKDOWN_DBG_READ_KERNEL,
LOCKDOWN_PERF,
LOCKDOWN_TRACEFS,
LOCKDOWN_XMON_RW,
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index da06a5553835..7beceb447211 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -53,6 +53,7 @@
#include <linux/vmacache.h>
#include <linux/rcupdate.h>
#include <linux/irq.h>
+#include <linux/security.h>
#include <asm/cacheflush.h>
#include <asm/byteorder.h>
@@ -752,6 +753,29 @@ cpu_master_loop:
continue;
kgdb_connected = 0;
} else {
+ /*
+ * This is a brutal way to interfere with the debugger
+ * and prevent gdb being used to poke at kernel memory.
+ * This could cause trouble if lockdown is applied when
+ * there is already an active gdb session. For now the
+ * answer is simply "don't do that". Typically lockdown
+ * *will* be applied before the debug core gets started
+ * so only developers using kgdb for fairly advanced
+ * early kernel debug can be biten by this. Hopefully
+ * they are sophisticated enough to take care of
+ * themselves, especially with help from the lockdown
+ * message printed on the console!
+ */
+ if (security_locked_down(LOCKDOWN_DBG_WRITE_KERNEL)) {
+ if (IS_ENABLED(CONFIG_KGDB_KDB)) {
+ /* Switch back to kdb if possible... */
+ dbg_kdb_mode = 1;
+ continue;
+ } else {
+ /* ... otherwise just bail */
+ break;
+ }
+ }
error = gdb_serial_stub(ks);
}
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 0852a537dad4..ead4da947127 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -45,6 +45,7 @@
#include <linux/proc_fs.h>
#include <linux/uaccess.h>
#include <linux/slab.h>
+#include <linux/security.h>
#include "kdb_private.h"
#undef MODULE_PARAM_PREFIX
@@ -166,10 +167,62 @@ struct task_struct *kdb_curr_task(int cpu)
}
/*
- * Check whether the flags of the current command and the permissions
- * of the kdb console has allow a command to be run.
+ * Update the permissions flags (kdb_cmd_enabled) to match the
+ * current lockdown state.
+ *
+ * Within this function the calls to security_locked_down() are "lazy". We
+ * avoid calling them if the current value of kdb_cmd_enabled already excludes
+ * flags that might be subject to lockdown. Additionally we deliberately check
+ * the lockdown flags independently (even though read lockdown implies write
+ * lockdown) since that results in both simpler code and clearer messages to
+ * the user on first-time debugger entry.
+ *
+ * The permission masks during a read+write lockdown permits the following
+ * flags: INSPECT, SIGNAL, REBOOT (and ALWAYS_SAFE).
+ *
+ * The INSPECT commands are not blocked during lockdown because they are
+ * not arbitrary memory reads. INSPECT covers the backtrace family (sometimes
+ * forcing them to have no arguments) and lsmod. These commands do expose
+ * some kernel state but do not allow the developer seated at the console to
+ * choose what state is reported. SIGNAL and REBOOT should not be controversial,
+ * given these are allowed for root during lockdown already.
+ */
+static void kdb_check_for_lockdown(void)
+{
+ const int write_flags = KDB_ENABLE_MEM_WRITE |
+ KDB_ENABLE_REG_WRITE |
+ KDB_ENABLE_FLOW_CTRL;
+ const int read_flags = KDB_ENABLE_MEM_READ |
+ KDB_ENABLE_REG_READ;
+
+ bool need_to_lockdown_write = false;
+ bool need_to_lockdown_read = false;
+
+ if (kdb_cmd_enabled & (KDB_ENABLE_ALL | write_flags))
+ need_to_lockdown_write =
+ security_locked_down(LOCKDOWN_DBG_WRITE_KERNEL);
+
+ if (kdb_cmd_enabled & (KDB_ENABLE_ALL | read_flags))
+ need_to_lockdown_read =
+ security_locked_down(LOCKDOWN_DBG_READ_KERNEL);
+
+ /* De-compose KDB_ENABLE_ALL if required */
+ if (need_to_lockdown_write || need_to_lockdown_read)
+ if (kdb_cmd_enabled & KDB_ENABLE_ALL)
+ kdb_cmd_enabled = KDB_ENABLE_MASK & ~KDB_ENABLE_ALL;
+
+ if (need_to_lockdown_write)
+ kdb_cmd_enabled &= ~write_flags;
+
+ if (need_to_lockdown_read)
+ kdb_cmd_enabled &= ~read_flags;
+}
+
+/*
+ * Check whether the flags of the current command, the permissions of the kdb
+ * console and the lockdown state allow a command to be run.
*/
-static inline bool kdb_check_flags(kdb_cmdflags_t flags, int permissions,
+static bool kdb_check_flags(kdb_cmdflags_t flags, int permissions,
bool no_args)
{
/* permissions comes from userspace so needs massaging slightly */
@@ -1180,6 +1233,9 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs,
kdb_curr_task(raw_smp_processor_id());
KDB_DEBUG_STATE("kdb_local 1", reason);
+
+ kdb_check_for_lockdown();
+
kdb_go_count = 0;
if (reason == KDB_REASON_DEBUG) {
/* special case below */
diff --git a/security/security.c b/security/security.c
index b7cf5cbfdc67..aaf6566deb9f 100644
--- a/security/security.c
+++ b/security/security.c
@@ -59,10 +59,12 @@ const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
[LOCKDOWN_DEBUGFS] = "debugfs access",
[LOCKDOWN_XMON_WR] = "xmon write access",
[LOCKDOWN_BPF_WRITE_USER] = "use of bpf to write user RAM",
+ [LOCKDOWN_DBG_WRITE_KERNEL] = "use of kgdb/kdb to write kernel RAM",
[LOCKDOWN_INTEGRITY_MAX] = "integrity",
[LOCKDOWN_KCORE] = "/proc/kcore access",
[LOCKDOWN_KPROBES] = "use of kprobes",
[LOCKDOWN_BPF_READ_KERNEL] = "use of bpf to read kernel RAM",
+ [LOCKDOWN_DBG_READ_KERNEL] = "use of kgdb/kdb to read kernel RAM",
[LOCKDOWN_PERF] = "unsafe use of perf",
[LOCKDOWN_TRACEFS] = "use of tracefs",
[LOCKDOWN_XMON_RW] = "xmon read and write access",
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From eadb2f47a3ced5c64b23b90fd2a3463f63726066 Mon Sep 17 00:00:00 2001
From: Daniel Thompson <daniel.thompson(a)linaro.org>
Date: Mon, 23 May 2022 19:11:02 +0100
Subject: [PATCH] lockdown: also lock down previous kgdb use
KGDB and KDB allow read and write access to kernel memory, and thus
should be restricted during lockdown. An attacker with access to a
serial port (for example, via a hypervisor console, which some cloud
vendors provide over the network) could trigger the debugger so it is
important that the debugger respect the lockdown mode when/if it is
triggered.
Fix this by integrating lockdown into kdb's existing permissions
mechanism. Unfortunately kgdb does not have any permissions mechanism
(although it certainly could be added later) so, for now, kgdb is simply
and brutally disabled by immediately exiting the gdb stub without taking
any action.
For lockdowns established early in the boot (e.g. the normal case) then
this should be fine but on systems where kgdb has set breakpoints before
the lockdown is enacted than "bad things" will happen.
CVE: CVE-2022-21499
Co-developed-by: Stephen Brennan <stephen.s.brennan(a)oracle.com>
Signed-off-by: Stephen Brennan <stephen.s.brennan(a)oracle.com>
Reviewed-by: Douglas Anderson <dianders(a)chromium.org>
Signed-off-by: Daniel Thompson <daniel.thompson(a)linaro.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
diff --git a/include/linux/security.h b/include/linux/security.h
index 25b3ef71f495..7fc4e9f49f54 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -121,10 +121,12 @@ enum lockdown_reason {
LOCKDOWN_DEBUGFS,
LOCKDOWN_XMON_WR,
LOCKDOWN_BPF_WRITE_USER,
+ LOCKDOWN_DBG_WRITE_KERNEL,
LOCKDOWN_INTEGRITY_MAX,
LOCKDOWN_KCORE,
LOCKDOWN_KPROBES,
LOCKDOWN_BPF_READ_KERNEL,
+ LOCKDOWN_DBG_READ_KERNEL,
LOCKDOWN_PERF,
LOCKDOWN_TRACEFS,
LOCKDOWN_XMON_RW,
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index da06a5553835..7beceb447211 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -53,6 +53,7 @@
#include <linux/vmacache.h>
#include <linux/rcupdate.h>
#include <linux/irq.h>
+#include <linux/security.h>
#include <asm/cacheflush.h>
#include <asm/byteorder.h>
@@ -752,6 +753,29 @@ cpu_master_loop:
continue;
kgdb_connected = 0;
} else {
+ /*
+ * This is a brutal way to interfere with the debugger
+ * and prevent gdb being used to poke at kernel memory.
+ * This could cause trouble if lockdown is applied when
+ * there is already an active gdb session. For now the
+ * answer is simply "don't do that". Typically lockdown
+ * *will* be applied before the debug core gets started
+ * so only developers using kgdb for fairly advanced
+ * early kernel debug can be biten by this. Hopefully
+ * they are sophisticated enough to take care of
+ * themselves, especially with help from the lockdown
+ * message printed on the console!
+ */
+ if (security_locked_down(LOCKDOWN_DBG_WRITE_KERNEL)) {
+ if (IS_ENABLED(CONFIG_KGDB_KDB)) {
+ /* Switch back to kdb if possible... */
+ dbg_kdb_mode = 1;
+ continue;
+ } else {
+ /* ... otherwise just bail */
+ break;
+ }
+ }
error = gdb_serial_stub(ks);
}
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 0852a537dad4..ead4da947127 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -45,6 +45,7 @@
#include <linux/proc_fs.h>
#include <linux/uaccess.h>
#include <linux/slab.h>
+#include <linux/security.h>
#include "kdb_private.h"
#undef MODULE_PARAM_PREFIX
@@ -166,10 +167,62 @@ struct task_struct *kdb_curr_task(int cpu)
}
/*
- * Check whether the flags of the current command and the permissions
- * of the kdb console has allow a command to be run.
+ * Update the permissions flags (kdb_cmd_enabled) to match the
+ * current lockdown state.
+ *
+ * Within this function the calls to security_locked_down() are "lazy". We
+ * avoid calling them if the current value of kdb_cmd_enabled already excludes
+ * flags that might be subject to lockdown. Additionally we deliberately check
+ * the lockdown flags independently (even though read lockdown implies write
+ * lockdown) since that results in both simpler code and clearer messages to
+ * the user on first-time debugger entry.
+ *
+ * The permission masks during a read+write lockdown permits the following
+ * flags: INSPECT, SIGNAL, REBOOT (and ALWAYS_SAFE).
+ *
+ * The INSPECT commands are not blocked during lockdown because they are
+ * not arbitrary memory reads. INSPECT covers the backtrace family (sometimes
+ * forcing them to have no arguments) and lsmod. These commands do expose
+ * some kernel state but do not allow the developer seated at the console to
+ * choose what state is reported. SIGNAL and REBOOT should not be controversial,
+ * given these are allowed for root during lockdown already.
+ */
+static void kdb_check_for_lockdown(void)
+{
+ const int write_flags = KDB_ENABLE_MEM_WRITE |
+ KDB_ENABLE_REG_WRITE |
+ KDB_ENABLE_FLOW_CTRL;
+ const int read_flags = KDB_ENABLE_MEM_READ |
+ KDB_ENABLE_REG_READ;
+
+ bool need_to_lockdown_write = false;
+ bool need_to_lockdown_read = false;
+
+ if (kdb_cmd_enabled & (KDB_ENABLE_ALL | write_flags))
+ need_to_lockdown_write =
+ security_locked_down(LOCKDOWN_DBG_WRITE_KERNEL);
+
+ if (kdb_cmd_enabled & (KDB_ENABLE_ALL | read_flags))
+ need_to_lockdown_read =
+ security_locked_down(LOCKDOWN_DBG_READ_KERNEL);
+
+ /* De-compose KDB_ENABLE_ALL if required */
+ if (need_to_lockdown_write || need_to_lockdown_read)
+ if (kdb_cmd_enabled & KDB_ENABLE_ALL)
+ kdb_cmd_enabled = KDB_ENABLE_MASK & ~KDB_ENABLE_ALL;
+
+ if (need_to_lockdown_write)
+ kdb_cmd_enabled &= ~write_flags;
+
+ if (need_to_lockdown_read)
+ kdb_cmd_enabled &= ~read_flags;
+}
+
+/*
+ * Check whether the flags of the current command, the permissions of the kdb
+ * console and the lockdown state allow a command to be run.
*/
-static inline bool kdb_check_flags(kdb_cmdflags_t flags, int permissions,
+static bool kdb_check_flags(kdb_cmdflags_t flags, int permissions,
bool no_args)
{
/* permissions comes from userspace so needs massaging slightly */
@@ -1180,6 +1233,9 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs,
kdb_curr_task(raw_smp_processor_id());
KDB_DEBUG_STATE("kdb_local 1", reason);
+
+ kdb_check_for_lockdown();
+
kdb_go_count = 0;
if (reason == KDB_REASON_DEBUG) {
/* special case below */
diff --git a/security/security.c b/security/security.c
index b7cf5cbfdc67..aaf6566deb9f 100644
--- a/security/security.c
+++ b/security/security.c
@@ -59,10 +59,12 @@ const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
[LOCKDOWN_DEBUGFS] = "debugfs access",
[LOCKDOWN_XMON_WR] = "xmon write access",
[LOCKDOWN_BPF_WRITE_USER] = "use of bpf to write user RAM",
+ [LOCKDOWN_DBG_WRITE_KERNEL] = "use of kgdb/kdb to write kernel RAM",
[LOCKDOWN_INTEGRITY_MAX] = "integrity",
[LOCKDOWN_KCORE] = "/proc/kcore access",
[LOCKDOWN_KPROBES] = "use of kprobes",
[LOCKDOWN_BPF_READ_KERNEL] = "use of bpf to read kernel RAM",
+ [LOCKDOWN_DBG_READ_KERNEL] = "use of kgdb/kdb to read kernel RAM",
[LOCKDOWN_PERF] = "unsafe use of perf",
[LOCKDOWN_TRACEFS] = "use of tracefs",
[LOCKDOWN_XMON_RW] = "xmon read and write access",
commit 7e0815b3e09986d2fe651199363e135b9358132a upstream.
When a XEN_HVM guest uses the XEN PIRQ/Eventchannel mechanism, then
PCI/MSI[-X] masking is solely controlled by the hypervisor, but contrary to
XEN_PV guests this does not disable PCI/MSI[-X] masking in the PCI/MSI
layer.
This can lead to a situation where the PCI/MSI layer masks an MSI[-X]
interrupt and the hypervisor grants the write despite the fact that it
already requested the interrupt. As a consequence interrupt delivery on the
affected device is not happening ever.
Set pci_msi_ignore_mask to prevent that like it's done for XEN_PV guests
already.
Fixes: 809f9267bbab ("xen: map MSIs into pirqs")
Reported-by: Jeremi Piotrowski <jpiotrowski(a)linux.microsoft.com>
Reported-by: Dusty Mabe <dustymabe(a)redhat.com>
Reported-by: Salvatore Bonaccorso <carnil(a)debian.org>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Tested-by: Noah Meyerhans <noahm(a)debian.org>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/87tuaduxj5.ffs@tglx
[nmeyerha(a)amazon.com: backported to 4.14]
Signed-off-by: Noah Meyerhans <nmeyerha(a)amazon.com>
---
arch/x86/pci/xen.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index c4b3646bd04c..7135f35f9de7 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -442,6 +442,11 @@ void __init xen_msi_init(void)
x86_msi.setup_msi_irqs = xen_hvm_setup_msi_irqs;
x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
+ /*
+ * With XEN PIRQ/Eventchannels in use PCI/MSI[-X] masking is solely
+ * controlled by the hypervisor.
+ */
+ pci_msi_ignore_mask = 1;
}
#endif
--
2.25.1
Mike Tailor Inv is currently doing a great Promo, You have the opportunity to invest at least $250 USD and earn $2,500 USD in 4 working days. Contact the investment company via this email: ( investmentdept2022(a)miketailorinv.us ). The higher you invest the higher your profit value.
From: Jitao Shi <jitao.shi(a)mediatek.com>
To comply with the panel sequence, hold the mipi signal to LP00 before the dcs cmds transmission,
and pull the mipi signal high from LP00 to LP11 until the start of the dcs cmds transmission.
The normal panel timing is :
(1) pp1800 DC pull up
(2) avdd & avee AC pull high
(3) lcm_reset pull high -> pull low -> pull high
(4) Pull MIPI signal high (LP11) -> initial code -> send video data(HS mode)
The power-off sequence is reversed.
If dsi is not in cmd mode, then dsi will pull the mipi signal high in the mtk_output_dsi_enable function.
The delay in lane_ready func is the reaction time of dsi_rx after pulling up the mipi signal.
Fixes: 2dd8075d2185 ("drm/mediatek: mtk_dsi: Use the drm_panel_bridge API")
Cc: <stable(a)vger.kernel.org> # 5.10.x: b255d51e3967: sched: Modify dsi funcs to atomic operations
Cc: <stable(a)vger.kernel.org> # 5.10.x: 72c69c977502: sched: Separate poweron/poweroff from enable/disable and define new funcs
Cc: <stable(a)vger.kernel.org> # 5.10.x
Signed-off-by: Jitao Shi <jitao.shi(a)mediatek.com>
Signed-off-by: Xinlei Lee <xinlei.lee(a)mediatek.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno(a)collabora.com>
---
drivers/gpu/drm/mediatek/mtk_dsi.c | 28 +++++++++++++++++++++-------
1 file changed, 21 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c
index d9a6b928dba8..25e84d9426bf 100644
--- a/drivers/gpu/drm/mediatek/mtk_dsi.c
+++ b/drivers/gpu/drm/mediatek/mtk_dsi.c
@@ -203,6 +203,7 @@ struct mtk_dsi {
struct mtk_phy_timing phy_timing;
int refcount;
bool enabled;
+ bool lanes_ready;
u32 irq_data;
wait_queue_head_t irq_wait_queue;
const struct mtk_dsi_driver_data *driver_data;
@@ -661,18 +662,11 @@ static int mtk_dsi_poweron(struct mtk_dsi *dsi)
mtk_dsi_reset_engine(dsi);
mtk_dsi_phy_timconfig(dsi);
- mtk_dsi_rxtx_control(dsi);
- usleep_range(30, 100);
- mtk_dsi_reset_dphy(dsi);
mtk_dsi_ps_control_vact(dsi);
mtk_dsi_set_vm_cmd(dsi);
mtk_dsi_config_vdo_timing(dsi);
mtk_dsi_set_interrupt_enable(dsi);
- mtk_dsi_clk_ulp_mode_leave(dsi);
- mtk_dsi_lane0_ulp_mode_leave(dsi);
- mtk_dsi_clk_hs_mode(dsi, 0);
-
return 0;
err_disable_engine_clk:
clk_disable_unprepare(dsi->engine_clk);
@@ -701,6 +695,23 @@ static void mtk_dsi_poweroff(struct mtk_dsi *dsi)
clk_disable_unprepare(dsi->digital_clk);
phy_power_off(dsi->phy);
+
+ dsi->lanes_ready = false;
+}
+
+static void mtk_dsi_lane_ready(struct mtk_dsi *dsi)
+{
+ if (!dsi->lanes_ready) {
+ dsi->lanes_ready = true;
+ mtk_dsi_rxtx_control(dsi);
+ usleep_range(30, 100);
+ mtk_dsi_reset_dphy(dsi);
+ mtk_dsi_clk_ulp_mode_leave(dsi);
+ mtk_dsi_lane0_ulp_mode_leave(dsi);
+ mtk_dsi_clk_hs_mode(dsi, 0);
+ msleep(20);
+ /* The reaction time after pulling up the mipi signal for dsi_rx */
+ }
}
static void mtk_output_dsi_enable(struct mtk_dsi *dsi)
@@ -708,6 +719,7 @@ static void mtk_output_dsi_enable(struct mtk_dsi *dsi)
if (dsi->enabled)
return;
+ mtk_dsi_lane_ready(dsi);
mtk_dsi_set_mode(dsi);
mtk_dsi_clk_hs_mode(dsi, 1);
@@ -1017,6 +1029,8 @@ static ssize_t mtk_dsi_host_transfer(struct mipi_dsi_host *host,
if (MTK_DSI_HOST_IS_READ(msg->type))
irq_flag |= LPRX_RD_RDY_INT_FLAG;
+ mtk_dsi_lane_ready(dsi);
+
ret = mtk_dsi_host_send_cmd(dsi, msg, irq_flag);
if (ret)
goto restore_dsi_mode;
--
2.18.0
The patch titled
Subject: hugetlb: fix huge_pmd_unshare address update
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
hugetlb-fix-huge_pmd_unshare-address-update.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Mike Kravetz <mike.kravetz(a)oracle.com>
Subject: hugetlb: fix huge_pmd_unshare address update
Date: Tue, 24 May 2022 13:50:03 -0700
The routine huge_pmd_unshare() is passed a pointer to an address
associated with an area which may be unshared. If unshare is successful
this address is updated to 'optimize' callers iterating over huge page
addresses. For the optimization to work correctly, address should be
updated to the last huge page in the unmapped/unshared area. However, in
the common case where the passed address is PUD_SIZE aligned, the address
is incorrectly updated to the address of the preceding huge page. That
wastes CPU cycles as the unmapped/unshared range is scanned twice.
Link: https://lkml.kernel.org/r/20220524205003.126184-1-mike.kravetz@oracle.com
Fixes: 39dde65c9940 ("shared page table for hugetlb page")
Signed-off-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/hugetlb.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
--- a/mm/hugetlb.c~hugetlb-fix-huge_pmd_unshare-address-update
+++ a/mm/hugetlb.c
@@ -6562,7 +6562,14 @@ int huge_pmd_unshare(struct mm_struct *m
pud_clear(pud);
put_page(virt_to_page(ptep));
mm_dec_nr_pmds(mm);
- *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE;
+ /*
+ * This update of passed address optimizes loops sequentially
+ * processing addresses in increments of huge page size (PMD_SIZE
+ * in this case). By clearing the pud, a PUD_SIZE area is unmapped.
+ * Update address to the 'last page' in the cleared area so that
+ * calling loop can move to first page past this area.
+ */
+ *addr |= PUD_SIZE - PMD_SIZE;
return 1;
}
_
Patches currently in -mm which might be from mike.kravetz(a)oracle.com are
hugetlb-fix-huge_pmd_unshare-address-update.patch
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 9f46c187e2e680ecd9de7983e4d081c3391acc76 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini(a)redhat.com>
Date: Fri, 20 May 2022 13:48:11 -0400
Subject: [PATCH] KVM: x86/mmu: fix NULL pointer dereference on guest INVPCID
With shadow paging enabled, the INVPCID instruction results in a call
to kvm_mmu_invpcid_gva. If INVPCID is executed with CR0.PG=0, the
invlpg callback is not set and the result is a NULL pointer dereference.
Fix it trivially by checking for mmu->invlpg before every call.
There are other possibilities:
- check for CR0.PG, because KVM (like all Intel processors after P5)
flushes guest TLB on CR0.PG changes so that INVPCID/INVLPG are a
nop with paging disabled
- check for EFER.LMA, because KVM syncs and flushes when switching
MMU contexts outside of 64-bit mode
All of these are tricky, go for the simple solution. This is CVE-2022-1789.
Reported-by: Yongkang Jia <kangel(a)zju.edu.cn>
Cc: stable(a)vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 56ebc4fb7f91..45e1573f8f1d 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5470,14 +5470,16 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
uint i;
if (pcid == kvm_get_active_pcid(vcpu)) {
- mmu->invlpg(vcpu, gva, mmu->root.hpa);
+ if (mmu->invlpg)
+ mmu->invlpg(vcpu, gva, mmu->root.hpa);
tlb_flush = true;
}
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
if (VALID_PAGE(mmu->prev_roots[i].hpa) &&
pcid == kvm_get_pcid(vcpu, mmu->prev_roots[i].pgd)) {
- mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
+ if (mmu->invlpg)
+ mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
tlb_flush = true;
}
}
This is the start of the stable review cycle for the 4.19.245 release.
There are 44 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Wed, 25 May 2022 16:56:55 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.19.245-r…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-4.19.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 4.19.245-rc1
David Howells <dhowells(a)redhat.com>
afs: Fix afs_getattr() to refetch file status if callback break occurred
Linus Torvalds <torvalds(a)linux-foundation.org>
Reinstate some of "swiotlb: rework "fix info leak with DMA_FROM_DEVICE""
Halil Pasic <pasic(a)linux.ibm.com>
swiotlb: fix info leak with DMA_FROM_DEVICE
Grant Grundler <grundler(a)chromium.org>
net: atlantic: verify hw_head_ lies within TX buffer ring
Yang Yingliang <yangyingliang(a)huawei.com>
net: stmmac: fix missing pci_disable_device() on error in stmmac_pci_probe()
Yang Yingliang <yangyingliang(a)huawei.com>
ethernet: tulip: fix missing pci_disable_device() on error in tulip_init_one()
Felix Fietkau <nbd(a)nbd.name>
mac80211: fix rx reordering with non explicit / psmp ack policy
Gleb Chesnokov <Chesnokov.G(a)raidix.com>
scsi: qla2xxx: Fix missed DMA unmap for aborted commands
Thomas Richter <tmricht(a)linux.ibm.com>
perf bench numa: Address compiler error on s390
Uwe Kleine-König <u.kleine-koenig(a)pengutronix.de>
gpio: mvebu/pwm: Refuse requests with inverted polarity
Haibo Chen <haibo.chen(a)nxp.com>
gpio: gpio-vf610: do not touch other bits when set the target bit
Andrew Lunn <andrew(a)lunn.ch>
net: bridge: Clear offload_fwd_mark when passing frame up bridge interface.
Kevin Mitchell <kevmitch(a)arista.com>
igb: skip phy status check where unavailable
Ard Biesheuvel <ardb(a)kernel.org>
ARM: 9197/1: spectre-bhb: fix loop8 sequence for Thumb2
Ard Biesheuvel <ardb(a)kernel.org>
ARM: 9196/1: spectre-bhb: enable for Cortex-A15
Jiasheng Jiang <jiasheng(a)iscas.ac.cn>
net: af_key: add check for pfkey_broadcast in function pfkey_process
Maxim Mikityanskiy <maximmi(a)nvidia.com>
net/mlx5e: Properly block LRO when XDP is enabled
Duoming Zhou <duoming(a)zju.edu.cn>
NFC: nci: fix sleep in atomic context bugs caused by nci_skb_alloc
Christophe JAILLET <christophe.jaillet(a)wanadoo.fr>
net/qla3xxx: Fix a test in ql_reset_work()
Codrin Ciubotariu <codrin.ciubotariu(a)microchip.com>
clk: at91: generated: consider range when calculating best rate
Zixuan Fu <r33s3n6(a)gmail.com>
net: vmxnet3: fix possible NULL pointer dereference in vmxnet3_rq_cleanup()
Zixuan Fu <r33s3n6(a)gmail.com>
net: vmxnet3: fix possible use-after-free bugs in vmxnet3_rq_alloc_rx_buf()
Paolo Abeni <pabeni(a)redhat.com>
net/sched: act_pedit: sanitize shift argument before usage
Harini Katakam <harini.katakam(a)xilinx.com>
net: macb: Increment rx bd head after allocating skb and buffer
Ulf Hansson <ulf.hansson(a)linaro.org>
mmc: core: Default to generic_cmd6_time as timeout in __mmc_switch()
Ulf Hansson <ulf.hansson(a)linaro.org>
mmc: block: Use generic_cmd6_time when modifying INAND_CMD38_ARG_EXT_CSD
Ulf Hansson <ulf.hansson(a)linaro.org>
mmc: core: Specify timeouts for BKOPS and CACHE_FLUSH for eMMC
Ulf Hansson <ulf.hansson(a)linaro.org>
mmc: core: Cleanup BKOPS support
Hangyu Hua <hbh25y(a)gmail.com>
drm/dp/mst: fix a possible memory leak in fetch_monitor_name()
Ondrej Mosnacek <omosnace(a)redhat.com>
crypto: qcom-rng - fix infinite loop on requests not multiple of WORD_SZ
Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
PCI/PM: Avoid putting Elo i2 PCIe Ports in D3cold
Al Viro <viro(a)zeniv.linux.org.uk>
Fix double fget() in vhost_net_set_backend()
Peter Zijlstra <peterz(a)infradead.org>
perf: Fix sys_perf_event_open() race against self
Takashi Iwai <tiwai(a)suse.de>
ALSA: wavefront: Proper check of get_user() error
Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
nilfs2: fix lockdep warnings during disk space reclamation
Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
nilfs2: fix lockdep warnings in page operations for btree nodes
linyujun <linyujun809(a)huawei.com>
ARM: 9191/1: arm/stacktrace, kasan: Silence KASAN warnings in unwind_frame()
Jakob Koschel <jakobkoschel(a)gmail.com>
drbd: remove usage of list iterator variable after loop
Xiaoke Wang <xkernel.wang(a)foxmail.com>
MIPS: lantiq: check the return value of kzalloc()
Zheng Yongjun <zhengyongjun3(a)huawei.com>
crypto: stm32 - fix reference leak in stm32_crc_remove
Zheng Yongjun <zhengyongjun3(a)huawei.com>
Input: stmfts - fix reference leak in stmfts_input_open
Jeff LaBundy <jeff(a)labundy.com>
Input: add bounds checking to input_set_capability()
David Gow <davidgow(a)google.com>
um: Cleanup syscall_handler_t definition/cast, fix warning
Willy Tarreau <w(a)1wt.eu>
floppy: use a statically allocated error counter
-------------
Diffstat:
Makefile | 4 +-
arch/arm/kernel/entry-armv.S | 2 +-
arch/arm/kernel/stacktrace.c | 10 +-
arch/arm/mm/proc-v7-bugs.c | 1 +
arch/mips/lantiq/falcon/sysctrl.c | 2 +
arch/mips/lantiq/xway/gptu.c | 2 +
arch/mips/lantiq/xway/sysctrl.c | 46 +++---
arch/x86/um/shared/sysdep/syscalls_64.h | 5 +-
drivers/block/drbd/drbd_main.c | 7 +-
drivers/block/floppy.c | 17 +--
drivers/clk/at91/clk-generated.c | 4 +
drivers/crypto/qcom-rng.c | 1 +
drivers/crypto/stm32/stm32_crc32.c | 4 +-
drivers/gpio/gpio-mvebu.c | 3 +
drivers/gpio/gpio-vf610.c | 8 +-
drivers/gpu/drm/drm_dp_mst_topology.c | 1 +
drivers/input/input.c | 19 +++
drivers/input/touchscreen/stmfts.c | 8 +-
drivers/mmc/core/block.c | 8 +-
drivers/mmc/core/card.h | 6 +-
drivers/mmc/core/mmc.c | 6 -
drivers/mmc/core/mmc_ops.c | 110 ++++----------
drivers/mmc/core/mmc_ops.h | 3 +-
.../ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 7 +
drivers/net/ethernet/cadence/macb_main.c | 2 +-
drivers/net/ethernet/dec/tulip/tulip_core.c | 5 +-
drivers/net/ethernet/intel/igb/igb_main.c | 3 +-
drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 7 +
drivers/net/ethernet/qlogic/qla3xxx.c | 3 +-
drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c | 4 +-
drivers/net/vmxnet3/vmxnet3_drv.c | 6 +
drivers/pci/pci.c | 10 ++
drivers/scsi/qla2xxx/qla_target.c | 3 +
drivers/vhost/net.c | 15 +-
fs/afs/inode.c | 14 +-
fs/nilfs2/btnode.c | 23 ++-
fs/nilfs2/btnode.h | 1 +
fs/nilfs2/btree.c | 27 ++--
fs/nilfs2/dat.c | 4 +-
fs/nilfs2/gcinode.c | 7 +-
fs/nilfs2/inode.c | 159 +++++++++++++++++++--
fs/nilfs2/mdt.c | 43 ++++--
fs/nilfs2/mdt.h | 6 +-
fs/nilfs2/nilfs.h | 16 +--
fs/nilfs2/page.c | 7 +-
fs/nilfs2/segment.c | 9 +-
fs/nilfs2/super.c | 5 +-
kernel/dma/swiotlb.c | 12 +-
kernel/events/core.c | 14 ++
net/bridge/br_input.c | 7 +
net/key/af_key.c | 6 +-
net/mac80211/rx.c | 3 +-
net/nfc/nci/data.c | 2 +-
net/nfc/nci/hci.c | 4 +-
net/sched/act_pedit.c | 4 +
sound/isa/wavefront/wavefront_synth.c | 3 +-
tools/perf/bench/numa.c | 2 +-
57 files changed, 483 insertions(+), 237 deletions(-)
commit 7e0815b3e09986d2fe651199363e135b9358132a upstream.
When a XEN_HVM guest uses the XEN PIRQ/Eventchannel mechanism, then
PCI/MSI[-X] masking is solely controlled by the hypervisor, but contrary to
XEN_PV guests this does not disable PCI/MSI[-X] masking in the PCI/MSI
layer.
This can lead to a situation where the PCI/MSI layer masks an MSI[-X]
interrupt and the hypervisor grants the write despite the fact that it
already requested the interrupt. As a consequence interrupt delivery on the
affected device is not happening ever.
Set pci_msi_ignore_mask to prevent that like it's done for XEN_PV guests
already.
Fixes: 809f9267bbab ("xen: map MSIs into pirqs")
Reported-by: Jeremi Piotrowski <jpiotrowski(a)linux.microsoft.com>
Reported-by: Dusty Mabe <dustymabe(a)redhat.com>
Reported-by: Salvatore Bonaccorso <carnil(a)debian.org>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Tested-by: Noah Meyerhans <noahm(a)debian.org>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/87tuaduxj5.ffs@tglx
[nmeyerha(a)amazon.com: backported to 5.4]
Signed-off-by: Noah Meyerhans <nmeyerha(a)amazon.com>
---
arch/x86/pci/xen.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 5c11ae66b5d8..9cf8f5417e7f 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -442,6 +442,11 @@ void __init xen_msi_init(void)
x86_msi.setup_msi_irqs = xen_hvm_setup_msi_irqs;
x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
+ /*
+ * With XEN PIRQ/Eventchannels in use PCI/MSI[-X] masking is solely
+ * controlled by the hypervisor.
+ */
+ pci_msi_ignore_mask = 1;
}
#endif
--
2.25.1
commit 7e0815b3e09986d2fe651199363e135b9358132a upstream.
When a XEN_HVM guest uses the XEN PIRQ/Eventchannel mechanism, then
PCI/MSI[-X] masking is solely controlled by the hypervisor, but contrary to
XEN_PV guests this does not disable PCI/MSI[-X] masking in the PCI/MSI
layer.
This can lead to a situation where the PCI/MSI layer masks an MSI[-X]
interrupt and the hypervisor grants the write despite the fact that it
already requested the interrupt. As a consequence interrupt delivery on the
affected device is not happening ever.
Set pci_msi_ignore_mask to prevent that like it's done for XEN_PV guests
already.
Fixes: 809f9267bbab ("xen: map MSIs into pirqs")
Reported-by: Jeremi Piotrowski <jpiotrowski(a)linux.microsoft.com>
Reported-by: Dusty Mabe <dustymabe(a)redhat.com>
Reported-by: Salvatore Bonaccorso <carnil(a)debian.org>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Tested-by: Noah Meyerhans <noahm(a)debian.org>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/87tuaduxj5.ffs@tglx
[nmeyerha(a)amazon.com: backported to 4.19]
Signed-off-by: Noah Meyerhans <nmeyerha(a)amazon.com>
---
arch/x86/pci/xen.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 22da9bfd8a45..bacf8d988f65 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -441,6 +441,11 @@ void __init xen_msi_init(void)
x86_msi.setup_msi_irqs = xen_hvm_setup_msi_irqs;
x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
+ /*
+ * With XEN PIRQ/Eventchannels in use PCI/MSI[-X] masking is solely
+ * controlled by the hypervisor.
+ */
+ pci_msi_ignore_mask = 1;
}
#endif
--
2.25.1
From: Eric Dumazet <edumazet(a)google.com>
commit 190cc82489f46f9d88e73c81a47e14f80a791e1a upstream
RFC 6056 (Recommendations for Transport-Protocol Port Randomization)
provides good summary of why source selection needs extra care.
David Dworken reminded us that linux implements Algorithm 3
as described in RFC 6056 3.3.3
Quoting David :
In the context of the web, this creates an interesting info leak where
websites can count how many TCP connections a user's computer is
establishing over time. For example, this allows a website to count
exactly how many subresources a third party website loaded.
This also allows:
- Distinguishing between different users behind a VPN based on
distinct source port ranges.
- Tracking users over time across multiple networks.
- Covert communication channels between different browsers/browser
profiles running on the same computer
- Tracking what applications are running on a computer based on
the pattern of how fast source ports are getting incremented.
Section 3.3.4 describes an enhancement, that reduces
attackers ability to use the basic information currently
stored into the shared 'u32 hint'.
This change also decreases collision rate when
multiple applications need to connect() to
different destinations.
Signed-off-by: Eric Dumazet <edumazet(a)google.com>
Reported-by: David Dworken <ddworken(a)google.com>
Cc: Willem de Bruijn <willemb(a)google.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
[SG: Adjusted context]
Signed-off-by: Stefan Ghinea <stefan.ghinea(a)windriver.com>
---
net/ipv4/inet_hashtables.c | 20 +++++++++++++++++---
1 file changed, 17 insertions(+), 3 deletions(-)
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index c96a5871b49d..da9537ab3b98 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -714,6 +714,17 @@ void inet_unhash(struct sock *sk)
}
EXPORT_SYMBOL_GPL(inet_unhash);
+/* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm
+ * Note that we use 32bit integers (vs RFC 'short integers')
+ * because 2^16 is not a multiple of num_ephemeral and this
+ * property might be used by clever attacker.
+ * RFC claims using TABLE_LENGTH=10 buckets gives an improvement,
+ * we use 256 instead to really give more isolation and
+ * privacy, this only consumes 1 KB of kernel memory.
+ */
+#define INET_TABLE_PERTURB_SHIFT 8
+static u32 table_perturb[1 << INET_TABLE_PERTURB_SHIFT];
+
int __inet_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *sk, u32 port_offset,
int (*check_established)(struct inet_timewait_death_row *,
@@ -727,7 +738,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
struct inet_bind_bucket *tb;
u32 remaining, offset;
int ret, i, low, high;
- static u32 hint;
+ u32 index;
if (port) {
head = &hinfo->bhash[inet_bhashfn(net, port,
@@ -752,7 +763,10 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
if (likely(remaining > 1))
remaining &= ~1U;
- offset = (hint + port_offset) % remaining;
+ net_get_random_once(table_perturb, sizeof(table_perturb));
+ index = hash_32(port_offset, INET_TABLE_PERTURB_SHIFT);
+
+ offset = (READ_ONCE(table_perturb[index]) + port_offset) % remaining;
/* In first pass we try ports of @low parity.
* inet_csk_get_port() does the opposite choice.
*/
@@ -805,7 +819,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
return -EADDRNOTAVAIL;
ok:
- hint += i + 2;
+ WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2);
/* Head lock still held and bh's disabled */
inet_bind_hash(sk, tb, port);
--
2.36.1
From: Eric Dumazet <edumazet(a)google.com>
commit 190cc82489f46f9d88e73c81a47e14f80a791e1a upstream
RFC 6056 (Recommendations for Transport-Protocol Port Randomization)
provides good summary of why source selection needs extra care.
David Dworken reminded us that linux implements Algorithm 3
as described in RFC 6056 3.3.3
Quoting David :
In the context of the web, this creates an interesting info leak where
websites can count how many TCP connections a user's computer is
establishing over time. For example, this allows a website to count
exactly how many subresources a third party website loaded.
This also allows:
- Distinguishing between different users behind a VPN based on
distinct source port ranges.
- Tracking users over time across multiple networks.
- Covert communication channels between different browsers/browser
profiles running on the same computer
- Tracking what applications are running on a computer based on
the pattern of how fast source ports are getting incremented.
Section 3.3.4 describes an enhancement, that reduces
attackers ability to use the basic information currently
stored into the shared 'u32 hint'.
This change also decreases collision rate when
multiple applications need to connect() to
different destinations.
Signed-off-by: Eric Dumazet <edumazet(a)google.com>
Reported-by: David Dworken <ddworken(a)google.com>
Cc: Willem de Bruijn <willemb(a)google.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Stefan Ghinea <stefan.ghinea(a)windriver.com>
---
net/ipv4/inet_hashtables.c | 20 +++++++++++++++++---
1 file changed, 17 insertions(+), 3 deletions(-)
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index cbbeb0eea0c3..dbfcefc264d6 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -671,6 +671,17 @@ void inet_unhash(struct sock *sk)
}
EXPORT_SYMBOL_GPL(inet_unhash);
+/* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm
+ * Note that we use 32bit integers (vs RFC 'short integers')
+ * because 2^16 is not a multiple of num_ephemeral and this
+ * property might be used by clever attacker.
+ * RFC claims using TABLE_LENGTH=10 buckets gives an improvement,
+ * we use 256 instead to really give more isolation and
+ * privacy, this only consumes 1 KB of kernel memory.
+ */
+#define INET_TABLE_PERTURB_SHIFT 8
+static u32 table_perturb[1 << INET_TABLE_PERTURB_SHIFT];
+
int __inet_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *sk, u32 port_offset,
int (*check_established)(struct inet_timewait_death_row *,
@@ -684,8 +695,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
struct inet_bind_bucket *tb;
u32 remaining, offset;
int ret, i, low, high;
- static u32 hint;
int l3mdev;
+ u32 index;
if (port) {
head = &hinfo->bhash[inet_bhashfn(net, port,
@@ -712,7 +723,10 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
if (likely(remaining > 1))
remaining &= ~1U;
- offset = (hint + port_offset) % remaining;
+ net_get_random_once(table_perturb, sizeof(table_perturb));
+ index = hash_32(port_offset, INET_TABLE_PERTURB_SHIFT);
+
+ offset = (READ_ONCE(table_perturb[index]) + port_offset) % remaining;
/* In first pass we try ports of @low parity.
* inet_csk_get_port() does the opposite choice.
*/
@@ -766,7 +780,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
return -EADDRNOTAVAIL;
ok:
- hint += i + 2;
+ WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2);
/* Head lock still held and bh's disabled */
inet_bind_hash(sk, tb, port);
--
2.36.1
From: Eric Dumazet <edumazet(a)google.com>
commit 190cc82489f46f9d88e73c81a47e14f80a791e1a upstream
RFC 6056 (Recommendations for Transport-Protocol Port Randomization)
provides good summary of why source selection needs extra care.
David Dworken reminded us that linux implements Algorithm 3
as described in RFC 6056 3.3.3
Quoting David :
In the context of the web, this creates an interesting info leak where
websites can count how many TCP connections a user's computer is
establishing over time. For example, this allows a website to count
exactly how many subresources a third party website loaded.
This also allows:
- Distinguishing between different users behind a VPN based on
distinct source port ranges.
- Tracking users over time across multiple networks.
- Covert communication channels between different browsers/browser
profiles running on the same computer
- Tracking what applications are running on a computer based on
the pattern of how fast source ports are getting incremented.
Section 3.3.4 describes an enhancement, that reduces
attackers ability to use the basic information currently
stored into the shared 'u32 hint'.
This change also decreases collision rate when
multiple applications need to connect() to
different destinations.
Signed-off-by: Eric Dumazet <edumazet(a)google.com>
Reported-by: David Dworken <ddworken(a)google.com>
Cc: Willem de Bruijn <willemb(a)google.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Stefan Ghinea <stefan.ghinea(a)windriver.com>
---
net/ipv4/inet_hashtables.c | 20 +++++++++++++++++---
1 file changed, 17 insertions(+), 3 deletions(-)
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 915b8e1bd9ef..3beaf9e84cf2 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -722,6 +722,17 @@ void inet_unhash(struct sock *sk)
}
EXPORT_SYMBOL_GPL(inet_unhash);
+/* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm
+ * Note that we use 32bit integers (vs RFC 'short integers')
+ * because 2^16 is not a multiple of num_ephemeral and this
+ * property might be used by clever attacker.
+ * RFC claims using TABLE_LENGTH=10 buckets gives an improvement,
+ * we use 256 instead to really give more isolation and
+ * privacy, this only consumes 1 KB of kernel memory.
+ */
+#define INET_TABLE_PERTURB_SHIFT 8
+static u32 table_perturb[1 << INET_TABLE_PERTURB_SHIFT];
+
int __inet_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *sk, u32 port_offset,
int (*check_established)(struct inet_timewait_death_row *,
@@ -735,8 +746,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
struct inet_bind_bucket *tb;
u32 remaining, offset;
int ret, i, low, high;
- static u32 hint;
int l3mdev;
+ u32 index;
if (port) {
head = &hinfo->bhash[inet_bhashfn(net, port,
@@ -763,7 +774,10 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
if (likely(remaining > 1))
remaining &= ~1U;
- offset = (hint + port_offset) % remaining;
+ net_get_random_once(table_perturb, sizeof(table_perturb));
+ index = hash_32(port_offset, INET_TABLE_PERTURB_SHIFT);
+
+ offset = (READ_ONCE(table_perturb[index]) + port_offset) % remaining;
/* In first pass we try ports of @low parity.
* inet_csk_get_port() does the opposite choice.
*/
@@ -817,7 +831,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
return -EADDRNOTAVAIL;
ok:
- hint += i + 2;
+ WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2);
/* Head lock still held and bh's disabled */
inet_bind_hash(sk, tb, port);
--
2.36.1
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 9f46c187e2e680ecd9de7983e4d081c3391acc76 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini(a)redhat.com>
Date: Fri, 20 May 2022 13:48:11 -0400
Subject: [PATCH] KVM: x86/mmu: fix NULL pointer dereference on guest INVPCID
With shadow paging enabled, the INVPCID instruction results in a call
to kvm_mmu_invpcid_gva. If INVPCID is executed with CR0.PG=0, the
invlpg callback is not set and the result is a NULL pointer dereference.
Fix it trivially by checking for mmu->invlpg before every call.
There are other possibilities:
- check for CR0.PG, because KVM (like all Intel processors after P5)
flushes guest TLB on CR0.PG changes so that INVPCID/INVLPG are a
nop with paging disabled
- check for EFER.LMA, because KVM syncs and flushes when switching
MMU contexts outside of 64-bit mode
All of these are tricky, go for the simple solution. This is CVE-2022-1789.
Reported-by: Yongkang Jia <kangel(a)zju.edu.cn>
Cc: stable(a)vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 56ebc4fb7f91..45e1573f8f1d 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5470,14 +5470,16 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
uint i;
if (pcid == kvm_get_active_pcid(vcpu)) {
- mmu->invlpg(vcpu, gva, mmu->root.hpa);
+ if (mmu->invlpg)
+ mmu->invlpg(vcpu, gva, mmu->root.hpa);
tlb_flush = true;
}
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
if (VALID_PAGE(mmu->prev_roots[i].hpa) &&
pcid == kvm_get_pcid(vcpu, mmu->prev_roots[i].pgd)) {
- mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
+ if (mmu->invlpg)
+ mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
tlb_flush = true;
}
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 9f46c187e2e680ecd9de7983e4d081c3391acc76 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini(a)redhat.com>
Date: Fri, 20 May 2022 13:48:11 -0400
Subject: [PATCH] KVM: x86/mmu: fix NULL pointer dereference on guest INVPCID
With shadow paging enabled, the INVPCID instruction results in a call
to kvm_mmu_invpcid_gva. If INVPCID is executed with CR0.PG=0, the
invlpg callback is not set and the result is a NULL pointer dereference.
Fix it trivially by checking for mmu->invlpg before every call.
There are other possibilities:
- check for CR0.PG, because KVM (like all Intel processors after P5)
flushes guest TLB on CR0.PG changes so that INVPCID/INVLPG are a
nop with paging disabled
- check for EFER.LMA, because KVM syncs and flushes when switching
MMU contexts outside of 64-bit mode
All of these are tricky, go for the simple solution. This is CVE-2022-1789.
Reported-by: Yongkang Jia <kangel(a)zju.edu.cn>
Cc: stable(a)vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 56ebc4fb7f91..45e1573f8f1d 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5470,14 +5470,16 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
uint i;
if (pcid == kvm_get_active_pcid(vcpu)) {
- mmu->invlpg(vcpu, gva, mmu->root.hpa);
+ if (mmu->invlpg)
+ mmu->invlpg(vcpu, gva, mmu->root.hpa);
tlb_flush = true;
}
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
if (VALID_PAGE(mmu->prev_roots[i].hpa) &&
pcid == kvm_get_pcid(vcpu, mmu->prev_roots[i].pgd)) {
- mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
+ if (mmu->invlpg)
+ mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
tlb_flush = true;
}
}
Querido amigo ,
Mucho tiempo, espero que todo esté bien junto con su familia, si es
así, gloria a Dios Todopoderoso. Bueno, lamento escuchar esta noticia
y entender que la culpa no es mía. Me complace informarles sobre mi
éxito en la transferencia de esos fondos bajo la cooperación de un
nuevo socio de EE. UU. Actualmente estoy en EE. UU. para recibir
tratamiento mientras tanto, no olvidé sus esfuerzos pasados ??e
intentos de obtener los fondos a pesar de que nos falló de alguna
manera. Ahora comuníquese con mi pastor mientras estaba en Togo. Dejé
$ 500,000.00 para usted.
como su propia parte para compensar sus esfuerzos de entrada durante
la transacción sin importar que falle debido a su falta de
comprensión.
Nombre: Pastora María Antonio
Dirección: 55 avenida slyvanus olympio
Correo electrónico: chika_abarla2008(a)yahoo.com
Pídale que le envíe la cantidad total de $ 500,000.00 dólares
estadounidenses que dejé como compensación por todos los esfuerzos e
intentos anteriores para ayudarme en este asunto. Aprecié mucho sus
esfuerzos en ese momento. Así que siéntase libre y póngase en contacto
con ellos e indíqueles cómo desea recibir su parte.
Por favor, avísame de inmediato si recibes tu parte para que podamos
compartir la alegría juntos después de todos los sufrimientos en ese
momento, está bien.
En este momento, estoy muy ocupado con mi tratamiento y mi salud. Así
que no dude en ponerse en contacto con mi pastor hoy de inmediato para
reclamar.
tu fondo Gracias de nuevo y que Dios los bendiga.
Saludos,
señora andrea mamón
The bug is here:
if (!p)
return ret;
The list iterator value 'p' will *always* be set and non-NULL by
list_for_each_entry(), so it is incorrect to assume that the iterator
value will be NULL if the list is empty or no element is found.
To fix the bug, Use a new value 'iter' as the list iterator, while use
the old value 'p' as a dedicated variable to point to the found element.
Cc: stable(a)vger.kernel.org
Fixes: dfaa973ae9605 ("KVM: PPC: Book3S HV: In H_SVM_INIT_DONE, migrate remaining normal-GFNs to secure-GFNs")
Signed-off-by: Xiaomeng Tong <xiam0nd.tong(a)gmail.com>
---
arch/powerpc/kvm/book3s_hv_uvmem.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
index e414ca44839f..0cb20ee6a632 100644
--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -360,13 +360,15 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm,
static bool kvmppc_next_nontransitioned_gfn(const struct kvm_memory_slot *memslot,
struct kvm *kvm, unsigned long *gfn)
{
- struct kvmppc_uvmem_slot *p;
+ struct kvmppc_uvmem_slot *p = NULL, *iter;
bool ret = false;
unsigned long i;
- list_for_each_entry(p, &kvm->arch.uvmem_pfns, list)
- if (*gfn >= p->base_pfn && *gfn < p->base_pfn + p->nr_pfns)
+ list_for_each_entry(iter, &kvm->arch.uvmem_pfns, list)
+ if (*gfn >= iter->base_pfn && *gfn < iter->base_pfn + iter->nr_pfns) {
+ p = iter;
break;
+ }
if (!p)
return ret;
/*
--
2.17.1
Hello my dear,
I sent this mail praying it will get to you in a good condition of
health, since I myself are in a very critical health condition in
which I sleep every night without knowing if I may be alive to see the
next day. I bring peace and love to you. It is by the grace of God, I
had no choice than to do what is lawful and right in the sight of God
for eternal life and in the sight of man, for witness of God’s mercy
and glory upon my life,I am Mrs,Jackie Grayson,a widow.I am suffering
from a long time brain tumor, It has defiled all forms of medical
treatment, and right now I have about a few months to leave, according
to medical experts.
The situation has gotten complicated recently with my inability to
hear proper, am communicating with you with the help of the chief
nurse herein the hospital, from all indication my conditions is really
deteriorating and it is quite obvious that, according to my doctors
they have advised me that I may not live too long, Because this
illness has gotten to a very bad stage. I plead that you will not
expose or betray this trust and confidence that I am about to repose
on you for the mutual benefit of the orphans and the less privilege. I
have some funds I inherited from my late husband, the sum of
($11,500,000.00 Dollars).Having known my condition, I decided to
donate this fund to you believing that you will utilize it the way i
am going to instruct herein.
I need you to assist me and reclaim this money and use it for
Charity works, for orphanages and gives justice and help to the poor,
needy and widows says The Lord." Jeremiah 22:15-16.“ and also build
schools for less privilege that will be named after my late husband if
possible and to promote the word of God and the effort that the house
of God is maintained. I do not want a situation where this money will
be used in an ungodly manner. That's why I'm taking this decision. I'm
not afraid of death, so I know where I'm going. I accept this decision
because I do not have any child who will inherit this money after I
die. Please I want your sincerely and urgent answer to know if you
will be able to execute this project for the glory of God, and I will
give you more information on how the fund will be transferred to your
bank account. May the grace, peace, love and the truth in the Word of
God be with you and all those that you love and care for.
I'm waiting for your immediate reply,
Respectfully,
Mrs,Jackie Grayson.
Writting From the hospital,
May God Bless you,
The journal no-space deadlock was reported time to time. Such deadlock
can happen in the following situation.
When all journal buckets are fully filled by active jset with heavy
write I/O load, the cache set registration (after a reboot) will load
all active jsets and inserting them into the btree again (which is
called journal replay). If a journaled bkey is inserted into a btree
node and results btree node split, new journal request might be
triggered. For example, the btree grows one more level after the node
split, then the root node record in cache device super block will be
upgrade by bch_journal_meta() from bch_btree_set_root(). But there is no
space in journal buckets, the journal replay has to wait for new journal
bucket to be reclaimed after at least one journal bucket replayed. This
is one example that how the journal no-space deadlock happens.
The solution to avoid the deadlock is to reserve 1 journal bucket in
run time, and only permit the reserved journal bucket to be used during
cache set registration procedure for things like journal replay. Then
the journal space will never be fully filled, there is no chance for
journal no-space deadlock to happen anymore.
This patch adds a new member "bool do_reserve" in struct journal, it is
inititalized to 0 (false) when struct journal is allocated, and set to
1 (true) by bch_journal_space_reserve() when all initialization done in
run_cache_set(). In the run time when journal_reclaim() tries to
allocate a new journal bucket, free_journal_buckets() is called to check
whether there are enough free journal buckets to use. If there is only
1 free journal bucket and journal->do_reserve is 1 (true), the last
bucket is reserved and free_journal_buckets() will return 0 to indicate
no free journal bucket. Then journal_reclaim() will give up, and try
next time to see whetheer there is free journal bucket to allocate. By
this method, there is always 1 jouranl bucket reserved in run time.
During the cache set registration, journal->do_reserve is 0 (false), so
the reserved journal bucket can be used to avoid the no-space deadlock.
Reported-by: Nikhil Kshirsagar <nkshirsagar(a)gmail.com>
Signed-off-by: Coly Li <colyli(a)suse.de>
Cc: stable(a)vger.kernel.org
---
drivers/md/bcache/journal.c | 31 ++++++++++++++++++++++++++-----
drivers/md/bcache/journal.h | 2 ++
drivers/md/bcache/super.c | 1 +
3 files changed, 29 insertions(+), 5 deletions(-)
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index df5347ea450b..e5da469a4235 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -405,6 +405,11 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list)
return ret;
}
+void bch_journal_space_reserve(struct journal *j)
+{
+ j->do_reserve = true;
+}
+
/* Journalling */
static void btree_flush_write(struct cache_set *c)
@@ -621,12 +626,30 @@ static void do_journal_discard(struct cache *ca)
}
}
+static unsigned int free_journal_buckets(struct cache_set *c)
+{
+ struct journal *j = &c->journal;
+ struct cache *ca = c->cache;
+ struct journal_device *ja = &c->cache->journal;
+ unsigned int n;
+
+ /* In case njournal_buckets is not power of 2 */
+ if (ja->cur_idx >= ja->discard_idx)
+ n = ca->sb.njournal_buckets + ja->discard_idx - ja->cur_idx;
+ else
+ n = ja->discard_idx - ja->cur_idx;
+
+ if (n > (1 + j->do_reserve))
+ return n - (1 + j->do_reserve);
+
+ return 0;
+}
+
static void journal_reclaim(struct cache_set *c)
{
struct bkey *k = &c->journal.key;
struct cache *ca = c->cache;
uint64_t last_seq;
- unsigned int next;
struct journal_device *ja = &ca->journal;
atomic_t p __maybe_unused;
@@ -649,12 +672,10 @@ static void journal_reclaim(struct cache_set *c)
if (c->journal.blocks_free)
goto out;
- next = (ja->cur_idx + 1) % ca->sb.njournal_buckets;
- /* No space available on this device */
- if (next == ja->discard_idx)
+ if (!free_journal_buckets(c))
goto out;
- ja->cur_idx = next;
+ ja->cur_idx = (ja->cur_idx + 1) % ca->sb.njournal_buckets;
k->ptr[0] = MAKE_PTR(0,
bucket_to_sector(c, ca->sb.d[ja->cur_idx]),
ca->sb.nr_this_dev);
diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h
index f2ea34d5f431..cd316b4a1e95 100644
--- a/drivers/md/bcache/journal.h
+++ b/drivers/md/bcache/journal.h
@@ -105,6 +105,7 @@ struct journal {
spinlock_t lock;
spinlock_t flush_write_lock;
bool btree_flushing;
+ bool do_reserve;
/* used when waiting because the journal was full */
struct closure_waitlist wait;
struct closure io;
@@ -182,5 +183,6 @@ int bch_journal_replay(struct cache_set *c, struct list_head *list);
void bch_journal_free(struct cache_set *c);
int bch_journal_alloc(struct cache_set *c);
+void bch_journal_space_reserve(struct journal *j);
#endif /* _BCACHE_JOURNAL_H */
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index bf3de149d3c9..2bb55278d22d 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -2128,6 +2128,7 @@ static int run_cache_set(struct cache_set *c)
flash_devs_run(c);
+ bch_journal_space_reserve(&c->journal);
set_bit(CACHE_SET_RUNNING, &c->flags);
return 0;
err:
--
2.35.3