From: Linus Torvalds <torvalds(a)linux-foundation.org>
commit 901c7280ca0d5e2b4a8929fbe0bfb007ac2a6544 upstream.
Halil Pasic points out [1] that the full revert of that commit (revert
in bddac7c1e02b), and that a partial revert that only reverts the
problematic case, but still keeps some of the cleanups is probably
better. 
And that partial revert [2] had already been verified by Oleksandr
Natalenko to also fix the issue, I had just missed that in the long
discussion.
So let's reinstate the cleanups from commit aa6f8dcbab47 ("swiotlb:
rework "fix info leak with DMA_FROM_DEVICE""), and effectively only
revert the part that caused problems.
Link: https://lore.kernel.org/all/20220328013731.017ae3e3.pasic@linux.ibm.com/ [1]
Link: https://lore.kernel.org/all/20220324055732.GB12078@lst.de/ [2]
Link: https://lore.kernel.org/all/4386660.LvFx2qVVIh@natalenko.name/ [3]
Suggested-by: Halil Pasic <pasic(a)linux.ibm.com>
Tested-by: Oleksandr Natalenko <oleksandr(a)natalenko.name>
Cc: Christoph Hellwig <hch(a)lst.de>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
[OP: backport to 5.4: adjusted context]
Signed-off-by: Ovidiu Panait <ovidiu.panait(a)windriver.com>
---
This is part of CVE-2022-0854 patchset:
[1] ddbd89deb7d3 ("swiotlb: fix info leak with DMA_FROM_DEVICE")
[2] 901c7280ca0d ("Reinstate some of "swiotlb: rework "fix info leak with DMA_FROM_DEVICE""")
[1] is already present in 5.4-stable.
[2] is present in 5.17/5.16/5.15, but not in 5.10 and 5.4 branches;
Documentation/DMA-attributes.txt | 10 ----------
include/linux/dma-mapping.h | 8 --------
kernel/dma/swiotlb.c | 13 ++++++++-----
3 files changed, 8 insertions(+), 23 deletions(-)
diff --git a/Documentation/DMA-attributes.txt b/Documentation/DMA-attributes.txt
index 7193505a98ca..8f8d97f65d73 100644
--- a/Documentation/DMA-attributes.txt
+++ b/Documentation/DMA-attributes.txt
@@ -156,13 +156,3 @@ accesses to DMA buffers in both privileged "supervisor" and unprivileged
subsystem that the buffer is fully accessible at the elevated privilege
level (and ideally inaccessible or at least read-only at the
lesser-privileged levels).
-
-DMA_ATTR_PRIVILEGED
--------------------
-
-Some advanced peripherals such as remote processors and GPUs perform
-accesses to DMA buffers in both privileged "supervisor" and unprivileged
-"user" modes. This attribute is used to indicate to the DMA-mapping
-subsystem that the buffer is fully accessible at the elevated privilege
-level (and ideally inaccessible or at least read-only at the
-lesser-privileged levels).
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index da90f20e11c1..4d450672b7d6 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -70,14 +70,6 @@
*/
#define DMA_ATTR_PRIVILEGED (1UL << 9)
-/*
- * This is a hint to the DMA-mapping subsystem that the device is expected
- * to overwrite the entire mapped size, thus the caller does not require any
- * of the previous buffer contents to be preserved. This allows
- * bounce-buffering implementations to optimise DMA_FROM_DEVICE transfers.
- */
-#define DMA_ATTR_OVERWRITE (1UL << 10)
-
/*
* A dma_addr_t can hold any valid DMA or bus address for the platform.
* It can be given to a device to use as a DMA source or target. A CPU cannot
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index f17b771856d1..913cb71198af 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -571,11 +571,14 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
*/
for (i = 0; i < nslots; i++)
io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
- if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
- (!(attrs & DMA_ATTR_OVERWRITE) || dir == DMA_TO_DEVICE ||
- dir == DMA_BIDIRECTIONAL))
- swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE);
-
+ /*
+ * When dir == DMA_FROM_DEVICE we could omit the copy from the orig
+ * to the tlb buffer, if we knew for sure the device will
+ * overwirte the entire current content. But we don't. Thus
+ * unconditional bounce may prevent leaking swiotlb content (i.e.
+ * kernel memory) to user-space.
+ */
+ swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE);
return tlb_addr;
}
--
2.36.1
This bug is marked as fixed by commit:
net: core: netlink: add helper refcount dec and lock function
net: sched: add helper function to take reference to Qdisc
net: sched: extend Qdisc with rcu
net: sched: rename qdisc_destroy() to qdisc_put()
net: sched: use Qdisc rcu API instead of relying on rtnl lock
But I can't find it in any tested tree for more than 90 days.
Is it a correct commit? Please update it by replying:
#syz fix: exact-commit-title
Until then the bug is still considered open and
new crashes with the same signature are ignored.
Due to the scheduler allocates the optimal bandwidth for FS ISOC endpoints,
this may be not enough actually and causes data transfer error, so come up
with an estimate that is no less than the worst case bandwidth used for
any one mframe, but may be an over-estimate.
Fixes: 451d3912586a ("usb: xhci-mtk: update fs bus bandwidth by bw_budget_table")
Cc: stable(a)vger.kernel.org
Signed-off-by: Chunfeng Yun <chunfeng.yun(a)mediatek.com>
---
drivers/usb/host/xhci-mtk-sch.c | 16 +++++++---------
1 file changed, 7 insertions(+), 9 deletions(-)
diff --git a/drivers/usb/host/xhci-mtk-sch.c b/drivers/usb/host/xhci-mtk-sch.c
index f3139ce7b0a9..953d2cd1d4cc 100644
--- a/drivers/usb/host/xhci-mtk-sch.c
+++ b/drivers/usb/host/xhci-mtk-sch.c
@@ -464,7 +464,7 @@ static int check_fs_bus_bw(struct mu3h_sch_ep_info *sch_ep, int offset)
*/
for (j = 0; j < sch_ep->num_budget_microframes; j++) {
k = XHCI_MTK_BW_INDEX(base + j);
- tmp = tt->fs_bus_bw[k] + sch_ep->bw_budget_table[j];
+ tmp = tt->fs_bus_bw[k] + sch_ep->bw_cost_per_microframe;
if (tmp > FS_PAYLOAD_MAX)
return -ESCH_BW_OVERFLOW;
}
@@ -538,19 +538,17 @@ static int check_sch_tt(struct mu3h_sch_ep_info *sch_ep, u32 offset)
static void update_sch_tt(struct mu3h_sch_ep_info *sch_ep, bool used)
{
struct mu3h_sch_tt *tt = sch_ep->sch_tt;
+ int bw_updated;
u32 base;
- int i, j, k;
+ int i, j;
+
+ bw_updated = sch_ep->bw_cost_per_microframe * (used ? 1 : -1);
for (i = 0; i < sch_ep->num_esit; i++) {
base = sch_ep->offset + i * sch_ep->esit;
- for (j = 0; j < sch_ep->num_budget_microframes; j++) {
- k = XHCI_MTK_BW_INDEX(base + j);
- if (used)
- tt->fs_bus_bw[k] += sch_ep->bw_budget_table[j];
- else
- tt->fs_bus_bw[k] -= sch_ep->bw_budget_table[j];
- }
+ for (j = 0; j < sch_ep->num_budget_microframes; j++)
+ tt->fs_bus_bw[XHCI_MTK_BW_INDEX(base + j)] += bw_updated;
}
if (used)
--
2.18.0
The VBT send packet port selection was never updated for ICL+ where the
2nd link is on port B instead of port C as in VLV+ DSI.
First, single link DSI needs to use the configured port instead of
relying on the VBT sequence block port. Remove the hard-coded port C
check here and make it generic. For reference, see commit f915084edc5a
("drm/i915: Changes related to the sequence port no for") for the
original VLV specific fix.
Second, the sequence block port number is either 0 or 1, where 1
indicates the 2nd link. Remove the hard-coded port C here for 2nd
link. (This could be a "find second set bit" on DSI ports, but just
check the two possible options.)
Third, sanity check the result with a warning to avoid a NULL pointer
dereference.
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/5984
Cc: stable(a)vger.kernel.org # v4.19+
Cc: Ville Syrjala <ville.syrjala(a)linux.intel.com>
Signed-off-by: Jani Nikula <jani.nikula(a)intel.com>
---
drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 33 +++++++++++++-------
1 file changed, 22 insertions(+), 11 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
index f370e9c4350d..dd24aef925f2 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@@ -125,9 +125,25 @@ struct i2c_adapter_lookup {
#define ICL_GPIO_DDPA_CTRLCLK_2 8
#define ICL_GPIO_DDPA_CTRLDATA_2 9
-static enum port intel_dsi_seq_port_to_port(u8 port)
+static enum port intel_dsi_seq_port_to_port(struct intel_dsi *intel_dsi,
+ u8 seq_port)
{
- return port ? PORT_C : PORT_A;
+ /*
+ * If single link DSI is being used on any port, the VBT sequence block
+ * send packet apparently always has 0 for the port. Just use the port
+ * we have configured, and ignore the sequence block port.
+ */
+ if (hweight8(intel_dsi->ports) == 1)
+ return ffs(intel_dsi->ports) - 1;
+
+ if (seq_port) {
+ if (intel_dsi->ports & PORT_B)
+ return PORT_B;
+ else if (intel_dsi->ports & PORT_C)
+ return PORT_C;
+ }
+
+ return PORT_A;
}
static const u8 *mipi_exec_send_packet(struct intel_dsi *intel_dsi,
@@ -149,15 +165,10 @@ static const u8 *mipi_exec_send_packet(struct intel_dsi *intel_dsi,
seq_port = (flags >> MIPI_PORT_SHIFT) & 3;
- /* For DSI single link on Port A & C, the seq_port value which is
- * parsed from Sequence Block#53 of VBT has been set to 0
- * Now, read/write of packets for the DSI single link on Port A and
- * Port C will based on the DVO port from VBT block 2.
- */
- if (intel_dsi->ports == (1 << PORT_C))
- port = PORT_C;
- else
- port = intel_dsi_seq_port_to_port(seq_port);
+ port = intel_dsi_seq_port_to_port(intel_dsi, seq_port);
+
+ if (drm_WARN_ON(&dev_priv->drm, !intel_dsi->dsi_hosts[port]))
+ goto out;
dsi_device = intel_dsi->dsi_hosts[port]->device;
if (!dsi_device) {
--
2.30.2
Dzień dobry,
czy rozważali Państwo rozwój kwalifikacji językowych swoich pracowników?
Opracowaliśmy kursy językowe dla różnych branż, w których koncentrujemy się na podniesieniu poziomu słownictwa i jakości komunikacji wykorzystując autorską metodę, stworzoną specjalnie dla wymagającego biznesu.
Niestandardowy kurs on-line, dopasowany do profilu firmy i obszarów świadczonych usług, w szybkim czasie przyniesie efekty, które zwiększą komfort i jakość pracy, rozwijając możliwości biznesowe.
Zdalne szkolenie językowe to m.in. zajęcia z native speakerami, które w szybkim czasie nauczą pracowników rozmawiać za pomocą jasnego i zwięzłego języka Business English.
Czy mógłbym przedstawić więcej szczegółów i opowiedzieć jak działamy?
Pozdrawiam
Krzysztof Maj
The original x86 sev_alloc() only called set_memory_decrypted() on
memory returned by alloc_pages_node(), so the page order calculation
fell out of that logic. However, the common dma-direct code has several
potential allocators, not all of which are guaranteed to round up the
underlying allocation to a power-of-two size, so carrying over that
calculation for the encryption/decryption size was a mistake. Fix it by
rounding to a *number* of pages, rather than an order.
Until recently there was an even worse interaction with DMA_DIRECT_REMAP
where we could have ended up decrypting part of the next adjacent
vmalloc area, only averted by no architecture actually supporting both
configs at once. Don't ask how I found that one out...
CC: stable(a)vger.kernel.org
Fixes: c10f07aa27da ("dma/direct: Handle force decryption for DMA coherent buffers in common code")
Signed-off-by: Robin Murphy <robin.murphy(a)arm.com>
---
kernel/dma/direct.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 9743c6ccce1a..09d78aa40466 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -79,7 +79,7 @@ static int dma_set_decrypted(struct device *dev, void *vaddr, size_t size)
{
if (!force_dma_unencrypted(dev))
return 0;
- return set_memory_decrypted((unsigned long)vaddr, 1 << get_order(size));
+ return set_memory_decrypted((unsigned long)vaddr, PFN_UP(size));
}
static int dma_set_encrypted(struct device *dev, void *vaddr, size_t size)
@@ -88,7 +88,7 @@ static int dma_set_encrypted(struct device *dev, void *vaddr, size_t size)
if (!force_dma_unencrypted(dev))
return 0;
- ret = set_memory_encrypted((unsigned long)vaddr, 1 << get_order(size));
+ ret = set_memory_encrypted((unsigned long)vaddr, PFN_UP(size));
if (ret)
pr_warn_ratelimited("leaking DMA memory that can't be re-encrypted\n");
return ret;
--
2.35.3.dirty
The journal no-space deadlock was reported time to time. Such deadlock
can happen in the following situation.
When all journal buckets are fully filled by active jset with heavy
write I/O load, the cache set registration (after a reboot) will load
all active jsets and inserting them into the btree again (which is
called journal replay). If a journaled bkey is inserted into a btree
node and results btree node split, new journal request might be
triggered. For example, the btree grows one more level after the node
split, then the root node record in cache device super block will be
upgrade by bch_journal_meta() from bch_btree_set_root(). But there is no
space in journal buckets, the journal replay has to wait for new journal
bucket to be reclaimed after at least one journal bucket replayed. This
is one example that how the journal no-space deadlock happens.
The solution to avoid the deadlock is to reserve 1 journal bucket in
run time, and only permit the reserved journal bucket to be used during
cache set registration procedure for things like journal replay. Then
the journal space will never be fully filled, there is no chance for
journal no-space deadlock to happen anymore.
This patch adds a new member "bool do_reserve" in struct journal, it is
inititalized to 0 (false) when struct journal is allocated, and set to
1 (true) by bch_journal_space_reserve() when all initialization done in
run_cache_set(). In the run time when journal_reclaim() tries to
allocate a new journal bucket, free_journal_buckets() is called to check
whether there are enough free journal buckets to use. If there is only
1 free journal bucket and journal->do_reserve is 1 (true), the last
bucket is reserved and free_journal_buckets() will return 0 to indicate
no free journal bucket. Then journal_reclaim() will give up, and try
next time to see whetheer there is free journal bucket to allocate. By
this method, there is always 1 jouranl bucket reserved in run time.
During the cache set registration, journal->do_reserve is 0 (false), so
the reserved journal bucket can be used to avoid the no-space deadlock.
Reported-by: Nikhil Kshirsagar <nkshirsagar(a)gmail.com>
Signed-off-by: Coly Li <colyli(a)suse.de>
Cc: stable(a)vger.kernel.org
---
drivers/md/bcache/journal.c | 31 ++++++++++++++++++++++++++-----
drivers/md/bcache/journal.h | 2 ++
drivers/md/bcache/super.c | 1 +
3 files changed, 29 insertions(+), 5 deletions(-)
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index df5347ea450b..e5da469a4235 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -405,6 +405,11 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list)
return ret;
}
+void bch_journal_space_reserve(struct journal *j)
+{
+ j->do_reserve = true;
+}
+
/* Journalling */
static void btree_flush_write(struct cache_set *c)
@@ -621,12 +626,30 @@ static void do_journal_discard(struct cache *ca)
}
}
+static unsigned int free_journal_buckets(struct cache_set *c)
+{
+ struct journal *j = &c->journal;
+ struct cache *ca = c->cache;
+ struct journal_device *ja = &c->cache->journal;
+ unsigned int n;
+
+ /* In case njournal_buckets is not power of 2 */
+ if (ja->cur_idx >= ja->discard_idx)
+ n = ca->sb.njournal_buckets + ja->discard_idx - ja->cur_idx;
+ else
+ n = ja->discard_idx - ja->cur_idx;
+
+ if (n > (1 + j->do_reserve))
+ return n - (1 + j->do_reserve);
+
+ return 0;
+}
+
static void journal_reclaim(struct cache_set *c)
{
struct bkey *k = &c->journal.key;
struct cache *ca = c->cache;
uint64_t last_seq;
- unsigned int next;
struct journal_device *ja = &ca->journal;
atomic_t p __maybe_unused;
@@ -649,12 +672,10 @@ static void journal_reclaim(struct cache_set *c)
if (c->journal.blocks_free)
goto out;
- next = (ja->cur_idx + 1) % ca->sb.njournal_buckets;
- /* No space available on this device */
- if (next == ja->discard_idx)
+ if (!free_journal_buckets(c))
goto out;
- ja->cur_idx = next;
+ ja->cur_idx = (ja->cur_idx + 1) % ca->sb.njournal_buckets;
k->ptr[0] = MAKE_PTR(0,
bucket_to_sector(c, ca->sb.d[ja->cur_idx]),
ca->sb.nr_this_dev);
diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h
index f2ea34d5f431..cd316b4a1e95 100644
--- a/drivers/md/bcache/journal.h
+++ b/drivers/md/bcache/journal.h
@@ -105,6 +105,7 @@ struct journal {
spinlock_t lock;
spinlock_t flush_write_lock;
bool btree_flushing;
+ bool do_reserve;
/* used when waiting because the journal was full */
struct closure_waitlist wait;
struct closure io;
@@ -182,5 +183,6 @@ int bch_journal_replay(struct cache_set *c, struct list_head *list);
void bch_journal_free(struct cache_set *c);
int bch_journal_alloc(struct cache_set *c);
+void bch_journal_space_reserve(struct journal *j);
#endif /* _BCACHE_JOURNAL_H */
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index bf3de149d3c9..2bb55278d22d 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -2128,6 +2128,7 @@ static int run_cache_set(struct cache_set *c)
flash_devs_run(c);
+ bch_journal_space_reserve(&c->journal);
set_bit(CACHE_SET_RUNNING, &c->flags);
return 0;
err:
--
2.35.3