[ Upstream commit bdcb8aa434c6d36b5c215d02a9ef07551be25a37 ]
In gfs2_put_super(), whether withdrawn or not, the quota should
be cleaned up by gfs2_quota_cleanup().
Otherwise, struct gfs2_sbd will be freed before gfs2_qd_dealloc (rcu
callback) has run for all gfs2_quota_data objects, resulting in
use-after-free.
Also, gfs2_destroy_threads() and gfs2_quota_cleanup() is already called
by gfs2_make_fs_ro(), so in gfs2_put_super(), after calling
gfs2_make_fs_ro(), there is no need to call them again.
The origin of a cherry-pick conflict is the (relevant) code block added in
commit f66af88e3321 ("gfs2: Stop using gfs2_make_fs_ro for withdraw")
There are no references to gfs2_withdrawn() nor gfs2_destroy_threads() in
gfs2_put_super(), so we can simply call gfs2_quota_cleanup() in a new else
block as bdcb8aa434c6 achieves.
Else braces were used for consistency with the if block.
Sponsor: 21SoftWare LLC
Signed-off-by: Clayton Casciato <majortomtosourcecontrol(a)gmail.com>
---
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 302d1e43d701..6107cd680176 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -591,6 +591,8 @@ static void gfs2_put_super(struct super_block *sb)
if (!sb_rdonly(sb)) {
gfs2_make_fs_ro(sdp);
+ } else {
+ gfs2_quota_cleanup(sdp);
}
WARN_ON(gfs2_withdrawing(sdp));
When trying to allocate a hugepage with none reserved ones free, it may
be allowed in case a number of overcommit hugepages was configured (using
/proc/sys/vm/nr_overcommit_hugepages) and that number wasn't reached. This
allows for a behavior of having extra hugepages allocated dynamically, if
there're resources for it. Some sysadmins even prefer not reserving any
hugepages and setting a big number of overcommit hugepages.
But while attempting to allocate overcommit hugepages in a multi node
system (either NUMA or mempolicy/cpuset) said allocations might randomly
fail even when there're resources available for the allocation.
This happens due allowed_mems_nr() only accounting for the number of free hugepages
in the nodes the current process belongs to and the surplus hugepage allocation is
done so it can be allocated in any node. In case one or more of the requested
surplus hugepages are allocated in a different node, the whole allocation will fail
due allowed_mems_nr() returning a lower value.
So allocate surplus hugepages in one of the nodes the current process belongs to.
Easy way to reproduce this issue is to use a 2+ NUMA nodes system:
# echo 0 >/proc/sys/vm/nr_hugepages
# echo 1 >/proc/sys/vm/nr_overcommit_hugepages
# numactl -m0 ./tools/testing/selftests/mm/map_hugetlb 2
Repeating the execution of map_hugetlb test application will eventually fail
when the hugepage ends up allocated in a different node.
v2: - attempt to make the description more clear
- prevent unitialized usage of folio in case current process isn't part of any
nodes with memory
Cc: Vishal Moola <vishal.moola(a)gmail.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Aristeu Rozanski <aris(a)redhat.com>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: stable(a)vger.kernel.org
Signed-off-by: Aristeu Rozanski <aris(a)ruivo.org>
---
mm/hugetlb.c | 47 ++++++++++++++++++++++++++++-------------------
1 file changed, 28 insertions(+), 19 deletions(-)
--- upstream.orig/mm/hugetlb.c 2024-06-20 13:42:25.699568114 -0400
+++ upstream/mm/hugetlb.c 2024-07-01 16:48:53.693298053 -0400
@@ -2618,6 +2618,23 @@ struct folio *alloc_hugetlb_folio_nodema
return alloc_migrate_hugetlb_folio(h, gfp_mask, preferred_nid, nmask);
}
+static nodemask_t *policy_mbind_nodemask(gfp_t gfp)
+{
+#ifdef CONFIG_NUMA
+ struct mempolicy *mpol = get_task_policy(current);
+
+ /*
+ * Only enforce MPOL_BIND policy which overlaps with cpuset policy
+ * (from policy_nodemask) specifically for hugetlb case
+ */
+ if (mpol->mode == MPOL_BIND &&
+ (apply_policy_zone(mpol, gfp_zone(gfp)) &&
+ cpuset_nodemask_valid_mems_allowed(&mpol->nodes)))
+ return &mpol->nodes;
+#endif
+ return NULL;
+}
+
/*
* Increase the hugetlb pool such that it can accommodate a reservation
* of size 'delta'.
@@ -2631,6 +2648,8 @@ static int gather_surplus_pages(struct h
long i;
long needed, allocated;
bool alloc_ok = true;
+ int node;
+ nodemask_t *mbind_nodemask = policy_mbind_nodemask(htlb_alloc_mask(h));
lockdep_assert_held(&hugetlb_lock);
needed = (h->resv_huge_pages + delta) - h->free_huge_pages;
@@ -2645,8 +2664,15 @@ allocated = 0;
retry:
spin_unlock_irq(&hugetlb_lock);
for (i = 0; i < needed; i++) {
- folio = alloc_surplus_hugetlb_folio(h, htlb_alloc_mask(h),
- NUMA_NO_NODE, NULL);
+ folio = NULL;
+ for_each_node_mask(node, cpuset_current_mems_allowed) {
+ if (!mbind_nodemask || node_isset(node, *mbind_nodemask)) {
+ folio = alloc_surplus_hugetlb_folio(h, htlb_alloc_mask(h),
+ node, NULL);
+ if (folio)
+ break;
+ }
+ }
if (!folio) {
alloc_ok = false;
break;
@@ -4876,23 +4902,6 @@ default_hstate_max_huge_pages = 0;
}
__setup("default_hugepagesz=", default_hugepagesz_setup);
-static nodemask_t *policy_mbind_nodemask(gfp_t gfp)
-{
-#ifdef CONFIG_NUMA
- struct mempolicy *mpol = get_task_policy(current);
-
- /*
- * Only enforce MPOL_BIND policy which overlaps with cpuset policy
- * (from policy_nodemask) specifically for hugetlb case
- */
- if (mpol->mode == MPOL_BIND &&
- (apply_policy_zone(mpol, gfp_zone(gfp)) &&
- cpuset_nodemask_valid_mems_allowed(&mpol->nodes)))
- return &mpol->nodes;
-#endif
- return NULL;
-}
-
static unsigned int allowed_mems_nr(struct hstate *h)
{
int node;
[ Upstream commit bdcb8aa434c6d36b5c215d02a9ef07551be25a37 ]
In gfs2_put_super(), whether withdrawn or not, the quota should
be cleaned up by gfs2_quota_cleanup().
Otherwise, struct gfs2_sbd will be freed before gfs2_qd_dealloc (rcu
callback) has run for all gfs2_quota_data objects, resulting in
use-after-free.
Also, gfs2_destroy_threads() and gfs2_quota_cleanup() is already called
by gfs2_make_fs_ro(), so in gfs2_put_super(), after calling
gfs2_make_fs_ro(), there is no need to call them again.
Backport notes:
The origin of a cherry-pick conflict is the (relevant) code block added in
commit f66af88e3321 ("gfs2: Stop using gfs2_make_fs_ro for withdraw")
There are no references to gfs2_withdrawn() nor gfs2_destroy_threads() in
gfs2_put_super(), so simply call gfs2_quota_cleanup() in a new else block
as bdcb8aa434c6 achieves.
Use else braces for consistency with the if block.
Reported-by: syzbot+29c47e9e51895928698c(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=29c47e9e51895928698c
Signed-off-by: Juntong Deng <juntong.deng(a)outlook.com>
Signed-off-by: Andreas Gruenbacher <agruenba(a)redhat.com>
Signed-off-by: Clayton Casciato <majortomtosourcecontrol(a)gmail.com>
---
v1 -> v2:
Remove invalid tag
Add upstream commit's tags
Use current mailing list for GFS2
Use branch fragment instead of Git tag in subject
Differentiate upstream commit body and backport notes
Make body more imperative
Sponsor: 21SoftWare LLC
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 302d1e43d701..6107cd680176 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -591,6 +591,8 @@ static void gfs2_put_super(struct super_block *sb)
if (!sb_rdonly(sb)) {
gfs2_make_fs_ro(sdp);
+ } else {
+ gfs2_quota_cleanup(sdp);
}
WARN_ON(gfs2_withdrawing(sdp));
Current ata_gen_passthru_sense() code performs two actions:
1. Generates sense data based on the ATA 'status' and ATA 'error' fields.
2. Populates "ATA Status Return sense data descriptor" / "Fixed format
sense data" with ATA taskfile fields.
The problem is that #1 generates sense data even when a valid sense data
is already present (ATA_QCFLAG_SENSE_VALID is set). Factoring out #2 into
a separate function allows us to generate sense data only when there is
no valid sense data (ATA_QCFLAG_SENSE_VALID is not set).
As a bonus, we can now delete a FIXME comment in atapi_qc_complete()
which states that we don't want to translate taskfile registers into
sense descriptors for ATAPI.
Cc: stable(a)vger.kernel.org # 4.19+
Reviewed-by: Hannes Reinecke <hare(a)suse.de>
Reviewed-by: Damien Le Moal <dlemoal(a)kernel.org>
Signed-off-by: Igor Pylypiv <ipylypiv(a)google.com>
---
drivers/ata/libata-scsi.c | 158 +++++++++++++++++++++-----------------
1 file changed, 86 insertions(+), 72 deletions(-)
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index a9e44ad4c2de..26b1263f5c7c 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -230,6 +230,80 @@ void ata_scsi_set_sense_information(struct ata_device *dev,
SCSI_SENSE_BUFFERSIZE, information);
}
+/**
+ * ata_scsi_set_passthru_sense_fields - Set ATA fields in sense buffer
+ * @qc: ATA PASS-THROUGH command.
+ *
+ * Populates "ATA Status Return sense data descriptor" / "Fixed format
+ * sense data" with ATA taskfile fields.
+ *
+ * LOCKING:
+ * None.
+ */
+static void ata_scsi_set_passthru_sense_fields(struct ata_queued_cmd *qc)
+{
+ struct scsi_cmnd *cmd = qc->scsicmd;
+ struct ata_taskfile *tf = &qc->result_tf;
+ unsigned char *sb = cmd->sense_buffer;
+
+ if ((sb[0] & 0x7f) >= 0x72) {
+ unsigned char *desc;
+ u8 len;
+
+ /* descriptor format */
+ len = sb[7];
+ desc = (char *)scsi_sense_desc_find(sb, len + 8, 9);
+ if (!desc) {
+ if (SCSI_SENSE_BUFFERSIZE < len + 14)
+ return;
+ sb[7] = len + 14;
+ desc = sb + 8 + len;
+ }
+ desc[0] = 9;
+ desc[1] = 12;
+ /*
+ * Copy registers into sense buffer.
+ */
+ desc[2] = 0x00;
+ desc[3] = tf->error;
+ desc[5] = tf->nsect;
+ desc[7] = tf->lbal;
+ desc[9] = tf->lbam;
+ desc[11] = tf->lbah;
+ desc[12] = tf->device;
+ desc[13] = tf->status;
+
+ /*
+ * Fill in Extend bit, and the high order bytes
+ * if applicable.
+ */
+ if (tf->flags & ATA_TFLAG_LBA48) {
+ desc[2] |= 0x01;
+ desc[4] = tf->hob_nsect;
+ desc[6] = tf->hob_lbal;
+ desc[8] = tf->hob_lbam;
+ desc[10] = tf->hob_lbah;
+ }
+ } else {
+ /* Fixed sense format */
+ sb[0] |= 0x80;
+ sb[3] = tf->error;
+ sb[4] = tf->status;
+ sb[5] = tf->device;
+ sb[6] = tf->nsect;
+ if (tf->flags & ATA_TFLAG_LBA48) {
+ sb[8] |= 0x80;
+ if (tf->hob_nsect)
+ sb[8] |= 0x40;
+ if (tf->hob_lbal || tf->hob_lbam || tf->hob_lbah)
+ sb[8] |= 0x20;
+ }
+ sb[9] = tf->lbal;
+ sb[10] = tf->lbam;
+ sb[11] = tf->lbah;
+ }
+}
+
static void ata_scsi_set_invalid_field(struct ata_device *dev,
struct scsi_cmnd *cmd, u16 field, u8 bit)
{
@@ -837,10 +911,8 @@ static void ata_to_sense_error(unsigned id, u8 drv_stat, u8 drv_err, u8 *sk,
* ata_gen_passthru_sense - Generate check condition sense block.
* @qc: Command that completed.
*
- * This function is specific to the ATA descriptor format sense
- * block specified for the ATA pass through commands. Regardless
- * of whether the command errored or not, return a sense
- * block. Copy all controller registers into the sense
+ * This function is specific to the ATA pass through commands.
+ * Regardless of whether the command errored or not, return a sense
* block. If there was no error, we get the request from an ATA
* passthrough command, so we use the following sense data:
* sk = RECOVERED ERROR
@@ -875,63 +947,6 @@ static void ata_gen_passthru_sense(struct ata_queued_cmd *qc)
*/
scsi_build_sense(cmd, 1, RECOVERED_ERROR, 0, 0x1D);
}
-
- if ((sb[0] & 0x7f) >= 0x72) {
- unsigned char *desc;
- u8 len;
-
- /* descriptor format */
- len = sb[7];
- desc = (char *)scsi_sense_desc_find(sb, len + 8, 9);
- if (!desc) {
- if (SCSI_SENSE_BUFFERSIZE < len + 14)
- return;
- sb[7] = len + 14;
- desc = sb + 8 + len;
- }
- desc[0] = 9;
- desc[1] = 12;
- /*
- * Copy registers into sense buffer.
- */
- desc[2] = 0x00;
- desc[3] = tf->error;
- desc[5] = tf->nsect;
- desc[7] = tf->lbal;
- desc[9] = tf->lbam;
- desc[11] = tf->lbah;
- desc[12] = tf->device;
- desc[13] = tf->status;
-
- /*
- * Fill in Extend bit, and the high order bytes
- * if applicable.
- */
- if (tf->flags & ATA_TFLAG_LBA48) {
- desc[2] |= 0x01;
- desc[4] = tf->hob_nsect;
- desc[6] = tf->hob_lbal;
- desc[8] = tf->hob_lbam;
- desc[10] = tf->hob_lbah;
- }
- } else {
- /* Fixed sense format */
- sb[0] |= 0x80;
- sb[3] = tf->error;
- sb[4] = tf->status;
- sb[5] = tf->device;
- sb[6] = tf->nsect;
- if (tf->flags & ATA_TFLAG_LBA48) {
- sb[8] |= 0x80;
- if (tf->hob_nsect)
- sb[8] |= 0x40;
- if (tf->hob_lbal || tf->hob_lbam || tf->hob_lbah)
- sb[8] |= 0x20;
- }
- sb[9] = tf->lbal;
- sb[10] = tf->lbam;
- sb[11] = tf->lbah;
- }
}
/**
@@ -1634,6 +1649,8 @@ static void ata_scsi_qc_complete(struct ata_queued_cmd *qc)
u8 *cdb = cmd->cmnd;
int need_sense = (qc->err_mask != 0) &&
!(qc->flags & ATA_QCFLAG_SENSE_VALID);
+ int need_passthru_sense = (qc->err_mask != 0) ||
+ (qc->flags & ATA_QCFLAG_SENSE_VALID);
/* For ATA pass thru (SAT) commands, generate a sense block if
* user mandated it or if there's an error. Note that if we
@@ -1645,13 +1662,16 @@ static void ata_scsi_qc_complete(struct ata_queued_cmd *qc)
* asc,ascq = ATA PASS-THROUGH INFORMATION AVAILABLE
*/
if (((cdb[0] == ATA_16) || (cdb[0] == ATA_12)) &&
- ((cdb[2] & 0x20) || need_sense))
- ata_gen_passthru_sense(qc);
- else if (need_sense)
+ ((cdb[2] & 0x20) || need_passthru_sense)) {
+ if (!(qc->flags & ATA_QCFLAG_SENSE_VALID))
+ ata_gen_passthru_sense(qc);
+ ata_scsi_set_passthru_sense_fields(qc);
+ } else if (need_sense) {
ata_gen_ata_sense(qc);
- else
+ } else {
/* Keep the SCSI ML and status byte, clear host byte. */
cmd->result &= 0x0000ffff;
+ }
ata_qc_done(qc);
}
@@ -2590,14 +2610,8 @@ static void atapi_qc_complete(struct ata_queued_cmd *qc)
/* handle completion from EH */
if (unlikely(err_mask || qc->flags & ATA_QCFLAG_SENSE_VALID)) {
- if (!(qc->flags & ATA_QCFLAG_SENSE_VALID)) {
- /* FIXME: not quite right; we don't want the
- * translation of taskfile registers into a
- * sense descriptors, since that's only
- * correct for ATA, not ATAPI
- */
+ if (!(qc->flags & ATA_QCFLAG_SENSE_VALID))
ata_gen_passthru_sense(qc);
- }
/* SCSI EH automatically locks door if sdev->locked is
* set. Sometimes door lock request continues to
--
2.45.2.803.g4e1b14247a-goog
SAT-5 revision 8 specification removed the text about the ANSI INCITS
431-2007 compliance which was requiring SCSI/ATA Translation (SAT) to
return descriptor format sense data for the ATA PASS-THROUGH commands
regardless of the setting of the D_SENSE bit.
Let's honour the D_SENSE bit for CK_COND=1 commands that had no error.
Kernel already honours the D_SENSE bit when creating the sense buffer
for commands that had an error.
SAT-5 revision 7
================
12.2.2.8 Fixed format sense data
Table 212 shows the fields returned in the fixed format sense data
(see SPC-5) for ATA PASS-THROUGH commands. SATLs compliant with ANSI
INCITS 431-2007, SCSI/ATA Translation (SAT) return descriptor format
sense data for the ATA PASS-THROUGH commands regardless of the setting
of the D_SENSE bit.
SAT-5 revision 8
================
12.2.2.8 Fixed format sense data
Table 211 shows the fields returned in the fixed format sense data
(see SPC-5) for ATA PASS-THROUGH commands.
Cc: stable(a)vger.kernel.org # 4.19+
Reported-by: Niklas Cassel <cassel(a)kernel.org>
Closes: https://lore.kernel.org/linux-ide/Zn1WUhmLglM4iais@ryzen.lan
Signed-off-by: Igor Pylypiv <ipylypiv(a)google.com>
---
drivers/ata/libata-scsi.c | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 26b1263f5c7c..ace6b009e7ff 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -941,11 +941,8 @@ static void ata_gen_passthru_sense(struct ata_queued_cmd *qc)
&sense_key, &asc, &ascq);
ata_scsi_set_sense(qc->dev, cmd, sense_key, asc, ascq);
} else {
- /*
- * ATA PASS-THROUGH INFORMATION AVAILABLE
- * Always in descriptor format sense.
- */
- scsi_build_sense(cmd, 1, RECOVERED_ERROR, 0, 0x1D);
+ /* ATA PASS-THROUGH INFORMATION AVAILABLE */
+ ata_scsi_set_sense(qc->dev, cmd, RECOVERED_ERROR, 0, 0x1D);
}
}
--
2.45.2.803.g4e1b14247a-goog
Current ata_gen_passthru_sense() code performs two actions:
1. Generates sense data based on the ATA 'status' and ATA 'error' fields.
2. Populates "ATA Status Return sense data descriptor" / "Fixed format
sense data" with ATA taskfile fields.
The problem is that #1 generates sense data even when a valid sense data
is already present (ATA_QCFLAG_SENSE_VALID is set). Factoring out #2 into
a separate function allows us to generate sense data only when there is
no valid sense data (ATA_QCFLAG_SENSE_VALID is not set).
As a bonus, we can now delete a FIXME comment in atapi_qc_complete()
which states that we don't want to translate taskfile registers into
sense descriptors for ATAPI.
Cc: stable(a)vger.kernel.org
Reviewed-by: Hannes Reinecke <hare(a)suse.de>
Reviewed-by: Damien Le Moal <dlemoal(a)kernel.org>
Signed-off-by: Igor Pylypiv <ipylypiv(a)google.com>
---
drivers/ata/libata-scsi.c | 158 +++++++++++++++++++++-----------------
1 file changed, 86 insertions(+), 72 deletions(-)
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index a9e44ad4c2de..26b1263f5c7c 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -230,6 +230,80 @@ void ata_scsi_set_sense_information(struct ata_device *dev,
SCSI_SENSE_BUFFERSIZE, information);
}
+/**
+ * ata_scsi_set_passthru_sense_fields - Set ATA fields in sense buffer
+ * @qc: ATA PASS-THROUGH command.
+ *
+ * Populates "ATA Status Return sense data descriptor" / "Fixed format
+ * sense data" with ATA taskfile fields.
+ *
+ * LOCKING:
+ * None.
+ */
+static void ata_scsi_set_passthru_sense_fields(struct ata_queued_cmd *qc)
+{
+ struct scsi_cmnd *cmd = qc->scsicmd;
+ struct ata_taskfile *tf = &qc->result_tf;
+ unsigned char *sb = cmd->sense_buffer;
+
+ if ((sb[0] & 0x7f) >= 0x72) {
+ unsigned char *desc;
+ u8 len;
+
+ /* descriptor format */
+ len = sb[7];
+ desc = (char *)scsi_sense_desc_find(sb, len + 8, 9);
+ if (!desc) {
+ if (SCSI_SENSE_BUFFERSIZE < len + 14)
+ return;
+ sb[7] = len + 14;
+ desc = sb + 8 + len;
+ }
+ desc[0] = 9;
+ desc[1] = 12;
+ /*
+ * Copy registers into sense buffer.
+ */
+ desc[2] = 0x00;
+ desc[3] = tf->error;
+ desc[5] = tf->nsect;
+ desc[7] = tf->lbal;
+ desc[9] = tf->lbam;
+ desc[11] = tf->lbah;
+ desc[12] = tf->device;
+ desc[13] = tf->status;
+
+ /*
+ * Fill in Extend bit, and the high order bytes
+ * if applicable.
+ */
+ if (tf->flags & ATA_TFLAG_LBA48) {
+ desc[2] |= 0x01;
+ desc[4] = tf->hob_nsect;
+ desc[6] = tf->hob_lbal;
+ desc[8] = tf->hob_lbam;
+ desc[10] = tf->hob_lbah;
+ }
+ } else {
+ /* Fixed sense format */
+ sb[0] |= 0x80;
+ sb[3] = tf->error;
+ sb[4] = tf->status;
+ sb[5] = tf->device;
+ sb[6] = tf->nsect;
+ if (tf->flags & ATA_TFLAG_LBA48) {
+ sb[8] |= 0x80;
+ if (tf->hob_nsect)
+ sb[8] |= 0x40;
+ if (tf->hob_lbal || tf->hob_lbam || tf->hob_lbah)
+ sb[8] |= 0x20;
+ }
+ sb[9] = tf->lbal;
+ sb[10] = tf->lbam;
+ sb[11] = tf->lbah;
+ }
+}
+
static void ata_scsi_set_invalid_field(struct ata_device *dev,
struct scsi_cmnd *cmd, u16 field, u8 bit)
{
@@ -837,10 +911,8 @@ static void ata_to_sense_error(unsigned id, u8 drv_stat, u8 drv_err, u8 *sk,
* ata_gen_passthru_sense - Generate check condition sense block.
* @qc: Command that completed.
*
- * This function is specific to the ATA descriptor format sense
- * block specified for the ATA pass through commands. Regardless
- * of whether the command errored or not, return a sense
- * block. Copy all controller registers into the sense
+ * This function is specific to the ATA pass through commands.
+ * Regardless of whether the command errored or not, return a sense
* block. If there was no error, we get the request from an ATA
* passthrough command, so we use the following sense data:
* sk = RECOVERED ERROR
@@ -875,63 +947,6 @@ static void ata_gen_passthru_sense(struct ata_queued_cmd *qc)
*/
scsi_build_sense(cmd, 1, RECOVERED_ERROR, 0, 0x1D);
}
-
- if ((sb[0] & 0x7f) >= 0x72) {
- unsigned char *desc;
- u8 len;
-
- /* descriptor format */
- len = sb[7];
- desc = (char *)scsi_sense_desc_find(sb, len + 8, 9);
- if (!desc) {
- if (SCSI_SENSE_BUFFERSIZE < len + 14)
- return;
- sb[7] = len + 14;
- desc = sb + 8 + len;
- }
- desc[0] = 9;
- desc[1] = 12;
- /*
- * Copy registers into sense buffer.
- */
- desc[2] = 0x00;
- desc[3] = tf->error;
- desc[5] = tf->nsect;
- desc[7] = tf->lbal;
- desc[9] = tf->lbam;
- desc[11] = tf->lbah;
- desc[12] = tf->device;
- desc[13] = tf->status;
-
- /*
- * Fill in Extend bit, and the high order bytes
- * if applicable.
- */
- if (tf->flags & ATA_TFLAG_LBA48) {
- desc[2] |= 0x01;
- desc[4] = tf->hob_nsect;
- desc[6] = tf->hob_lbal;
- desc[8] = tf->hob_lbam;
- desc[10] = tf->hob_lbah;
- }
- } else {
- /* Fixed sense format */
- sb[0] |= 0x80;
- sb[3] = tf->error;
- sb[4] = tf->status;
- sb[5] = tf->device;
- sb[6] = tf->nsect;
- if (tf->flags & ATA_TFLAG_LBA48) {
- sb[8] |= 0x80;
- if (tf->hob_nsect)
- sb[8] |= 0x40;
- if (tf->hob_lbal || tf->hob_lbam || tf->hob_lbah)
- sb[8] |= 0x20;
- }
- sb[9] = tf->lbal;
- sb[10] = tf->lbam;
- sb[11] = tf->lbah;
- }
}
/**
@@ -1634,6 +1649,8 @@ static void ata_scsi_qc_complete(struct ata_queued_cmd *qc)
u8 *cdb = cmd->cmnd;
int need_sense = (qc->err_mask != 0) &&
!(qc->flags & ATA_QCFLAG_SENSE_VALID);
+ int need_passthru_sense = (qc->err_mask != 0) ||
+ (qc->flags & ATA_QCFLAG_SENSE_VALID);
/* For ATA pass thru (SAT) commands, generate a sense block if
* user mandated it or if there's an error. Note that if we
@@ -1645,13 +1662,16 @@ static void ata_scsi_qc_complete(struct ata_queued_cmd *qc)
* asc,ascq = ATA PASS-THROUGH INFORMATION AVAILABLE
*/
if (((cdb[0] == ATA_16) || (cdb[0] == ATA_12)) &&
- ((cdb[2] & 0x20) || need_sense))
- ata_gen_passthru_sense(qc);
- else if (need_sense)
+ ((cdb[2] & 0x20) || need_passthru_sense)) {
+ if (!(qc->flags & ATA_QCFLAG_SENSE_VALID))
+ ata_gen_passthru_sense(qc);
+ ata_scsi_set_passthru_sense_fields(qc);
+ } else if (need_sense) {
ata_gen_ata_sense(qc);
- else
+ } else {
/* Keep the SCSI ML and status byte, clear host byte. */
cmd->result &= 0x0000ffff;
+ }
ata_qc_done(qc);
}
@@ -2590,14 +2610,8 @@ static void atapi_qc_complete(struct ata_queued_cmd *qc)
/* handle completion from EH */
if (unlikely(err_mask || qc->flags & ATA_QCFLAG_SENSE_VALID)) {
- if (!(qc->flags & ATA_QCFLAG_SENSE_VALID)) {
- /* FIXME: not quite right; we don't want the
- * translation of taskfile registers into a
- * sense descriptors, since that's only
- * correct for ATA, not ATAPI
- */
+ if (!(qc->flags & ATA_QCFLAG_SENSE_VALID))
ata_gen_passthru_sense(qc);
- }
/* SCSI EH automatically locks door if sdev->locked is
* set. Sometimes door lock request continues to
--
2.45.2.803.g4e1b14247a-goog
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 9eee5330656bf92f51cb1f09b2dc9f8cf975b3d1
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024070120-undergo-stipulate-9aae@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
9eee5330656b ("PCI/MSI: Fix UAF in msi_capability_init")
b12d0bec385b ("PCI/MSI: Move pci_disable_msi() to api.c")
c93fd5266cff ("PCI/MSI: Move mask and unmask helpers to msi.h")
a474d3fbe287 ("PCI/MSI: Get rid of PCI_MSI_IRQ_DOMAIN")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 9eee5330656bf92f51cb1f09b2dc9f8cf975b3d1 Mon Sep 17 00:00:00 2001
From: Mostafa Saleh <smostafa(a)google.com>
Date: Mon, 24 Jun 2024 20:37:28 +0000
Subject: [PATCH] PCI/MSI: Fix UAF in msi_capability_init
KFENCE reports the following UAF:
BUG: KFENCE: use-after-free read in __pci_enable_msi_range+0x2c0/0x488
Use-after-free read at 0x0000000024629571 (in kfence-#12):
__pci_enable_msi_range+0x2c0/0x488
pci_alloc_irq_vectors_affinity+0xec/0x14c
pci_alloc_irq_vectors+0x18/0x28
kfence-#12: 0x0000000008614900-0x00000000e06c228d, size=104, cache=kmalloc-128
allocated by task 81 on cpu 7 at 10.808142s:
__kmem_cache_alloc_node+0x1f0/0x2bc
kmalloc_trace+0x44/0x138
msi_alloc_desc+0x3c/0x9c
msi_domain_insert_msi_desc+0x30/0x78
msi_setup_msi_desc+0x13c/0x184
__pci_enable_msi_range+0x258/0x488
pci_alloc_irq_vectors_affinity+0xec/0x14c
pci_alloc_irq_vectors+0x18/0x28
freed by task 81 on cpu 7 at 10.811436s:
msi_domain_free_descs+0xd4/0x10c
msi_domain_free_locked.part.0+0xc0/0x1d8
msi_domain_alloc_irqs_all_locked+0xb4/0xbc
pci_msi_setup_msi_irqs+0x30/0x4c
__pci_enable_msi_range+0x2a8/0x488
pci_alloc_irq_vectors_affinity+0xec/0x14c
pci_alloc_irq_vectors+0x18/0x28
Descriptor allocation done in:
__pci_enable_msi_range
msi_capability_init
msi_setup_msi_desc
msi_insert_msi_desc
msi_domain_insert_msi_desc
msi_alloc_desc
...
Freed in case of failure in __msi_domain_alloc_locked()
__pci_enable_msi_range
msi_capability_init
pci_msi_setup_msi_irqs
msi_domain_alloc_irqs_all_locked
msi_domain_alloc_locked
__msi_domain_alloc_locked => fails
msi_domain_free_locked
...
That failure propagates back to pci_msi_setup_msi_irqs() in
msi_capability_init() which accesses the descriptor for unmasking in the
error exit path.
Cure it by copying the descriptor and using the copy for the error exit path
unmask operation.
[ tglx: Massaged change log ]
Fixes: bf6e054e0e3f ("genirq/msi: Provide msi_device_populate/destroy_sysfs()")
Suggested-by: Thomas Gleixner <tglx(a)linutronix.de>
Signed-off-by: Mostafa Saleh <smostafa(a)google.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Bjorn Heelgas <bhelgaas(a)google.com>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20240624203729.1094506-1-smostafa@google.com
diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c
index c5625dd9bf49..3a45879d85db 100644
--- a/drivers/pci/msi/msi.c
+++ b/drivers/pci/msi/msi.c
@@ -352,7 +352,7 @@ static int msi_capability_init(struct pci_dev *dev, int nvec,
struct irq_affinity *affd)
{
struct irq_affinity_desc *masks = NULL;
- struct msi_desc *entry;
+ struct msi_desc *entry, desc;
int ret;
/* Reject multi-MSI early on irq domain enabled architectures */
@@ -377,6 +377,12 @@ static int msi_capability_init(struct pci_dev *dev, int nvec,
/* All MSIs are unmasked by default; mask them all */
entry = msi_first_desc(&dev->dev, MSI_DESC_ALL);
pci_msi_mask(entry, msi_multi_mask(entry));
+ /*
+ * Copy the MSI descriptor for the error path because
+ * pci_msi_setup_msi_irqs() will free it for the hierarchical
+ * interrupt domain case.
+ */
+ memcpy(&desc, entry, sizeof(desc));
/* Configure MSI capability structure */
ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI);
@@ -396,7 +402,7 @@ static int msi_capability_init(struct pci_dev *dev, int nvec,
goto unlock;
err:
- pci_msi_unmask(entry, msi_multi_mask(entry));
+ pci_msi_unmask(&desc, msi_multi_mask(&desc));
pci_free_msi_irqs(dev);
fail:
dev->msi_enabled = 0;