When event on child inodes are sent to the parent inode mark and
parent inode mark was not marked with FAN_EVENT_ON_CHILD, the event
will not be delivered to the listener process. However, if the same
process also has a mount mark, the event to the parent inode will be
delivered regadless of the mount mark mask.
This behavior is incorrect in the case where the mount mark mask does
not contain the specific event type. For example, the process adds
a mark on a directory with mask FAN_MODIFY (without FAN_EVENT_ON_CHILD)
and a mount mark with mask FAN_CLOSE_NOWRITE (without FAN_ONDIR).
A modify event on a file inside that directory (and inside that mount)
should not create a FAN_MODIFY event, because neither of the marks
requested to get that event on the file.
Fixes: 1968f5eed54c ("fanotify: use both marks when possible")
Cc: stable <stable(a)vger.kernel.org>
Signed-off-by: Amir Goldstein <amir73il(a)gmail.com>
---
fs/notify/fanotify/fanotify.c | 34 +++++++++++++++-------------------
1 file changed, 15 insertions(+), 19 deletions(-)
Jan,
While working on the super block mark patches [1], I stumbbled on
this bug. I figured I might as well send the fix out now.
I have written an LTP test [2] to reproduce it.
Thanks,
Amir.
[1] https://github.com/amir73il/linux/commits/fanotify_sb
[2] https://github.com/amir73il/ltp/commits/fanotify_sb
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 6702a6a0bbb5..e0e6a9d627df 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -92,7 +92,7 @@ static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark,
u32 event_mask,
const void *data, int data_type)
{
- __u32 marks_mask, marks_ignored_mask;
+ __u32 marks_mask = 0, marks_ignored_mask = 0;
const struct path *path = data;
pr_debug("%s: inode_mark=%p vfsmnt_mark=%p mask=%x data=%p"
@@ -108,24 +108,20 @@ static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark,
!d_can_lookup(path->dentry))
return false;
- if (inode_mark && vfsmnt_mark) {
- marks_mask = (vfsmnt_mark->mask | inode_mark->mask);
- marks_ignored_mask = (vfsmnt_mark->ignored_mask | inode_mark->ignored_mask);
- } else if (inode_mark) {
- /*
- * if the event is for a child and this inode doesn't care about
- * events on the child, don't send it!
- */
- if ((event_mask & FS_EVENT_ON_CHILD) &&
- !(inode_mark->mask & FS_EVENT_ON_CHILD))
- return false;
- marks_mask = inode_mark->mask;
- marks_ignored_mask = inode_mark->ignored_mask;
- } else if (vfsmnt_mark) {
- marks_mask = vfsmnt_mark->mask;
- marks_ignored_mask = vfsmnt_mark->ignored_mask;
- } else {
- BUG();
+ /*
+ * if the event is for a child and this inode doesn't care about
+ * events on the child, don't send it!
+ */
+ if (inode_mark &&
+ (!(event_mask & FS_EVENT_ON_CHILD) ||
+ (inode_mark->mask & FS_EVENT_ON_CHILD))) {
+ marks_mask |= inode_mark->mask;
+ marks_ignored_mask |= inode_mark->ignored_mask;
+ }
+
+ if (vfsmnt_mark) {
+ marks_mask |= vfsmnt_mark->mask;
+ marks_ignored_mask |= vfsmnt_mark->ignored_mask;
}
if (d_is_dir(path->dentry) &&
--
2.7.4
Presently the dt_cpu_ftrs restore_cpu will only add bits to the LPCR
for secondaries, but some bits must be removed (e.g., UPRT for HPT).
Not clearing these bits on secondaries causes checkstops when booting
with disable_radix.
restore_cpu can not just set LPCR, because it is also called by the
idle wakeup code which relies on opal_slw_set_reg to restore the value
of LPCR, at least on P8 which does not save LPCR to stack in the idle
code.
Fix this by including a mask of bits to clear from LPCR as well, which
is used by restore_cpu.
This is a little messy now, but it's a minimal fix that can be
backported. Longer term, the idle SPR save/restore code can be
reworked to completely avoid calls to restore_cpu, then restore_cpu
would be able to unconditionally set LPCR to match boot processor
environment.
Fixes: 5a61ef74f269f ("powerpc/64s: Support new device tree binding for discovering CPU features")
Cc: stable(a)vger.kernel.org # v4.12+
Signed-off-by: Nicholas Piggin <npiggin(a)gmail.com>
---
I tested this fix and it boots a POWER9 with disable_radix, where
previously it checkstopped. Deeper idle states seem to work too,
after they're enabled with a firmware override.
arch/powerpc/kernel/dt_cpu_ftrs.c | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 11a3a4fed3fb..ed7605d8fd2d 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -83,6 +83,7 @@ static int hv_mode;
static struct {
u64 lpcr;
+ u64 lpcr_clear;
u64 hfscr;
u64 fscr;
} system_registers;
@@ -91,6 +92,8 @@ static void (*init_pmu_registers)(void);
static void __restore_cpu_cpufeatures(void)
{
+ u64 lpcr;
+
/*
* LPCR is restored by the power on engine already. It can be changed
* after early init e.g., by radix enable, and we have no unified API
@@ -103,8 +106,10 @@ static void __restore_cpu_cpufeatures(void)
* The best we can do to accommodate secondary boot and idle restore
* for now is "or" LPCR with existing.
*/
-
- mtspr(SPRN_LPCR, system_registers.lpcr | mfspr(SPRN_LPCR));
+ lpcr = mfspr(SPRN_LPCR);
+ lpcr |= system_registers.lpcr;
+ lpcr &= ~system_registers.lpcr_clear;
+ mtspr(SPRN_LPCR, lpcr);
if (hv_mode) {
mtspr(SPRN_LPID, 0);
mtspr(SPRN_HFSCR, system_registers.hfscr);
@@ -324,8 +329,9 @@ static int __init feat_enable_mmu_hash_v3(struct dt_cpu_feature *f)
{
u64 lpcr;
+ system_registers.lpcr_clear |= (LPCR_ISL | LPCR_UPRT | LPCR_HR);
lpcr = mfspr(SPRN_LPCR);
- lpcr &= ~LPCR_ISL;
+ lpcr &= ~(LPCR_ISL | LPCR_UPRT | LPCR_HR);
mtspr(SPRN_LPCR, lpcr);
cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE;
--
2.16.3
The following situation leads to deadlock:
[task 1] [task 2] [task 3]
kill_fasync() mm_update_next_owner() copy_process()
spin_lock_irqsave(&fa->fa_lock) read_lock(&tasklist_lock) write_lock_irq(&tasklist_lock)
send_sigio() <IRQ> ...
read_lock(&fown->lock) kill_fasync() ...
read_lock(&tasklist_lock) spin_lock_irqsave(&fa->fa_lock) ...
Task 1 can't acquire read locked tasklist_lock, since there is
already task 3 expressed its wish to take the lock exclusive.
Task 2 holds the read locked lock, but it can't take the spin lock.
The patch makes queued_read_lock_slowpath() to give task 1 the same
priority as it was an interrupt handler, and to take the lock
dispite of task 3 is waiting it, and this prevents the deadlock.
It seems there is no better way to detect such the situations,
also in general it's not good to wait so long for readers with
interrupts disabled, since read_lock may nest with another locks
and delay the system.
Signed-off-by: Kirill Tkhai <ktkhai(a)virtuozzo.com>
---
kernel/locking/qrwlock.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c
index c7471c3fb798..d15df85de8f5 100644
--- a/kernel/locking/qrwlock.c
+++ b/kernel/locking/qrwlock.c
@@ -32,7 +32,7 @@ void queued_read_lock_slowpath(struct qrwlock *lock)
/*
* Readers come here when they cannot get the lock without waiting
*/
- if (unlikely(in_interrupt())) {
+ if (unlikely(irqs_disabled())) {
/*
* Readers in interrupt context will get the lock immediately
* if the writer is just waiting (not holding the lock yet),
Commit e39a97353e53 modified __scsi_error_from_host_byte() such
that that function translates DID_OK into BLK_STS_OK. However,
the description of that commit is wrong: it mentions that commit
2a842acab109 introduced a bug in __scsi_error_from_host_byte()
although that commit did not change the behavior of that function.
Additionally, commit e39a97353e53 introduced a severe bug: it causes
commands that fail with hostbyte=DID_OK and driverbyte=DRIVER_SENSE
to be completed with BLK_STS_OK. Fix __scsi_error_from_host_byte()
by only translating good status values into BLK_STS_OK.
Fixes: e39a97353e53 ("scsi: core: return BLK_STS_OK for DID_OK in __scsi_error_from_host_byte()")
Reported-by: Damien Le Moal <damien.lemoal(a)wdc.com>
Signed-off-by: Bart Van Assche <bart.vanassche(a)wdc.com>
Cc: Hannes Reinecke <hare(a)suse.com>
Cc: Douglas Gilbert <dgilbert(a)interlog.com>
Cc: Damien Le Moal <damien.lemoal(a)wdc.com>
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: stable(a)vger.kernel.org
---
Changes compared to v1:
- Modified __scsi_error_from_host_byte() such that it again returns
BLK_STS_OK for CONDITION MET and other result codes that represent
success.
drivers/scsi/scsi_lib.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 74a39db57d49..1496b34af409 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -736,7 +736,13 @@ static blk_status_t __scsi_error_from_host_byte(struct scsi_cmnd *cmd,
{
switch (host_byte(result)) {
case DID_OK:
- return BLK_STS_OK;
+ /*
+ * Also check the other bytes than the status byte in result
+ * to handle the case when a SCSI LLD sets result to
+ * DRIVER_SENSE << 24 without setting SAM_STAT_CHECK_CONDITION.
+ */
+ return scsi_status_is_good(result) && (result & ~0xff) == 0 ?
+ BLK_STS_OK : BLK_STS_IOERR;
case DID_TRANSPORT_FAILFAST:
return BLK_STS_TRANSPORT;
case DID_TARGET_FAILURE:
--
2.16.2
Enabling virtual mapped kernel stacks breaks the thunderx_zip
driver. On compression or decompression the executing CPU hangs
in an endless loop. The reason for this is the usage of __pa
by the driver which does no longer work for an address that is
not part of the 1:1 mapping.
The zip driver allocates a result struct on the stack and needs
to tell the hardware the physical address within this struct
that is used to signal the completion of the request.
As the hardware gets the wrong address after the broken __pa
conversion it writes to an arbitrary address. The zip driver then
waits forever for the completion byte to contain a non-zero value.
Allocating the result struct from 1:1 mapped memory resolves this
bug.
Signed-off-by: Jan Glauber <jglauber(a)cavium.com>
Reviewed-by: Robert Richter <rrichter(a)cavium.com>
Cc: stable <stable(a)vger.kernel.org> # 4.14
---
drivers/crypto/cavium/zip/zip_crypto.c | 22 ++++++++++++++--------
1 file changed, 14 insertions(+), 8 deletions(-)
diff --git a/drivers/crypto/cavium/zip/zip_crypto.c b/drivers/crypto/cavium/zip/zip_crypto.c
index 8df4d26..2fc9b03 100644
--- a/drivers/crypto/cavium/zip/zip_crypto.c
+++ b/drivers/crypto/cavium/zip/zip_crypto.c
@@ -124,7 +124,7 @@ int zip_compress(const u8 *src, unsigned int slen,
struct zip_kernel_ctx *zip_ctx)
{
struct zip_operation *zip_ops = NULL;
- struct zip_state zip_state;
+ struct zip_state *zip_state;
struct zip_device *zip = NULL;
int ret;
@@ -135,20 +135,23 @@ int zip_compress(const u8 *src, unsigned int slen,
if (!zip)
return -ENODEV;
- memset(&zip_state, 0, sizeof(struct zip_state));
+ zip_state = kzalloc(sizeof(*zip_state), GFP_KERNEL);
+ if (!zip_state)
+ return -ENOMEM;
+
zip_ops = &zip_ctx->zip_comp;
zip_ops->input_len = slen;
zip_ops->output_len = *dlen;
memcpy(zip_ops->input, src, slen);
- ret = zip_deflate(zip_ops, &zip_state, zip);
+ ret = zip_deflate(zip_ops, zip_state, zip);
if (!ret) {
*dlen = zip_ops->output_len;
memcpy(dst, zip_ops->output, *dlen);
}
-
+ kfree(zip_state);
return ret;
}
@@ -157,7 +160,7 @@ int zip_decompress(const u8 *src, unsigned int slen,
struct zip_kernel_ctx *zip_ctx)
{
struct zip_operation *zip_ops = NULL;
- struct zip_state zip_state;
+ struct zip_state *zip_state;
struct zip_device *zip = NULL;
int ret;
@@ -168,7 +171,10 @@ int zip_decompress(const u8 *src, unsigned int slen,
if (!zip)
return -ENODEV;
- memset(&zip_state, 0, sizeof(struct zip_state));
+ zip_state = kzalloc(sizeof(*zip_state), GFP_KERNEL);
+ if (!zip_state)
+ return -ENOMEM;
+
zip_ops = &zip_ctx->zip_decomp;
memcpy(zip_ops->input, src, slen);
@@ -179,13 +185,13 @@ int zip_decompress(const u8 *src, unsigned int slen,
zip_ops->input_len = slen;
zip_ops->output_len = *dlen;
- ret = zip_inflate(zip_ops, &zip_state, zip);
+ ret = zip_inflate(zip_ops, zip_state, zip);
if (!ret) {
*dlen = zip_ops->output_len;
memcpy(dst, zip_ops->output, *dlen);
}
-
+ kfree(zip_state);
return ret;
}
--
2.7.4