The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From d6ef1f194b7569af8b8397876dc9ab07649d63cb Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel(a)suse.de>
Date: Tue, 17 Apr 2018 15:27:16 +0200
Subject: [PATCH] x86/mm: Prevent kernel Oops in PTDUMP code with HIGHPTE=y
The walk_pte_level() function just uses __va to get the virtual address of
the PTE page, but that breaks when the PTE page is not in the direct
mapping with HIGHPTE=y.
The result is an unhandled kernel paging request at some random address
when accessing the current_kernel or current_user file.
Use the correct API to access PTE pages.
Fixes: fe770bf0310d ('x86: clean up the page table dumper and add 32-bit support')
Signed-off-by: Joerg Roedel <jroedel(a)suse.de>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Cc: jgross(a)suse.com
Cc: JBeulich(a)suse.com
Cc: hpa(a)zytor.com
Cc: aryabinin(a)virtuozzo.com
Cc: kirill.shutemov(a)linux.intel.com
Link: https://lkml.kernel.org/r/1523971636-4137-1-git-send-email-joro@8bytes.org
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 62a7e9f65dec..cc7ff5957194 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -18,6 +18,7 @@
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/seq_file.h>
+#include <linux/highmem.h>
#include <asm/pgtable.h>
@@ -334,16 +335,16 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
pgprotval_t eff_in, unsigned long P)
{
int i;
- pte_t *start;
+ pte_t *pte;
pgprotval_t prot, eff;
- start = (pte_t *)pmd_page_vaddr(addr);
for (i = 0; i < PTRS_PER_PTE; i++) {
- prot = pte_flags(*start);
- eff = effective_prot(eff_in, prot);
st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT);
+ pte = pte_offset_map(&addr, st->current_address);
+ prot = pte_flags(*pte);
+ eff = effective_prot(eff_in, prot);
note_page(m, st, __pgprot(prot), eff, 5);
- start++;
+ pte_unmap(pte);
}
}
#ifdef CONFIG_KASAN
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From d6ef1f194b7569af8b8397876dc9ab07649d63cb Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel(a)suse.de>
Date: Tue, 17 Apr 2018 15:27:16 +0200
Subject: [PATCH] x86/mm: Prevent kernel Oops in PTDUMP code with HIGHPTE=y
The walk_pte_level() function just uses __va to get the virtual address of
the PTE page, but that breaks when the PTE page is not in the direct
mapping with HIGHPTE=y.
The result is an unhandled kernel paging request at some random address
when accessing the current_kernel or current_user file.
Use the correct API to access PTE pages.
Fixes: fe770bf0310d ('x86: clean up the page table dumper and add 32-bit support')
Signed-off-by: Joerg Roedel <jroedel(a)suse.de>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Cc: jgross(a)suse.com
Cc: JBeulich(a)suse.com
Cc: hpa(a)zytor.com
Cc: aryabinin(a)virtuozzo.com
Cc: kirill.shutemov(a)linux.intel.com
Link: https://lkml.kernel.org/r/1523971636-4137-1-git-send-email-joro@8bytes.org
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 62a7e9f65dec..cc7ff5957194 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -18,6 +18,7 @@
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/seq_file.h>
+#include <linux/highmem.h>
#include <asm/pgtable.h>
@@ -334,16 +335,16 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
pgprotval_t eff_in, unsigned long P)
{
int i;
- pte_t *start;
+ pte_t *pte;
pgprotval_t prot, eff;
- start = (pte_t *)pmd_page_vaddr(addr);
for (i = 0; i < PTRS_PER_PTE; i++) {
- prot = pte_flags(*start);
- eff = effective_prot(eff_in, prot);
st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT);
+ pte = pte_offset_map(&addr, st->current_address);
+ prot = pte_flags(*pte);
+ eff = effective_prot(eff_in, prot);
note_page(m, st, __pgprot(prot), eff, 5);
- start++;
+ pte_unmap(pte);
}
}
#ifdef CONFIG_KASAN
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From d6ef1f194b7569af8b8397876dc9ab07649d63cb Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel(a)suse.de>
Date: Tue, 17 Apr 2018 15:27:16 +0200
Subject: [PATCH] x86/mm: Prevent kernel Oops in PTDUMP code with HIGHPTE=y
The walk_pte_level() function just uses __va to get the virtual address of
the PTE page, but that breaks when the PTE page is not in the direct
mapping with HIGHPTE=y.
The result is an unhandled kernel paging request at some random address
when accessing the current_kernel or current_user file.
Use the correct API to access PTE pages.
Fixes: fe770bf0310d ('x86: clean up the page table dumper and add 32-bit support')
Signed-off-by: Joerg Roedel <jroedel(a)suse.de>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Cc: jgross(a)suse.com
Cc: JBeulich(a)suse.com
Cc: hpa(a)zytor.com
Cc: aryabinin(a)virtuozzo.com
Cc: kirill.shutemov(a)linux.intel.com
Link: https://lkml.kernel.org/r/1523971636-4137-1-git-send-email-joro@8bytes.org
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 62a7e9f65dec..cc7ff5957194 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -18,6 +18,7 @@
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/seq_file.h>
+#include <linux/highmem.h>
#include <asm/pgtable.h>
@@ -334,16 +335,16 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
pgprotval_t eff_in, unsigned long P)
{
int i;
- pte_t *start;
+ pte_t *pte;
pgprotval_t prot, eff;
- start = (pte_t *)pmd_page_vaddr(addr);
for (i = 0; i < PTRS_PER_PTE; i++) {
- prot = pte_flags(*start);
- eff = effective_prot(eff_in, prot);
st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT);
+ pte = pte_offset_map(&addr, st->current_address);
+ prot = pte_flags(*pte);
+ eff = effective_prot(eff_in, prot);
note_page(m, st, __pgprot(prot), eff, 5);
- start++;
+ pte_unmap(pte);
}
}
#ifdef CONFIG_KASAN
The patch below does not apply to the 4.16-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From d6ef1f194b7569af8b8397876dc9ab07649d63cb Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel(a)suse.de>
Date: Tue, 17 Apr 2018 15:27:16 +0200
Subject: [PATCH] x86/mm: Prevent kernel Oops in PTDUMP code with HIGHPTE=y
The walk_pte_level() function just uses __va to get the virtual address of
the PTE page, but that breaks when the PTE page is not in the direct
mapping with HIGHPTE=y.
The result is an unhandled kernel paging request at some random address
when accessing the current_kernel or current_user file.
Use the correct API to access PTE pages.
Fixes: fe770bf0310d ('x86: clean up the page table dumper and add 32-bit support')
Signed-off-by: Joerg Roedel <jroedel(a)suse.de>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Cc: jgross(a)suse.com
Cc: JBeulich(a)suse.com
Cc: hpa(a)zytor.com
Cc: aryabinin(a)virtuozzo.com
Cc: kirill.shutemov(a)linux.intel.com
Link: https://lkml.kernel.org/r/1523971636-4137-1-git-send-email-joro@8bytes.org
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 62a7e9f65dec..cc7ff5957194 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -18,6 +18,7 @@
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/seq_file.h>
+#include <linux/highmem.h>
#include <asm/pgtable.h>
@@ -334,16 +335,16 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
pgprotval_t eff_in, unsigned long P)
{
int i;
- pte_t *start;
+ pte_t *pte;
pgprotval_t prot, eff;
- start = (pte_t *)pmd_page_vaddr(addr);
for (i = 0; i < PTRS_PER_PTE; i++) {
- prot = pte_flags(*start);
- eff = effective_prot(eff_in, prot);
st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT);
+ pte = pte_offset_map(&addr, st->current_address);
+ prot = pte_flags(*pte);
+ eff = effective_prot(eff_in, prot);
note_page(m, st, __pgprot(prot), eff, 5);
- start++;
+ pte_unmap(pte);
}
}
#ifdef CONFIG_KASAN
The patch below does not apply to the 4.16-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From ff30b89e0ab71115cbad6ae10a58bd83fe18b41f Mon Sep 17 00:00:00 2001
From: Long Li <longli(a)microsoft.com>
Date: Tue, 17 Apr 2018 12:17:10 -0700
Subject: [PATCH] cifs: smbd: Dump SMB packet when configured
When sending through SMB Direct, also dump the packet in SMB send path.
Also fixed a typo in debug message.
Signed-off-by: Long Li <longli(a)microsoft.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Steve French <smfrench(a)gmail.com>
Reviewed-by: Ronnie Sahlberg <lsahlber(a)redhat.com>
diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
index d611ed0537fd..87817ddcc096 100644
--- a/fs/cifs/smbdirect.c
+++ b/fs/cifs/smbdirect.c
@@ -1028,7 +1028,7 @@ static int smbd_post_send(struct smbd_connection *info,
for (i = 0; i < request->num_sge; i++) {
log_rdma_send(INFO,
"rdma_request sge[%d] addr=%llu length=%u\n",
- i, request->sge[0].addr, request->sge[0].length);
+ i, request->sge[i].addr, request->sge[i].length);
ib_dma_sync_single_for_device(
info->id->device,
request->sge[i].addr,
@@ -2139,6 +2139,10 @@ int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst)
goto done;
}
+ cifs_dbg(FYI, "Sending smb (RDMA): smb_len=%u\n", buflen);
+ for (i = 0; i < rqst->rq_nvec-1; i++)
+ dump_smb(iov[i].iov_base, iov[i].iov_len);
+
remaining_data_length = buflen;
log_write(INFO, "rqst->rq_nvec=%d rqst->rq_npages=%d rq_pagesz=%d "
Hi,
We are running kernel 4.4.0-22 and the patch below does not seem to be present in the mpt3sas driver. Can you please confirm?
As a reminder the patch was related to a Security Erase ATA command that requires a very long timeout like 100 minutes or more and the drive retains a busy status. And the driver should not try to send other commands or reset the drive.
Thanks,
Igor Rybak
CTO
MediaClone Inc
6900 Canby Ave Ste 107
Reseda, CA 91335
USA
+1-818-654-6286
________________________________________
From: Sreekanth Reddy [sreekanth.reddy(a)broadcom.com]
Sent: Thursday, November 10, 2016 8:38 PM
To: Andrey Grodzovsky
Cc: PDL-MPT-FUSIONLINUX; Igor Rybak; Ezra Kohavi; linux-scsi(a)vger.kernel.org; Sathya Prakash; Chaitra P B; Suganath Prabu Subramani; Hannes Reinecke; stable(a)vger.kernel.org
Subject: Re: [PATCH] [SCSI] mpt3sas: Fix secure erase premature termination (v4)
On Thu, Nov 10, 2016 at 8:05 PM, Andrey Grodzovsky <andrey2805(a)gmail.com> wrote:
> Problem:
> This is a work around for a bug with LSI Fusion MPT SAS2 when
> pefroming secure erase. Due to the very long time the operation
> takes commands issued during the erase will time out and will trigger
> execution of abort hook. Even though the abort hook is called for
> the specific command which timed out this leads to entire device halt
> (scsi_state terminated) and premature termination of the secured erase.
>
> Fix:
> Set device state to busy while erase in progress to reject any incoming
> commands until the erase is done. The device is blocked any way during
> this time and cannot execute any other command.
> More data and logs can be found here -
> https://drive.google.com/file/d/0B9ocOHYHbbS1Q3VMdkkzeWFkTjg/view
>
> v2: Update according to example patch by Hannes Reinecke to apply
> the blocking logic to any ATA 12/16 command.
>
> v3: Use SCSI commands opcodes definitions instead of value and
> correct identation.
>
> v4: Fix checkpath errors and warning.
>
> Signed-off-by: Andrey Grodzovsky <andrey2805(a)gmail.com>
> Cc: <linux-scsi(a)vger.kernel.org>
> Cc: Sathya Prakash <sathya.prakash(a)broadcom.com>
> Cc: Chaitra P B <chaitra.basappa(a)broadcom.com>
> Cc: Suganath Prabu Subramani <suganath-prabu.subramani(a)broadcom.com>
> Cc: Sreekanth Reddy <Sreekanth.Reddy(a)broadcom.com>
> Cc: Hannes Reinecke <hare(a)suse.de>
> Cc: <stable(a)vger.kernel.org>
Acked-by: Sreekanth Reddy <Sreekanth.Reddy(a)broadcom.com>
> ---
> drivers/scsi/mpt3sas/mpt3sas_scsih.c | 16 ++++++++++++++++
> 1 file changed, 16 insertions(+)
>
> diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
> index 5a97e32..c032319 100644
> --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
> +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
> @@ -3500,6 +3500,10 @@ _scsih_eedp_error_handling(struct scsi_cmnd *scmd, u16 ioc_status)
> SAM_STAT_CHECK_CONDITION;
> }
>
> +static inline bool ata_12_16_cmd(struct scsi_cmnd *scmd)
> +{
> + return (scmd->cmnd[0] == ATA_12 || scmd->cmnd[0] == ATA_16);
> +}
>
> /**
> * _scsih_qcmd - main scsi request entry point
> @@ -3528,6 +3532,14 @@ _scsih_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *scmd)
> scsi_print_command(scmd);
> #endif
>
> + /**
> + * Lock the device for any subsequent command until
> + * command is done.
> + */
> + if (ata_12_16_cmd(scmd))
> + scsi_internal_device_block(scmd->device);
> +
> +
> sas_device_priv_data = scmd->device->hostdata;
> if (!sas_device_priv_data || !sas_device_priv_data->sas_target) {
> scmd->result = DID_NO_CONNECT << 16;
> @@ -4062,6 +4074,10 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply)
> if (scmd == NULL)
> return 1;
>
> + if (ata_12_16_cmd(scmd))
> + scsi_internal_device_unblock(scmd->device, SDEV_RUNNING);
> +
> +
> mpi_request = mpt3sas_base_get_msg_frame(ioc, smid);
>
> if (mpi_reply == NULL) {
> --
> 2.1.4
>
From: Evan Wang <xswang(a)marvell.com>
There maybe some special operations needed when stop engine
for AHCI IP of different vendors, so it is necessary to override
ahci_stop_engine() when stop the engine like overriding start_engine().
Evan Wang (2):
libahci: Allow drivers to override stop_engine
ata: ahci: mvebu: override ahci_stop_engine for mvebu AHCI
drivers/ata/ahci.c | 6 ++---
drivers/ata/ahci.h | 7 ++++++
drivers/ata/ahci_mvebu.c | 56 +++++++++++++++++++++++++++++++++++++++++++++
drivers/ata/ahci_qoriq.c | 2 +-
drivers/ata/ahci_xgene.c | 4 ++--
drivers/ata/libahci.c | 20 +++++++++-------
drivers/ata/sata_highbank.c | 2 +-
7 files changed, 82 insertions(+), 15 deletions(-)
--
1.9.1
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 021ad274d7dc31611d4f47f7dd4ac7a224526f30 Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui(a)microsoft.com>
Date: Thu, 15 Mar 2018 14:20:53 +0000
Subject: [PATCH] PCI: hv: Serialize the present and eject work items
When we hot-remove the device, we first receive a PCI_EJECT message and
then receive a PCI_BUS_RELATIONS message with bus_rel->device_count == 0.
The first message is offloaded to hv_eject_device_work(), and the second
is offloaded to pci_devices_present_work(). Both the paths can be running
list_del(&hpdev->list_entry), causing general protection fault, because
system_wq can run them concurrently.
The patch eliminates the race condition.
Since access to present/eject work items is serialized, we do not need the
hbus->enum_sem anymore, so remove it.
Fixes: 4daace0d8ce8 ("PCI: hv: Add paravirtual PCI front-end for Microsoft Hyper-V VMs")
Link: https://lkml.kernel.org/r/KL1P15301MB00064DA6B4D221123B5241CFBFD70@KL1P1530…
Tested-by: Adrian Suhov <v-adsuho(a)microsoft.com>
Tested-by: Chris Valean <v-chvale(a)microsoft.com>
Signed-off-by: Dexuan Cui <decui(a)microsoft.com>
[lorenzo.pieralisi(a)arm.com: squashed semaphore removal patch]
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi(a)arm.com>
Reviewed-by: Michael Kelley <mikelley(a)microsoft.com>
Acked-by: Haiyang Zhang <haiyangz(a)microsoft.com>
Cc: <stable(a)vger.kernel.org> # v4.6+
Cc: Vitaly Kuznetsov <vkuznets(a)redhat.com>
Cc: Jack Morgenstein <jackm(a)mellanox.com>
Cc: Stephen Hemminger <sthemmin(a)microsoft.com>
Cc: K. Y. Srinivasan <kys(a)microsoft.com>
diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
index 2faf38eab785..b7fd5c157d73 100644
--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -447,7 +447,6 @@ struct hv_pcibus_device {
spinlock_t device_list_lock; /* Protect lists below */
void __iomem *cfg_addr;
- struct semaphore enum_sem;
struct list_head resources_for_children;
struct list_head children;
@@ -461,6 +460,8 @@ struct hv_pcibus_device {
struct retarget_msi_interrupt retarget_msi_interrupt_params;
spinlock_t retarget_msi_interrupt_lock;
+
+ struct workqueue_struct *wq;
};
/*
@@ -1590,12 +1591,8 @@ static struct hv_pci_dev *get_pcichild_wslot(struct hv_pcibus_device *hbus,
* It must also treat the omission of a previously observed device as
* notification that the device no longer exists.
*
- * Note that this function is a work item, and it may not be
- * invoked in the order that it was queued. Back to back
- * updates of the list of present devices may involve queuing
- * multiple work items, and this one may run before ones that
- * were sent later. As such, this function only does something
- * if is the last one in the queue.
+ * Note that this function is serialized with hv_eject_device_work(),
+ * because both are pushed to the ordered workqueue hbus->wq.
*/
static void pci_devices_present_work(struct work_struct *work)
{
@@ -1616,11 +1613,6 @@ static void pci_devices_present_work(struct work_struct *work)
INIT_LIST_HEAD(&removed);
- if (down_interruptible(&hbus->enum_sem)) {
- put_hvpcibus(hbus);
- return;
- }
-
/* Pull this off the queue and process it if it was the last one. */
spin_lock_irqsave(&hbus->device_list_lock, flags);
while (!list_empty(&hbus->dr_list)) {
@@ -1637,7 +1629,6 @@ static void pci_devices_present_work(struct work_struct *work)
spin_unlock_irqrestore(&hbus->device_list_lock, flags);
if (!dr) {
- up(&hbus->enum_sem);
put_hvpcibus(hbus);
return;
}
@@ -1724,7 +1715,6 @@ static void pci_devices_present_work(struct work_struct *work)
break;
}
- up(&hbus->enum_sem);
put_hvpcibus(hbus);
kfree(dr);
}
@@ -1770,7 +1760,7 @@ static void hv_pci_devices_present(struct hv_pcibus_device *hbus,
spin_unlock_irqrestore(&hbus->device_list_lock, flags);
get_hvpcibus(hbus);
- schedule_work(&dr_wrk->wrk);
+ queue_work(hbus->wq, &dr_wrk->wrk);
}
/**
@@ -1848,7 +1838,7 @@ static void hv_pci_eject_device(struct hv_pci_dev *hpdev)
get_pcichild(hpdev, hv_pcidev_ref_pnp);
INIT_WORK(&hpdev->wrk, hv_eject_device_work);
get_hvpcibus(hpdev->hbus);
- schedule_work(&hpdev->wrk);
+ queue_work(hpdev->hbus->wq, &hpdev->wrk);
}
/**
@@ -2461,13 +2451,18 @@ static int hv_pci_probe(struct hv_device *hdev,
spin_lock_init(&hbus->config_lock);
spin_lock_init(&hbus->device_list_lock);
spin_lock_init(&hbus->retarget_msi_interrupt_lock);
- sema_init(&hbus->enum_sem, 1);
init_completion(&hbus->remove_event);
+ hbus->wq = alloc_ordered_workqueue("hv_pci_%x", 0,
+ hbus->sysdata.domain);
+ if (!hbus->wq) {
+ ret = -ENOMEM;
+ goto free_bus;
+ }
ret = vmbus_open(hdev->channel, pci_ring_size, pci_ring_size, NULL, 0,
hv_pci_onchannelcallback, hbus);
if (ret)
- goto free_bus;
+ goto destroy_wq;
hv_set_drvdata(hdev, hbus);
@@ -2536,6 +2531,8 @@ static int hv_pci_probe(struct hv_device *hdev,
hv_free_config_window(hbus);
close:
vmbus_close(hdev->channel);
+destroy_wq:
+ destroy_workqueue(hbus->wq);
free_bus:
free_page((unsigned long)hbus);
return ret;
@@ -2615,6 +2612,7 @@ static int hv_pci_remove(struct hv_device *hdev)
irq_domain_free_fwnode(hbus->sysdata.fwnode);
put_hvpcibus(hbus);
wait_for_completion(&hbus->remove_event);
+ destroy_workqueue(hbus->wq);
free_page((unsigned long)hbus);
return 0;
}
The current rescind processing code will not correctly handle
the case where the host immediately rescinds a channel that has
been offerred. In this case, we could be blocked in the open call and
since the channel is rescinded, the host will not respond and we could
be blocked forever in the vmbus open call.i Fix this problem.
Signed-off-by: K. Y. Srinivasan <kys(a)microsoft.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
(cherry picked from commit 7fa32e5ec28b1609abc0b797b58267f725fc3964)
Signed-off-by: Mohammed Gamal <mgamal(a)redhat.com>
---
drivers/hv/channel.c | 10 ++++++++--
drivers/hv/channel_mgmt.c | 7 ++++---
include/linux/hyperv.h | 1 +
3 files changed, 13 insertions(+), 5 deletions(-)
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 1c967f5..1d58986 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -659,22 +659,28 @@ void vmbus_close(struct vmbus_channel *channel)
*/
return;
}
- mutex_lock(&vmbus_connection.channel_mutex);
/*
* Close all the sub-channels first and then close the
* primary channel.
*/
list_for_each_safe(cur, tmp, &channel->sc_list) {
cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
- vmbus_close_internal(cur_channel);
if (cur_channel->rescind) {
+ wait_for_completion(&cur_channel->rescind_event);
+ mutex_lock(&vmbus_connection.channel_mutex);
+ vmbus_close_internal(cur_channel);
hv_process_channel_removal(
cur_channel->offermsg.child_relid);
+ } else {
+ mutex_lock(&vmbus_connection.channel_mutex);
+ vmbus_close_internal(cur_channel);
}
+ mutex_unlock(&vmbus_connection.channel_mutex);
}
/*
* Now close the primary.
*/
+ mutex_lock(&vmbus_connection.channel_mutex);
vmbus_close_internal(channel);
mutex_unlock(&vmbus_connection.channel_mutex);
}
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index ec5454f..c21020b 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -333,6 +333,7 @@ static struct vmbus_channel *alloc_channel(void)
return NULL;
spin_lock_init(&channel->lock);
+ init_completion(&channel->rescind_event);
INIT_LIST_HEAD(&channel->sc_list);
INIT_LIST_HEAD(&channel->percpu_list);
@@ -898,6 +899,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
/*
* Now wait for offer handling to complete.
*/
+ vmbus_rescind_cleanup(channel);
while (READ_ONCE(channel->probe_done) == false) {
/*
* We wait here until any channel offer is currently
@@ -913,7 +915,6 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
if (channel->device_obj) {
if (channel->chn_rescind_callback) {
channel->chn_rescind_callback(channel);
- vmbus_rescind_cleanup(channel);
return;
}
/*
@@ -922,7 +923,6 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
*/
dev = get_device(&channel->device_obj->device);
if (dev) {
- vmbus_rescind_cleanup(channel);
vmbus_device_unregister(channel->device_obj);
put_device(dev);
}
@@ -936,13 +936,14 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
* 2. Then close the primary channel.
*/
mutex_lock(&vmbus_connection.channel_mutex);
- vmbus_rescind_cleanup(channel);
if (channel->state == CHANNEL_OPEN_STATE) {
/*
* The channel is currently not open;
* it is safe for us to cleanup the channel.
*/
hv_process_channel_removal(rescind->child_relid);
+ } else {
+ complete(&channel->rescind_event);
}
mutex_unlock(&vmbus_connection.channel_mutex);
}
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 1552a5f..465d5db 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -724,6 +724,7 @@ struct vmbus_channel {
u8 monitor_bit;
bool rescind; /* got rescind msg */
+ struct completion rescind_event;
u32 ringbuffer_gpadlhandle;
--
1.8.3.1
This is the start of the stable review cycle for the 3.18.106 release.
There are 52 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Tue Apr 24 13:53:02 UTC 2018.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v3.x/stable-review/patch-3.18.106-r…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-3.18.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 3.18.106-rc1
Amir Goldstein <amir73il(a)gmail.com>
fanotify: fix logic of events on child
Ian Kent <raven(a)themaw.net>
autofs: mount point create should honour passed in mode
Al Viro <viro(a)zeniv.linux.org.uk>
Don't leak MNT_INTERNAL away from internal mounts
Al Viro <viro(a)zeniv.linux.org.uk>
rpc_pipefs: fix double-dput()
Al Viro <viro(a)zeniv.linux.org.uk>
hypfs_kill_super(): deal with failed allocations
Al Viro <viro(a)zeniv.linux.org.uk>
jffs2_kill_sb(): deal with failed allocations
Michael Ellerman <mpe(a)ellerman.id.au>
powerpc/lib: Fix off-by-one in alternate feature patching
Matt Redfearn <matt.redfearn(a)mips.com>
MIPS: memset.S: Fix clobber of v1 in last_fixup
Matt Redfearn <matt.redfearn(a)mips.com>
MIPS: memset.S: Fix return of __clear_user from Lpartial_fixup
Matt Redfearn <matt.redfearn(a)mips.com>
MIPS: memset.S: EVA & fault support for small_memset
Rodrigo Rivas Costa <rodrigorivascosta(a)gmail.com>
HID: hidraw: Fix crash on HIDIOCGFEATURE with a destroyed device
Takashi Iwai <tiwai(a)suse.de>
ALSA: rawmidi: Fix missing input substream checks in compat ioctls
Paul Parsons <lost.distance(a)yahoo.com>
drm/radeon: Fix PCIe lane width calculation
Theodore Ts'o <tytso(a)mit.edu>
ext4: don't allow r/w mounts if metadata blocks overlap the superblock
Theodore Ts'o <tytso(a)mit.edu>
ext4: fail ext4_iget for root directory if unallocated
Theodore Ts'o <tytso(a)mit.edu>
ext4: add validity checks for bitmap block numbers
Takashi Iwai <tiwai(a)suse.de>
ALSA: pcm: Fix endless loop for XRUN recovery in OSS emulation
Takashi Iwai <tiwai(a)suse.de>
ALSA: pcm: Fix mutex unbalance in OSS emulation ioctls
Takashi Iwai <tiwai(a)suse.de>
ALSA: pcm: Return -EBUSY for OSS ioctls changing busy streams
Takashi Iwai <tiwai(a)suse.de>
ALSA: pcm: Avoid potential races between OSS ioctls and read/write
Takashi Iwai <tiwai(a)suse.de>
ALSA: pcm: Use ERESTARTSYS instead of EINTR in OSS emulation
Nicholas Mc Guire <hofrat(a)osadl.org>
ALSA: oss: consolidate kmalloc/memset 0 call to kzalloc
Igor Pylypiv <igor.pylypiv(a)gmail.com>
watchdog: f71808e_wdt: Fix WD_EN register read
Mikhail Lappo <mikhail.lappo(a)esrlabs.com>
thermal: imx: Fix race condition in imx_thermal_probe()
Richard Genoud <richard.genoud(a)gmail.com>
clk: mvebu: armada-38x: add support for missing clocks
Ralph Sennhauser <ralph.sennhauser(a)gmail.com>
clk: mvebu: armada-38x: add support for 1866MHz variants
Alex Smith <alex.smith(a)imgtec.com>
mmc: jz4740: Fix race condition in IRQ mask update
Theodore Ts'o <tytso(a)mit.edu>
jbd2: if the journal is aborted then don't allow update of the log tail
Theodore Ts'o <tytso(a)mit.edu>
random: use a tighter cap in credit_entropy_bits_safe()
Mika Westerberg <mika.westerberg(a)linux.intel.com>
thunderbolt: Resume control channel after hibernation image is created
James Kelly <jamespeterkelly(a)gmail.com>
ASoC: ssm2602: Replace reg_default_raw with reg_default
Nicholas Piggin <npiggin(a)gmail.com>
powerpc/powernv: Fix OPAL NVRAM driver OPAL_BUSY loops
Nicholas Piggin <npiggin(a)gmail.com>
powerpc/64: Fix smp_wmb barrier definition use use lwsync consistently
Nicholas Piggin <npiggin(a)gmail.com>
powerpc/powernv: Handle unknown OPAL errors in opal_nvram_write()
Aaron Ma <aaron.ma(a)canonical.com>
HID: i2c-hid: fix size check and type usage
Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
usb: dwc3: pci: Properly cleanup resource
Zhengjun Xing <zhengjun.xing(a)linux.intel.com>
USB:fix USB3 devices behind USB3 hubs not resuming at hibernate thaw
Mika Westerberg <mika.westerberg(a)linux.intel.com>
ACPI / hotplug / PCI: Check presence of slot itself in get_slot_status()
Jason Andryuk <jandryuk(a)gmail.com>
xen-netfront: Fix hang on device removal
Nicolas Ferre <nicolas.ferre(a)microchip.com>
ARM: dts: at91: at91sam9g25: fix mux-mask pinctrl property
Heinrich Schuchardt <xypron.glpk(a)gmx.de>
usb: musb: gadget: misplaced out of bounds check
Takashi Iwai <tiwai(a)suse.de>
resource: fix integer overflow at reallocation
Andrew Morton <akpm(a)linux-foundation.org>
fs/reiserfs/journal.c: add missing resierfs_warning() arg
Richard Weinberger <richard(a)nod.at>
ubi: Reject MLC NAND
Romain Izard <romain.izard.pro(a)gmail.com>
ubi: Fix error for write access
Richard Weinberger <richard(a)nod.at>
ubifs: Check ubifs_wbuf_sync() return code
Tejaswi Tanikella <tejaswit(a)codeaurora.org>
slip: Check if rstate is initialized before uncompressing
Vasily Gorbik <gor(a)linux.ibm.com>
s390/ipl: ensure loadparm valid flag is set
Julian Wiedmann <jwi(a)linux.vnet.ibm.com>
s390/qdio: don't merge ERROR output buffers
Julian Wiedmann <jwi(a)linux.vnet.ibm.com>
s390/qdio: don't retry EQBS after CCQ 96
Helge Deller <deller(a)gmx.de>
parisc: Fix out of array access in match_pci_device()
Mauro Carvalho Chehab <mchehab(a)s-opensource.com>
media: v4l2-compat-ioctl32: don't oops on overlay
-------------
Diffstat:
Makefile | 4 +-
arch/arm/boot/dts/at91sam9g25.dtsi | 2 +-
arch/mips/lib/memset.S | 11 +-
arch/parisc/kernel/drivers.c | 4 +
arch/powerpc/include/asm/barrier.h | 3 +-
arch/powerpc/include/asm/synch.h | 4 -
arch/powerpc/lib/feature-fixups.c | 2 +-
arch/powerpc/platforms/powernv/opal-nvram.c | 11 +-
arch/s390/hypfs/inode.c | 2 +-
arch/s390/kernel/ipl.c | 1 +
drivers/char/random.c | 2 +-
drivers/clk/mvebu/armada-38x.c | 15 +-
drivers/gpu/drm/radeon/si_dpm.c | 4 +-
drivers/hid/hidraw.c | 5 +
drivers/hid/i2c-hid/i2c-hid.c | 13 +-
drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 4 +-
drivers/mmc/host/jz4740_mmc.c | 2 +-
drivers/mtd/ubi/block.c | 2 +-
drivers/mtd/ubi/build.c | 11 ++
drivers/net/slip/slhc.c | 5 +
drivers/net/xen-netfront.c | 7 +-
drivers/pci/hotplug/acpiphp_glue.c | 23 +++-
drivers/s390/cio/qdio_main.c | 42 +++---
drivers/thermal/imx_thermal.c | 6 +-
drivers/thunderbolt/nhi.c | 1 +
drivers/usb/core/generic.c | 9 +-
drivers/usb/dwc3/dwc3-pci.c | 2 +-
drivers/usb/musb/musb_gadget_ep0.c | 14 +-
drivers/watchdog/f71808e_wdt.c | 2 +-
fs/autofs4/root.c | 2 +-
fs/ext4/balloc.c | 16 ++-
fs/ext4/ialloc.c | 8 +-
fs/ext4/inode.c | 6 +
fs/ext4/super.c | 6 +
fs/jbd2/journal.c | 5 +-
fs/jffs2/super.c | 2 +-
fs/namespace.c | 3 +-
fs/notify/fanotify/fanotify.c | 34 ++---
fs/reiserfs/journal.c | 2 +-
fs/ubifs/super.c | 14 +-
include/net/slhc_vj.h | 1 +
include/sound/pcm_oss.h | 1 +
kernel/resource.c | 3 +-
net/sunrpc/rpc_pipe.c | 1 +
sound/core/oss/pcm_oss.c | 189 ++++++++++++++++++++------
sound/core/rawmidi_compat.c | 18 ++-
sound/soc/codecs/ssm2602.c | 19 ++-
47 files changed, 388 insertions(+), 155 deletions(-)
From: Long Li <longli(a)microsoft.com>
When sending the last iov that breaks into smaller buffers to fit the
transfer size, it's necessary to check if this is the last iov.
If this is the latest iov, stop and proceed to send pages.
Signed-off-by: Long Li <longli(a)microsoft.com>
Cc: stable(a)vger.kernel.org
---
fs/cifs/smbdirect.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
index 90e673c..b5c6c0d 100644
--- a/fs/cifs/smbdirect.c
+++ b/fs/cifs/smbdirect.c
@@ -2197,6 +2197,8 @@ int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst)
goto done;
}
i++;
+ if (i == rqst->rq_nvec)
+ break;
}
start = i;
buflen = 0;
--
2.7.4
From: Thor Thayer <thor.thayer(a)linux.intel.com>
The current Cadence QSPI driver caused a kernel panic when loading
a Root Filesystem from QSPI. The problem was caused by reading more
bytes than needed because the QSPI operated on 4 bytes at a time.
<snip>
[ 7.947754] spi_nor_read[1048]:from 0x037cad74, len 1 [bfe07fff]
[ 7.956247] cqspi_read[910]:offset 0x58502516, buffer=bfe07fff
[ 7.956247]
[ 7.966046] Unable to handle kernel paging request at virtual
address bfe08002
[ 7.973239] pgd = eebfc000
[ 7.975931] [bfe08002] *pgd=2fffb811, *pte=00000000, *ppte=00000000
</snip>
Notice above how only 1 byte needed to be read but by reading 4 bytes
into the end of a mapped page, an unrecoverable page fault occurred.
This patch uses a temporary buffer to hold the 4 bytes read and then
copies only the bytes required into the buffer. A min() function is
used to limit the length to prevent buffer overflows.
Request testing of this patch on other platforms. This was tested
on the Intel Arria10 SoCFPGA DevKit.
Fixes: 0cf1725676a97fc8 ("mtd: spi-nor: cqspi: Fix build on arches missing readsl/writesl")
Signed-off-by: Thor Thayer <thor.thayer(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org>
Reviewed-by: Marek Vasut <marek.vasut(a)gmail.com>
---
v2 Changes to only write dangling bytes at end of transfer since
previous patch may have multiple dangling byte transfers.
Remove write patch since no errors reported and write timeout
needs more investigation.
v3 Add Fixes tag Cc-stable tag.
---
drivers/mtd/spi-nor/cadence-quadspi.c | 19 +++++++++++++++++--
1 file changed, 17 insertions(+), 2 deletions(-)
diff --git a/drivers/mtd/spi-nor/cadence-quadspi.c b/drivers/mtd/spi-nor/cadence-quadspi.c
index 2f3a4d4232b3..c3f7aaa5d18f 100644
--- a/drivers/mtd/spi-nor/cadence-quadspi.c
+++ b/drivers/mtd/spi-nor/cadence-quadspi.c
@@ -507,7 +507,9 @@ static int cqspi_indirect_read_execute(struct spi_nor *nor, u8 *rxbuf,
void __iomem *reg_base = cqspi->iobase;
void __iomem *ahb_base = cqspi->ahb_base;
unsigned int remaining = n_rx;
+ unsigned int mod_bytes = n_rx % 4;
unsigned int bytes_to_read = 0;
+ u8 *rxbuf_end = rxbuf + n_rx;
int ret = 0;
writel(from_addr, reg_base + CQSPI_REG_INDIRECTRDSTARTADDR);
@@ -536,11 +538,24 @@ static int cqspi_indirect_read_execute(struct spi_nor *nor, u8 *rxbuf,
}
while (bytes_to_read != 0) {
+ unsigned int word_remain = round_down(remaining, 4);
+
bytes_to_read *= cqspi->fifo_width;
bytes_to_read = bytes_to_read > remaining ?
remaining : bytes_to_read;
- ioread32_rep(ahb_base, rxbuf,
- DIV_ROUND_UP(bytes_to_read, 4));
+ bytes_to_read = round_down(bytes_to_read, 4);
+ /* Read 4 byte word chunks then single bytes */
+ if (bytes_to_read) {
+ ioread32_rep(ahb_base, rxbuf,
+ (bytes_to_read / 4));
+ } else if (!word_remain && mod_bytes) {
+ unsigned int temp = ioread32(ahb_base);
+
+ bytes_to_read = mod_bytes;
+ memcpy(rxbuf, &temp, min((unsigned int)
+ (rxbuf_end - rxbuf),
+ bytes_to_read));
+ }
rxbuf += bytes_to_read;
remaining -= bytes_to_read;
bytes_to_read = cqspi_get_rd_sram_level(cqspi);
--
2.7.4
From: Greg Thelen <gthelen(a)google.com>
commit 2e898e4c0a3897ccd434adac5abb8330194f527b upstream.
lock_page_memcg()/unlock_page_memcg() use spin_lock_irqsave/restore() if
the page's memcg is undergoing move accounting, which occurs when a
process leaves its memcg for a new one that has
memory.move_charge_at_immigrate set.
unlocked_inode_to_wb_begin,end() use spin_lock_irq/spin_unlock_irq() if
the given inode is switching writeback domains. Switches occur when
enough writes are issued from a new domain.
This existing pattern is thus suspicious:
lock_page_memcg(page);
unlocked_inode_to_wb_begin(inode, &locked);
...
unlocked_inode_to_wb_end(inode, locked);
unlock_page_memcg(page);
If both inode switch and process memcg migration are both in-flight then
unlocked_inode_to_wb_end() will unconditionally enable interrupts while
still holding the lock_page_memcg() irq spinlock. This suggests the
possibility of deadlock if an interrupt occurs before unlock_page_memcg().
truncate
__cancel_dirty_page
lock_page_memcg
unlocked_inode_to_wb_begin
unlocked_inode_to_wb_end
<interrupts mistakenly enabled>
<interrupt>
end_page_writeback
test_clear_page_writeback
lock_page_memcg
<deadlock>
unlock_page_memcg
Due to configuration limitations this deadlock is not currently possible
because we don't mix cgroup writeback (a cgroupv2 feature) and
memory.move_charge_at_immigrate (a cgroupv1 feature).
If the kernel is hacked to always claim inode switching and memcg
moving_account, then this script triggers lockup in less than a minute:
cd /mnt/cgroup/memory
mkdir a b
echo 1 > a/memory.move_charge_at_immigrate
echo 1 > b/memory.move_charge_at_immigrate
(
echo $BASHPID > a/cgroup.procs
while true; do
dd if=/dev/zero of=/mnt/big bs=1M count=256
done
) &
while true; do
sync
done &
sleep 1h &
SLEEP=$!
while true; do
echo $SLEEP > a/cgroup.procs
echo $SLEEP > b/cgroup.procs
done
The deadlock does not seem possible, so it's debatable if there's any
reason to modify the kernel. I suggest we should to prevent future
surprises. And Wang Long said "this deadlock occurs three times in our
environment", so there's more reason to apply this, even to stable.
Stable 4.4 has minor conflicts applying this patch. For a clean 4.4 patch
see "[PATCH for-4.4] writeback: safer lock nesting"
https://lkml.org/lkml/2018/4/11/146
Wang Long said "this deadlock occurs three times in our environment"
[gthelen(a)google.com: v4]
Link: http://lkml.kernel.org/r/20180411084653.254724-1-gthelen@google.com
[akpm(a)linux-foundation.org: comment tweaks, struct initialization simplification]
Change-Id: Ibb773e8045852978f6207074491d262f1b3fb613
Link: http://lkml.kernel.org/r/20180410005908.167976-1-gthelen@google.com
Fixes: 682aa8e1a6a1 ("writeback: implement unlocked_inode_to_wb transaction and use it for stat updates")
Signed-off-by: Greg Thelen <gthelen(a)google.com>
Reported-by: Wang Long <wanglong19(a)meituan.com>
Acked-by: Wang Long <wanglong19(a)meituan.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Reviewed-by: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Tejun Heo <tj(a)kernel.org>
Cc: Nicholas Piggin <npiggin(a)gmail.com>
Cc: <stable(a)vger.kernel.org> [v4.2+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
[natechancellor: Adjust context due to lack of b93b016313b3b]
Signed-off-by: Nathan Chancellor <natechancellor(a)gmail.com>
---
fs/fs-writeback.c | 7 ++++---
include/linux/backing-dev-defs.h | 5 +++++
include/linux/backing-dev.h | 30 ++++++++++++++++--------------
mm/page-writeback.c | 18 +++++++++---------
4 files changed, 34 insertions(+), 26 deletions(-)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index d4d04fee568a..40c34a0ef58a 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -745,11 +745,12 @@ int inode_congested(struct inode *inode, int cong_bits)
*/
if (inode && inode_to_wb_is_valid(inode)) {
struct bdi_writeback *wb;
- bool locked, congested;
+ struct wb_lock_cookie lock_cookie = {};
+ bool congested;
- wb = unlocked_inode_to_wb_begin(inode, &locked);
+ wb = unlocked_inode_to_wb_begin(inode, &lock_cookie);
congested = wb_congested(wb, cong_bits);
- unlocked_inode_to_wb_end(inode, locked);
+ unlocked_inode_to_wb_end(inode, &lock_cookie);
return congested;
}
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index bfe86b54f6c1..0bd432a4d7bd 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -223,6 +223,11 @@ static inline void set_bdi_congested(struct backing_dev_info *bdi, int sync)
set_wb_congested(bdi->wb.congested, sync);
}
+struct wb_lock_cookie {
+ bool locked;
+ unsigned long flags;
+};
+
#ifdef CONFIG_CGROUP_WRITEBACK
/**
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 3e4ce54d84ab..82e8b73117d1 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -346,7 +346,7 @@ static inline struct bdi_writeback *inode_to_wb(const struct inode *inode)
/**
* unlocked_inode_to_wb_begin - begin unlocked inode wb access transaction
* @inode: target inode
- * @lockedp: temp bool output param, to be passed to the end function
+ * @cookie: output param, to be passed to the end function
*
* The caller wants to access the wb associated with @inode but isn't
* holding inode->i_lock, mapping->tree_lock or wb->list_lock. This
@@ -354,12 +354,12 @@ static inline struct bdi_writeback *inode_to_wb(const struct inode *inode)
* association doesn't change until the transaction is finished with
* unlocked_inode_to_wb_end().
*
- * The caller must call unlocked_inode_to_wb_end() with *@lockdep
- * afterwards and can't sleep during transaction. IRQ may or may not be
- * disabled on return.
+ * The caller must call unlocked_inode_to_wb_end() with *@cookie afterwards and
+ * can't sleep during the transaction. IRQs may or may not be disabled on
+ * return.
*/
static inline struct bdi_writeback *
-unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp)
+unlocked_inode_to_wb_begin(struct inode *inode, struct wb_lock_cookie *cookie)
{
rcu_read_lock();
@@ -367,10 +367,10 @@ unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp)
* Paired with store_release in inode_switch_wb_work_fn() and
* ensures that we see the new wb if we see cleared I_WB_SWITCH.
*/
- *lockedp = smp_load_acquire(&inode->i_state) & I_WB_SWITCH;
+ cookie->locked = smp_load_acquire(&inode->i_state) & I_WB_SWITCH;
- if (unlikely(*lockedp))
- spin_lock_irq(&inode->i_mapping->tree_lock);
+ if (unlikely(cookie->locked))
+ spin_lock_irqsave(&inode->i_mapping->tree_lock, cookie->flags);
/*
* Protected by either !I_WB_SWITCH + rcu_read_lock() or tree_lock.
@@ -382,12 +382,13 @@ unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp)
/**
* unlocked_inode_to_wb_end - end inode wb access transaction
* @inode: target inode
- * @locked: *@lockedp from unlocked_inode_to_wb_begin()
+ * @cookie: @cookie from unlocked_inode_to_wb_begin()
*/
-static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked)
+static inline void unlocked_inode_to_wb_end(struct inode *inode,
+ struct wb_lock_cookie *cookie)
{
- if (unlikely(locked))
- spin_unlock_irq(&inode->i_mapping->tree_lock);
+ if (unlikely(cookie->locked))
+ spin_unlock_irqrestore(&inode->i_mapping->tree_lock, cookie->flags);
rcu_read_unlock();
}
@@ -434,12 +435,13 @@ static inline struct bdi_writeback *inode_to_wb(struct inode *inode)
}
static inline struct bdi_writeback *
-unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp)
+unlocked_inode_to_wb_begin(struct inode *inode, struct wb_lock_cookie *cookie)
{
return inode_to_wb(inode);
}
-static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked)
+static inline void unlocked_inode_to_wb_end(struct inode *inode,
+ struct wb_lock_cookie *cookie)
{
}
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 586f31261c83..8369572e1f7d 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2501,13 +2501,13 @@ void account_page_redirty(struct page *page)
if (mapping && mapping_cap_account_dirty(mapping)) {
struct inode *inode = mapping->host;
struct bdi_writeback *wb;
- bool locked;
+ struct wb_lock_cookie cookie = {};
- wb = unlocked_inode_to_wb_begin(inode, &locked);
+ wb = unlocked_inode_to_wb_begin(inode, &cookie);
current->nr_dirtied--;
dec_node_page_state(page, NR_DIRTIED);
dec_wb_stat(wb, WB_DIRTIED);
- unlocked_inode_to_wb_end(inode, locked);
+ unlocked_inode_to_wb_end(inode, &cookie);
}
}
EXPORT_SYMBOL(account_page_redirty);
@@ -2613,15 +2613,15 @@ void __cancel_dirty_page(struct page *page)
if (mapping_cap_account_dirty(mapping)) {
struct inode *inode = mapping->host;
struct bdi_writeback *wb;
- bool locked;
+ struct wb_lock_cookie cookie = {};
lock_page_memcg(page);
- wb = unlocked_inode_to_wb_begin(inode, &locked);
+ wb = unlocked_inode_to_wb_begin(inode, &cookie);
if (TestClearPageDirty(page))
account_page_cleaned(page, mapping, wb);
- unlocked_inode_to_wb_end(inode, locked);
+ unlocked_inode_to_wb_end(inode, &cookie);
unlock_page_memcg(page);
} else {
ClearPageDirty(page);
@@ -2653,7 +2653,7 @@ int clear_page_dirty_for_io(struct page *page)
if (mapping && mapping_cap_account_dirty(mapping)) {
struct inode *inode = mapping->host;
struct bdi_writeback *wb;
- bool locked;
+ struct wb_lock_cookie cookie = {};
/*
* Yes, Virginia, this is indeed insane.
@@ -2690,14 +2690,14 @@ int clear_page_dirty_for_io(struct page *page)
* always locked coming in here, so we get the desired
* exclusion.
*/
- wb = unlocked_inode_to_wb_begin(inode, &locked);
+ wb = unlocked_inode_to_wb_begin(inode, &cookie);
if (TestClearPageDirty(page)) {
dec_lruvec_page_state(page, NR_FILE_DIRTY);
dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
dec_wb_stat(wb, WB_RECLAIMABLE);
ret = 1;
}
- unlocked_inode_to_wb_end(inode, locked);
+ unlocked_inode_to_wb_end(inode, &cookie);
return ret;
}
return TestClearPageDirty(page);
--
2.17.0
Hi Greg,
Commit 7dac4a1726a9 ("ext4: add validity checks for bitmap block
numbers") seems to be the cause of the regression reported here:
https://marc.info/?l=linux-ext4&m=152416385122029&w=2
ext4 folks are probably busy at LSF, so no reply yet. Should this
commit be held until we get word from Ted?
Please excuse broken threading.
Thanks,
Ilya
commit 5e1df40f40ee45a97bb1066c3d71f0ae920a9672 upstream.
Currently we see sporadic timeouts during CDCLK changing both on BXT and
GLK as reported by the Bugzilla: ticket. It's easy to reproduce this by
changing the frequency in a tight loop after blanking the display. The
upper bound for the completion time is 800us based on my tests, so
increase it from the current 500us to 2ms; with that I couldn't trigger
the problem either on BXT or GLK.
Note that timeouts happened during both the change notification and the
voltage level setting PCODE request. (For the latter one BSpec doesn't
require us to wait for completion before further HW programming.)
This issue is similar to
commit 2c7d0602c815 ("drm/i915/gen9: Fix PCODE polling during CDCLK
change notification")
but there the PCODE request does complete (as shown by the mbox
busy flag), only the reply we get from PCODE indicates a failure.
So there we keep resending the request until a success reply, here we
just have to increase the timeout for the one PCODE request we send.
v2:
- s/snb_pcode_request/sandybridge_pcode_write_timeout/ (Ville)
Cc: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org> # v4.15
Acked-by: Chris Wilson <chris(a)chris-wilson.co.uk> (v1)
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103326
Reviewed-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Signed-off-by: Imre Deak <imre.deak(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180130142939.17983-1-imre.d…
(cherry picked from commit e76019a81921e87a4d9e7b3d86102bc708a6c227)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
(Rebased for v4.15 stable tree due to upstream s/DIV_ROUND_UP(cdclk, 25000)/cdclk_state->voltage_level/ change )
Signed-off-by: Imre Deak <imre.deak(a)intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 6 +++++-
drivers/gpu/drm/i915/intel_cdclk.c | 22 +++++++++++++++++-----
drivers/gpu/drm/i915/intel_pm.c | 6 +++---
3 files changed, 25 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e143004e66d5..ffc75271bf7e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -4140,7 +4140,11 @@ extern void intel_display_print_error_state(struct drm_i915_error_state_buf *e,
struct intel_display_error_state *error);
int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val);
-int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u32 mbox, u32 val);
+int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv, u32 mbox,
+ u32 val, int timeout_us);
+#define sandybridge_pcode_write(dev_priv, mbox, val) \
+ sandybridge_pcode_write_timeout(dev_priv, mbox, val, 500)
+
int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
u32 reply_mask, u32 reply, int timeout_base_ms);
diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c
index 60cf4e58389a..658f681eb927 100644
--- a/drivers/gpu/drm/i915/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/intel_cdclk.c
@@ -1284,10 +1284,15 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv,
break;
}
- /* Inform power controller of upcoming frequency change */
+ /*
+ * Inform power controller of upcoming frequency change. BSpec
+ * requires us to wait up to 150usec, but that leads to timeouts;
+ * the 2ms used here is based on experiment.
+ */
mutex_lock(&dev_priv->pcu_lock);
- ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ,
- 0x80000000);
+ ret = sandybridge_pcode_write_timeout(dev_priv,
+ HSW_PCODE_DE_WRITE_FREQ_REQ,
+ 0x80000000, 2000);
mutex_unlock(&dev_priv->pcu_lock);
if (ret) {
@@ -1318,8 +1323,15 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv,
I915_WRITE(CDCLK_CTL, val);
mutex_lock(&dev_priv->pcu_lock);
- ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ,
- DIV_ROUND_UP(cdclk, 25000));
+ /*
+ * The timeout isn't specified, the 2ms used here is based on
+ * experiment.
+ * FIXME: Waiting for the request completion could be delayed until
+ * the next PCODE request based on BSpec.
+ */
+ ret = sandybridge_pcode_write_timeout(dev_priv,
+ HSW_PCODE_DE_WRITE_FREQ_REQ,
+ DIV_ROUND_UP(cdclk, 25000), 2000);
mutex_unlock(&dev_priv->pcu_lock);
if (ret) {
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index f0d0dbab4150..c4e5db551fc2 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -9227,8 +9227,8 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val
return 0;
}
-int sandybridge_pcode_write(struct drm_i915_private *dev_priv,
- u32 mbox, u32 val)
+int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
+ u32 mbox, u32 val, int timeout_us)
{
int status;
@@ -9251,7 +9251,7 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv,
if (__intel_wait_for_register_fw(dev_priv,
GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
- 500, 0, NULL)) {
+ timeout_us, 0, NULL)) {
DRM_ERROR("timeout waiting for pcode write of 0x%08x to mbox %x to finish for %ps\n",
val, mbox, __builtin_return_address(0));
return -ETIMEDOUT;
--
2.13.2