The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 4231aba000f5a4583dd9f67057aadb68c3eca99d Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin(a)gmail.com>
Date: Fri, 27 Jul 2018 21:48:17 +1000
Subject: [PATCH] powerpc/64s: Fix page table fragment refcount race vs
speculative references
The page table fragment allocator uses the main page refcount racily
with respect to speculative references. A customer observed a BUG due
to page table page refcount underflow in the fragment allocator. This
can be caused by the fragment allocator set_page_count stomping on a
speculative reference, and then the speculative failure handler
decrements the new reference, and the underflow eventually pops when
the page tables are freed.
Fix this by using a dedicated field in the struct page for the page
table fragment allocator.
Fixes: 5c1f6ee9a31c ("powerpc: Reduce PTE table memory wastage")
Cc: stable(a)vger.kernel.org # v3.10+
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar(a)linux.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin(a)gmail.com>
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index 8b24168ea8c4..4a892d894a0f 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -200,9 +200,9 @@ static void pte_frag_destroy(void *pte_frag)
/* drop all the pending references */
count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
/* We allow PTE_FRAG_NR fragments from a PTE page */
- if (page_ref_sub_and_test(page, PTE_FRAG_NR - count)) {
+ if (atomic_sub_and_test(PTE_FRAG_NR - count, &page->pt_frag_refcount)) {
pgtable_page_dtor(page);
- free_unref_page(page);
+ __free_page(page);
}
}
@@ -215,9 +215,9 @@ static void pmd_frag_destroy(void *pmd_frag)
/* drop all the pending references */
count = ((unsigned long)pmd_frag & ~PAGE_MASK) >> PMD_FRAG_SIZE_SHIFT;
/* We allow PTE_FRAG_NR fragments from a PTE page */
- if (page_ref_sub_and_test(page, PMD_FRAG_NR - count)) {
+ if (atomic_sub_and_test(PMD_FRAG_NR - count, &page->pt_frag_refcount)) {
pgtable_pmd_page_dtor(page);
- free_unref_page(page);
+ __free_page(page);
}
}
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 4afbfbb64bfd..78d0b3d5ebad 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -270,6 +270,8 @@ static pmd_t *__alloc_for_pmdcache(struct mm_struct *mm)
return NULL;
}
+ atomic_set(&page->pt_frag_refcount, 1);
+
ret = page_address(page);
/*
* if we support only one fragment just return the
@@ -285,7 +287,7 @@ static pmd_t *__alloc_for_pmdcache(struct mm_struct *mm)
* count.
*/
if (likely(!mm->context.pmd_frag)) {
- set_page_count(page, PMD_FRAG_NR);
+ atomic_set(&page->pt_frag_refcount, PMD_FRAG_NR);
mm->context.pmd_frag = ret + PMD_FRAG_SIZE;
}
spin_unlock(&mm->page_table_lock);
@@ -308,9 +310,10 @@ void pmd_fragment_free(unsigned long *pmd)
{
struct page *page = virt_to_page(pmd);
- if (put_page_testzero(page)) {
+ BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0);
+ if (atomic_dec_and_test(&page->pt_frag_refcount)) {
pgtable_pmd_page_dtor(page);
- free_unref_page(page);
+ __free_page(page);
}
}
@@ -352,6 +355,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
return NULL;
}
+ atomic_set(&page->pt_frag_refcount, 1);
ret = page_address(page);
/*
@@ -367,7 +371,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
* count.
*/
if (likely(!mm->context.pte_frag)) {
- set_page_count(page, PTE_FRAG_NR);
+ atomic_set(&page->pt_frag_refcount, PTE_FRAG_NR);
mm->context.pte_frag = ret + PTE_FRAG_SIZE;
}
spin_unlock(&mm->page_table_lock);
@@ -390,10 +394,11 @@ void pte_fragment_free(unsigned long *table, int kernel)
{
struct page *page = virt_to_page(table);
- if (put_page_testzero(page)) {
+ BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0);
+ if (atomic_dec_and_test(&page->pt_frag_refcount)) {
if (!kernel)
pgtable_page_dtor(page);
- free_unref_page(page);
+ __free_page(page);
}
}
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 99ce070e7dcb..22651e124071 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -139,7 +139,10 @@ struct page {
unsigned long _pt_pad_1; /* compound_head */
pgtable_t pmd_huge_pte; /* protected by page->ptl */
unsigned long _pt_pad_2; /* mapping */
- struct mm_struct *pt_mm; /* x86 pgds only */
+ union {
+ struct mm_struct *pt_mm; /* x86 pgds only */
+ atomic_t pt_frag_refcount; /* powerpc */
+ };
#if ALLOC_SPLIT_PTLOCKS
spinlock_t *ptl;
#else
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 4231aba000f5a4583dd9f67057aadb68c3eca99d Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin(a)gmail.com>
Date: Fri, 27 Jul 2018 21:48:17 +1000
Subject: [PATCH] powerpc/64s: Fix page table fragment refcount race vs
speculative references
The page table fragment allocator uses the main page refcount racily
with respect to speculative references. A customer observed a BUG due
to page table page refcount underflow in the fragment allocator. This
can be caused by the fragment allocator set_page_count stomping on a
speculative reference, and then the speculative failure handler
decrements the new reference, and the underflow eventually pops when
the page tables are freed.
Fix this by using a dedicated field in the struct page for the page
table fragment allocator.
Fixes: 5c1f6ee9a31c ("powerpc: Reduce PTE table memory wastage")
Cc: stable(a)vger.kernel.org # v3.10+
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar(a)linux.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin(a)gmail.com>
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index 8b24168ea8c4..4a892d894a0f 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -200,9 +200,9 @@ static void pte_frag_destroy(void *pte_frag)
/* drop all the pending references */
count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
/* We allow PTE_FRAG_NR fragments from a PTE page */
- if (page_ref_sub_and_test(page, PTE_FRAG_NR - count)) {
+ if (atomic_sub_and_test(PTE_FRAG_NR - count, &page->pt_frag_refcount)) {
pgtable_page_dtor(page);
- free_unref_page(page);
+ __free_page(page);
}
}
@@ -215,9 +215,9 @@ static void pmd_frag_destroy(void *pmd_frag)
/* drop all the pending references */
count = ((unsigned long)pmd_frag & ~PAGE_MASK) >> PMD_FRAG_SIZE_SHIFT;
/* We allow PTE_FRAG_NR fragments from a PTE page */
- if (page_ref_sub_and_test(page, PMD_FRAG_NR - count)) {
+ if (atomic_sub_and_test(PMD_FRAG_NR - count, &page->pt_frag_refcount)) {
pgtable_pmd_page_dtor(page);
- free_unref_page(page);
+ __free_page(page);
}
}
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 4afbfbb64bfd..78d0b3d5ebad 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -270,6 +270,8 @@ static pmd_t *__alloc_for_pmdcache(struct mm_struct *mm)
return NULL;
}
+ atomic_set(&page->pt_frag_refcount, 1);
+
ret = page_address(page);
/*
* if we support only one fragment just return the
@@ -285,7 +287,7 @@ static pmd_t *__alloc_for_pmdcache(struct mm_struct *mm)
* count.
*/
if (likely(!mm->context.pmd_frag)) {
- set_page_count(page, PMD_FRAG_NR);
+ atomic_set(&page->pt_frag_refcount, PMD_FRAG_NR);
mm->context.pmd_frag = ret + PMD_FRAG_SIZE;
}
spin_unlock(&mm->page_table_lock);
@@ -308,9 +310,10 @@ void pmd_fragment_free(unsigned long *pmd)
{
struct page *page = virt_to_page(pmd);
- if (put_page_testzero(page)) {
+ BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0);
+ if (atomic_dec_and_test(&page->pt_frag_refcount)) {
pgtable_pmd_page_dtor(page);
- free_unref_page(page);
+ __free_page(page);
}
}
@@ -352,6 +355,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
return NULL;
}
+ atomic_set(&page->pt_frag_refcount, 1);
ret = page_address(page);
/*
@@ -367,7 +371,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
* count.
*/
if (likely(!mm->context.pte_frag)) {
- set_page_count(page, PTE_FRAG_NR);
+ atomic_set(&page->pt_frag_refcount, PTE_FRAG_NR);
mm->context.pte_frag = ret + PTE_FRAG_SIZE;
}
spin_unlock(&mm->page_table_lock);
@@ -390,10 +394,11 @@ void pte_fragment_free(unsigned long *table, int kernel)
{
struct page *page = virt_to_page(table);
- if (put_page_testzero(page)) {
+ BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0);
+ if (atomic_dec_and_test(&page->pt_frag_refcount)) {
if (!kernel)
pgtable_page_dtor(page);
- free_unref_page(page);
+ __free_page(page);
}
}
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 99ce070e7dcb..22651e124071 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -139,7 +139,10 @@ struct page {
unsigned long _pt_pad_1; /* compound_head */
pgtable_t pmd_huge_pte; /* protected by page->ptl */
unsigned long _pt_pad_2; /* mapping */
- struct mm_struct *pt_mm; /* x86 pgds only */
+ union {
+ struct mm_struct *pt_mm; /* x86 pgds only */
+ atomic_t pt_frag_refcount; /* powerpc */
+ };
#if ALLOC_SPLIT_PTLOCKS
spinlock_t *ptl;
#else
From: Andi Kleen <ak(a)linux.intel.com>
Mostly recycling the commit log from adaba23ccd7d which fixed
populate_pmd, but did not fix populate_pud. The same problem exists
there.
Stable trees reverted the following patch:
Revert "x86/mm/pat: Ensure cpa->pfn only contains page frame numbers"
This reverts commit 87e2bd898d3a79a8c609f183180adac47879a2a4 which is
commit edc3b9129cecd0f0857112136f5b8b1bc1d45918 upstream.
but the L1TF patch 02ff2769edbc backported here
x86/mm/pat: Make set_memory_np() L1TF safe
commit 958f79b9ee55dfaf00c8106ed1c22a2919e0028b upstream
set_memory_np() is used to mark kernel mappings not present, but it has
it's own open coded mechanism which does not have the L1TF protection of
inverting the address bits.
assumed that cpa->pfn contains a PFN. With the above patch reverted
it does not, which causes the PUD to be set to an incorrect address
shifted by 12 bits, which can cause various failures.
Convert the address to a PFN before passing it to pud_pfn().
This is a 4.4 stable only patch to fix the L1TF patches backport there.
Cc: stable(a)vger.kernel.org # 4.4-only
Cc: Andi Kleen <ak(a)linux.intel.com>
Signed-off-by: Jiri Slaby <jslaby(a)suse.cz>
---
arch/x86/mm/pageattr.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 1007fa80f5a6..0e1dd7d47f05 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -1079,7 +1079,7 @@ static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd,
* Map everything starting from the Gb boundary, possibly with 1G pages
*/
while (end - start >= PUD_SIZE) {
- set_pud(pud, pud_mkhuge(pfn_pud(cpa->pfn,
+ set_pud(pud, pud_mkhuge(pfn_pud(cpa->pfn >> PAGE_SHIFT,
canon_pgprot(pud_pgprot))));
start += PUD_SIZE;
--
2.18.0
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 7444a8092906ed44c09459780c56ba57043e39b1 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel(a)zonque.org>
Date: Wed, 27 Jun 2018 20:58:45 +0200
Subject: [PATCH] libertas: fix suspend and resume for SDIO connected cards
Prior to commit 573185cc7e64 ("mmc: core: Invoke sdio func driver's PM
callbacks from the sdio bus"), the MMC core used to call into the power
management functions of SDIO clients itself and removed the card if the
return code was non-zero. IOW, the mmc handled errors gracefully and didn't
upchain them to the pm core.
Since this change, the mmc core relies on generic power management
functions which treat all errors as a reason to cancel the suspend
immediately. This causes suspend attempts to fail when the libertas
driver is loaded.
To fix this, power down the card explicitly in if_sdio_suspend() when we
know we're about to lose power and return success. Also set a flag in these
cases, and power up the card again in if_sdio_resume().
Fixes: 573185cc7e64 ("mmc: core: Invoke sdio func driver's PM callbacks from the sdio bus")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Daniel Mack <daniel(a)zonque.org>
Reviewed-by: Chris Ball <chris(a)printf.net>
Reviewed-by: Ulf Hansson <ulf.hansson(a)linaro.org>
Signed-off-by: Kalle Valo <kvalo(a)codeaurora.org>
diff --git a/drivers/net/wireless/marvell/libertas/dev.h b/drivers/net/wireless/marvell/libertas/dev.h
index dd1ee1f0af48..469134930026 100644
--- a/drivers/net/wireless/marvell/libertas/dev.h
+++ b/drivers/net/wireless/marvell/libertas/dev.h
@@ -104,6 +104,7 @@ struct lbs_private {
u8 fw_ready;
u8 surpriseremoved;
u8 setup_fw_on_resume;
+ u8 power_up_on_resume;
int (*hw_host_to_card) (struct lbs_private *priv, u8 type, u8 *payload, u16 nb);
void (*reset_card) (struct lbs_private *priv);
int (*power_save) (struct lbs_private *priv);
diff --git a/drivers/net/wireless/marvell/libertas/if_sdio.c b/drivers/net/wireless/marvell/libertas/if_sdio.c
index 2300e796c6ab..43743c26c071 100644
--- a/drivers/net/wireless/marvell/libertas/if_sdio.c
+++ b/drivers/net/wireless/marvell/libertas/if_sdio.c
@@ -1290,15 +1290,23 @@ static void if_sdio_remove(struct sdio_func *func)
static int if_sdio_suspend(struct device *dev)
{
struct sdio_func *func = dev_to_sdio_func(dev);
- int ret;
struct if_sdio_card *card = sdio_get_drvdata(func);
+ struct lbs_private *priv = card->priv;
+ int ret;
mmc_pm_flag_t flags = sdio_get_host_pm_caps(func);
+ priv->power_up_on_resume = false;
/* If we're powered off anyway, just let the mmc layer remove the
* card. */
- if (!lbs_iface_active(card->priv))
- return -ENOSYS;
+ if (!lbs_iface_active(priv)) {
+ if (priv->fw_ready) {
+ priv->power_up_on_resume = true;
+ if_sdio_power_off(card);
+ }
+
+ return 0;
+ }
dev_info(dev, "%s: suspend: PM flags = 0x%x\n",
sdio_func_id(func), flags);
@@ -1306,9 +1314,14 @@ static int if_sdio_suspend(struct device *dev)
/* If we aren't being asked to wake on anything, we should bail out
* and let the SD stack power down the card.
*/
- if (card->priv->wol_criteria == EHS_REMOVE_WAKEUP) {
+ if (priv->wol_criteria == EHS_REMOVE_WAKEUP) {
dev_info(dev, "Suspend without wake params -- powering down card\n");
- return -ENOSYS;
+ if (priv->fw_ready) {
+ priv->power_up_on_resume = true;
+ if_sdio_power_off(card);
+ }
+
+ return 0;
}
if (!(flags & MMC_PM_KEEP_POWER)) {
@@ -1321,7 +1334,7 @@ static int if_sdio_suspend(struct device *dev)
if (ret)
return ret;
- ret = lbs_suspend(card->priv);
+ ret = lbs_suspend(priv);
if (ret)
return ret;
@@ -1336,6 +1349,11 @@ static int if_sdio_resume(struct device *dev)
dev_info(dev, "%s: resume: we're back\n", sdio_func_id(func));
+ if (card->priv->power_up_on_resume) {
+ if_sdio_power_on(card);
+ wait_event(card->pwron_waitq, card->priv->fw_ready);
+ }
+
ret = lbs_resume(card->priv);
return ret;
Hello,
Tested without any problem so please picked up this.
From: Matthew Auld
[ Upstream commit c11c7bfd213495784b22ef82a69b6489f8d0092f ]
Operating on a zero sized GEM userptr object will lead to explosions.
Fixes: 5cc9ed4b9a7a ("drm/i915: Introduce mapping of user pages into
video memory (userptr) ioctl")
Testcase: igt/gem_userptr_blits/input-checking
Signed-off-by: Matthew Auld <matthew.auld(a)intel.com>
Cc: Chris Wilson <chris(a)chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Link:
https://patchwork.freedesktop.org/patch/msgid/20180502195021.30900-1-matthe…
---
drivers/gpu/drm/i915/i915_gem_userptr.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c
b/drivers/gpu/drm/i915/i915_gem_userptr.c
index d596a8302ca3c..854bd51b9478a 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -778,6 +778,9 @@ i915_gem_userptr_ioctl(struct drm_device *dev,
I915_USERPTR_UNSYNCHRONIZED))
return -EINVAL;
+ if (!args->user_size)
+ return -EINVAL;
+
if (offset_in_page(args->user_ptr | args->user_size))
return -EINVAL;
--
2.17.1
The MTK xHCI controller use some reserved bytes in endpoint context for
bandwidth scheduling, so need keep them in xhci_endpoint_copy();
The issue is introduced by:
commit f5249461b504 ("xhci: Clear the host side toggle manually when
endpoint is soft reset")
It resets endpoints and will drop bandwidth scheduling parameters used
by interrupt or isochronous endpoints on MTK xHCI controller.
Fixes: f5249461b504 ("xhci: Clear the host side toggle manually when
endpoint is soft reset")
Cc: stable(a)vger.kernel.org
Signed-off-by: Chunfeng Yun <chunfeng.yun(a)mediatek.com>
Tested-by: Sean Wang <sean.wang(a)mediatek.com>
---
v2: add fix tag, Cc and Tested-by
---
drivers/usb/host/xhci-mem.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index ef350c3..b1f27aa 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -1613,6 +1613,10 @@ void xhci_endpoint_copy(struct xhci_hcd *xhci,
in_ep_ctx->ep_info2 = out_ep_ctx->ep_info2;
in_ep_ctx->deq = out_ep_ctx->deq;
in_ep_ctx->tx_info = out_ep_ctx->tx_info;
+ if (xhci->quirks & XHCI_MTK_HOST) {
+ in_ep_ctx->reserved[0] = out_ep_ctx->reserved[0];
+ in_ep_ctx->reserved[1] = out_ep_ctx->reserved[1];
+ }
}
/* Copy output xhci_slot_ctx to the input xhci_slot_ctx.
--
1.9.1
The patch below does not apply to the 4.18-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From ea8c5356d39048bc94bae068228f51ddbecc6b89 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli(a)suse.de>
Date: Thu, 9 Aug 2018 15:48:49 +0800
Subject: [PATCH] bcache: set max writeback rate when I/O request is idle
Commit b1092c9af9ed ("bcache: allow quick writeback when backing idle")
allows the writeback rate to be faster if there is no I/O request on a
bcache device. It works well if there is only one bcache device attached
to the cache set. If there are many bcache devices attached to a cache
set, it may introduce performance regression because multiple faster
writeback threads of the idle bcache devices will compete the btree level
locks with the bcache device who have I/O requests coming.
This patch fixes the above issue by only permitting fast writebac when
all bcache devices attached on the cache set are idle. And if one of the
bcache devices has new I/O request coming, minimized all writeback
throughput immediately and let PI controller __update_writeback_rate()
to decide the upcoming writeback rate for each bcache device.
Also when all bcache devices are idle, limited wrieback rate to a small
number is wast of thoughput, especially when backing devices are slower
non-rotation devices (e.g. SATA SSD). This patch sets a max writeback
rate for each backing device if the whole cache set is idle. A faster
writeback rate in idle time means new I/Os may have more available space
for dirty data, and people may observe a better write performance then.
Please note bcache may change its cache mode in run time, and this patch
still works if the cache mode is switched from writeback mode and there
is still dirty data on cache.
Fixes: Commit b1092c9af9ed ("bcache: allow quick writeback when backing idle")
Cc: stable(a)vger.kernel.org #4.16+
Signed-off-by: Coly Li <colyli(a)suse.de>
Tested-by: Kai Krakow <kai(a)kaishome.de>
Tested-by: Stefan Priebe <s.priebe(a)profihost.ag>
Cc: Michael Lyle <mlyle(a)lyle.org>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index b393b3fd06b6..05f82ff6f016 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -328,13 +328,6 @@ struct cached_dev {
*/
atomic_t has_dirty;
- /*
- * Set to zero by things that touch the backing volume-- except
- * writeback. Incremented by writeback. Used to determine when to
- * accelerate idle writeback.
- */
- atomic_t backing_idle;
-
struct bch_ratelimit writeback_rate;
struct delayed_work writeback_rate_update;
@@ -515,6 +508,8 @@ struct cache_set {
struct cache_accounting accounting;
unsigned long flags;
+ atomic_t idle_counter;
+ atomic_t at_max_writeback_rate;
struct cache_sb sb;
@@ -524,6 +519,7 @@ struct cache_set {
struct bcache_device **devices;
unsigned devices_max_used;
+ atomic_t attached_dev_nr;
struct list_head cached_devs;
uint64_t cached_dev_sectors;
atomic_long_t flash_dev_dirty_sectors;
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 914d501ad1e0..7dbe8b6316a0 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -1103,6 +1103,44 @@ static void detached_dev_do_request(struct bcache_device *d, struct bio *bio)
generic_make_request(bio);
}
+static void quit_max_writeback_rate(struct cache_set *c,
+ struct cached_dev *this_dc)
+{
+ int i;
+ struct bcache_device *d;
+ struct cached_dev *dc;
+
+ /*
+ * mutex bch_register_lock may compete with other parallel requesters,
+ * or attach/detach operations on other backing device. Waiting to
+ * the mutex lock may increase I/O request latency for seconds or more.
+ * To avoid such situation, if mutext_trylock() failed, only writeback
+ * rate of current cached device is set to 1, and __update_write_back()
+ * will decide writeback rate of other cached devices (remember now
+ * c->idle_counter is 0 already).
+ */
+ if (mutex_trylock(&bch_register_lock)) {
+ for (i = 0; i < c->devices_max_used; i++) {
+ if (!c->devices[i])
+ continue;
+
+ if (UUID_FLASH_ONLY(&c->uuids[i]))
+ continue;
+
+ d = c->devices[i];
+ dc = container_of(d, struct cached_dev, disk);
+ /*
+ * set writeback rate to default minimum value,
+ * then let update_writeback_rate() to decide the
+ * upcoming rate.
+ */
+ atomic_long_set(&dc->writeback_rate.rate, 1);
+ }
+ mutex_unlock(&bch_register_lock);
+ } else
+ atomic_long_set(&this_dc->writeback_rate.rate, 1);
+}
+
/* Cached devices - read & write stuff */
static blk_qc_t cached_dev_make_request(struct request_queue *q,
@@ -1120,8 +1158,25 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
return BLK_QC_T_NONE;
}
- atomic_set(&dc->backing_idle, 0);
- generic_start_io_acct(q, bio_op(bio), bio_sectors(bio), &d->disk->part0);
+ if (likely(d->c)) {
+ if (atomic_read(&d->c->idle_counter))
+ atomic_set(&d->c->idle_counter, 0);
+ /*
+ * If at_max_writeback_rate of cache set is true and new I/O
+ * comes, quit max writeback rate of all cached devices
+ * attached to this cache set, and set at_max_writeback_rate
+ * to false.
+ */
+ if (unlikely(atomic_read(&d->c->at_max_writeback_rate) == 1)) {
+ atomic_set(&d->c->at_max_writeback_rate, 0);
+ quit_max_writeback_rate(d->c, dc);
+ }
+ }
+
+ generic_start_io_acct(q,
+ bio_op(bio),
+ bio_sectors(bio),
+ &d->disk->part0);
bio_set_dev(bio, dc->bdev);
bio->bi_iter.bi_sector += dc->sb.data_offset;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 1e85cbb4c159..55a37641aa95 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -696,6 +696,8 @@ static void bcache_device_detach(struct bcache_device *d)
{
lockdep_assert_held(&bch_register_lock);
+ atomic_dec(&d->c->attached_dev_nr);
+
if (test_bit(BCACHE_DEV_DETACHING, &d->flags)) {
struct uuid_entry *u = d->c->uuids + d->id;
@@ -1144,6 +1146,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
bch_cached_dev_run(dc);
bcache_device_link(&dc->disk, c, "bdev");
+ atomic_inc(&c->attached_dev_nr);
/* Allow the writeback thread to proceed */
up_write(&dc->writeback_lock);
@@ -1696,6 +1699,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
c->block_bits = ilog2(sb->block_size);
c->nr_uuids = bucket_bytes(c) / sizeof(struct uuid_entry);
c->devices_max_used = 0;
+ atomic_set(&c->attached_dev_nr, 0);
c->btree_pages = bucket_pages(c);
if (c->btree_pages > BTREE_MAX_PAGES)
c->btree_pages = max_t(int, c->btree_pages / 4,
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 3e9d3459a224..6e88142514fb 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -171,7 +171,8 @@ SHOW(__bch_cached_dev)
var_printf(writeback_running, "%i");
var_print(writeback_delay);
var_print(writeback_percent);
- sysfs_hprint(writeback_rate, wb ? dc->writeback_rate.rate << 9 : 0);
+ sysfs_hprint(writeback_rate,
+ wb ? atomic_long_read(&dc->writeback_rate.rate) << 9 : 0);
sysfs_hprint(io_errors, atomic_read(&dc->io_errors));
sysfs_printf(io_error_limit, "%i", dc->error_limit);
sysfs_printf(io_disable, "%i", dc->io_disable);
@@ -193,7 +194,9 @@ SHOW(__bch_cached_dev)
* Except for dirty and target, other values should
* be 0 if writeback is not running.
*/
- bch_hprint(rate, wb ? dc->writeback_rate.rate << 9 : 0);
+ bch_hprint(rate,
+ wb ? atomic_long_read(&dc->writeback_rate.rate) << 9
+ : 0);
bch_hprint(dirty, bcache_dev_sectors_dirty(&dc->disk) << 9);
bch_hprint(target, dc->writeback_rate_target << 9);
bch_hprint(proportional,
@@ -261,8 +264,12 @@ STORE(__cached_dev)
sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent, 0, 40);
- sysfs_strtoul_clamp(writeback_rate,
- dc->writeback_rate.rate, 1, INT_MAX);
+ if (attr == &sysfs_writeback_rate) {
+ int v;
+
+ sysfs_strtoul_clamp(writeback_rate, v, 1, INT_MAX);
+ atomic_long_set(&dc->writeback_rate.rate, v);
+ }
sysfs_strtoul_clamp(writeback_rate_update_seconds,
dc->writeback_rate_update_seconds,
diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c
index fc479b026d6d..b15256bcf0e7 100644
--- a/drivers/md/bcache/util.c
+++ b/drivers/md/bcache/util.c
@@ -200,7 +200,7 @@ uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done)
{
uint64_t now = local_clock();
- d->next += div_u64(done * NSEC_PER_SEC, d->rate);
+ d->next += div_u64(done * NSEC_PER_SEC, atomic_long_read(&d->rate));
/* Bound the time. Don't let us fall further than 2 seconds behind
* (this prevents unnecessary backlog that would make it impossible
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index cced87f8eb27..f7b0133c9d2f 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -442,7 +442,7 @@ struct bch_ratelimit {
* Rate at which we want to do work, in units per second
* The units here correspond to the units passed to bch_next_delay()
*/
- uint32_t rate;
+ atomic_long_t rate;
};
static inline void bch_ratelimit_reset(struct bch_ratelimit *d)
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 912e969fedba..481d4cf38ac0 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -104,11 +104,56 @@ static void __update_writeback_rate(struct cached_dev *dc)
dc->writeback_rate_proportional = proportional_scaled;
dc->writeback_rate_integral_scaled = integral_scaled;
- dc->writeback_rate_change = new_rate - dc->writeback_rate.rate;
- dc->writeback_rate.rate = new_rate;
+ dc->writeback_rate_change = new_rate -
+ atomic_long_read(&dc->writeback_rate.rate);
+ atomic_long_set(&dc->writeback_rate.rate, new_rate);
dc->writeback_rate_target = target;
}
+static bool set_at_max_writeback_rate(struct cache_set *c,
+ struct cached_dev *dc)
+{
+ /*
+ * Idle_counter is increased everytime when update_writeback_rate() is
+ * called. If all backing devices attached to the same cache set have
+ * identical dc->writeback_rate_update_seconds values, it is about 6
+ * rounds of update_writeback_rate() on each backing device before
+ * c->at_max_writeback_rate is set to 1, and then max wrteback rate set
+ * to each dc->writeback_rate.rate.
+ * In order to avoid extra locking cost for counting exact dirty cached
+ * devices number, c->attached_dev_nr is used to calculate the idle
+ * throushold. It might be bigger if not all cached device are in write-
+ * back mode, but it still works well with limited extra rounds of
+ * update_writeback_rate().
+ */
+ if (atomic_inc_return(&c->idle_counter) <
+ atomic_read(&c->attached_dev_nr) * 6)
+ return false;
+
+ if (atomic_read(&c->at_max_writeback_rate) != 1)
+ atomic_set(&c->at_max_writeback_rate, 1);
+
+ atomic_long_set(&dc->writeback_rate.rate, INT_MAX);
+
+ /* keep writeback_rate_target as existing value */
+ dc->writeback_rate_proportional = 0;
+ dc->writeback_rate_integral_scaled = 0;
+ dc->writeback_rate_change = 0;
+
+ /*
+ * Check c->idle_counter and c->at_max_writeback_rate agagain in case
+ * new I/O arrives during before set_at_max_writeback_rate() returns.
+ * Then the writeback rate is set to 1, and its new value should be
+ * decided via __update_writeback_rate().
+ */
+ if ((atomic_read(&c->idle_counter) <
+ atomic_read(&c->attached_dev_nr) * 6) ||
+ !atomic_read(&c->at_max_writeback_rate))
+ return false;
+
+ return true;
+}
+
static void update_writeback_rate(struct work_struct *work)
{
struct cached_dev *dc = container_of(to_delayed_work(work),
@@ -136,13 +181,20 @@ static void update_writeback_rate(struct work_struct *work)
return;
}
- down_read(&dc->writeback_lock);
-
- if (atomic_read(&dc->has_dirty) &&
- dc->writeback_percent)
- __update_writeback_rate(dc);
+ if (atomic_read(&dc->has_dirty) && dc->writeback_percent) {
+ /*
+ * If the whole cache set is idle, set_at_max_writeback_rate()
+ * will set writeback rate to a max number. Then it is
+ * unncessary to update writeback rate for an idle cache set
+ * in maximum writeback rate number(s).
+ */
+ if (!set_at_max_writeback_rate(c, dc)) {
+ down_read(&dc->writeback_lock);
+ __update_writeback_rate(dc);
+ up_read(&dc->writeback_lock);
+ }
+ }
- up_read(&dc->writeback_lock);
/*
* CACHE_SET_IO_DISABLE might be set via sysfs interface,
@@ -422,27 +474,6 @@ static void read_dirty(struct cached_dev *dc)
delay = writeback_delay(dc, size);
- /* If the control system would wait for at least half a
- * second, and there's been no reqs hitting the backing disk
- * for awhile: use an alternate mode where we have at most
- * one contiguous set of writebacks in flight at a time. If
- * someone wants to do IO it will be quick, as it will only
- * have to contend with one operation in flight, and we'll
- * be round-tripping data to the backing disk as quickly as
- * it can accept it.
- */
- if (delay >= HZ / 2) {
- /* 3 means at least 1.5 seconds, up to 7.5 if we
- * have slowed way down.
- */
- if (atomic_inc_return(&dc->backing_idle) >= 3) {
- /* Wait for current I/Os to finish */
- closure_sync(&cl);
- /* And immediately launch a new set. */
- delay = 0;
- }
- }
-
while (!kthread_should_stop() &&
!test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) &&
delay) {
@@ -741,7 +772,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
dc->writeback_running = true;
dc->writeback_percent = 10;
dc->writeback_delay = 30;
- dc->writeback_rate.rate = 1024;
+ atomic_long_set(&dc->writeback_rate.rate, 1024);
dc->writeback_rate_minimum = 8;
dc->writeback_rate_update_seconds = WRITEBACK_RATE_UPDATE_SECS_DEFAULT;
The patch below was submitted to be applied to the 4.18-stable tree.
I fail to see how this patch meets the stable kernel rules as found at
Documentation/process/stable-kernel-rules.rst.
I could be totally wrong, and if so, please respond to
<stable(a)vger.kernel.org> and let me know why this patch should be
applied. Otherwise, it is now dropped from my patch queues, never to be
seen again.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 78ac2107176baa0daf65b0fb8e561d2ed14c83ca Mon Sep 17 00:00:00 2001
From: Coly Li <colyli(a)suse.de>
Date: Thu, 9 Aug 2018 15:48:42 +0800
Subject: [PATCH] bcache: do not check return value of debugfs_create_dir()
Greg KH suggests that normal code should not care about debugfs. Therefore
no matter successful or failed of debugfs_create_dir() execution, it is
unncessary to check its return value.
There are two functions called debugfs_create_dir() and check the return
value, which are bch_debug_init() and closure_debug_init(). This patch
changes these two functions from int to void type, and ignore return values
of debugfs_create_dir().
This patch does not fix exact bug, just makes things work as they should.
Signed-off-by: Coly Li <colyli(a)suse.de>
Suggested-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: stable(a)vger.kernel.org
Cc: Kai Krakow <kai(a)kaishome.de>
Cc: Kent Overstreet <kent.overstreet(a)gmail.com>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 872ef4d67711..0a3e82b0876d 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -1001,7 +1001,7 @@ void bch_open_buckets_free(struct cache_set *);
int bch_cache_allocator_start(struct cache *ca);
void bch_debug_exit(void);
-int bch_debug_init(struct kobject *);
+void bch_debug_init(struct kobject *kobj);
void bch_request_exit(void);
int bch_request_init(void);
diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c
index 0e14969182c6..618253683d40 100644
--- a/drivers/md/bcache/closure.c
+++ b/drivers/md/bcache/closure.c
@@ -199,11 +199,16 @@ static const struct file_operations debug_ops = {
.release = single_release
};
-int __init closure_debug_init(void)
+void __init closure_debug_init(void)
{
- closure_debug = debugfs_create_file("closures",
- 0400, bcache_debug, NULL, &debug_ops);
- return IS_ERR_OR_NULL(closure_debug);
+ if (!IS_ERR_OR_NULL(bcache_debug))
+ /*
+ * it is unnecessary to check return value of
+ * debugfs_create_file(), we should not care
+ * about this.
+ */
+ closure_debug = debugfs_create_file(
+ "closures", 0400, bcache_debug, NULL, &debug_ops);
}
#endif
diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h
index 71427eb5fdae..7c2c5bc7c88b 100644
--- a/drivers/md/bcache/closure.h
+++ b/drivers/md/bcache/closure.h
@@ -186,13 +186,13 @@ static inline void closure_sync(struct closure *cl)
#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
-int closure_debug_init(void);
+void closure_debug_init(void);
void closure_debug_create(struct closure *cl);
void closure_debug_destroy(struct closure *cl);
#else
-static inline int closure_debug_init(void) { return 0; }
+static inline void closure_debug_init(void) {}
static inline void closure_debug_create(struct closure *cl) {}
static inline void closure_debug_destroy(struct closure *cl) {}
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index 04d146711950..12034c07257b 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -252,11 +252,12 @@ void bch_debug_exit(void)
debugfs_remove_recursive(bcache_debug);
}
-int __init bch_debug_init(struct kobject *kobj)
+void __init bch_debug_init(struct kobject *kobj)
{
- if (!IS_ENABLED(CONFIG_DEBUG_FS))
- return 0;
-
+ /*
+ * it is unnecessary to check return value of
+ * debugfs_create_file(), we should not care
+ * about this.
+ */
bcache_debug = debugfs_create_dir("bcache", NULL);
- return IS_ERR_OR_NULL(bcache_debug);
}
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index e0a92104ca23..c7ffa6ef3f82 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -2345,10 +2345,12 @@ static int __init bcache_init(void)
goto err;
if (bch_request_init() ||
- bch_debug_init(bcache_kobj) || closure_debug_init() ||
sysfs_create_files(bcache_kobj, files))
goto err;
+ bch_debug_init(bcache_kobj);
+ closure_debug_init();
+
return 0;
err:
bcache_exit();
The patch below does not apply to the 3.18-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 54648cf1ec2d7f4b6a71767799c45676a138ca24 Mon Sep 17 00:00:00 2001
From: xiao jin <jin.xiao(a)intel.com>
Date: Mon, 30 Jul 2018 14:11:12 +0800
Subject: [PATCH] block: blk_init_allocated_queue() set q->fq as NULL in the
fail case
We find the memory use-after-free issue in __blk_drain_queue()
on the kernel 4.14. After read the latest kernel 4.18-rc6 we
think it has the same problem.
Memory is allocated for q->fq in the blk_init_allocated_queue().
If the elevator init function called with error return, it will
run into the fail case to free the q->fq.
Then the __blk_drain_queue() uses the same memory after the free
of the q->fq, it will lead to the unpredictable event.
The patch is to set q->fq as NULL in the fail case of
blk_init_allocated_queue().
Fixes: commit 7c94e1c157a2 ("block: introduce blk_flush_queue to drive flush machinery")
Cc: <stable(a)vger.kernel.org>
Reviewed-by: Ming Lei <ming.lei(a)redhat.com>
Reviewed-by: Bart Van Assche <bart.vanassche(a)wdc.com>
Signed-off-by: xiao jin <jin.xiao(a)intel.com>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/block/blk-core.c b/block/blk-core.c
index 03a4ea93a5f3..23cd1b7770e7 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1184,6 +1184,7 @@ int blk_init_allocated_queue(struct request_queue *q)
q->exit_rq_fn(q, q->fq->flush_rq);
out_free_flush_queue:
blk_free_flush_queue(q->fq);
+ q->fq = NULL;
return -ENOMEM;
}
EXPORT_SYMBOL(blk_init_allocated_queue);
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 54648cf1ec2d7f4b6a71767799c45676a138ca24 Mon Sep 17 00:00:00 2001
From: xiao jin <jin.xiao(a)intel.com>
Date: Mon, 30 Jul 2018 14:11:12 +0800
Subject: [PATCH] block: blk_init_allocated_queue() set q->fq as NULL in the
fail case
We find the memory use-after-free issue in __blk_drain_queue()
on the kernel 4.14. After read the latest kernel 4.18-rc6 we
think it has the same problem.
Memory is allocated for q->fq in the blk_init_allocated_queue().
If the elevator init function called with error return, it will
run into the fail case to free the q->fq.
Then the __blk_drain_queue() uses the same memory after the free
of the q->fq, it will lead to the unpredictable event.
The patch is to set q->fq as NULL in the fail case of
blk_init_allocated_queue().
Fixes: commit 7c94e1c157a2 ("block: introduce blk_flush_queue to drive flush machinery")
Cc: <stable(a)vger.kernel.org>
Reviewed-by: Ming Lei <ming.lei(a)redhat.com>
Reviewed-by: Bart Van Assche <bart.vanassche(a)wdc.com>
Signed-off-by: xiao jin <jin.xiao(a)intel.com>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/block/blk-core.c b/block/blk-core.c
index 03a4ea93a5f3..23cd1b7770e7 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1184,6 +1184,7 @@ int blk_init_allocated_queue(struct request_queue *q)
q->exit_rq_fn(q, q->fq->flush_rq);
out_free_flush_queue:
blk_free_flush_queue(q->fq);
+ q->fq = NULL;
return -ENOMEM;
}
EXPORT_SYMBOL(blk_init_allocated_queue);
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 54648cf1ec2d7f4b6a71767799c45676a138ca24 Mon Sep 17 00:00:00 2001
From: xiao jin <jin.xiao(a)intel.com>
Date: Mon, 30 Jul 2018 14:11:12 +0800
Subject: [PATCH] block: blk_init_allocated_queue() set q->fq as NULL in the
fail case
We find the memory use-after-free issue in __blk_drain_queue()
on the kernel 4.14. After read the latest kernel 4.18-rc6 we
think it has the same problem.
Memory is allocated for q->fq in the blk_init_allocated_queue().
If the elevator init function called with error return, it will
run into the fail case to free the q->fq.
Then the __blk_drain_queue() uses the same memory after the free
of the q->fq, it will lead to the unpredictable event.
The patch is to set q->fq as NULL in the fail case of
blk_init_allocated_queue().
Fixes: commit 7c94e1c157a2 ("block: introduce blk_flush_queue to drive flush machinery")
Cc: <stable(a)vger.kernel.org>
Reviewed-by: Ming Lei <ming.lei(a)redhat.com>
Reviewed-by: Bart Van Assche <bart.vanassche(a)wdc.com>
Signed-off-by: xiao jin <jin.xiao(a)intel.com>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/block/blk-core.c b/block/blk-core.c
index 03a4ea93a5f3..23cd1b7770e7 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1184,6 +1184,7 @@ int blk_init_allocated_queue(struct request_queue *q)
q->exit_rq_fn(q, q->fq->flush_rq);
out_free_flush_queue:
blk_free_flush_queue(q->fq);
+ q->fq = NULL;
return -ENOMEM;
}
EXPORT_SYMBOL(blk_init_allocated_queue);
The default mid-level PLL bias current setting interferes with sigma
delta modulation. This manifests as decreased audio quality at lower
sampling rates, which sounds like radio broadcast quality, and
distortion noises at sampling rates at 48 kHz or above.
Changing the bias current settings to the lowest gets rid of the
noise.
Fixes: de3448519194 ("clk: sunxi-ng: sun4i: Use sigma-delta modulation
for audio PLL")
Cc: <stable(a)vger.kernel.org> # 4.15.x
Signed-off-by: Chen-Yu Tsai <wens(a)csie.org>
---
drivers/clk/sunxi-ng/ccu-sun4i-a10.c | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/drivers/clk/sunxi-ng/ccu-sun4i-a10.c b/drivers/clk/sunxi-ng/ccu-sun4i-a10.c
index ffa5dac221e4..129ebd2588fd 100644
--- a/drivers/clk/sunxi-ng/ccu-sun4i-a10.c
+++ b/drivers/clk/sunxi-ng/ccu-sun4i-a10.c
@@ -1434,8 +1434,16 @@ static void __init sun4i_ccu_init(struct device_node *node,
return;
}
- /* Force the PLL-Audio-1x divider to 1 */
val = readl(reg + SUN4I_PLL_AUDIO_REG);
+
+ /*
+ * Force VCO and PLL bias current to lowest setting. Higher
+ * settings interfere with sigma-delta modulation and result
+ * in audible noise and distortions when using SPDIF or I2S.
+ */
+ val &= ~GENMASK(25, 16);
+
+ /* Force the PLL-Audio-1x divider to 1 */
val &= ~GENMASK(29, 26);
writel(val | (1 << 26), reg + SUN4I_PLL_AUDIO_REG);
--
2.19.0.rc1
Hello!
Please apply the following commit to the v4.18 -stable tree:
rcu: Make expedited GPs handle CPU 0 being offline
fcc63543650150629c8a873cbef3578770acecd9
This patch fixes a v4.18 regression that is causing the RISC-V people
(CCed) some trouble. It is a small patch and has been tested in the
failing situation running v4.18 by Atish Patra and Andreas Schwab
(both CCed).
Please let me know if more information is required.
Thanx, Paul
Commit 136f55f66019 ("net: lan78xx: fix rx handling before first
packet is send") was not correctly backported to 4.4. The call to
tasklet_schedule() belongs in lan78xx_link_reset().
Fixes: d1fc12d8475c ("net: lan78xx: fix rx handling before first packet is send")
Signed-off-by: Ben Hutchings <ben.hutchings(a)codethink.co.uk>
---
This is for 4.4 only; the backports to other stable branches look OK.
I didn't test the driver on any branch though.
Ben.
drivers/net/usb/lan78xx.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index acec4b565511..1aede726052c 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -902,6 +902,8 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
ret = lan78xx_update_flowcontrol(dev, ecmd.duplex, ladv, radv);
netif_carrier_on(dev->net);
+
+ tasklet_schedule(&dev->bh);
}
return ret;
@@ -1361,8 +1363,6 @@ static void lan78xx_init_mac_address(struct lan78xx_net *dev)
netif_dbg(dev, ifup, dev->net,
"MAC address set to random addr");
}
-
- tasklet_schedule(&dev->bh);
}
ret = lan78xx_write_reg(dev, MAF_LO(0), addr_lo);
--
Ben Hutchings, Software Developer Codethink Ltd
https://www.codethink.co.uk/ Dale House, 35 Dale Street
Manchester, M1 2HF, United Kingdom
From: "Steven Rostedt (VMware)" <rostedt(a)goodmis.org>
I hit the following splat in my tests:
------------[ cut here ]------------
IRQs not enabled as expected
WARNING: CPU: 3 PID: 0 at kernel/time/tick-sched.c:982 tick_nohz_idle_enter+0x44/0x8c
Modules linked in: ip6t_REJECT nf_reject_ipv6 ip6table_filter ip6_tables ipv6
CPU: 3 PID: 0 Comm: swapper/3 Not tainted 4.19.0-rc2-test+ #2
Hardware name: MSI MS-7823/CSM-H87M-G43 (MS-7823), BIOS V1.6 02/22/2014
EIP: tick_nohz_idle_enter+0x44/0x8c
Code: ec 05 00 00 00 75 26 83 b8 c0 05 00 00 00 75 1d 80 3d d0 36 3e c1 00
75 14 68 94 63 12 c1 c6 05 d0 36 3e c1 01 e8 04 ee f8 ff <0f> 0b 58 fa bb a0
e5 66 c1 e8 25 0f 04 00 64 03 1d 28 31 52 c1 8b
EAX: 0000001c EBX: f26e7f8c ECX: 00000006 EDX: 00000007
ESI: f26dd1c0 EDI: 00000000 EBP: f26e7f40 ESP: f26e7f38
DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 EFLAGS: 00010296
CR0: 80050033 CR2: 0813c6b0 CR3: 2f342000 CR4: 001406f0
Call Trace:
do_idle+0x33/0x202
cpu_startup_entry+0x61/0x63
start_secondary+0x18e/0x1ed
startup_32_smp+0x164/0x168
irq event stamp: 18773830
hardirqs last enabled at (18773829): [<c040150c>] trace_hardirqs_on_thunk+0xc/0x10
hardirqs last disabled at (18773830): [<c040151c>] trace_hardirqs_off_thunk+0xc/0x10
softirqs last enabled at (18773824): [<c0ddaa6f>] __do_softirq+0x25f/0x2bf
softirqs last disabled at (18773767): [<c0416bbe>] call_on_stack+0x45/0x4b
---[ end trace b7c64aa79e17954a ]---
After a bit of debugging, I found what was happening. This would trigger
when performing "perf" with a high NMI interrupt rate, while enabling and
disabling function tracer. Ftrace uses breakpoints to convert the nops at
the start of functions to calls to the function trampolines. The breakpoint
traps disable interrupts and this makes calls into lockdep via the
trace_hardirqs_off_thunk in the entry.S code. What happens is the following:
do_idle {
[interrupts enabled]
<interrupt> [interrupts disabled]
TRACE_IRQS_OFF [lockdep says irqs off]
[...]
TRACE_IRQS_IRET
test if pt_regs say return to interrupts enabled [yes]
TRACE_IRQS_ON [lockdep says irqs are on]
<nmi>
nmi_enter() {
printk_nmi_enter() [traced by ftrace]
[ hit ftrace breakpoint ]
<breakpoint exception>
TRACE_IRQS_OFF [lockdep says irqs off]
[...]
TRACE_IRQS_IRET [return from breakpoint]
test if pt_regs say interrupts enabled [no]
[iret back to interrupt]
[iret back to code]
tick_nohz_idle_enter() {
lockdep_assert_irqs_enabled() [lockdep say no!]
Although interrupts are indeed enabled, lockdep thinks it is not, and since
we now do asserts via lockdep, it gives a false warning. The issue here is
that printk_nmi_enter() is called before lockdep_off(), which disables
lockdep (for this reason) in NMIs. By simply not allowing ftrace to see
printk_nmi_enter() (via notrace annotation) we keep lockdep from getting
confused.
Cc: stable(a)vger.kernel.org
Fixes: 42a0bb3f71383 ("printk/nmi: generic solution for safe printk in NMI")
Acked-by: Sergey Senozhatsky <sergey.senozhatsky(a)gmail.com>
Acked-by: Petr Mladek <pmladek(a)suse.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
---
kernel/printk/printk_safe.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c
index a0a74c533e4b..0913b4d385de 100644
--- a/kernel/printk/printk_safe.c
+++ b/kernel/printk/printk_safe.c
@@ -306,12 +306,12 @@ static __printf(1, 0) int vprintk_nmi(const char *fmt, va_list args)
return printk_safe_log_store(s, fmt, args);
}
-void printk_nmi_enter(void)
+void notrace printk_nmi_enter(void)
{
this_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK);
}
-void printk_nmi_exit(void)
+void notrace printk_nmi_exit(void)
{
this_cpu_and(printk_context, ~PRINTK_NMI_CONTEXT_MASK);
}
--
2.18.0
From: Chas Williams <chas3(a)att.com>
Commit 3c226c637b69 ("mm: numa: avoid waiting on freed migrated pages")
was an incomplete backport of the upstream commit. It is necessary to
always reset page_nid before attempting any early exit.
---
mm/huge_memory.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9efe88ef9702..e4c6c3edaf6a 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1259,12 +1259,12 @@ int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd)
/* Migration could have started since the pmd_trans_migrating check */
if (!page_locked) {
+ page_nid = -1;
if (!get_page_unless_zero(page))
goto out_unlock;
spin_unlock(fe->ptl);
wait_on_page_locked(page);
put_page(page);
- page_nid = -1;
goto out;
}
--
2.14.4
This is a note to let you know that I've just added the patch titled
usb: gadget: udc: renesas_usb3: fix maxpacket size of ep0
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From dfe1a51d2a36647f74cbad478801efa7cf394376 Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh(a)renesas.com>
Date: Fri, 3 Aug 2018 12:12:46 +0900
Subject: usb: gadget: udc: renesas_usb3: fix maxpacket size of ep0
This patch fixes an issue that maxpacket size of ep0 is incorrect
for SuperSpeed. Otherwise, CDC NCM class with SuperSpeed doesn't
work correctly on this driver because its control read data size
is more than 64 bytes.
Reported-by: Junki Kato <junki.kato.xk(a)renesas.com>
Fixes: 746bfe63bba3 ("usb: gadget: renesas_usb3: add support for Renesas USB3.0 peripheral controller")
Cc: <stable(a)vger.kernel.org> # v4.5+
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh(a)renesas.com>
Tested-by: Junki Kato <junki.kato.xk(a)renesas.com>
Signed-off-by: Felipe Balbi <felipe.balbi(a)linux.intel.com>
---
drivers/usb/gadget/udc/renesas_usb3.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c
index 1f879b3f2c96..e1656f361e08 100644
--- a/drivers/usb/gadget/udc/renesas_usb3.c
+++ b/drivers/usb/gadget/udc/renesas_usb3.c
@@ -812,12 +812,15 @@ static void usb3_irq_epc_int_1_speed(struct renesas_usb3 *usb3)
switch (speed) {
case USB_STA_SPEED_SS:
usb3->gadget.speed = USB_SPEED_SUPER;
+ usb3->gadget.ep0->maxpacket = USB3_EP0_SS_MAX_PACKET_SIZE;
break;
case USB_STA_SPEED_HS:
usb3->gadget.speed = USB_SPEED_HIGH;
+ usb3->gadget.ep0->maxpacket = USB3_EP0_HSFS_MAX_PACKET_SIZE;
break;
case USB_STA_SPEED_FS:
usb3->gadget.speed = USB_SPEED_FULL;
+ usb3->gadget.ep0->maxpacket = USB3_EP0_HSFS_MAX_PACKET_SIZE;
break;
default:
usb3->gadget.speed = USB_SPEED_UNKNOWN;
@@ -2513,7 +2516,7 @@ static int renesas_usb3_init_ep(struct renesas_usb3 *usb3, struct device *dev,
/* for control pipe */
usb3->gadget.ep0 = &usb3_ep->ep;
usb_ep_set_maxpacket_limit(&usb3_ep->ep,
- USB3_EP0_HSFS_MAX_PACKET_SIZE);
+ USB3_EP0_SS_MAX_PACKET_SIZE);
usb3_ep->ep.caps.type_control = true;
usb3_ep->ep.caps.dir_in = true;
usb3_ep->ep.caps.dir_out = true;
--
2.18.0
This is a note to let you know that I've just added the patch titled
USB: net2280: Fix erroneous synchronization change
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From dec3c23c9aa1815f07d98ae0375b4cbc10971e13 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern(a)rowland.harvard.edu>
Date: Wed, 8 Aug 2018 11:20:39 -0400
Subject: USB: net2280: Fix erroneous synchronization change
Commit f16443a034c7 ("USB: gadgetfs, dummy-hcd, net2280: fix locking
for callbacks") was based on a serious misunderstanding. It
introduced regressions into both the dummy-hcd and net2280 drivers.
The problem in dummy-hcd was fixed by commit 7dbd8f4cabd9 ("USB:
dummy-hcd: Fix erroneous synchronization change"), but the problem in
net2280 remains. Namely: the ->disconnect(), ->suspend(), ->resume(),
and ->reset() callbacks must be invoked without the private lock held;
otherwise a deadlock will occur when the callback routine tries to
interact with the UDC driver.
This patch largely is a reversion of the relevant parts of
f16443a034c7. It also drops the private lock around the calls to
->suspend() and ->resume() (something the earlier patch forgot to do).
This is safe from races with device interrupts because it occurs
within the interrupt handler.
Finally, the patch changes where the ->disconnect() callback is
invoked when net2280_pullup() turns the pullup off. Rather than
making the callback from within stop_activity() at a time when dropping
the private lock could be unsafe, the callback is moved to a point
after the lock has already been dropped.
Signed-off-by: Alan Stern <stern(a)rowland.harvard.edu>
Fixes: f16443a034c7 ("USB: gadgetfs, dummy-hcd, net2280: fix locking for callbacks")
Reported-by: D. Ziesche <dziesche(a)zes.com>
Tested-by: D. Ziesche <dziesche(a)zes.com>
CC: <stable(a)vger.kernel.org>
Signed-off-by: Felipe Balbi <felipe.balbi(a)linux.intel.com>
---
drivers/usb/gadget/udc/net2280.c | 16 ++++++++++++++--
1 file changed, 14 insertions(+), 2 deletions(-)
diff --git a/drivers/usb/gadget/udc/net2280.c b/drivers/usb/gadget/udc/net2280.c
index 318246d8b2e2..b02ab2a8d927 100644
--- a/drivers/usb/gadget/udc/net2280.c
+++ b/drivers/usb/gadget/udc/net2280.c
@@ -1545,11 +1545,14 @@ static int net2280_pullup(struct usb_gadget *_gadget, int is_on)
writel(tmp | BIT(USB_DETECT_ENABLE), &dev->usb->usbctl);
} else {
writel(tmp & ~BIT(USB_DETECT_ENABLE), &dev->usb->usbctl);
- stop_activity(dev, dev->driver);
+ stop_activity(dev, NULL);
}
spin_unlock_irqrestore(&dev->lock, flags);
+ if (!is_on && dev->driver)
+ dev->driver->disconnect(&dev->gadget);
+
return 0;
}
@@ -2466,8 +2469,11 @@ static void stop_activity(struct net2280 *dev, struct usb_gadget_driver *driver)
nuke(&dev->ep[i]);
/* report disconnect; the driver is already quiesced */
- if (driver)
+ if (driver) {
+ spin_unlock(&dev->lock);
driver->disconnect(&dev->gadget);
+ spin_lock(&dev->lock);
+ }
usb_reinit(dev);
}
@@ -3341,6 +3347,8 @@ static void handle_stat0_irqs(struct net2280 *dev, u32 stat)
BIT(PCI_RETRY_ABORT_INTERRUPT))
static void handle_stat1_irqs(struct net2280 *dev, u32 stat)
+__releases(dev->lock)
+__acquires(dev->lock)
{
struct net2280_ep *ep;
u32 tmp, num, mask, scratch;
@@ -3381,12 +3389,14 @@ static void handle_stat1_irqs(struct net2280 *dev, u32 stat)
if (disconnect || reset) {
stop_activity(dev, dev->driver);
ep0_start(dev);
+ spin_unlock(&dev->lock);
if (reset)
usb_gadget_udc_reset
(&dev->gadget, dev->driver);
else
(dev->driver->disconnect)
(&dev->gadget);
+ spin_lock(&dev->lock);
return;
}
}
@@ -3405,6 +3415,7 @@ static void handle_stat1_irqs(struct net2280 *dev, u32 stat)
tmp = BIT(SUSPEND_REQUEST_CHANGE_INTERRUPT);
if (stat & tmp) {
writel(tmp, &dev->regs->irqstat1);
+ spin_unlock(&dev->lock);
if (stat & BIT(SUSPEND_REQUEST_INTERRUPT)) {
if (dev->driver->suspend)
dev->driver->suspend(&dev->gadget);
@@ -3415,6 +3426,7 @@ static void handle_stat1_irqs(struct net2280 *dev, u32 stat)
dev->driver->resume(&dev->gadget);
/* at high speed, note erratum 0133 */
}
+ spin_lock(&dev->lock);
stat &= ~tmp;
}
--
2.18.0