buffer_migrate_page_norefs() can race with bh users in a following way:
CPU1 CPU2
buffer_migrate_page_norefs()
buffer_migrate_lock_buffers()
checks bh refs
spin_unlock(&mapping->private_lock)
__find_get_block()
spin_lock(&mapping->private_lock)
grab bh ref
spin_unlock(&mapping->private_lock)
move page do bh work
This can result in various issues like lost updates to buffers (i.e.
metadata corruption) or use after free issues for the old page.
Closing this race window is relatively difficult. We could hold
mapping->private_lock in buffer_migrate_page_norefs() until we are
finished with migrating the page but the lock hold times would be rather
big. So let's revert to a more careful variant of page migration requiring
eviction of buffers on migrated page. This is effectively
fallback_migrate_page() that additionally invalidates bh LRUs in case
try_to_free_buffers() failed.
CC: stable(a)vger.kernel.org
Fixes: 89cb0888ca14 "mm: migrate: provide buffer_migrate_page_norefs()"
Signed-off-by: Jan Kara <jack(a)suse.cz>
---
mm/migrate.c | 161 +++++++++++++++++++++++++++++------------------------------
1 file changed, 78 insertions(+), 83 deletions(-)
I've lightly tested this with config-workload-thpfioscale which didn't
show any obvious issue but the patch probably needs more testing (especially
to verify that memory unplug is still able to succeed in reasonable time).
That's why this is RFC.
diff --git a/mm/migrate.c b/mm/migrate.c
index e9594bc0d406..893698d37d50 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -697,6 +697,47 @@ int migrate_page(struct address_space *mapping,
}
EXPORT_SYMBOL(migrate_page);
+/*
+ * Writeback a page to clean the dirty state
+ */
+static int writeout(struct address_space *mapping, struct page *page)
+{
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_NONE,
+ .nr_to_write = 1,
+ .range_start = 0,
+ .range_end = LLONG_MAX,
+ .for_reclaim = 1
+ };
+ int rc;
+
+ if (!mapping->a_ops->writepage)
+ /* No write method for the address space */
+ return -EINVAL;
+
+ if (!clear_page_dirty_for_io(page))
+ /* Someone else already triggered a write */
+ return -EAGAIN;
+
+ /*
+ * A dirty page may imply that the underlying filesystem has
+ * the page on some queue. So the page must be clean for
+ * migration. Writeout may mean we loose the lock and the
+ * page state is no longer what we checked for earlier.
+ * At this point we know that the migration attempt cannot
+ * be successful.
+ */
+ remove_migration_ptes(page, page, false);
+
+ rc = mapping->a_ops->writepage(page, &wbc);
+
+ if (rc != AOP_WRITEPAGE_ACTIVATE)
+ /* unlocked. Relock */
+ lock_page(page);
+
+ return (rc < 0) ? -EIO : -EAGAIN;
+}
+
#ifdef CONFIG_BLOCK
/* Returns true if all buffers are successfully locked */
static bool buffer_migrate_lock_buffers(struct buffer_head *head,
@@ -736,9 +777,14 @@ static bool buffer_migrate_lock_buffers(struct buffer_head *head,
return true;
}
-static int __buffer_migrate_page(struct address_space *mapping,
- struct page *newpage, struct page *page, enum migrate_mode mode,
- bool check_refs)
+/*
+ * Migration function for pages with buffers. This function can only be used
+ * if the underlying filesystem guarantees that no other references to "page"
+ * exist. For example attached buffer heads are accessed only under page lock.
+ */
+int buffer_migrate_page(struct address_space *mapping,
+ struct page *newpage, struct page *page,
+ enum migrate_mode mode)
{
struct buffer_head *bh, *head;
int rc;
@@ -756,33 +802,6 @@ static int __buffer_migrate_page(struct address_space *mapping,
if (!buffer_migrate_lock_buffers(head, mode))
return -EAGAIN;
- if (check_refs) {
- bool busy;
- bool invalidated = false;
-
-recheck_buffers:
- busy = false;
- spin_lock(&mapping->private_lock);
- bh = head;
- do {
- if (atomic_read(&bh->b_count)) {
- busy = true;
- break;
- }
- bh = bh->b_this_page;
- } while (bh != head);
- spin_unlock(&mapping->private_lock);
- if (busy) {
- if (invalidated) {
- rc = -EAGAIN;
- goto unlock_buffers;
- }
- invalidate_bh_lrus();
- invalidated = true;
- goto recheck_buffers;
- }
- }
-
rc = migrate_page_move_mapping(mapping, newpage, page, mode, 0);
if (rc != MIGRATEPAGE_SUCCESS)
goto unlock_buffers;
@@ -818,72 +837,48 @@ static int __buffer_migrate_page(struct address_space *mapping,
return rc;
}
-
-/*
- * Migration function for pages with buffers. This function can only be used
- * if the underlying filesystem guarantees that no other references to "page"
- * exist. For example attached buffer heads are accessed only under page lock.
- */
-int buffer_migrate_page(struct address_space *mapping,
- struct page *newpage, struct page *page, enum migrate_mode mode)
-{
- return __buffer_migrate_page(mapping, newpage, page, mode, false);
-}
EXPORT_SYMBOL(buffer_migrate_page);
/*
- * Same as above except that this variant is more careful and checks that there
- * are also no buffer head references. This function is the right one for
- * mappings where buffer heads are directly looked up and referenced (such as
- * block device mappings).
+ * Same as above except that this variant is more careful. This function is
+ * the right one for mappings where buffer heads are directly looked up and
+ * referenced (such as block device mappings).
*/
int buffer_migrate_page_norefs(struct address_space *mapping,
struct page *newpage, struct page *page, enum migrate_mode mode)
{
- return __buffer_migrate_page(mapping, newpage, page, mode, true);
-}
-#endif
-
-/*
- * Writeback a page to clean the dirty state
- */
-static int writeout(struct address_space *mapping, struct page *page)
-{
- struct writeback_control wbc = {
- .sync_mode = WB_SYNC_NONE,
- .nr_to_write = 1,
- .range_start = 0,
- .range_end = LLONG_MAX,
- .for_reclaim = 1
- };
- int rc;
-
- if (!mapping->a_ops->writepage)
- /* No write method for the address space */
- return -EINVAL;
+ bool invalidated = false;
- if (!clear_page_dirty_for_io(page))
- /* Someone else already triggered a write */
- return -EAGAIN;
+ if (PageDirty(page)) {
+ /* Only writeback pages in full synchronous migration */
+ switch (mode) {
+ case MIGRATE_SYNC:
+ case MIGRATE_SYNC_NO_COPY:
+ break;
+ default:
+ return -EBUSY;
+ }
+ return writeout(mapping, page);
+ }
+retry:
/*
- * A dirty page may imply that the underlying filesystem has
- * the page on some queue. So the page must be clean for
- * migration. Writeout may mean we loose the lock and the
- * page state is no longer what we checked for earlier.
- * At this point we know that the migration attempt cannot
- * be successful.
+ * Buffers may be managed in a filesystem specific way.
+ * We must have no buffers or drop them.
*/
- remove_migration_ptes(page, page, false);
-
- rc = mapping->a_ops->writepage(page, &wbc);
-
- if (rc != AOP_WRITEPAGE_ACTIVATE)
- /* unlocked. Relock */
- lock_page(page);
+ if (page_has_private(page) &&
+ !try_to_release_page(page, GFP_KERNEL)) {
+ if (!invalidated) {
+ invalidate_bh_lrus();
+ invalidated = true;
+ goto retry;
+ }
+ return mode == MIGRATE_SYNC ? -EAGAIN : -EBUSY;
+ }
- return (rc < 0) ? -EIO : -EAGAIN;
+ return migrate_page(mapping, newpage, page, mode);
}
+#endif
/*
* Default handling if a filesystem does not provide a migration function.
--
2.16.4
From: Maarten ter Huurne <maarten(a)treewalker.org>
The SADC component can run at up to 8 MHz on JZ4725B, but is fed
a 12 MHz input clock (EXT). Divide it by two to get 6 MHz, then
set up another divider to match, to produce a 10us clock.
If the clock dividers are left on their power-on defaults (a divider
of 1), the SADC mostly works, but will occasionally produce erroneous
readings. This led to button presses being detected out of nowhere on
the RS90 every few minutes. With this change, no ghost button presses
were logged in almost a day worth of testing.
The ADCLK register for configuring clock dividers doesn't exist on
JZ4740, so avoid writing it there.
A function has been introduced rather than a flag because there is a lot
of variation between the ADCLK registers on JZ47xx SoCs, both in
the internal layout of the register and in the frequency range
supported by the SADC. So this solution should make it easier
to add support for other JZ47xx SoCs later.
Fixes: 1a78daea107d ("iio: adc: probe should set clock divider")
Signed-off-by: Maarten ter Huurne <maarten(a)treewalker.org>
Signed-off-by: Artur Rojek <contact(a)artur-rojek.eu>
---
Changes:
v2: Add the fixes tag.
drivers/iio/adc/ingenic-adc.c | 54 +++++++++++++++++++++++++++++++++++
1 file changed, 54 insertions(+)
diff --git a/drivers/iio/adc/ingenic-adc.c b/drivers/iio/adc/ingenic-adc.c
index 92b1d5037ac9..e234970b7150 100644
--- a/drivers/iio/adc/ingenic-adc.c
+++ b/drivers/iio/adc/ingenic-adc.c
@@ -11,6 +11,7 @@
#include <linux/iio/iio.h>
#include <linux/io.h>
#include <linux/iopoll.h>
+#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/platform_device.h>
@@ -22,8 +23,11 @@
#define JZ_ADC_REG_ADTCH 0x18
#define JZ_ADC_REG_ADBDAT 0x1c
#define JZ_ADC_REG_ADSDAT 0x20
+#define JZ_ADC_REG_ADCLK 0x28
#define JZ_ADC_REG_CFG_BAT_MD BIT(4)
+#define JZ_ADC_REG_ADCLK_CLKDIV_LSB 0
+#define JZ_ADC_REG_ADCLK_CLKDIV10US_LSB 16
#define JZ_ADC_AUX_VREF 3300
#define JZ_ADC_AUX_VREF_BITS 12
@@ -34,6 +38,8 @@
#define JZ4740_ADC_BATTERY_HIGH_VREF (7500 * 0.986)
#define JZ4740_ADC_BATTERY_HIGH_VREF_BITS 12
+struct ingenic_adc;
+
struct ingenic_adc_soc_data {
unsigned int battery_high_vref;
unsigned int battery_high_vref_bits;
@@ -41,6 +47,7 @@ struct ingenic_adc_soc_data {
size_t battery_raw_avail_size;
const int *battery_scale_avail;
size_t battery_scale_avail_size;
+ int (*init_clk_div)(struct device *dev, struct ingenic_adc *adc);
};
struct ingenic_adc {
@@ -151,6 +158,42 @@ static const int jz4740_adc_battery_scale_avail[] = {
JZ_ADC_BATTERY_LOW_VREF, JZ_ADC_BATTERY_LOW_VREF_BITS,
};
+static int jz4725b_adc_init_clk_div(struct device *dev, struct ingenic_adc *adc)
+{
+ struct clk *parent_clk;
+ unsigned long parent_rate, rate;
+ unsigned int div_main, div_10us;
+
+ parent_clk = clk_get_parent(adc->clk);
+ if (!parent_clk) {
+ dev_err(dev, "ADC clock has no parent\n");
+ return -ENODEV;
+ }
+ parent_rate = clk_get_rate(parent_clk);
+
+ /*
+ * The JZ4725B ADC works at 500 kHz to 8 MHz.
+ * We pick the highest rate possible.
+ * In practice we typically get 6 MHz, half of the 12 MHz EXT clock.
+ */
+ div_main = DIV_ROUND_UP(parent_rate, 8000000);
+ div_main = clamp(div_main, 1u, 64u);
+ rate = parent_rate / div_main;
+ if (rate < 500000 || rate > 8000000) {
+ dev_err(dev, "No valid divider for ADC main clock\n");
+ return -EINVAL;
+ }
+
+ /* We also need a divider that produces a 10us clock. */
+ div_10us = DIV_ROUND_UP(rate, 100000);
+
+ writel(((div_10us - 1) << JZ_ADC_REG_ADCLK_CLKDIV10US_LSB) |
+ (div_main - 1) << JZ_ADC_REG_ADCLK_CLKDIV_LSB,
+ adc->base + JZ_ADC_REG_ADCLK);
+
+ return 0;
+}
+
static const struct ingenic_adc_soc_data jz4725b_adc_soc_data = {
.battery_high_vref = JZ4725B_ADC_BATTERY_HIGH_VREF,
.battery_high_vref_bits = JZ4725B_ADC_BATTERY_HIGH_VREF_BITS,
@@ -158,6 +201,7 @@ static const struct ingenic_adc_soc_data jz4725b_adc_soc_data = {
.battery_raw_avail_size = ARRAY_SIZE(jz4725b_adc_battery_raw_avail),
.battery_scale_avail = jz4725b_adc_battery_scale_avail,
.battery_scale_avail_size = ARRAY_SIZE(jz4725b_adc_battery_scale_avail),
+ .init_clk_div = jz4725b_adc_init_clk_div,
};
static const struct ingenic_adc_soc_data jz4740_adc_soc_data = {
@@ -167,6 +211,7 @@ static const struct ingenic_adc_soc_data jz4740_adc_soc_data = {
.battery_raw_avail_size = ARRAY_SIZE(jz4740_adc_battery_raw_avail),
.battery_scale_avail = jz4740_adc_battery_scale_avail,
.battery_scale_avail_size = ARRAY_SIZE(jz4740_adc_battery_scale_avail),
+ .init_clk_div = NULL, /* no ADCLK register on JZ4740 */
};
static int ingenic_adc_read_avail(struct iio_dev *iio_dev,
@@ -317,6 +362,15 @@ static int ingenic_adc_probe(struct platform_device *pdev)
return ret;
}
+ /* Set clock dividers. */
+ if (soc_data->init_clk_div) {
+ ret = soc_data->init_clk_div(dev, adc);
+ if (ret) {
+ clk_disable_unprepare(adc->clk);
+ return ret;
+ }
+ }
+
/* Put hardware in a known passive state. */
writeb(0x00, adc->base + JZ_ADC_REG_ENABLE);
writeb(0xff, adc->base + JZ_ADC_REG_CTRL);
--
2.22.0