After _gadget_stop_activity is executed, we can consider the hardware
operation for gadget has finished, and the udc can be stopped and enter
low power mode. So, any later hardware operations (from usb_ep_ops APIs
or usb_gadget_ops APIs) should be considered invalid, any deinitializatons
has been covered at _gadget_stop_activity.
I meet this problem when I plug out usb cable from PC using mass_storage
gadget, my callstack like: vbus interrupt->.vbus_session->
composite_disconnect ->pm_runtime_put_sync(&_gadget->dev),
the composite_disconnect will call fsg_disable, but fsg_disable calls
usb_ep_disable using async way, there are register accesses for
usb_ep_disable. So sometimes, I get system hang due to visit register
without clock, sometimes not.
The Linux Kernel USB maintainer Alan Stern suggests this kinds of solution.
See: http://marc.info/?l=linux-usb&m=138541769810983&w=2.
Cc: <stable(a)vger.kernel.org> #v4.9+
Signed-off-by: Peter Chen <peter.chen(a)nxp.com>
---
This patch is at NXP internal tree long time, and no issues have found.
Submit to mainline kenrel.
drivers/usb/chipidea/udc.c | 32 ++++++++++++++++++++++++--------
1 file changed, 24 insertions(+), 8 deletions(-)
diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c
index 053432d79bf7..8f18e7b6cadf 100644
--- a/drivers/usb/chipidea/udc.c
+++ b/drivers/usb/chipidea/udc.c
@@ -709,12 +709,6 @@ static int _gadget_stop_activity(struct usb_gadget *gadget)
struct ci_hdrc *ci = container_of(gadget, struct ci_hdrc, gadget);
unsigned long flags;
- spin_lock_irqsave(&ci->lock, flags);
- ci->gadget.speed = USB_SPEED_UNKNOWN;
- ci->remote_wakeup = 0;
- ci->suspended = 0;
- spin_unlock_irqrestore(&ci->lock, flags);
-
/* flush all endpoints */
gadget_for_each_ep(ep, gadget) {
usb_ep_fifo_flush(ep);
@@ -732,6 +726,12 @@ static int _gadget_stop_activity(struct usb_gadget *gadget)
ci->status = NULL;
}
+ spin_lock_irqsave(&ci->lock, flags);
+ ci->gadget.speed = USB_SPEED_UNKNOWN;
+ ci->remote_wakeup = 0;
+ ci->suspended = 0;
+ spin_unlock_irqrestore(&ci->lock, flags);
+
return 0;
}
@@ -1303,6 +1303,10 @@ static int ep_disable(struct usb_ep *ep)
return -EBUSY;
spin_lock_irqsave(hwep->lock, flags);
+ if (hwep->ci->gadget.speed == USB_SPEED_UNKNOWN) {
+ spin_unlock_irqrestore(hwep->lock, flags);
+ return 0;
+ }
/* only internal SW should disable ctrl endpts */
@@ -1392,6 +1396,10 @@ static int ep_queue(struct usb_ep *ep, struct usb_request *req,
return -EINVAL;
spin_lock_irqsave(hwep->lock, flags);
+ if (hwep->ci->gadget.speed == USB_SPEED_UNKNOWN) {
+ spin_unlock_irqrestore(hwep->lock, flags);
+ return 0;
+ }
retval = _ep_queue(ep, req, gfp_flags);
spin_unlock_irqrestore(hwep->lock, flags);
return retval;
@@ -1415,8 +1423,8 @@ static int ep_dequeue(struct usb_ep *ep, struct usb_request *req)
return -EINVAL;
spin_lock_irqsave(hwep->lock, flags);
-
- hw_ep_flush(hwep->ci, hwep->num, hwep->dir);
+ if (hwep->ci->gadget.speed != USB_SPEED_UNKNOWN)
+ hw_ep_flush(hwep->ci, hwep->num, hwep->dir);
list_for_each_entry_safe(node, tmpnode, &hwreq->tds, td) {
dma_pool_free(hwep->td_pool, node->ptr, node->dma);
@@ -1487,6 +1495,10 @@ static void ep_fifo_flush(struct usb_ep *ep)
}
spin_lock_irqsave(hwep->lock, flags);
+ if (hwep->ci->gadget.speed == USB_SPEED_UNKNOWN) {
+ spin_unlock_irqrestore(hwep->lock, flags);
+ return;
+ }
hw_ep_flush(hwep->ci, hwep->num, hwep->dir);
@@ -1559,6 +1571,10 @@ static int ci_udc_wakeup(struct usb_gadget *_gadget)
int ret = 0;
spin_lock_irqsave(&ci->lock, flags);
+ if (ci->gadget.speed == USB_SPEED_UNKNOWN) {
+ spin_unlock_irqrestore(&ci->lock, flags);
+ return 0;
+ }
if (!ci->remote_wakeup) {
ret = -EOPNOTSUPP;
goto out;
--
2.17.1
Fix a regression introduced by upstream commit fee109901f39
('signal/drbd: Use send_sig not force_sig').
Currently, when a thread is initialized, all signals are set to be
ignored by default. DRBD uses SIGHUP to end its threads, which means it
is now no longer possible to bring down a DRBD resource because the
signals do not make it through to the thread in question.
This circumstance was previously hidden by the fact that DRBD used
force_sig() to kill its threads. The aforementioned upstream commit
changed this to send_sig(), which means the effects of the signals being
ignored by default are now becoming visible.
Thus, issue an allow_signal() at the start of the thread to explicitly
allow the desired signals.
Signed-off-by: Christoph Böhmwalder <christoph.boehmwalder(a)linbit.com>
Signed-off-by: Philipp Reisner <philipp.reisner(a)linbit.com>
Fixes: fee109901f39 ("signal/drbd: Use send_sig not force_sig")
Cc: stable(a)vger.kernel.org
---
drivers/block/drbd/drbd_main.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 9bd4ddd12b25..b8b986df6814 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -318,6 +318,9 @@ static int drbd_thread_setup(void *arg)
unsigned long flags;
int retval;
+ allow_signal(DRBD_SIGKILL);
+ allow_signal(SIGXCPU);
+
snprintf(current->comm, sizeof(current->comm), "drbd_%c_%s",
thi->name[0],
resource->name);
--
2.22.0
Similar to vmstats, percpu caching of local vmevents leads to an
accumulation of errors on non-leaf levels. This happens because some
leftovers may remain in percpu caches, so that they are never propagated
up by the cgroup tree and just disappear into nonexistence with on
releasing of the memory cgroup.
To fix this issue let's accumulate and propagate percpu vmevents values
before releasing the memory cgroup similar to what we're doing with
vmstats.
Since on cpu hotplug we do flush percpu vmstats anyway, we can iterate
only over online cpus.
Fixes: 42a300353577 ("mm: memcontrol: fix recursive statistics correctness & scalabilty")
Signed-off-by: Roman Gushchin <guro(a)fb.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Vladimir Davydov <vdavydov.dev(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
---
mm/memcontrol.c | 22 +++++++++++++++++++++-
1 file changed, 21 insertions(+), 1 deletion(-)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 818165d8de3f..f98c5293adae 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3418,6 +3418,25 @@ static void memcg_flush_percpu_vmstats(struct mem_cgroup *memcg)
}
}
+static void memcg_flush_percpu_vmevents(struct mem_cgroup *memcg)
+{
+ unsigned long events[NR_VM_EVENT_ITEMS];
+ struct mem_cgroup *mi;
+ int cpu, i;
+
+ for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
+ events[i] = 0;
+
+ for_each_online_cpu(cpu)
+ for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
+ events[i] += raw_cpu_read(
+ memcg->vmstats_percpu->events[i]);
+
+ for (mi = memcg; mi; mi = parent_mem_cgroup(mi))
+ for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
+ atomic_long_add(events[i], &mi->vmevents[i]);
+}
+
#ifdef CONFIG_MEMCG_KMEM
static int memcg_online_kmem(struct mem_cgroup *memcg)
{
@@ -4841,10 +4860,11 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
int node;
/*
- * Flush percpu vmstats to guarantee the value correctness
+ * Flush percpu vmstats and vmevents to guarantee the value correctness
* on parent's and all ancestor levels.
*/
memcg_flush_percpu_vmstats(memcg);
+ memcg_flush_percpu_vmevents(memcg);
for_each_node(node)
free_mem_cgroup_per_node_info(memcg, node);
free_percpu(memcg->vmstats_percpu);
--
2.21.0
Percpu caching of local vmstats with the conditional propagation by the
cgroup tree leads to an accumulation of errors on non-leaf levels.
Let's imagine two nested memory cgroups A and A/B. Say, a process
belonging to A/B allocates 100 pagecache pages on the CPU 0. The percpu
cache will spill 3 times, so that 32*3=96 pages will be accounted to A/B
and A atomic vmstat counters, 4 pages will remain in the percpu cache.
Imagine A/B is nearby memory.max, so that every following allocation
triggers a direct reclaim on the local CPU. Say, each such attempt will
free 16 pages on a new cpu. That means every percpu cache will have -16
pages, except the first one, which will have 4 - 16 = -12. A/B and A
atomic counters will not be touched at all.
Now a user removes A/B. All percpu caches are freed and corresponding
vmstat numbers are forgotten. A has 96 pages more than expected.
As memory cgroups are created and destroyed, errors do accumulate. Even
1-2 pages differences can accumulate into large numbers.
To fix this issue let's accumulate and propagate percpu vmstat values
before releasing the memory cgroup. At this point these numbers are
stable and cannot be changed.
Since on cpu hotplug we do flush percpu vmstats anyway, we can iterate
only over online cpus.
Fixes: 42a300353577 ("mm: memcontrol: fix recursive statistics correctness & scalabilty")
Signed-off-by: Roman Gushchin <guro(a)fb.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Vladimir Davydov <vdavydov.dev(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
---
mm/memcontrol.c | 40 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 40 insertions(+)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 3e821f34399f..818165d8de3f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3383,6 +3383,41 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
}
}
+static void memcg_flush_percpu_vmstats(struct mem_cgroup *memcg)
+{
+ unsigned long stat[MEMCG_NR_STAT];
+ struct mem_cgroup *mi;
+ int node, cpu, i;
+
+ for (i = 0; i < MEMCG_NR_STAT; i++)
+ stat[i] = 0;
+
+ for_each_online_cpu(cpu)
+ for (i = 0; i < MEMCG_NR_STAT; i++)
+ stat[i] += raw_cpu_read(memcg->vmstats_percpu->stat[i]);
+
+ for (mi = memcg; mi; mi = parent_mem_cgroup(mi))
+ for (i = 0; i < MEMCG_NR_STAT; i++)
+ atomic_long_add(stat[i], &mi->vmstats[i]);
+
+ for_each_node(node) {
+ struct mem_cgroup_per_node *pn = memcg->nodeinfo[node];
+ struct mem_cgroup_per_node *pi;
+
+ for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
+ stat[i] = 0;
+
+ for_each_online_cpu(cpu)
+ for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
+ stat[i] += raw_cpu_read(
+ pn->lruvec_stat_cpu->count[i]);
+
+ for (pi = pn; pi; pi = parent_nodeinfo(pi, node))
+ for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
+ atomic_long_add(stat[i], &pi->lruvec_stat[i]);
+ }
+}
+
#ifdef CONFIG_MEMCG_KMEM
static int memcg_online_kmem(struct mem_cgroup *memcg)
{
@@ -4805,6 +4840,11 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
{
int node;
+ /*
+ * Flush percpu vmstats to guarantee the value correctness
+ * on parent's and all ancestor levels.
+ */
+ memcg_flush_percpu_vmstats(memcg);
for_each_node(node)
free_mem_cgroup_per_node_info(memcg, node);
free_percpu(memcg->vmstats_percpu);
--
2.21.0
The patch titled
Subject: mm: memcontrol: flush percpu vmevents before releasing memcg
has been added to the -mm tree. Its filename is
mm-memcontrol-flush-percpu-vmevents-before-releasing-memcg.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/mm-memcontrol-flush-percpu-vmevent…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/mm-memcontrol-flush-percpu-vmevent…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Roman Gushchin <guro(a)fb.com>
Subject: mm: memcontrol: flush percpu vmevents before releasing memcg
Similar to vmstats, percpu caching of local vmevents leads to an
accumulation of errors on non-leaf levels. This happens because some
leftovers may remain in percpu caches, so that they are never propagated
up by the cgroup tree and just disappear into nonexistence with on
releasing of the memory cgroup.
To fix this issue let's accumulate and propagate percpu vmevents values
before releasing the memory cgroup similar to what we're doing with
vmstats.
Since on cpu hotplug we do flush percpu vmstats anyway, we can iterate
only over online cpus.
Link: http://lkml.kernel.org/r/20190819202338.363363-4-guro@fb.com
Fixes: 42a300353577 ("mm: memcontrol: fix recursive statistics correctness & scalabilty")
Signed-off-by: Roman Gushchin <guro(a)fb.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Vladimir Davydov <vdavydov.dev(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memcontrol.c | 22 +++++++++++++++++++++-
1 file changed, 21 insertions(+), 1 deletion(-)
--- a/mm/memcontrol.c~mm-memcontrol-flush-percpu-vmevents-before-releasing-memcg
+++ a/mm/memcontrol.c
@@ -3307,6 +3307,25 @@ static void memcg_flush_percpu_vmstats(s
}
}
+static void memcg_flush_percpu_vmevents(struct mem_cgroup *memcg)
+{
+ unsigned long events[NR_VM_EVENT_ITEMS];
+ struct mem_cgroup *mi;
+ int cpu, i;
+
+ for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
+ events[i] = 0;
+
+ for_each_online_cpu(cpu)
+ for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
+ events[i] += raw_cpu_read(
+ memcg->vmstats_percpu->events[i]);
+
+ for (mi = memcg; mi; mi = parent_mem_cgroup(mi))
+ for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
+ atomic_long_add(events[i], &mi->vmevents[i]);
+}
+
#ifdef CONFIG_MEMCG_KMEM
static int memcg_online_kmem(struct mem_cgroup *memcg)
{
@@ -4737,10 +4756,11 @@ static void __mem_cgroup_free(struct mem
int node;
/*
- * Flush percpu vmstats to guarantee the value correctness
+ * Flush percpu vmstats and vmevents to guarantee the value correctness
* on parent's and all ancestor levels.
*/
memcg_flush_percpu_vmstats(memcg, false);
+ memcg_flush_percpu_vmevents(memcg);
for_each_node(node)
free_mem_cgroup_per_node_info(memcg, node);
free_percpu(memcg->vmstats_percpu);
_
Patches currently in -mm which might be from guro(a)fb.com are
mm-memcontrol-flush-percpu-vmstats-before-releasing-memcg.patch
mm-memcontrol-flush-percpu-slab-vmstats-on-kmem-offlining.patch
mm-memcontrol-flush-percpu-vmevents-before-releasing-memcg.patch
mm-memcontrol-switch-to-rcu-protection-in-drain_all_stock.patch
The patch titled
Subject: mm: memcontrol: flush percpu vmstats before releasing memcg
has been added to the -mm tree. Its filename is
mm-memcontrol-flush-percpu-vmstats-before-releasing-memcg.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/mm-memcontrol-flush-percpu-vmstats…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/mm-memcontrol-flush-percpu-vmstats…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Roman Gushchin <guro(a)fb.com>
Subject: mm: memcontrol: flush percpu vmstats before releasing memcg
Percpu caching of local vmstats with the conditional propagation by the
cgroup tree leads to an accumulation of errors on non-leaf levels.
Let's imagine two nested memory cgroups A and A/B. Say, a process
belonging to A/B allocates 100 pagecache pages on the CPU 0. The percpu
cache will spill 3 times, so that 32*3=96 pages will be accounted to A/B
and A atomic vmstat counters, 4 pages will remain in the percpu cache.
Imagine A/B is nearby memory.max, so that every following allocation
triggers a direct reclaim on the local CPU. Say, each such attempt will
free 16 pages on a new cpu. That means every percpu cache will have -16
pages, except the first one, which will have 4 - 16 = -12. A/B and A
atomic counters will not be touched at all.
Now a user removes A/B. All percpu caches are freed and corresponding
vmstat numbers are forgotten. A has 96 pages more than expected.
As memory cgroups are created and destroyed, errors do accumulate. Even
1-2 pages differences can accumulate into large numbers.
To fix this issue let's accumulate and propagate percpu vmstat values
before releasing the memory cgroup. At this point these numbers are
stable and cannot be changed.
Since on cpu hotplug we do flush percpu vmstats anyway, we can iterate
only over online cpus.
Link: http://lkml.kernel.org/r/20190819202338.363363-2-guro@fb.com
Fixes: 42a300353577 ("mm: memcontrol: fix recursive statistics correctness & scalabilty")
Signed-off-by: Roman Gushchin <guro(a)fb.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Vladimir Davydov <vdavydov.dev(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memcontrol.c | 40 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 40 insertions(+)
--- a/mm/memcontrol.c~mm-memcontrol-flush-percpu-vmstats-before-releasing-memcg
+++ a/mm/memcontrol.c
@@ -3260,6 +3260,41 @@ static u64 mem_cgroup_read_u64(struct cg
}
}
+static void memcg_flush_percpu_vmstats(struct mem_cgroup *memcg)
+{
+ unsigned long stat[MEMCG_NR_STAT];
+ struct mem_cgroup *mi;
+ int node, cpu, i;
+
+ for (i = 0; i < MEMCG_NR_STAT; i++)
+ stat[i] = 0;
+
+ for_each_online_cpu(cpu)
+ for (i = 0; i < MEMCG_NR_STAT; i++)
+ stat[i] += raw_cpu_read(memcg->vmstats_percpu->stat[i]);
+
+ for (mi = memcg; mi; mi = parent_mem_cgroup(mi))
+ for (i = 0; i < MEMCG_NR_STAT; i++)
+ atomic_long_add(stat[i], &mi->vmstats[i]);
+
+ for_each_node(node) {
+ struct mem_cgroup_per_node *pn = memcg->nodeinfo[node];
+ struct mem_cgroup_per_node *pi;
+
+ for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
+ stat[i] = 0;
+
+ for_each_online_cpu(cpu)
+ for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
+ stat[i] += raw_cpu_read(
+ pn->lruvec_stat_cpu->count[i]);
+
+ for (pi = pn; pi; pi = parent_nodeinfo(pi, node))
+ for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
+ atomic_long_add(stat[i], &pi->lruvec_stat[i]);
+ }
+}
+
#ifdef CONFIG_MEMCG_KMEM
static int memcg_online_kmem(struct mem_cgroup *memcg)
{
@@ -4682,6 +4717,11 @@ static void __mem_cgroup_free(struct mem
{
int node;
+ /*
+ * Flush percpu vmstats to guarantee the value correctness
+ * on parent's and all ancestor levels.
+ */
+ memcg_flush_percpu_vmstats(memcg);
for_each_node(node)
free_mem_cgroup_per_node_info(memcg, node);
free_percpu(memcg->vmstats_percpu);
_
Patches currently in -mm which might be from guro(a)fb.com are
mm-memcontrol-flush-percpu-vmstats-before-releasing-memcg.patch
mm-memcontrol-flush-percpu-slab-vmstats-on-kmem-offlining.patch
mm-memcontrol-flush-percpu-vmevents-before-releasing-memcg.patch
mm-memcontrol-switch-to-rcu-protection-in-drain_all_stock.patch
The patch titled
Subject: mm: memcontrol: flush percpu vmevents before releasing memcg
has been removed from the -mm tree. Its filename was
mm-memcontrol-flush-percpu-vmevents-before-releasing-memcg.patch
This patch was dropped because an updated version will be merged
------------------------------------------------------
From: Roman Gushchin <guro(a)fb.com>
Subject: mm: memcontrol: flush percpu vmevents before releasing memcg
Similar to vmstats, percpu caching of local vmevents leads to an
accumulation of errors on non-leaf levels. This happens because some
leftovers may remain in percpu caches, so that they are never propagated
up by the cgroup tree and just disappear into nonexistence with on
releasing of the memory cgroup.
To fix this issue let's accumulate and propagate percpu vmevents values
before releasing the memory cgroup similar to what we're doing with
vmstats.
Since on cpu hotplug we do flush percpu vmstats anyway, we can iterate
only over online cpus.
Link: http://lkml.kernel.org/r/20190812233754.2570543-1-guro@fb.com
Fixes: 42a300353577 ("mm: memcontrol: fix recursive statistics correctness & scalabilty")
Signed-off-by: Roman Gushchin <guro(a)fb.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Vladimir Davydov <vdavydov.dev(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memcontrol.c | 22 +++++++++++++++++++++-
1 file changed, 21 insertions(+), 1 deletion(-)
--- a/mm/memcontrol.c~mm-memcontrol-flush-percpu-vmevents-before-releasing-memcg
+++ a/mm/memcontrol.c
@@ -3336,6 +3336,25 @@ static void memcg_flush_percpu_vmstats(s
}
}
+static void memcg_flush_percpu_vmevents(struct mem_cgroup *memcg)
+{
+ unsigned long events[NR_VM_EVENT_ITEMS];
+ struct mem_cgroup *mi;
+ int cpu, i;
+
+ for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
+ events[i] = 0;
+
+ for_each_online_cpu(cpu)
+ for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
+ events[i] += raw_cpu_read(
+ memcg->vmstats_percpu->events[i]);
+
+ for (mi = memcg; mi; mi = parent_mem_cgroup(mi))
+ for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
+ atomic_long_add(events[i], &mi->vmevents[i]);
+}
+
static void memcg_offline_kmem(struct mem_cgroup *memcg)
{
struct cgroup_subsys_state *css;
@@ -4737,10 +4756,11 @@ static void __mem_cgroup_free(struct mem
int node;
/*
- * Flush percpu vmstats to guarantee the value correctness
+ * Flush percpu vmstats and vmevents to guarantee the value correctness
* on parent's and all ancestor levels.
*/
memcg_flush_percpu_vmstats(memcg, false);
+ memcg_flush_percpu_vmevents(memcg);
for_each_node(node)
free_mem_cgroup_per_node_info(memcg, node);
free_percpu(memcg->vmstats_percpu);
_
Patches currently in -mm which might be from guro(a)fb.com are
mm-memcontrol-switch-to-rcu-protection-in-drain_all_stock.patch