Commit 07115449919383548d094ff83cc27bd08639a8a1 is on Linux-Next and
this fixes an issue on 4.19.y which fixes an issue where changing for
image size fails.
Can you please apply this patch to 4.19.y?
Thank you
adam
Currently, xattrs values must match the xmatch DFA to match a profile.
This lets users construct profiles to match a file with a specific key
and value with a basic regex.
profile test xattrs(security.apparmor=/usr/bin/*) {}
The xmatch DFA doesn't handle null characters in the xattrs value, since
this is the special character used to indicate that the DFA is transitioning
from matching the profile path to matching the xattr value.
However, both IMA and EVM xattr values hold signatures which potentially
have a null character. It's currently impossible to write a profile that
requires a the presence of an EVM signature without checking the value.
profile test xattrs(security.evm security.apparmor=/usr/bin/*) {}
Add an additional "xattr_keys" array to the profile that only checks the
presence of extended attributes, and not their values.
A modified apparmor_parser that was used to test these changes can be found at:
https://gitlab.com/ericchiang/apparmor/commits/parser-xattrs-keys
To test, build the parser and run the following command:
$ echo '/usr/bin/* xattrs=(user.foo user.bar=bar) {}' | \
./apparmor_parser -r
$ setfattr -n "user.foo" -v "foo" /usr/bin/whoami
$ setfattr -n "user.bar" -v "bar" /usr/bin/whoami
$ whoami # command fails
Signed-off-by: Eric Chiang <ericchiang(a)google.com>
CC: stable(a)vger.kernel.org
---
security/apparmor/domain.c | 22 +++++++++++++++++++---
security/apparmor/include/policy.h | 6 ++++++
security/apparmor/policy.c | 3 +++
security/apparmor/policy_unpack.c | 18 ++++++++++++++++++
4 files changed, 46 insertions(+), 3 deletions(-)
diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c
index 08c88de0ffda..53c46e3e01f0 100644
--- a/security/apparmor/domain.c
+++ b/security/apparmor/domain.c
@@ -319,14 +319,30 @@ static int aa_xattrs_match(const struct linux_binprm *bprm,
char *value = NULL;
int value_size = 0, ret = profile->xattr_count;
- if (!bprm || !profile->xattr_count)
+ if (!bprm)
return 0;
+ d = bprm->file->f_path.dentry;
+
+ if (profile->xattr_keys_count) {
+ /* validate that these attributes are present, ignore values */
+ for (i = 0; i < profile->xattr_keys_count; i++) {
+ size = vfs_getxattr_alloc(d, profile->xattr_keys[i],
+ &value, value_size,
+ GFP_KERNEL);
+ if (size < 0) {
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+ }
+
+ if (!profile->xattr_count)
+ goto out;
+
/* transition from exec match to xattr set */
state = aa_dfa_null_transition(profile->xmatch, state);
- d = bprm->file->f_path.dentry;
-
for (i = 0; i < profile->xattr_count; i++) {
size = vfs_getxattr_alloc(d, profile->xattrs[i], &value,
value_size, GFP_KERNEL);
diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h
index 8e6707c837be..8ed1d30de7ce 100644
--- a/security/apparmor/include/policy.h
+++ b/security/apparmor/include/policy.h
@@ -112,6 +112,10 @@ struct aa_data {
* @policy: general match rules governing policy
* @file: The set of rules governing basic file access and domain transitions
* @caps: capabilities for the profile
+ * @xattr_count: number of xattrs values
+ * @xattrs: extended attributes whose values must match the xmatch
+ * @xattr_keys_count: number of xattr keys values
+ * @xattr_keys: extended attributes that must be present to match the profile
* @rlimits: rlimits for the profile
*
* @dents: dentries for the profiles file entries in apparmorfs
@@ -152,6 +156,8 @@ struct aa_profile {
int xattr_count;
char **xattrs;
+ int xattr_keys_count;
+ char **xattr_keys;
struct aa_rlimit rlimits;
diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
index df9c5890a878..e0f9cf8b8318 100644
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -231,6 +231,9 @@ void aa_free_profile(struct aa_profile *profile)
for (i = 0; i < profile->xattr_count; i++)
kzfree(profile->xattrs[i]);
kzfree(profile->xattrs);
+ for (i = 0; i < profile->xattr_keys_count; i++)
+ kzfree(profile->xattr_keys[i]);
+ kzfree(profile->xattr_keys);
for (i = 0; i < profile->secmark_count; i++)
kzfree(profile->secmark[i].label);
kzfree(profile->secmark);
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 379682e2a8d5..d1fd75093260 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -535,6 +535,24 @@ static bool unpack_xattrs(struct aa_ext *e, struct aa_profile *profile)
goto fail;
}
+ if (unpack_nameX(e, AA_STRUCT, "xattr_keys")) {
+ int i, size;
+
+ size = unpack_array(e, NULL);
+ profile->xattr_keys_count = size;
+ profile->xattr_keys = kcalloc(size, sizeof(char *), GFP_KERNEL);
+ if (!profile->xattr_keys)
+ goto fail;
+ for (i = 0; i < size; i++) {
+ if (!unpack_strdup(e, &profile->xattr_keys[i], NULL))
+ goto fail;
+ }
+ if (!unpack_nameX(e, AA_ARRAYEND, NULL))
+ goto fail;
+ if (!unpack_nameX(e, AA_STRUCTEND, NULL))
+ goto fail;
+ }
+
return 1;
fail:
--
2.20.0.rc2.403.gdbc3b29805-goog
The patch titled
Subject: hwpoison, memory_hotplug: allow hwpoisoned pages to be offlined
has been added to the -mm tree. Its filename is
hwpoison-memory_hotplug-allow-hwpoisoned-pages-to-be-offlined.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/hwpoison-memory_hotplug-allow-hwpo…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/hwpoison-memory_hotplug-allow-hwpo…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Michal Hocko <mhocko(a)suse.com>
Subject: hwpoison, memory_hotplug: allow hwpoisoned pages to be offlined
We have received a bug report that an injected MCE about faulty memory
prevents memory offline to succeed on 4.4 base kernel. The underlying
reason was that the HWPoison page has an elevated reference count and the
migration keeps failing. There are two problems with that. First of all
it is dubious to migrate the poisoned page because we know that accessing
that memory is possible to fail. Secondly it doesn't make any sense to
migrate a potentially broken content and preserve the memory corruption
over to a new location.
Oscar has found out that 4.4 and the current upstream kernels behave
slightly differently with his simply testcase
===
int main(void)
{
int ret;
int i;
int fd;
char *array = malloc(4096);
char *array_locked = malloc(4096);
fd = open("/tmp/data", O_RDONLY);
read(fd, array, 4095);
for (i = 0; i < 4096; i++)
array_locked[i] = 'd';
ret = mlock((void *)PAGE_ALIGN((unsigned long)array_locked), sizeof(array_locked));
if (ret)
perror("mlock");
sleep (20);
ret = madvise((void *)PAGE_ALIGN((unsigned long)array_locked), 4096, MADV_HWPOISON);
if (ret)
perror("madvise");
for (i = 0; i < 4096; i++)
array_locked[i] = 'd';
return 0;
}
===
+ offline this memory.
In 4.4 kernels he saw the hwpoisoned page to be returned back to the LRU
list
kernel: [<ffffffff81019ac9>] dump_trace+0x59/0x340
kernel: [<ffffffff81019e9a>] show_stack_log_lvl+0xea/0x170
kernel: [<ffffffff8101ac71>] show_stack+0x21/0x40
kernel: [<ffffffff8132bb90>] dump_stack+0x5c/0x7c
kernel: [<ffffffff810815a1>] warn_slowpath_common+0x81/0xb0
kernel: [<ffffffff811a275c>] __pagevec_lru_add_fn+0x14c/0x160
kernel: [<ffffffff811a2eed>] pagevec_lru_move_fn+0xad/0x100
kernel: [<ffffffff811a334c>] __lru_cache_add+0x6c/0xb0
kernel: [<ffffffff81195236>] add_to_page_cache_lru+0x46/0x70
kernel: [<ffffffffa02b4373>] extent_readpages+0xc3/0x1a0 [btrfs]
kernel: [<ffffffff811a16d7>] __do_page_cache_readahead+0x177/0x200
kernel: [<ffffffff811a18c8>] ondemand_readahead+0x168/0x2a0
kernel: [<ffffffff8119673f>] generic_file_read_iter+0x41f/0x660
kernel: [<ffffffff8120e50d>] __vfs_read+0xcd/0x140
kernel: [<ffffffff8120e9ea>] vfs_read+0x7a/0x120
kernel: [<ffffffff8121404b>] kernel_read+0x3b/0x50
kernel: [<ffffffff81215c80>] do_execveat_common.isra.29+0x490/0x6f0
kernel: [<ffffffff81215f08>] do_execve+0x28/0x30
kernel: [<ffffffff81095ddb>] call_usermodehelper_exec_async+0xfb/0x130
kernel: [<ffffffff8161c045>] ret_from_fork+0x55/0x80
And that latter confuses the hotremove path because an LRU page is
attempted to be migrated and that fails due to an elevated reference
count. It is quite possible that the reuse of the HWPoisoned page is some
kind of fixed race condition but I am not really sure about that.
With the upstream kernel the failure is slightly different. The page
doesn't seem to have LRU bit set but isolate_movable_page simply fails and
do_migrate_range simply puts all the isolated pages back to LRU and
therefore no progress is made and scan_movable_pages finds same set of
pages over and over again.
Fix both cases by explicitly checking HWPoisoned pages before we even try
to get reference on the page, try to unmap it if it is still mapped. As
explained by Naoya:
: Hwpoison code never unmapped those for no big reason because
: Ksm pages never dominate memory, so we simply didn't have strong
: motivation to save the pages.
Also put WARN_ON(PageLRU) in case there is a race and we can hit LRU
HWPoison pages which shouldn't happen but I couldn't convince myself about
that. Naoya has noted the following:
: Theoretically no such gurantee, because try_to_unmap() doesn't have a
: guarantee of success and then memory_failure() returns immediately
: when hwpoison_user_mappings fails.
: Or the following code (comes after hwpoison_user_mappings block) also impli=
: es
: that the target page can still have PageLRU flag.
:
: /*
: * Torn down by someone else?
: */
: if (PageLRU(p) && !PageSwapCache(p) && p->mapping =3D=3D NULL) {
: action_result(pfn, MF_MSG_TRUNCATED_LRU, MF_IGNORED);
: res =3D -EBUSY;
: goto out;
: }
:
: So I think it's OK to keep "if (WARN_ON(PageLRU(page)))" block in
: current version of your patch.
Link: http://lkml.kernel.org/r/20181206120135.14079-1-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko(a)suse.com>
Reviewed-by: Oscar Salvador <osalvador(a)suse.com>
Debugged-by: Oscar Salvador <osalvador(a)suse.com>
Tested-by: Oscar Salvador <osalvador(a)suse.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Acked-by: Naoya Horiguchi <n-horiguchi(a)ah.jp.nec.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memory_hotplug.c | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
--- a/mm/memory_hotplug.c~hwpoison-memory_hotplug-allow-hwpoisoned-pages-to-be-offlined
+++ a/mm/memory_hotplug.c
@@ -34,6 +34,7 @@
#include <linux/hugetlb.h>
#include <linux/memblock.h>
#include <linux/compaction.h>
+#include <linux/rmap.h>
#include <asm/tlbflush.h>
@@ -1343,6 +1344,21 @@ do_migrate_range(unsigned long start_pfn
pfn = page_to_pfn(compound_head(page))
+ hpage_nr_pages(page) - 1;
+ /*
+ * HWPoison pages have elevated reference counts so the migration would
+ * fail on them. It also doesn't make any sense to migrate them in the
+ * first place. Still try to unmap such a page in case it is still mapped
+ * (e.g. current hwpoison implementation doesn't unmap KSM pages but keep
+ * the unmap as the catch all safety net).
+ */
+ if (PageHWPoison(page)) {
+ if (WARN_ON(PageLRU(page)))
+ isolate_lru_page(page);
+ if (page_mapped(page))
+ try_to_unmap(page, TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS);
+ continue;
+ }
+
if (!get_page_unless_zero(page))
continue;
/*
_
Patches currently in -mm which might be from mhocko(a)suse.com are
mm-print-more-information-about-mapping-in-__dump_page.patch
mm-lower-the-printk-loglevel-for-__dump_page-messages.patch
mm-memory_hotplug-drop-pointless-block-alignment-checks-from-__offline_pages.patch
mm-memory_hotplug-print-reason-for-the-offlining-failure.patch
mm-memory_hotplug-be-more-verbose-for-memory-offline-failures.patch
mm-memory_hotplug-be-more-verbose-for-memory-offline-failures-update.patch
mm-memory_hotplug-do-not-clear-numa_node-association-after-hot_remove.patch
hwpoison-memory_hotplug-allow-hwpoisoned-pages-to-be-offlined.patch
From: Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp>
commit 400e22499dd92613821374c8c6c88c7225359980 upstream.
Commit 63f53dea0c98 ("mm: warn about allocations which stall for too
long") was a great step for reducing possibility of silent hang up
problem caused by memory allocation stalls. But this commit reverts it,
for it is possible to trigger OOM lockup and/or soft lockups when many
threads concurrently called warn_alloc() (in order to warn about memory
allocation stalls) due to current implementation of printk(), and it is
difficult to obtain useful information due to limitation of synchronous
warning approach.
Current printk() implementation flushes all pending logs using the
context of a thread which called console_unlock(). printk() should be
able to flush all pending logs eventually unless somebody continues
appending to printk() buffer.
Since warn_alloc() started appending to printk() buffer while waiting
for oom_kill_process() to make forward progress when oom_kill_process()
is processing pending logs, it became possible for warn_alloc() to force
oom_kill_process() loop inside printk(). As a result, warn_alloc()
significantly increased possibility of preventing oom_kill_process()
from making forward progress.
---------- Pseudo code start ----------
Before warn_alloc() was introduced:
retry:
if (mutex_trylock(&oom_lock)) {
while (atomic_read(&printk_pending_logs) > 0) {
atomic_dec(&printk_pending_logs);
print_one_log();
}
// Send SIGKILL here.
mutex_unlock(&oom_lock)
}
goto retry;
After warn_alloc() was introduced:
retry:
if (mutex_trylock(&oom_lock)) {
while (atomic_read(&printk_pending_logs) > 0) {
atomic_dec(&printk_pending_logs);
print_one_log();
}
// Send SIGKILL here.
mutex_unlock(&oom_lock)
} else if (waited_for_10seconds()) {
atomic_inc(&printk_pending_logs);
}
goto retry;
---------- Pseudo code end ----------
Although waited_for_10seconds() becomes true once per 10 seconds,
unbounded number of threads can call waited_for_10seconds() at the same
time. Also, since threads doing waited_for_10seconds() keep doing
almost busy loop, the thread doing print_one_log() can use little CPU
resource. Therefore, this situation can be simplified like
---------- Pseudo code start ----------
retry:
if (mutex_trylock(&oom_lock)) {
while (atomic_read(&printk_pending_logs) > 0) {
atomic_dec(&printk_pending_logs);
print_one_log();
}
// Send SIGKILL here.
mutex_unlock(&oom_lock)
} else {
atomic_inc(&printk_pending_logs);
}
goto retry;
---------- Pseudo code end ----------
when printk() is called faster than print_one_log() can process a log.
One of possible mitigation would be to introduce a new lock in order to
make sure that no other series of printk() (either oom_kill_process() or
warn_alloc()) can append to printk() buffer when one series of printk()
(either oom_kill_process() or warn_alloc()) is already in progress.
Such serialization will also help obtaining kernel messages in readable
form.
---------- Pseudo code start ----------
retry:
if (mutex_trylock(&oom_lock)) {
mutex_lock(&oom_printk_lock);
while (atomic_read(&printk_pending_logs) > 0) {
atomic_dec(&printk_pending_logs);
print_one_log();
}
// Send SIGKILL here.
mutex_unlock(&oom_printk_lock);
mutex_unlock(&oom_lock)
} else {
if (mutex_trylock(&oom_printk_lock)) {
atomic_inc(&printk_pending_logs);
mutex_unlock(&oom_printk_lock);
}
}
goto retry;
---------- Pseudo code end ----------
But this commit does not go that direction, for we don't want to
introduce a new lock dependency, and we unlikely be able to obtain
useful information even if we serialized oom_kill_process() and
warn_alloc().
Synchronous approach is prone to unexpected results (e.g. too late [1],
too frequent [2], overlooked [3]). As far as I know, warn_alloc() never
helped with providing information other than "something is going wrong".
I want to consider asynchronous approach which can obtain information
during stalls with possibly relevant threads (e.g. the owner of
oom_lock and kswapd-like threads) and serve as a trigger for actions
(e.g. turn on/off tracepoints, ask libvirt daemon to take a memory dump
of stalling KVM guest for diagnostic purpose).
This commit temporarily loses ability to report e.g. OOM lockup due to
unable to invoke the OOM killer due to !__GFP_FS allocation request.
But asynchronous approach will be able to detect such situation and emit
warning. Thus, let's remove warn_alloc().
[1] https://bugzilla.kernel.org/show_bug.cgi?id=192981
[2] http://lkml.kernel.org/r/CAM_iQpWuPVGc2ky8M-9yukECtS+zKjiDasNymX7rMcBjBFyM_…
[3] commit db73ee0d46379922 ("mm, vmscan: do not loop on too_many_isolated for ever"))
Link: http://lkml.kernel.org/r/1509017339-4802-1-git-send-email-penguin-kernel@I-…
Signed-off-by: Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp>
Reported-by: Cong Wang <xiyou.wangcong(a)gmail.com>
Reported-by: yuwang.yuwang <yuwang.yuwang(a)alibaba-inc.com>
Reported-by: Johannes Weiner <hannes(a)cmpxchg.org>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Acked-by: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: Mel Gorman <mgorman(a)suse.de>
Cc: Dave Hansen <dave.hansen(a)intel.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky(a)gmail.com>
Cc: Petr Mladek <pmladek(a)suse.com>
Cc: Steven Rostedt <rostedt(a)goodmis.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
[Resolved backport conflict due to missing 8225196, a8e9925, 9e80c71 and
9a67f64 in 4.9 -- all of which modified this hunk being removed.]
Signed-off-by: Amit Shah <amit(a)kernel.org>
---
mm/page_alloc.c | 10 ----------
1 file changed, 10 deletions(-)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 28240ce475d6..3af727d95c17 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3530,8 +3530,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
enum compact_result compact_result;
int compaction_retries;
int no_progress_loops;
- unsigned long alloc_start = jiffies;
- unsigned int stall_timeout = 10 * HZ;
unsigned int cpuset_mems_cookie;
/*
@@ -3704,14 +3702,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_REPEAT))
goto nopage;
- /* Make sure we know about allocations which stall for too long */
- if (time_after(jiffies, alloc_start + stall_timeout)) {
- warn_alloc(gfp_mask,
- "page allocation stalls for %ums, order:%u",
- jiffies_to_msecs(jiffies-alloc_start), order);
- stall_timeout += 10 * HZ;
- }
-
if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,
did_some_progress > 0, &no_progress_loops))
goto retry;
--
2.15.3.AMZN
On Wed, 2018-08-22 at 09:19 +0200, gregkh(a)linuxfoundation.org wrote:
> This is a note to let you know that I've just added the patch titled
>
> x86/entry/64: Remove %ebx handling from error_entry/exit
>
> to the 4.9-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> x86-entry-64-remove-ebx-handling-from-error_entry-exit.patch
> and it can be found in the queue-4.9 subdirectory.
Can we have it for 4.4 too, please?
> [ Note to stable maintainers: this should probably get applied to all
> kernels. If you're nervous about that, a more conservative fix to
> add xorl %ebx,%ebx; incl %ebx before the jump to error_exit should
> also fix the problem. ]
Can we assume it's always from kernel? The Xen code definitely seems to
handle invoking this from both kernel and userspace contexts.
Shouldn't %ebx get set to !(regs->rsp & 3) ?
Either way, let's just do it in the stable tree exactly the same way
it's done upstream.
> - * On entry, EBX is a "return to kernel mode" flag:
Re-introduce the typo 'EBS' here, to make the patch apply cleanly to
4.4. It's only removing that line anyway.
Or just cherry-pick upstream commit 75ca5b22260ef7 first.