From: Daniel Colascione dancol@google.com
commit 1ae80cf31938c8f77c37a29bbe29e7f1cd492be8 upstream.
The map-in-map frequently serves as a mechanism for atomic snapshotting of state that a BPF program might record. The current implementation is dangerous to use in this way, however, since userspace has no way of knowing when all programs that might have retrieved the "old" value of the map may have completed.
This change ensures that map update operations on map-in-map map types always wait for all references to the old map to drop before returning to userspace.
Signed-off-by: Daniel Colascione dancol@google.com Reviewed-by: Joel Fernandes (Google) joel@joelfernandes.org Signed-off-by: Alexei Starovoitov ast@kernel.org
[fengc@google.com: 4.19 clean cherry-picks, no modification] Signed-off-by: Chenbo Feng fengc@google.com --- kernel/bpf/syscall.c | 13 +++++++++++++ 1 file changed, 13 insertions(+)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 8339d81cba1d..675eb6d36e47 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -741,6 +741,17 @@ static int map_lookup_elem(union bpf_attr *attr) return err; }
+static void maybe_wait_bpf_programs(struct bpf_map *map) +{ + /* Wait for any running BPF programs to complete so that + * userspace, when we return to it, knows that all programs + * that could be running use the new map value. + */ + if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS || + map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) + synchronize_rcu(); +} + #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
static int map_update_elem(union bpf_attr *attr) @@ -831,6 +842,7 @@ static int map_update_elem(union bpf_attr *attr) } __this_cpu_dec(bpf_prog_active); preempt_enable(); + maybe_wait_bpf_programs(map); out: free_value: kfree(value); @@ -883,6 +895,7 @@ static int map_delete_elem(union bpf_attr *attr) rcu_read_unlock(); __this_cpu_dec(bpf_prog_active); preempt_enable(); + maybe_wait_bpf_programs(map); out: kfree(key); err_put:
From: Theodore Ts'o tytso@mit.edu
commit 8bc1379b82b8e809eef77a9fedbb75c6c297be19 upstream.
Use a separate journal transaction if it turns out that we need to convert an inline file to use an data block. Otherwise we could end up failing due to not having journal credits.
This addresses CVE-2018-10883.
https://bugzilla.kernel.org/show_bug.cgi?id=200071
Change-Id: Ifbe92e379f7a25fb252a2584356ccb91f902ea8f Signed-off-by: Theodore Ts'o tytso@mit.edu Cc: stable@kernel.org [fengc@google.com: 4.4 backport: adjust context] Signed-off-by: Chenbo Feng fengc@google.com --- fs/ext4/ext4.h | 3 --- fs/ext4/inline.c | 38 +------------------------------------- fs/ext4/xattr.c | 18 ++---------------- 3 files changed, 3 insertions(+), 56 deletions(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index f5d9f82b173a..b6e25d771eea 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -3039,9 +3039,6 @@ extern struct buffer_head *ext4_get_first_inline_block(struct inode *inode, extern int ext4_inline_data_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, int *has_inline, __u64 start, __u64 len); -extern int ext4_try_to_evict_inline_data(handle_t *handle, - struct inode *inode, - int needed); extern void ext4_inline_data_truncate(struct inode *inode, int *has_inline);
extern int ext4_convert_inline_data(struct inode *inode); diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 1e7a9774119c..5ead3b0f3d34 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -888,11 +888,11 @@ retry_journal: flags |= AOP_FLAG_NOFS;
if (ret == -ENOSPC) { + ext4_journal_stop(handle); ret = ext4_da_convert_inline_data_to_extent(mapping, inode, flags, fsdata); - ext4_journal_stop(handle); if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry_journal; @@ -1867,42 +1867,6 @@ out: return (error < 0 ? error : 0); }
-/* - * Called during xattr set, and if we can sparse space 'needed', - * just create the extent tree evict the data to the outer block. - * - * We use jbd2 instead of page cache to move data to the 1st block - * so that the whole transaction can be committed as a whole and - * the data isn't lost because of the delayed page cache write. - */ -int ext4_try_to_evict_inline_data(handle_t *handle, - struct inode *inode, - int needed) -{ - int error; - struct ext4_xattr_entry *entry; - struct ext4_inode *raw_inode; - struct ext4_iloc iloc; - - error = ext4_get_inode_loc(inode, &iloc); - if (error) - return error; - - raw_inode = ext4_raw_inode(&iloc); - entry = (struct ext4_xattr_entry *)((void *)raw_inode + - EXT4_I(inode)->i_inline_off); - if (EXT4_XATTR_LEN(entry->e_name_len) + - EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size)) < needed) { - error = -ENOSPC; - goto out; - } - - error = ext4_convert_inline_data_nolock(handle, inode, &iloc); -out: - brelse(iloc.bh); - return error; -} - void ext4_inline_data_truncate(struct inode *inode, int *has_inline) { handle_t *handle; diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index d6bae37489af..16cf24dc9c34 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1044,22 +1044,8 @@ int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode, if (EXT4_I(inode)->i_extra_isize == 0) return -ENOSPC; error = ext4_xattr_set_entry(i, s, inode); - if (error) { - if (error == -ENOSPC && - ext4_has_inline_data(inode)) { - error = ext4_try_to_evict_inline_data(handle, inode, - EXT4_XATTR_LEN(strlen(i->name) + - EXT4_XATTR_SIZE(i->value_len))); - if (error) - return error; - error = ext4_xattr_ibody_find(inode, i, is); - if (error) - return error; - error = ext4_xattr_set_entry(i, s, inode); - } - if (error) - return error; - } + if (error) + return error; header = IHDR(inode, ext4_raw_inode(&is->iloc)); if (!IS_LAST_ENTRY(s->first)) { header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
From: Daniel Colascione dancol@google.com
commit 1ae80cf31938c8f77c37a29bbe29e7f1cd492be8 upstream.
The map-in-map frequently serves as a mechanism for atomic snapshotting of state that a BPF program might record. The current implementation is dangerous to use in this way, however, since userspace has no way of knowing when all programs that might have retrieved the "old" value of the map may have completed.
This change ensures that map update operations on map-in-map map types always wait for all references to the old map to drop before returning to userspace.
Signed-off-by: Daniel Colascione dancol@google.com Reviewed-by: Joel Fernandes (Google) joel@joelfernandes.org Signed-off-by: Alexei Starovoitov ast@kernel.org
[fengc@google.com: 4.14 backport: adjust context] Signed-off-by: Chenbo Feng fengc@google.com --- kernel/bpf/syscall.c | 13 +++++++++++++ 1 file changed, 13 insertions(+)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index ea22d0b6a9f0..5c9deed4524e 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -519,6 +519,17 @@ static int map_lookup_elem(union bpf_attr *attr) return err; }
+static void maybe_wait_bpf_programs(struct bpf_map *map) +{ + /* Wait for any running BPF programs to complete so that + * userspace, when we return to it, knows that all programs + * that could be running use the new map value. + */ + if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS || + map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) + synchronize_rcu(); +} + #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
static int map_update_elem(union bpf_attr *attr) @@ -592,6 +603,7 @@ static int map_update_elem(union bpf_attr *attr) } __this_cpu_dec(bpf_prog_active); preempt_enable(); + maybe_wait_bpf_programs(map);
if (!err) trace_bpf_map_update_elem(map, ufd, key, value); @@ -636,6 +648,7 @@ static int map_delete_elem(union bpf_attr *attr) rcu_read_unlock(); __this_cpu_dec(bpf_prog_active); preempt_enable(); + maybe_wait_bpf_programs(map);
if (!err) trace_bpf_map_delete_elem(map, ufd, key);
On Sat, Nov 10, 2018 at 03:52:13PM -0800, Chenbo Feng wrote:
From: Daniel Colascione dancol@google.com
commit 1ae80cf31938c8f77c37a29bbe29e7f1cd492be8 upstream.
The map-in-map frequently serves as a mechanism for atomic snapshotting of state that a BPF program might record. The current implementation is dangerous to use in this way, however, since userspace has no way of knowing when all programs that might have retrieved the "old" value of the map may have completed.
This change ensures that map update operations on map-in-map map types always wait for all references to the old map to drop before returning to userspace.
Signed-off-by: Daniel Colascione dancol@google.com Reviewed-by: Joel Fernandes (Google) joel@joelfernandes.org Signed-off-by: Alexei Starovoitov ast@kernel.org
[fengc@google.com: 4.19 clean cherry-picks, no modification] Signed-off-by: Chenbo Feng fengc@google.com
Greg has queued all 3 patches in this series to their respective trees, but I didn't see a mail from him. Thank you for the backports.
-- Thanks, Sasha
linux-stable-mirror@lists.linaro.org