A patchset from linux-5.15 should be backported to 4.19 that can
significantly improve ext4 fs read and write performance. Unixbench test
results for linux-4.19.318 on Phytium D2000 CPU are shown below.
Test cmd: (Phytium D2000 only has 8 cores)
./Run fs -c 8
Before this patch set:
File Copy 1024 bufsize 2000 maxblocks 1124181
File Copy 256 bufsize 500 maxblocks 281885
File Copy 4096 bufsize 8000 maxblocks 3383785
File Read 1024 bufsize 2000 maxblocks 8702173
File Read 256 bufsize 500 maxblocks 3869384
File Read 4096 bufsize 8000 maxblocks 13043151
File Write 1024 bufsize 2000 maxblocks 1107185
File Write 256 bufsize 500 maxblocks 270493
File Write 4096 bufsize 8000 maxblocks 4018084
After this patch set:
File Copy 1024 bufsize 2000 maxblocks 2026206
File Copy 256 bufsize 500 maxblocks 829534
File Copy 4096 bufsize 8000 maxblocks 4066659
File Read 1024 bufsize 2000 maxblocks 8877219
File Read 256 bufsize 500 maxblocks 3997445
File Read 4096 bufsize 8000 maxblocks 13179885
File Write 1024 bufsize 2000 maxblocks 4256929
File Write 256 bufsize 500 maxblocks 1305320
File Write 4096 bufsize 8000 maxblocks 10721052
We can observe a quantum leap in the test results as a consequence of
applying this patchset
Link: https://lore.kernel.org/all/20210716122024.1105856-1-yi.zhang@huawei.com/
Original description:
This patchset address to improve buffer write performance with delalloc.
The first patch reduce the unnecessary update i_disksize, the second two
patch refactor the inline data write procedure and also do some small
fix, the last patch do improve by remove all unnecessary journal handle
in the delalloc write procedure.
After this patch set, we could get a lot of performance improvement.
Below is the Unixbench comparison data test on my machine with 'Intel
Xeon Gold 5120' CPU and nvme SSD backend.
Test cmd:
./Run -c 56 -i 3 fstime fsbuffer fsdisk
Before this patch set:
System Benchmarks Partial Index BASELINE RESULT INDEX
File Copy 1024 bufsize 2000 maxblocks 3960.0 422965.0 1068.1
File Copy 256 bufsize 500 maxblocks 1655.0 105077.0 634.9
File Copy 4096 bufsize 8000 maxblocks 5800.0 1429092.0 2464.0
========
System Benchmarks Index Score (Partial Only) 1186.6
After this patch set:
System Benchmarks Partial Index BASELINE RESULT INDEX
File Copy 1024 bufsize 2000 maxblocks 3960.0 732716.0 1850.3
File Copy 256 bufsize 500 maxblocks 1655.0 184940.0 1117.5
File Copy 4096 bufsize 8000 maxblocks 5800.0 2427152.0 4184.7
========
System Benchmarks Index Score (Partial Only) 2053.0
Zhang Yi (4):
ext4: check and update i_disksize properly
ext4: correct the error path of ext4_write_inline_data_end()
ext4: factor out write end code of inline file
ext4: drop unnecessary journal handle in delalloc write
fs/ext4/ext4.h | 3 -
fs/ext4/inline.c | 120 ++++++++++++++++++-------------------
fs/ext4/inode.c | 150 ++++++++++++-----------------------------------
3 files changed, 99 insertions(+), 174 deletions(-)
--
2.31.1
Hello,
I sent you a message a few hours ago but no reply yet, or you didn't receive it? Kindly read my letter and reply back. I want to make an inquiry
Thanks.
Dr.Allen Cheng
Human Resource Manager | Product Research Assistant
FC Industrial Laboratories Ltd
Originally, the check_unaligned_access_emulated_all_cpus function
only checked the boot hart. This fixes the function to check all
harts.
Fixes: 71c54b3d169d ("riscv: report misaligned accesses emulation to hwprobe")
Signed-off-by: Jesse Taube <jesse(a)rivosinc.com>
Reviewed-by: Charlie Jenkins <charlie(a)rivosinc.com>
Cc: stable(a)vger.kernel.org
---
V1 -> V2:
- New patch
V2 -> V3:
- Split patch
V3 -> V4:
- Re-add check for a system where a heterogeneous
CPU is hotplugged into a previously homogenous
system.
V4 -> V5:
- Change work_struct *unused to work_struct *work __always_unused
---
arch/riscv/kernel/traps_misaligned.c | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c
index b62d5a2f4541..9a1e94383d6d 100644
--- a/arch/riscv/kernel/traps_misaligned.c
+++ b/arch/riscv/kernel/traps_misaligned.c
@@ -526,11 +526,11 @@ int handle_misaligned_store(struct pt_regs *regs)
return 0;
}
-static bool check_unaligned_access_emulated(int cpu)
+static void check_unaligned_access_emulated(struct work_struct *work __always_unused)
{
+ int cpu = smp_processor_id();
long *mas_ptr = per_cpu_ptr(&misaligned_access_speed, cpu);
unsigned long tmp_var, tmp_val;
- bool misaligned_emu_detected;
*mas_ptr = RISCV_HWPROBE_MISALIGNED_UNKNOWN;
@@ -538,19 +538,16 @@ static bool check_unaligned_access_emulated(int cpu)
" "REG_L" %[tmp], 1(%[ptr])\n"
: [tmp] "=r" (tmp_val) : [ptr] "r" (&tmp_var) : "memory");
- misaligned_emu_detected = (*mas_ptr == RISCV_HWPROBE_MISALIGNED_EMULATED);
/*
* If unaligned_ctl is already set, this means that we detected that all
* CPUS uses emulated misaligned access at boot time. If that changed
* when hotplugging the new cpu, this is something we don't handle.
*/
- if (unlikely(unaligned_ctl && !misaligned_emu_detected)) {
+ if (unlikely(unaligned_ctl && (*mas_ptr != RISCV_HWPROBE_MISALIGNED_EMULATED))) {
pr_crit("CPU misaligned accesses non homogeneous (expected all emulated)\n");
while (true)
cpu_relax();
}
-
- return misaligned_emu_detected;
}
bool check_unaligned_access_emulated_all_cpus(void)
@@ -562,8 +559,11 @@ bool check_unaligned_access_emulated_all_cpus(void)
* accesses emulated since tasks requesting such control can run on any
* CPU.
*/
+ schedule_on_each_cpu(check_unaligned_access_emulated);
+
for_each_online_cpu(cpu)
- if (!check_unaligned_access_emulated(cpu))
+ if (per_cpu(misaligned_access_speed, cpu)
+ != RISCV_HWPROBE_MISALIGNED_EMULATED)
return false;
unaligned_ctl = true;
--
2.45.2
Before this change, network restrictions were enforced according to the
calling thread's Landlock domain, leading to potential inconsistent
results when the same socket was used by different threads or processes
(with different domains). This change fixes such access control
inconsistency by enforcing the socket's Landlock domain instead of the
caller's Landlock domain.
Socket's Landlock domain is inherited from the thread that created this
socket. This means that a socket created without sandboxing will be
free to connect and bind without limitation. This also means that a
socket created by a sandboxed thread will inherit the thread's policy,
which will be enforced on this socket even when used by another thread
or passed to another process.
The initial rationale [1] was that a socket does not directly grants
access to data, but it is an object used to define an access (e.g.
connection to a peer). Contrary to my initial assumption, we can
identify to which protocol/port a newly created socket can give access
to with the socket's file->f_cred inherited from its creator. Moreover,
from a kernel point of view, especially for shared objects, we need a
more consistent access model. This means that the same action on the
same socket performed by different threads will have the same effect.
This follows the same approach as for file descriptors tied to the file
system (e.g. LANDLOCK_ACCESS_FS_TRUNCATE).
One potential risk of this change is for unsandboxed processes to send
socket file descriptors to sandboxed processes, which could give
unrestricted network access to the sandboxed process (by reconfigure the
socket). While it makes sense for processes to transfer (AF_UNIX)
socketpairs, which is OK because they can only exchange data between
themselves, it should be rare for processes to legitimately pass other
kind of sockets (e.g. AF_INET).
Another potential risk of this approach is socket file descriptor leaks.
This is the same risk as with regular file descriptor leaks giving
access to the content of a file, which is well known and documented.
This could be mitigated with a future complementary restriction on
received or inherited file descriptors.
One interesting side effect of this new approach is that a process can
create a socket that will only allow to connect to a set of ports. This
can be done by creating a thread, sandboxing it, creating a socket, and
using the related file descriptor (in the same process). Passing this
restricted socket to a more sandboxed process makes it possible to have
a more dynamic security policy.
This new approach aligns with SELinux and Smack instead of AppArmor and
Tomoyo. It is also in line with capability-based security mechanisms
such as Capsicum.
This slight semantic change is important for current and future
Landlock's consistency, and it must be backported.
Current tests are still OK because this behavior wasn't covered. A
following commit adds new tests.
Cc: Günther Noack <gnoack(a)google.com>
Cc: Ivanov Mikhail <ivanov.mikhail1(a)huawei-partners.com>
Cc: Konstantin Meskhidze <konstantin.meskhidze(a)huawei.com>
Cc: Paul Moore <paul(a)paul-moore.com>
Cc: Tahera Fahimi <fahimitahera(a)gmail.com>
Cc: <stable(a)vger.kernel.org> # 6.7.x: 088e2efaf3d2: landlock: Simplify current_check_access_socket()
Fixes: fff69fb03dde ("landlock: Support network rules with TCP bind and connect")
Link: https://lore.kernel.org/r/263c1eb3-602f-57fe-8450-3f138581bee7@digikod.net [1]
Signed-off-by: Mickaël Salaün <mic(a)digikod.net>
Link: https://lore.kernel.org/r/20240719150618.197991-2-mic@digikod.net
---
security/landlock/net.c | 22 +++++++++++-----------
1 file changed, 11 insertions(+), 11 deletions(-)
diff --git a/security/landlock/net.c b/security/landlock/net.c
index c8bcd29bde09..78e027a74819 100644
--- a/security/landlock/net.c
+++ b/security/landlock/net.c
@@ -50,10 +50,11 @@ get_raw_handled_net_accesses(const struct landlock_ruleset *const domain)
return access_dom;
}
-static const struct landlock_ruleset *get_current_net_domain(void)
+static const struct landlock_ruleset *
+get_socket_net_domain(const struct socket *const sock)
{
const struct landlock_ruleset *const dom =
- landlock_get_current_domain();
+ landlock_cred(sock->file->f_cred)->domain;
if (!dom || !get_raw_handled_net_accesses(dom))
return NULL;
@@ -61,10 +62,9 @@ static const struct landlock_ruleset *get_current_net_domain(void)
return dom;
}
-static int current_check_access_socket(struct socket *const sock,
- struct sockaddr *const address,
- const int addrlen,
- access_mask_t access_request)
+static int check_access_socket(struct socket *const sock,
+ struct sockaddr *const address,
+ const int addrlen, access_mask_t access_request)
{
__be16 port;
layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_NET] = {};
@@ -72,7 +72,7 @@ static int current_check_access_socket(struct socket *const sock,
struct landlock_id id = {
.type = LANDLOCK_KEY_NET_PORT,
};
- const struct landlock_ruleset *const dom = get_current_net_domain();
+ const struct landlock_ruleset *const dom = get_socket_net_domain(sock);
if (!dom)
return 0;
@@ -175,16 +175,16 @@ static int current_check_access_socket(struct socket *const sock,
static int hook_socket_bind(struct socket *const sock,
struct sockaddr *const address, const int addrlen)
{
- return current_check_access_socket(sock, address, addrlen,
- LANDLOCK_ACCESS_NET_BIND_TCP);
+ return check_access_socket(sock, address, addrlen,
+ LANDLOCK_ACCESS_NET_BIND_TCP);
}
static int hook_socket_connect(struct socket *const sock,
struct sockaddr *const address,
const int addrlen)
{
- return current_check_access_socket(sock, address, addrlen,
- LANDLOCK_ACCESS_NET_CONNECT_TCP);
+ return check_access_socket(sock, address, addrlen,
+ LANDLOCK_ACCESS_NET_CONNECT_TCP);
}
static struct security_hook_list landlock_hooks[] __ro_after_init = {
--
2.45.2