Before this change, network restrictions were enforced according to the
calling thread's Landlock domain, leading to potential inconsistent
results when the same socket was used by different threads or processes
(with different domains). This change fixes such access control
inconsistency by enforcing the socket's Landlock domain instead of the
caller's Landlock domain.
Socket's Landlock domain is inherited from the thread that created this
socket. This means that a socket created without …
[View More]sandboxing will be
free to connect and bind without limitation. This also means that a
socket created by a sandboxed thread will inherit the thread's policy,
which will be enforced on this socket even when used by another thread
or passed to another process.
The initial rationale [1] was that a socket does not directly grants
access to data, but it is an object used to define an access (e.g.
connection to a peer). Contrary to my initial assumption, we can
identify to which protocol/port a newly created socket can give access
to with the socket's file->f_cred inherited from its creator. Moreover,
from a kernel point of view, especially for shared objects, we need a
more consistent access model. This means that the same action on the
same socket performed by different threads will have the same effect.
This follows the same approach as for file descriptors tied to the file
system (e.g. LANDLOCK_ACCESS_FS_TRUNCATE).
One potential risk of this change is for unsandboxed processes to send
socket file descriptors to sandboxed processes, which could give
unrestricted network access to the sandboxed process (by reconfigure the
socket). While it makes sense for processes to transfer (AF_UNIX)
socketpairs, which is OK because they can only exchange data between
themselves, it should be rare for processes to legitimately pass other
kind of sockets (e.g. AF_INET).
Another potential risk of this approach is socket file descriptor leaks.
This is the same risk as with regular file descriptor leaks giving
access to the content of a file, which is well known and documented.
This could be mitigated with a future complementary restriction on
received or inherited file descriptors.
One interesting side effect of this new approach is that a process can
create a socket that will only allow to connect to a set of ports. This
can be done by creating a thread, sandboxing it, creating a socket, and
using the related file descriptor (in the same process). Passing this
restricted socket to a more sandboxed process makes it possible to have
a more dynamic security policy.
This new approach aligns with SELinux and Smack instead of AppArmor and
Tomoyo. It is also in line with capability-based security mechanisms
such as Capsicum.
This slight semantic change is important for current and future
Landlock's consistency, and it must be backported.
Current tests are still OK because this behavior wasn't covered. A
following commit adds new tests.
Cc: Günther Noack <gnoack(a)google.com>
Cc: Ivanov Mikhail <ivanov.mikhail1(a)huawei-partners.com>
Cc: Konstantin Meskhidze <konstantin.meskhidze(a)huawei.com>
Cc: Paul Moore <paul(a)paul-moore.com>
Cc: Tahera Fahimi <fahimitahera(a)gmail.com>
Cc: <stable(a)vger.kernel.org> # 6.7.x: 088e2efaf3d2: landlock: Simplify current_check_access_socket()
Fixes: fff69fb03dde ("landlock: Support network rules with TCP bind and connect")
Link: https://lore.kernel.org/r/263c1eb3-602f-57fe-8450-3f138581bee7@digikod.net [1]
Signed-off-by: Mickaël Salaün <mic(a)digikod.net>
Link: https://lore.kernel.org/r/20240719150618.197991-2-mic@digikod.net
---
security/landlock/net.c | 22 +++++++++++-----------
1 file changed, 11 insertions(+), 11 deletions(-)
diff --git a/security/landlock/net.c b/security/landlock/net.c
index c8bcd29bde09..78e027a74819 100644
--- a/security/landlock/net.c
+++ b/security/landlock/net.c
@@ -50,10 +50,11 @@ get_raw_handled_net_accesses(const struct landlock_ruleset *const domain)
return access_dom;
}
-static const struct landlock_ruleset *get_current_net_domain(void)
+static const struct landlock_ruleset *
+get_socket_net_domain(const struct socket *const sock)
{
const struct landlock_ruleset *const dom =
- landlock_get_current_domain();
+ landlock_cred(sock->file->f_cred)->domain;
if (!dom || !get_raw_handled_net_accesses(dom))
return NULL;
@@ -61,10 +62,9 @@ static const struct landlock_ruleset *get_current_net_domain(void)
return dom;
}
-static int current_check_access_socket(struct socket *const sock,
- struct sockaddr *const address,
- const int addrlen,
- access_mask_t access_request)
+static int check_access_socket(struct socket *const sock,
+ struct sockaddr *const address,
+ const int addrlen, access_mask_t access_request)
{
__be16 port;
layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_NET] = {};
@@ -72,7 +72,7 @@ static int current_check_access_socket(struct socket *const sock,
struct landlock_id id = {
.type = LANDLOCK_KEY_NET_PORT,
};
- const struct landlock_ruleset *const dom = get_current_net_domain();
+ const struct landlock_ruleset *const dom = get_socket_net_domain(sock);
if (!dom)
return 0;
@@ -175,16 +175,16 @@ static int current_check_access_socket(struct socket *const sock,
static int hook_socket_bind(struct socket *const sock,
struct sockaddr *const address, const int addrlen)
{
- return current_check_access_socket(sock, address, addrlen,
- LANDLOCK_ACCESS_NET_BIND_TCP);
+ return check_access_socket(sock, address, addrlen,
+ LANDLOCK_ACCESS_NET_BIND_TCP);
}
static int hook_socket_connect(struct socket *const sock,
struct sockaddr *const address,
const int addrlen)
{
- return current_check_access_socket(sock, address, addrlen,
- LANDLOCK_ACCESS_NET_CONNECT_TCP);
+ return check_access_socket(sock, address, addrlen,
+ LANDLOCK_ACCESS_NET_CONNECT_TCP);
}
static struct security_hook_list landlock_hooks[] __ro_after_init = {
--
2.45.2
[View Less]
In read_handle(), of_get_address() may return NULL if getting address and
size of the node failed. When of_read_number() uses prop to handle
conversions between different byte orders, it could lead to a null pointer
dereference. Add NULL check to fix potential issue.
Found by static analysis.
Cc: stable(a)vger.kernel.org
Fixes: 14baf4d9c739 ("cxl: Add guest-specific code")
Signed-off-by: Ma Ke <make24(a)iscas.ac.cn>
---
Changes in v4:
- modified vulnerability description according to …
[View More]suggestions, making the
process of static analysis of vulnerabilities clearer. No active research
on developer behavior.
Changes in v3:
- fixed up the changelog text as suggestions.
Changes in v2:
- added an explanation of how the potential vulnerability was discovered,
but not meet the description specification requirements.
---
drivers/misc/cxl/of.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/misc/cxl/of.c b/drivers/misc/cxl/of.c
index bcc005dff1c0..d8dbb3723951 100644
--- a/drivers/misc/cxl/of.c
+++ b/drivers/misc/cxl/of.c
@@ -58,7 +58,7 @@ static int read_handle(struct device_node *np, u64 *handle)
/* Get address and size of the node */
prop = of_get_address(np, 0, &size, NULL);
- if (size)
+ if (!prop || size)
return -EINVAL;
/* Helper to read a big number; size is in cells (not bytes) */
--
2.25.1
[View Less]
It has turned out that having _set_required_opps() to recursively call
dev_pm_opp_set_opp() to set the required OPPs, doesn't really work as well
as we expected.
More precisely, at each recursive call to dev_pm_opp_set_opp() we are
changing the OPP for a genpd's OPP table for a device that has been
attached to it. The problem with this, is that we may have several devices
being attached to the same genpd, thus sharing the same OPP-table that is
being used for their required OPPs. So, typically …
[View More]we may have several
active requests simultaneously for different OPPs for a genpd's OPP table.
This may lead to that the per device vote for a performance-state
(opp-level) for a genpd doesn't get requested accordingly.
Moreover, dev_pm_opp_set_opp() doesn't get called for a required OPP when a
device has been attached to a single PM domain. Even if a consumer driver
would attempt to assign the required-devs, via _opp_attach_genpd() or
_opp_set_required_devs() it would not be possible, as there is no separate
virtual device at hand to use in this case.
The above said, let's fix the problem by replacing the call to
dev_pm_opp_set_opp() in _set_required_opps() by a call to _set_opp_level().
At the moment there's no drawback doing this, as there is no need to manage
anything but the performance-state of the genpd. If it later turns out that
another resource needs to be managed for a required-OPP, it can still be
extended without having to call dev_pm_opp_set_opp().
Fixes: e37440e7e2c2 ("OPP: Call dev_pm_opp_set_opp() for required OPPs")
Cc: stable(a)vger.kernel.org
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
---
Changes in v2:
- Clarified the commitmsg.
- Addressed some comments from Viresh.
- Drop calls to _add_opp_dev() for required_devs.
---
drivers/opp/core.c | 56 ++++++++++++++++++----------------------------
1 file changed, 22 insertions(+), 34 deletions(-)
diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index 5f4598246a87..494f8860220d 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -1061,6 +1061,27 @@ static int _set_opp_bw(const struct opp_table *opp_table,
return 0;
}
+static int _set_opp_level(struct device *dev, struct dev_pm_opp *opp)
+{
+ unsigned int level = 0;
+ int ret = 0;
+
+ if (opp) {
+ if (opp->level == OPP_LEVEL_UNSET)
+ return 0;
+
+ level = opp->level;
+ }
+
+ /* Request a new performance state through the device's PM domain. */
+ ret = dev_pm_domain_set_performance_state(dev, level);
+ if (ret)
+ dev_err(dev, "Failed to set performance state %u (%d)\n", level,
+ ret);
+
+ return ret;
+}
+
/* This is only called for PM domain for now */
static int _set_required_opps(struct device *dev, struct opp_table *opp_table,
struct dev_pm_opp *opp, bool up)
@@ -1091,7 +1112,7 @@ static int _set_required_opps(struct device *dev, struct opp_table *opp_table,
if (devs[index]) {
required_opp = opp ? opp->required_opps[index] : NULL;
- ret = dev_pm_opp_set_opp(devs[index], required_opp);
+ ret = _set_opp_level(devs[index], required_opp);
if (ret)
return ret;
}
@@ -1102,27 +1123,6 @@ static int _set_required_opps(struct device *dev, struct opp_table *opp_table,
return 0;
}
-static int _set_opp_level(struct device *dev, struct dev_pm_opp *opp)
-{
- unsigned int level = 0;
- int ret = 0;
-
- if (opp) {
- if (opp->level == OPP_LEVEL_UNSET)
- return 0;
-
- level = opp->level;
- }
-
- /* Request a new performance state through the device's PM domain. */
- ret = dev_pm_domain_set_performance_state(dev, level);
- if (ret)
- dev_err(dev, "Failed to set performance state %u (%d)\n", level,
- ret);
-
- return ret;
-}
-
static void _find_current_opp(struct device *dev, struct opp_table *opp_table)
{
struct dev_pm_opp *opp = ERR_PTR(-ENODEV);
@@ -2457,18 +2457,6 @@ static int _opp_attach_genpd(struct opp_table *opp_table, struct device *dev,
}
}
- /*
- * Add the virtual genpd device as a user of the OPP table, so
- * we can call dev_pm_opp_set_opp() on it directly.
- *
- * This will be automatically removed when the OPP table is
- * removed, don't need to handle that here.
- */
- if (!_add_opp_dev(virt_dev, opp_table->required_opp_tables[index])) {
- ret = -ENOMEM;
- goto err;
- }
-
opp_table->required_devs[index] = virt_dev;
index++;
name++;
--
2.34.1
[View Less]