From: Xiubo Li <xiubli(a)redhat.com>
Blindly expanding the readahead windows will cause unneccessary
pagecache thrashing and also will introdue the network workload.
We should disable expanding the windows if the readahead is disabled
and also shouldn't expand the windows too much.
Expanding forward firstly instead of expanding backward for possible
sequential reads.
Bound `rreq->len` to the actual file size to restore the previous page
cache usage.
The posix_fadvise may change the maximum size of a file readahead.
Cc: stable(a)vger.kernel.org
Fixes: 49870056005c ("ceph: convert ceph_readpages to ceph_readahead")
URL: https://lore.kernel.org/ceph-devel/20230504082510.247-1-sehuww@mail.scut.ed…
URL: https://www.spinics.net/lists/ceph-users/msg76183.html
Cc: Hu Weiwen <sehuww(a)mail.scut.edu.cn>
Signed-off-by: Xiubo Li <xiubli(a)redhat.com>
---
fs/ceph/addr.c | 40 +++++++++++++++++++++++++++++++++-------
1 file changed, 33 insertions(+), 7 deletions(-)
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 93fff1a7373f..4b29777c01d7 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -188,16 +188,42 @@ static void ceph_netfs_expand_readahead(struct netfs_io_request *rreq)
struct inode *inode = rreq->inode;
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_file_layout *lo = &ci->i_layout;
+ unsigned long max_pages = inode->i_sb->s_bdi->ra_pages;
+ loff_t end = rreq->start + rreq->len, new_end;
+ struct ceph_netfs_request_data *priv = rreq->netfs_priv;
+ unsigned long max_len;
u32 blockoff;
- u64 blockno;
- /* Expand the start downward */
- blockno = div_u64_rem(rreq->start, lo->stripe_unit, &blockoff);
- rreq->start = blockno * lo->stripe_unit;
- rreq->len += blockoff;
+ if (priv) {
+ /* Readahead is disabled by posix_fadvise POSIX_FADV_RANDOM */
+ if (priv->file_ra_disabled)
+ max_pages = 0;
+ else
+ max_pages = priv->file_ra_pages;
+
+ }
+
+ /* Readahead is disabled */
+ if (!max_pages)
+ return;
- /* Now, round up the length to the next block */
- rreq->len = roundup(rreq->len, lo->stripe_unit);
+ max_len = max_pages << PAGE_SHIFT;
+
+ /*
+ * Try to expand the length forward by rounding up it to the next
+ * block, but do not exceed the file size, unless the original
+ * request already exceeds it.
+ */
+ new_end = min(round_up(end, lo->stripe_unit), rreq->i_size);
+ if (new_end > end && new_end <= rreq->start + max_len)
+ rreq->len = new_end - rreq->start;
+
+ /* Try to expand the start downward */
+ div_u64_rem(rreq->start, lo->stripe_unit, &blockoff);
+ if (rreq->len + blockoff <= max_len) {
+ rreq->start -= blockoff;
+ rreq->len += blockoff;
+ }
}
static bool ceph_netfs_clamp_length(struct netfs_io_subrequest *subreq)
--
2.40.1
This patch fixes a possible plock op collisions when using F_SETLKW lock
requests and fsid, number and owner are not enough to identify a result
for a pending request. The ltp testcases [0] and [1] are examples when
this is not enough in case of using classic posix locks with threads and
open filedescriptor posix locks.
The idea to fix the issue here is to place all lock request in order. In
case of non F_SETLKW lock request (indicated if wait is set or not) the
lock requests are ordered inside the recv_list. If a result comes back
the right plock op can be found by the first plock_op in recv_list which
has not info.wait set. This can be done only by non F_SETLKW plock ops as
dlm_controld always reads a specific plock op (list_move_tail() from
send_list to recv_mlist) and write the result immediately back.
This behaviour is for F_SETLKW not possible as multiple waiters can be
get a result back in an random order. To avoid a collisions in cases
like [0] or [1] this patch adds more fields to compare the plock
operations as the lock request is the same. This is also being made in
NFS to find an result for an asynchronous F_SETLKW lock request [2][3]. We
still can't find the exact lock request for a specific result if the
lock request is the same, but if this is the case we don't care the
order how the identical lock requests get their result back to grant the
lock.
[0] https://gitlab.com/netcoder/ltp/-/blob/dlm_fcntl_owner_testcase/testcases/k…
[1] https://gitlab.com/netcoder/ltp/-/blob/dlm_fcntl_owner_testcase/testcases/k…
[2] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/inc…
[3] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/fs/…
Cc: stable(a)vger.kernel.org
Signed-off-by: Alexander Aring <aahringo(a)redhat.com>
---
change since v2:
- don't split recv_list into recv_setlkw_list
fs/dlm/plock.c | 43 ++++++++++++++++++++++++++++++-------------
1 file changed, 30 insertions(+), 13 deletions(-)
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index 31bc601ee3d8..53d17dbbb716 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -391,7 +391,7 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count,
if (op->info.flags & DLM_PLOCK_FL_CLOSE)
list_del(&op->list);
else
- list_move(&op->list, &recv_list);
+ list_move_tail(&op->list, &recv_list);
memcpy(&info, &op->info, sizeof(info));
}
spin_unlock(&ops_lock);
@@ -430,19 +430,36 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
return -EINVAL;
spin_lock(&ops_lock);
- list_for_each_entry(iter, &recv_list, list) {
- if (iter->info.fsid == info.fsid &&
- iter->info.number == info.number &&
- iter->info.owner == info.owner) {
- list_del_init(&iter->list);
- memcpy(&iter->info, &info, sizeof(info));
- if (iter->data)
- do_callback = 1;
- else
- iter->done = 1;
- op = iter;
- break;
+ if (info.wait) {
+ list_for_each_entry(iter, &recv_list, list) {
+ if (iter->info.fsid == info.fsid &&
+ iter->info.number == info.number &&
+ iter->info.owner == info.owner &&
+ iter->info.pid == info.pid &&
+ iter->info.start == info.start &&
+ iter->info.end == info.end &&
+ iter->info.ex == info.ex &&
+ iter->info.wait) {
+ op = iter;
+ break;
+ }
}
+ } else {
+ list_for_each_entry(iter, &recv_list, list) {
+ if (!iter->info.wait) {
+ op = iter;
+ break;
+ }
+ }
+ }
+
+ if (op) {
+ list_del_init(&op->list);
+ memcpy(&op->info, &info, sizeof(info));
+ if (op->data)
+ do_callback = 1;
+ else
+ op->done = 1;
}
spin_unlock(&ops_lock);
--
2.31.1
From: Lino Sanfilippo <l.sanfilippo(a)kunbus.com>
With commit 858e8b792d06 ("tpm, tpm_tis: Avoid cache incoherency in test
for interrupts") bit accessor functions are used to access flags in
tpm_tis_data->flags.
However these functions expect bit numbers, while the flags are defined as
bit masks in enum tpm_tis_flag.
Fix this inconsistency by using numbers instead of masks also for the flags
in the enum.
Reported-by: Pavel Machek <pavel(a)denx.de>
Fixes: 858e8b792d06 ("tpm, tpm_tis: Avoid cache incoherency in test for interrupts")
Signed-off-by: Lino Sanfilippo <l.sanfilippo(a)kunbus.com>
Cc: stable(a)vger.kernel.org
---
drivers/char/tpm/tpm_tis_core.h | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/char/tpm/tpm_tis_core.h b/drivers/char/tpm/tpm_tis_core.h
index e978f457fd4d..610bfadb6acf 100644
--- a/drivers/char/tpm/tpm_tis_core.h
+++ b/drivers/char/tpm/tpm_tis_core.h
@@ -84,10 +84,10 @@ enum tis_defaults {
#define ILB_REMAP_SIZE 0x100
enum tpm_tis_flags {
- TPM_TIS_ITPM_WORKAROUND = BIT(0),
- TPM_TIS_INVALID_STATUS = BIT(1),
- TPM_TIS_DEFAULT_CANCELLATION = BIT(2),
- TPM_TIS_IRQ_TESTED = BIT(3),
+ TPM_TIS_ITPM_WORKAROUND = 0,
+ TPM_TIS_INVALID_STATUS = 1,
+ TPM_TIS_DEFAULT_CANCELLATION = 2,
+ TPM_TIS_IRQ_TESTED = 3,
};
struct tpm_tis_data {
base-commit: 7877cb91f1081754a1487c144d85dc0d2e2e7fc4
--
2.40.1
usb_udc_vbus_handler() can be invoked from interrupt context by irq
handlers of the gadget drivers, however, usb_udc_connect_control() has
to run in non-atomic context due to the following:
a. Some of the gadget driver implementations expect the ->pullup
callback to be invoked in non-atomic context.
b. usb_gadget_disconnect() acquires udc_lock which is a mutex.
Hence offload invocation of usb_udc_connect_control()
to workqueue.
Cc: stable(a)vger.kernel.org
Fixes: 1016fc0c096c ("USB: gadget: Fix obscure lockdep violation for udc_mutex")
Signed-off-by: Badhri Jagan Sridharan <badhri(a)google.com>
---
Changes since v1:
- Address Alan Stern's comment on usb_udc_vbus_handler invocation from
atomic context:
* vbus_events_lock is now a spinlock and allocations in
* usb_udc_vbus_handler are atomic now.
Changes since v2:
- Addressing Alan Stern's comments:
** connect_lock is now held by callers of
* usb_gadget_pullup_update_locked() and gadget_(un)bind_driver() does
* notdirectly hold the lock.
** Both usb_gadget_(dis)connect() and usb_udc_vbus_handler() would
* set/clear udc->vbus and invoke usb_gadget_pullup_update_locked.
** Add "unbinding" to prevent new connections after the gadget is being
* unbound.
Changes since v3:
** Made a minor cleanup which I missed to do in v3 in
* usb_udc_vbus_handler().
Changes since v4:
- Addressing Alan Stern's comments:
** usb_udc_vbus_handler() now offloads invocation of usb_udc_connect_control()
* from workqueue.
** Dropped vbus_events list as this was redundant. Updating to the
* latest value is suffice
---
drivers/usb/gadget/udc/core.c | 19 ++++++++++++++++++-
1 file changed, 18 insertions(+), 1 deletion(-)
diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
index 52e6d2e84e35..44a9f32679b5 100644
--- a/drivers/usb/gadget/udc/core.c
+++ b/drivers/usb/gadget/udc/core.c
@@ -48,6 +48,7 @@ struct usb_udc {
struct list_head list;
bool vbus;
bool started;
+ struct work_struct vbus_work;
};
static struct class *udc_class;
@@ -1086,6 +1087,13 @@ static void usb_udc_connect_control(struct usb_udc *udc)
usb_gadget_disconnect(udc->gadget);
}
+static void vbus_event_work(struct work_struct *work)
+{
+ struct usb_udc *udc = container_of(work, struct usb_udc, vbus_work);
+
+ usb_udc_connect_control(udc);
+}
+
/**
* usb_udc_vbus_handler - updates the udc core vbus status, and try to
* connect or disconnect gadget
@@ -1094,6 +1102,13 @@ static void usb_udc_connect_control(struct usb_udc *udc)
*
* The udc driver calls it when it wants to connect or disconnect gadget
* according to vbus status.
+ *
+ * This function can be invoked from interrupt context by irq handlers of the gadget drivers,
+ * however, usb_udc_connect_control() has to run in non-atomic context due to the following:
+ * a. Some of the gadget driver implementations expect the ->pullup callback to be invoked in
+ * non-atomic context.
+ * b. usb_gadget_disconnect() acquires udc_lock which is a mutex.
+ * Hence offload invocation of usb_udc_connect_control() to workqueue.
*/
void usb_udc_vbus_handler(struct usb_gadget *gadget, bool status)
{
@@ -1101,7 +1116,7 @@ void usb_udc_vbus_handler(struct usb_gadget *gadget, bool status)
if (udc) {
udc->vbus = status;
- usb_udc_connect_control(udc);
+ schedule_work(&udc->vbus_work);
}
}
EXPORT_SYMBOL_GPL(usb_udc_vbus_handler);
@@ -1328,6 +1343,7 @@ int usb_add_gadget(struct usb_gadget *gadget)
mutex_lock(&udc_lock);
list_add_tail(&udc->list, &udc_list);
mutex_unlock(&udc_lock);
+ INIT_WORK(&udc->vbus_work, vbus_event_work);
ret = device_add(&udc->dev);
if (ret)
@@ -1558,6 +1574,7 @@ static void gadget_unbind_driver(struct device *dev)
kobject_uevent(&udc->dev.kobj, KOBJ_CHANGE);
+ cancel_work_sync(&udc->vbus_work);
usb_gadget_disconnect(gadget);
usb_gadget_disable_async_callbacks(udc);
if (gadget->irq)
base-commit: 046895105d9666ab56e86ce8dd9786f8003125c6
--
2.41.0.rc0.172.g3f132b7071-goog
From: Roberto Sassu <roberto.sassu(a)huawei.com>
Changelog:
v4:
- Replace sg_init_table()/sg_set_buf() with sg_init_one() (suggested by
Eric)
v3:
v2:
- Add patch by Herbert to take only the needed bytes for a MPI from the
scatterlist
- Use only one scatterlist for signature and digest (suggested by Eric)
- Rename key variable to buf (suggested by Eric)
- Rename key_max_len variable to buf_len
- Use size_t for the buf_len variable instead of u32
v1:
- Unconditionally copy the signature and digest to the buffer to keep the
code simple (suggested by Eric)
Herbert Xu (1):
lib/mpi: Fix buffer overrun when SG is too long
Roberto Sassu (1):
KEYS: asymmetric: Copy sig and digest in public_key_verify_signature()
crypto/asymmetric_keys/public_key.c | 38 ++++++++++++++++-------------
lib/mpi/mpicoder.c | 3 ++-
2 files changed, 23 insertions(+), 18 deletions(-)
--
2.25.1
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 3632679d9e4f879f49949bb5b050e0de553e4739
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023052622-such-rearview-04a6@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
3632679d9e4f ("ipv{4,6}/raw: fix output xfrm lookup wrt protocol")
91d0b78c5177 ("inet: Add IP_LOCAL_PORT_RANGE socket option")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3632679d9e4f879f49949bb5b050e0de553e4739 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel(a)6wind.com>
Date: Mon, 22 May 2023 14:08:20 +0200
Subject: [PATCH] ipv{4,6}/raw: fix output xfrm lookup wrt protocol
With a raw socket bound to IPPROTO_RAW (ie with hdrincl enabled), the
protocol field of the flow structure, build by raw_sendmsg() /
rawv6_sendmsg()), is set to IPPROTO_RAW. This breaks the ipsec policy
lookup when some policies are defined with a protocol in the selector.
For ipv6, the sin6_port field from 'struct sockaddr_in6' could be used to
specify the protocol. Just accept all values for IPPROTO_RAW socket.
For ipv4, the sin_port field of 'struct sockaddr_in' could not be used
without breaking backward compatibility (the value of this field was never
checked). Let's add a new kind of control message, so that the userland
could specify which protocol is used.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
CC: stable(a)vger.kernel.org
Signed-off-by: Nicolas Dichtel <nicolas.dichtel(a)6wind.com>
Link: https://lore.kernel.org/r/20230522120820.1319391-1-nicolas.dichtel@6wind.com
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
diff --git a/include/net/ip.h b/include/net/ip.h
index c3fffaa92d6e..acec504c469a 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -76,6 +76,7 @@ struct ipcm_cookie {
__be32 addr;
int oif;
struct ip_options_rcu *opt;
+ __u8 protocol;
__u8 ttl;
__s16 tos;
char priority;
@@ -96,6 +97,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
ipcm->sockc.tsflags = inet->sk.sk_tsflags;
ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if);
ipcm->addr = inet->inet_saddr;
+ ipcm->protocol = inet->inet_num;
}
#define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb))
diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
index 4b7f2df66b99..e682ab628dfa 100644
--- a/include/uapi/linux/in.h
+++ b/include/uapi/linux/in.h
@@ -163,6 +163,7 @@ struct in_addr {
#define IP_MULTICAST_ALL 49
#define IP_UNICAST_IF 50
#define IP_LOCAL_PORT_RANGE 51
+#define IP_PROTOCOL 52
#define MCAST_EXCLUDE 0
#define MCAST_INCLUDE 1
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index b511ff0adc0a..8e97d8d4cc9d 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -317,7 +317,14 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
ipc->tos = val;
ipc->priority = rt_tos2priority(ipc->tos);
break;
-
+ case IP_PROTOCOL:
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
+ return -EINVAL;
+ val = *(int *)CMSG_DATA(cmsg);
+ if (val < 1 || val > 255)
+ return -EINVAL;
+ ipc->protocol = val;
+ break;
default:
return -EINVAL;
}
@@ -1761,6 +1768,9 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
case IP_LOCAL_PORT_RANGE:
val = inet->local_port_range.hi << 16 | inet->local_port_range.lo;
break;
+ case IP_PROTOCOL:
+ val = inet_sk(sk)->inet_num;
+ break;
default:
sockopt_release_sock(sk);
return -ENOPROTOOPT;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index ff712bf2a98d..eadf1c9ef7e4 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -532,6 +532,9 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
ipcm_init_sk(&ipc, inet);
+ /* Keep backward compat */
+ if (hdrincl)
+ ipc.protocol = IPPROTO_RAW;
if (msg->msg_controllen) {
err = ip_cmsg_send(sk, msg, &ipc, false);
@@ -599,7 +602,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos,
RT_SCOPE_UNIVERSE,
- hdrincl ? IPPROTO_RAW : sk->sk_protocol,
+ hdrincl ? ipc.protocol : sk->sk_protocol,
inet_sk_flowi_flags(sk) |
(hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
daddr, saddr, 0, 0, sk->sk_uid);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 7d0adb612bdd..44ee7a2e72ac 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -793,7 +793,8 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (!proto)
proto = inet->inet_num;
- else if (proto != inet->inet_num)
+ else if (proto != inet->inet_num &&
+ inet->inet_num != IPPROTO_RAW)
return -EINVAL;
if (proto > 255)
Hi,
Greg, can you include these in the 5.4-stable batch for the next
release? Lee reported and issue that really ended up being two
separate bugs, I fixed these last week and Lee has tested them
as good. No real upstream commits exists for these, as we fixed
them separately with refactoring and cleanup of this code.
--
Jens Axboe