From: Zhang Rui rui.zhang@intel.com
[ Upstream commit ec9aedb2aa1ab7ac420c00b31f5edc5be15ec167 ]
Currently, the kernel enumerates the possible CPUs by parsing both ACPI MADT Local APIC entries and x2APIC entries. So CPUs with "valid" APIC IDs, even if they have duplicated APIC IDs in Local APIC and x2APIC, are always enumerated.
Below is what ACPI MADT Local APIC and x2APIC describes on an Ivebridge-EP system,
[02Ch 0044 1] Subtable Type : 00 [Processor Local APIC] [02Fh 0047 1] Local Apic ID : 00 ... [164h 0356 1] Subtable Type : 00 [Processor Local APIC] [167h 0359 1] Local Apic ID : 39 [16Ch 0364 1] Subtable Type : 00 [Processor Local APIC] [16Fh 0367 1] Local Apic ID : FF ... [3ECh 1004 1] Subtable Type : 09 [Processor Local x2APIC] [3F0h 1008 4] Processor x2Apic ID : 00000000 ... [B5Ch 2908 1] Subtable Type : 09 [Processor Local x2APIC] [B60h 2912 4] Processor x2Apic ID : 00000077
As a result, kernel shows "smpboot: Allowing 168 CPUs, 120 hotplug CPUs". And this wastes significant amount of memory for the per-cpu data. Plus this also breaks https://lore.kernel.org/all/87edm36qqb.ffs@tglx/, because __max_logical_packages is over-estimated by the APIC IDs in the x2APIC entries.
According to https://uefi.org/specs/ACPI/6.5/05_ACPI_Software_Programming_Model.html#proc...:
"[Compatibility note] On some legacy OSes, Logical processors with APIC ID values less than 255 (whether in XAPIC or X2APIC mode) must use the Processor Local APIC structure to convey their APIC information to OSPM, and those processors must be declared in the DSDT using the Processor() keyword. Logical processors with APIC ID values 255 and greater must use the Processor Local x2APIC structure and be declared using the Device() keyword."
Therefore prevent the registration of x2APIC entries with an APIC ID less than 255 if the local APIC table enumerates valid APIC IDs.
[ tglx: Simplify the logic ]
Signed-off-by: Zhang Rui rui.zhang@intel.com Signed-off-by: Thomas Gleixner tglx@linutronix.de Tested-by: Peter Zijlstra peterz@infradead.org Link: https://lore.kernel.org/r/20230702162802.344176-1-rui.zhang@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/x86/kernel/acpi/boot.c | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-)
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 2252340b2133e..14af7fbdc6b5e 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -62,6 +62,7 @@ int acpi_fix_pin2_polarity __initdata;
#ifdef CONFIG_X86_LOCAL_APIC static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; +static bool has_lapic_cpus __initdata; static bool acpi_support_online_capable; #endif
@@ -235,6 +236,14 @@ acpi_parse_x2apic(union acpi_subtable_headers *header, const unsigned long end) if (!acpi_is_processor_usable(processor->lapic_flags)) return 0;
+ /* + * According to https://uefi.org/specs/ACPI/6.5/05_ACPI_Software_Programming_Model.html#proc... + * when MADT provides both valid LAPIC and x2APIC entries, the APIC ID + * in x2APIC must be equal or greater than 0xff. + */ + if (has_lapic_cpus && apic_id < 0xff) + return 0; + /* * We need to register disabled CPU as well to permit * counting disabled CPUs. This allows us to size @@ -1114,10 +1123,7 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
static int __init acpi_parse_madt_lapic_entries(void) { - int count; - int x2count = 0; - int ret; - struct acpi_subtable_proc madt_proc[2]; + int count, x2count = 0;
if (!boot_cpu_has(X86_FEATURE_APIC)) return -ENODEV; @@ -1126,21 +1132,11 @@ static int __init acpi_parse_madt_lapic_entries(void) acpi_parse_sapic, MAX_LOCAL_APIC);
if (!count) { - memset(madt_proc, 0, sizeof(madt_proc)); - madt_proc[0].id = ACPI_MADT_TYPE_LOCAL_APIC; - madt_proc[0].handler = acpi_parse_lapic; - madt_proc[1].id = ACPI_MADT_TYPE_LOCAL_X2APIC; - madt_proc[1].handler = acpi_parse_x2apic; - ret = acpi_table_parse_entries_array(ACPI_SIG_MADT, - sizeof(struct acpi_table_madt), - madt_proc, ARRAY_SIZE(madt_proc), MAX_LOCAL_APIC); - if (ret < 0) { - pr_err("Error parsing LAPIC/X2APIC entries\n"); - return ret; - } - - count = madt_proc[0].count; - x2count = madt_proc[1].count; + count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, + acpi_parse_lapic, MAX_LOCAL_APIC); + has_lapic_cpus = count > 0; + x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC, + acpi_parse_x2apic, MAX_LOCAL_APIC); } if (!count && !x2count) { pr_err("No LAPIC entries present\n");
From: Thomas Gleixner tglx@linutronix.de
[ Upstream commit 5c0930ccaad5a74d74e8b18b648c5eb21ed2fe94 ]
2b8272ff4a70 ("cpu/hotplug: Prevent self deadlock on CPU hot-unplug") solved the straight forward CPU hotplug deadlock vs. the scheduler bandwidth timer. Yu discovered a more involved variant where a task which has a bandwidth timer started on the outgoing CPU holds a lock and then gets throttled. If the lock required by one of the CPU hotplug callbacks the hotplug operation deadlocks because the unthrottling timer event is not handled on the dying CPU and can only be recovered once the control CPU reaches the hotplug state which pulls the pending hrtimers from the dead CPU.
Solve this by pushing the hrtimers away from the dying CPU in the dying callbacks. Nothing can queue a hrtimer on the dying CPU at that point because all other CPUs spin in stop_machine() with interrupts disabled and once the operation is finished the CPU is marked offline.
Reported-by: Yu Liao liaoyu15@huawei.com Signed-off-by: Thomas Gleixner tglx@linutronix.de Tested-by: Liu Tie liutie4@huawei.com Link: https://lore.kernel.org/r/87a5rphara.ffs@tglx Signed-off-by: Sasha Levin sashal@kernel.org --- include/linux/cpuhotplug.h | 1 + include/linux/hrtimer.h | 4 ++-- kernel/cpu.c | 8 +++++++- kernel/time/hrtimer.c | 33 ++++++++++++--------------------- 4 files changed, 22 insertions(+), 24 deletions(-)
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index c7e0d80dbf6a5..67575bc8a7e29 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -196,6 +196,7 @@ enum cpuhp_state { CPUHP_AP_ARM_CORESIGHT_CTI_STARTING, CPUHP_AP_ARM64_ISNDEP_STARTING, CPUHP_AP_SMPCFD_DYING, + CPUHP_AP_HRTIMERS_DYING, CPUHP_AP_X86_TBOOT_DYING, CPUHP_AP_ARM_CACHE_B15_RAC_DYING, CPUHP_AP_ONLINE, diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 0ee140176f102..f2044d5a652b5 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -531,9 +531,9 @@ extern void sysrq_timer_list_show(void);
int hrtimers_prepare_cpu(unsigned int cpu); #ifdef CONFIG_HOTPLUG_CPU -int hrtimers_dead_cpu(unsigned int cpu); +int hrtimers_cpu_dying(unsigned int cpu); #else -#define hrtimers_dead_cpu NULL +#define hrtimers_cpu_dying NULL #endif
#endif diff --git a/kernel/cpu.c b/kernel/cpu.c index f8eb1825f704f..ca861e545f34e 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1730,7 +1730,7 @@ static struct cpuhp_step cpuhp_hp_states[] = { [CPUHP_HRTIMERS_PREPARE] = { .name = "hrtimers:prepare", .startup.single = hrtimers_prepare_cpu, - .teardown.single = hrtimers_dead_cpu, + .teardown.single = NULL, }, [CPUHP_SMPCFD_PREPARE] = { .name = "smpcfd:prepare", @@ -1797,6 +1797,12 @@ static struct cpuhp_step cpuhp_hp_states[] = { .startup.single = NULL, .teardown.single = smpcfd_dying_cpu, }, + [CPUHP_AP_HRTIMERS_DYING] = { + .name = "hrtimers:dying", + .startup.single = NULL, + .teardown.single = hrtimers_cpu_dying, + }, + /* Entry state on starting. Interrupts enabled from here on. Transient * state for synchronsization */ [CPUHP_AP_ONLINE] = { diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index e4f0e3b0c4f4f..5561dabc9b225 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -2216,29 +2216,22 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, } }
-int hrtimers_dead_cpu(unsigned int scpu) +int hrtimers_cpu_dying(unsigned int dying_cpu) { struct hrtimer_cpu_base *old_base, *new_base; - int i; + int i, ncpu = cpumask_first(cpu_active_mask);
- BUG_ON(cpu_online(scpu)); - tick_cancel_sched_timer(scpu); + tick_cancel_sched_timer(dying_cpu); + + old_base = this_cpu_ptr(&hrtimer_bases); + new_base = &per_cpu(hrtimer_bases, ncpu);
- /* - * this BH disable ensures that raise_softirq_irqoff() does - * not wakeup ksoftirqd (and acquire the pi-lock) while - * holding the cpu_base lock - */ - local_bh_disable(); - local_irq_disable(); - old_base = &per_cpu(hrtimer_bases, scpu); - new_base = this_cpu_ptr(&hrtimer_bases); /* * The caller is globally serialized and nobody else * takes two locks at once, deadlock is not possible. */ - raw_spin_lock(&new_base->lock); - raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); + raw_spin_lock(&old_base->lock); + raw_spin_lock_nested(&new_base->lock, SINGLE_DEPTH_NESTING);
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { migrate_hrtimer_list(&old_base->clock_base[i], @@ -2249,15 +2242,13 @@ int hrtimers_dead_cpu(unsigned int scpu) * The migration might have changed the first expiring softirq * timer on this CPU. Update it. */ - hrtimer_update_softirq_timer(new_base, false); + __hrtimer_get_next_event(new_base, HRTIMER_ACTIVE_SOFT); + /* Tell the other CPU to retrigger the next event */ + smp_call_function_single(ncpu, retrigger_next_event, NULL, 0);
- raw_spin_unlock(&old_base->lock); raw_spin_unlock(&new_base->lock); + raw_spin_unlock(&old_base->lock);
- /* Check, if we got expired work to do */ - __hrtimer_peek_ahead_timers(); - local_irq_enable(); - local_bh_enable(); return 0; }
From: Jan Bottorff janb@os.amperecomputing.com
[ Upstream commit f726eaa787e9f9bc858c902d18a09af6bcbfcdaf ]
When running on a many core ARM64 server, errors were happening in the ISR that looked like corrupted memory. These corruptions would fix themselves if small delays were inserted in the ISR. Errors reported by the driver included "i2c_designware APMC0D0F:00: i2c_dw_xfer_msg: invalid target address" and "i2c_designware APMC0D0F:00:controller timed out" during in-band IPMI SSIF stress tests.
The problem was determined to be memory writes in the driver were not becoming visible to all cores when execution rapidly shifted between cores, like when a register write immediately triggers an ISR. Processors with weak memory ordering, like ARM64, make no guarantees about the order normal memory writes become globally visible, unless barrier instructions are used to control ordering.
To solve this, regmap accessor functions configured by this driver were changed to use non-relaxed forms of the low-level register access functions, which include a barrier on platforms that require it. This assures memory writes before a controller register access are visible to all cores. The community concluded defaulting to correct operation outweighed defaulting to the small performance gains from using relaxed access functions. Being a low speed device added weight to this choice of default register access behavior.
Signed-off-by: Jan Bottorff janb@os.amperecomputing.com Acked-by: Jarkko Nikula jarkko.nikula@linux.intel.com Tested-by: Serge Semin fancer.lancer@gmail.com Reviewed-by: Serge Semin fancer.lancer@gmail.com Signed-off-by: Wolfram Sang wsa@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/i2c/busses/i2c-designware-common.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/drivers/i2c/busses/i2c-designware-common.c b/drivers/i2c/busses/i2c-designware-common.c index 6fdb25a5f8016..ad98c85ec2e7a 100644 --- a/drivers/i2c/busses/i2c-designware-common.c +++ b/drivers/i2c/busses/i2c-designware-common.c @@ -63,7 +63,7 @@ static int dw_reg_read(void *context, unsigned int reg, unsigned int *val) { struct dw_i2c_dev *dev = context;
- *val = readl_relaxed(dev->base + reg); + *val = readl(dev->base + reg);
return 0; } @@ -72,7 +72,7 @@ static int dw_reg_write(void *context, unsigned int reg, unsigned int val) { struct dw_i2c_dev *dev = context;
- writel_relaxed(val, dev->base + reg); + writel(val, dev->base + reg);
return 0; } @@ -81,7 +81,7 @@ static int dw_reg_read_swab(void *context, unsigned int reg, unsigned int *val) { struct dw_i2c_dev *dev = context;
- *val = swab32(readl_relaxed(dev->base + reg)); + *val = swab32(readl(dev->base + reg));
return 0; } @@ -90,7 +90,7 @@ static int dw_reg_write_swab(void *context, unsigned int reg, unsigned int val) { struct dw_i2c_dev *dev = context;
- writel_relaxed(swab32(val), dev->base + reg); + writel(swab32(val), dev->base + reg);
return 0; } @@ -99,8 +99,8 @@ static int dw_reg_read_word(void *context, unsigned int reg, unsigned int *val) { struct dw_i2c_dev *dev = context;
- *val = readw_relaxed(dev->base + reg) | - (readw_relaxed(dev->base + reg + 2) << 16); + *val = readw(dev->base + reg) | + (readw(dev->base + reg + 2) << 16);
return 0; } @@ -109,8 +109,8 @@ static int dw_reg_write_word(void *context, unsigned int reg, unsigned int val) { struct dw_i2c_dev *dev = context;
- writew_relaxed(val, dev->base + reg); - writew_relaxed(val >> 16, dev->base + reg + 2); + writew(val, dev->base + reg); + writew(val >> 16, dev->base + reg + 2);
return 0; }
From: Jozsef Kadlecsik kadlec@netfilter.org
[ Upstream commit 28628fa952fefc7f2072ce6e8016968cc452b1ba ]
Linkui Xiao reported that there's a race condition when ipset swap and destroy is called, which can lead to crash in add/del/test element operations. Swap then destroy are usual operations to replace a set with another one in a production system. The issue can in some cases be reproduced with the script:
ipset create hash_ip1 hash:net family inet hashsize 1024 maxelem 1048576 ipset add hash_ip1 172.20.0.0/16 ipset add hash_ip1 192.168.0.0/16 iptables -A INPUT -m set --match-set hash_ip1 src -j ACCEPT while [ 1 ] do # ... Ongoing traffic... ipset create hash_ip2 hash:net family inet hashsize 1024 maxelem 1048576 ipset add hash_ip2 172.20.0.0/16 ipset swap hash_ip1 hash_ip2 ipset destroy hash_ip2 sleep 0.05 done
In the race case the possible order of the operations are
CPU0 CPU1 ip_set_test ipset swap hash_ip1 hash_ip2 ipset destroy hash_ip2 hash_net_kadt
Swap replaces hash_ip1 with hash_ip2 and then destroy removes hash_ip2 which is the original hash_ip1. ip_set_test was called on hash_ip1 and because destroy removed it, hash_net_kadt crashes.
The fix is to force ip_set_swap() to wait for all readers to finish accessing the old set pointers by calling synchronize_rcu().
The first version of the patch was written by Linkui Xiao xiaolinkui@kylinos.cn.
v2: synchronize_rcu() is moved into ip_set_swap() in order not to burden ip_set_destroy() unnecessarily when all sets are destroyed. v3: Florian Westphal pointed out that all netfilter hooks run with rcu_read_lock() held and em_ipset.c wraps the entire ip_set_test() in rcu read lock/unlock pair. So there's no need to extend the rcu read locked area in ipset itself.
Closes: https://lore.kernel.org/all/69e7963b-e7f8-3ad0-210-7b86eebf7f78@netfilter.or... Reported by: Linkui Xiao xiaolinkui@kylinos.cn Signed-off-by: Jozsef Kadlecsik kadlec@netfilter.org Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Sasha Levin sashal@kernel.org --- net/netfilter/ipset/ip_set_core.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 20eede37d5228..d47dfdcb899b0 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -61,6 +61,8 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); ip_set_dereference((inst)->ip_set_list)[id] #define ip_set_ref_netlink(inst,id) \ rcu_dereference_raw((inst)->ip_set_list)[id] +#define ip_set_dereference_nfnl(p) \ + rcu_dereference_check(p, lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET))
/* The set types are implemented in modules and registered set types * can be found in ip_set_type_list. Adding/deleting types is @@ -708,15 +710,10 @@ __ip_set_put_netlink(struct ip_set *set) static struct ip_set * ip_set_rcu_get(struct net *net, ip_set_id_t index) { - struct ip_set *set; struct ip_set_net *inst = ip_set_pernet(net);
- rcu_read_lock(); - /* ip_set_list itself needs to be protected */ - set = rcu_dereference(inst->ip_set_list)[index]; - rcu_read_unlock(); - - return set; + /* ip_set_list and the set pointer need to be protected */ + return ip_set_dereference_nfnl(inst->ip_set_list)[index]; }
static inline void @@ -1399,6 +1396,9 @@ static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info, ip_set(inst, to_id) = from; write_unlock_bh(&ip_set_ref_lock);
+ /* Make sure all readers of the old set pointers are completed. */ + synchronize_rcu(); + return 0; }
From: Nick Terrell terrelln@fb.com
[ Upstream commit 77618db346455129424fadbbaec596a09feaf3bb ]
Zstd used an array of length 1 to mean a flexible array for C89 compatibility. Switch to a C99 flexible array to fix the UBSAN warning.
Tested locally by booting the kernel and writing to and reading from a BtrFS filesystem with zstd compression enabled. I was unable to reproduce the issue before the fix, however it is a trivial change.
Link: https://lkml.kernel.org/r/20231012213428.1390905-1-nickrterrell@gmail.com Reported-by: syzbot+1f2eb3e8cd123ffce499@syzkaller.appspotmail.com Reported-by: Eric Biggers ebiggers@kernel.org Reported-by: Kees Cook keescook@chromium.org Signed-off-by: Nick Terrell terrelln@fb.com Reviewed-by: Kees Cook keescook@chromium.org Signed-off-by: Sasha Levin sashal@kernel.org --- lib/zstd/common/fse_decompress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/zstd/common/fse_decompress.c b/lib/zstd/common/fse_decompress.c index 2c8bbe3e4c148..f37b7aec088ec 100644 --- a/lib/zstd/common/fse_decompress.c +++ b/lib/zstd/common/fse_decompress.c @@ -312,7 +312,7 @@ size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size
typedef struct { short ncount[FSE_MAX_SYMBOL_VALUE + 1]; - FSE_DTable dtable[1]; /* Dynamically sized */ + FSE_DTable dtable[]; /* Dynamically sized */ } FSE_DecompressWksp;
From: Alex Pakhunov alexey.pakhunov@spacex.com
[ Upstream commit 907d1bdb8b2cc0357d03a1c34d2a08d9943760b1 ]
This change moves [rt]x_dropped counters to tg3_napi so that they can be updated by a single writer, race-free.
Signed-off-by: Alex Pakhunov alexey.pakhunov@spacex.com Signed-off-by: Vincent Wong vincent.wong2@spacex.com Reviewed-by: Michael Chan michael.chan@broadcom.com Link: https://lore.kernel.org/r/20231113182350.37472-1-alexey.pakhunov@spacex.com Signed-off-by: Jakub Kicinski kuba@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/ethernet/broadcom/tg3.c | 38 +++++++++++++++++++++++++---- drivers/net/ethernet/broadcom/tg3.h | 4 +-- 2 files changed, 35 insertions(+), 7 deletions(-)
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 85570e40c8e9b..830f542a7c6d2 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -6853,7 +6853,7 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget) desc_idx, *post_ptr); drop_it_no_recycle: /* Other statistics kept track of by card. */ - tp->rx_dropped++; + tnapi->rx_dropped++; goto next_pkt; }
@@ -8151,7 +8151,7 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) drop: dev_kfree_skb_any(skb); drop_nofree: - tp->tx_dropped++; + tnapi->tx_dropped++; return NETDEV_TX_OK; }
@@ -9330,7 +9330,7 @@ static void __tg3_set_rx_mode(struct net_device *); /* tp->lock is held. */ static int tg3_halt(struct tg3 *tp, int kind, bool silent) { - int err; + int err, i;
tg3_stop_fw(tp);
@@ -9351,6 +9351,13 @@ static int tg3_halt(struct tg3 *tp, int kind, bool silent)
/* And make sure the next sample is new data */ memset(tp->hw_stats, 0, sizeof(struct tg3_hw_stats)); + + for (i = 0; i < TG3_IRQ_MAX_VECS; ++i) { + struct tg3_napi *tnapi = &tp->napi[i]; + + tnapi->rx_dropped = 0; + tnapi->tx_dropped = 0; + } }
return err; @@ -11900,6 +11907,9 @@ static void tg3_get_nstats(struct tg3 *tp, struct rtnl_link_stats64 *stats) { struct rtnl_link_stats64 *old_stats = &tp->net_stats_prev; struct tg3_hw_stats *hw_stats = tp->hw_stats; + unsigned long rx_dropped; + unsigned long tx_dropped; + int i;
stats->rx_packets = old_stats->rx_packets + get_stat64(&hw_stats->rx_ucast_packets) + @@ -11946,8 +11956,26 @@ static void tg3_get_nstats(struct tg3 *tp, struct rtnl_link_stats64 *stats) stats->rx_missed_errors = old_stats->rx_missed_errors + get_stat64(&hw_stats->rx_discards);
- stats->rx_dropped = tp->rx_dropped; - stats->tx_dropped = tp->tx_dropped; + /* Aggregate per-queue counters. The per-queue counters are updated + * by a single writer, race-free. The result computed by this loop + * might not be 100% accurate (counters can be updated in the middle of + * the loop) but the next tg3_get_nstats() will recompute the current + * value so it is acceptable. + * + * Note that these counters wrap around at 4G on 32bit machines. + */ + rx_dropped = (unsigned long)(old_stats->rx_dropped); + tx_dropped = (unsigned long)(old_stats->tx_dropped); + + for (i = 0; i < tp->irq_cnt; i++) { + struct tg3_napi *tnapi = &tp->napi[i]; + + rx_dropped += tnapi->rx_dropped; + tx_dropped += tnapi->tx_dropped; + } + + stats->rx_dropped = rx_dropped; + stats->tx_dropped = tx_dropped; }
static int tg3_get_regs_len(struct net_device *dev) diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h index 1000c894064f0..8d753f8c5b065 100644 --- a/drivers/net/ethernet/broadcom/tg3.h +++ b/drivers/net/ethernet/broadcom/tg3.h @@ -3018,6 +3018,7 @@ struct tg3_napi { u16 *rx_rcb_prod_idx; struct tg3_rx_prodring_set prodring; struct tg3_rx_buffer_desc *rx_rcb; + unsigned long rx_dropped;
u32 tx_prod ____cacheline_aligned; u32 tx_cons; @@ -3026,6 +3027,7 @@ struct tg3_napi { u32 prodmbox; struct tg3_tx_buffer_desc *tx_ring; struct tg3_tx_ring_info *tx_buffers; + unsigned long tx_dropped;
dma_addr_t status_mapping; dma_addr_t rx_rcb_mapping; @@ -3219,8 +3221,6 @@ struct tg3 {
/* begin "everything else" cacheline(s) section */ - unsigned long rx_dropped; - unsigned long tx_dropped; struct rtnl_link_stats64 net_stats_prev; struct tg3_ethtool_stats estats_prev;
From: Alex Pakhunov alexey.pakhunov@spacex.com
[ Upstream commit 17dd5efe5f36a96bd78012594fabe21efb01186b ]
tg3_tso_bug() drops a packet if it cannot be segmented for any reason. The number of discarded frames should be incremented accordingly.
Signed-off-by: Alex Pakhunov alexey.pakhunov@spacex.com Signed-off-by: Vincent Wong vincent.wong2@spacex.com Reviewed-by: Pavan Chebbi pavan.chebbi@broadcom.com Link: https://lore.kernel.org/r/20231113182350.37472-2-alexey.pakhunov@spacex.com Signed-off-by: Jakub Kicinski kuba@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/ethernet/broadcom/tg3.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 830f542a7c6d2..f60a16de565ed 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -7879,8 +7879,10 @@ static int tg3_tso_bug(struct tg3 *tp, struct tg3_napi *tnapi,
segs = skb_gso_segment(skb, tp->dev->features & ~(NETIF_F_TSO | NETIF_F_TSO6)); - if (IS_ERR(segs) || !segs) + if (IS_ERR(segs) || !segs) { + tnapi->tx_dropped++; goto tg3_tso_bug_end; + }
skb_list_walk_safe(segs, seg, next) { skb_mark_not_on_list(seg);
From: Masahiro Yamada masahiroy@kernel.org
[ Upstream commit ae1eff0349f2e908fc083630e8441ea6dc434dc0 ]
Currently, sym_validate_range() duplicates the range string using xstrdup(), which is overwritten by a subsequent sym_calc_value() call. It results in a memory leak.
Instead, only the pointer should be copied.
Below is a test case, with a summary from Valgrind.
[Test Kconfig]
config FOO int "foo" range 10 20
[Test .config]
CONFIG_FOO=0
[Before]
LEAK SUMMARY: definitely lost: 3 bytes in 1 blocks indirectly lost: 0 bytes in 0 blocks possibly lost: 0 bytes in 0 blocks still reachable: 17,465 bytes in 21 blocks suppressed: 0 bytes in 0 blocks
[After]
LEAK SUMMARY: definitely lost: 0 bytes in 0 blocks indirectly lost: 0 bytes in 0 blocks possibly lost: 0 bytes in 0 blocks still reachable: 17,462 bytes in 20 blocks suppressed: 0 bytes in 0 blocks
Signed-off-by: Masahiro Yamada masahiroy@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- scripts/kconfig/symbol.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-)
diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c index 0572330bf8a78..a76925b46ce63 100644 --- a/scripts/kconfig/symbol.c +++ b/scripts/kconfig/symbol.c @@ -122,9 +122,9 @@ static long long sym_get_range_val(struct symbol *sym, int base) static void sym_validate_range(struct symbol *sym) { struct property *prop; + struct symbol *range_sym; int base; long long val, val2; - char str[64];
switch (sym->type) { case S_INT: @@ -140,17 +140,15 @@ static void sym_validate_range(struct symbol *sym) if (!prop) return; val = strtoll(sym->curr.val, NULL, base); - val2 = sym_get_range_val(prop->expr->left.sym, base); + range_sym = prop->expr->left.sym; + val2 = sym_get_range_val(range_sym, base); if (val >= val2) { - val2 = sym_get_range_val(prop->expr->right.sym, base); + range_sym = prop->expr->right.sym; + val2 = sym_get_range_val(range_sym, base); if (val <= val2) return; } - if (sym->type == S_INT) - sprintf(str, "%lld", val2); - else - sprintf(str, "0x%llx", val2); - sym->curr.val = xstrdup(str); + sym->curr.val = range_sym->curr.val; }
static void sym_set_changed(struct symbol *sym)
From: YuanShang YuanShang.Mao@amd.com
[ Upstream commit 50d51374b498457c4dea26779d32ccfed12ddaff ]
The variable "chunk_ptr" should be a pointer pointing to a struct drm_amdgpu_cs_chunk instead of to a pointer of that.
Signed-off-by: YuanShang YuanShang.Mao@amd.com Reviewed-by: Christian König christian.koenig@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 4624160315648..26b55cca27680 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -201,7 +201,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, }
for (i = 0; i < p->nchunks; i++) { - struct drm_amdgpu_cs_chunk __user **chunk_ptr = NULL; + struct drm_amdgpu_cs_chunk __user *chunk_ptr = NULL; struct drm_amdgpu_cs_chunk user_chunk; uint32_t __user *cdata;
linux-stable-mirror@lists.linaro.org