Skip to content

Commit 966a967

Browse files
yhuang-intelIngo Molnar
authored andcommitted
smp: Avoid using two cache lines for struct call_single_data
struct call_single_data is used in IPIs to transfer information between CPUs. Its size is bigger than sizeof(unsigned long) and less than cache line size. Currently it is not allocated with any explicit alignment requirements. This makes it possible for allocated call_single_data to cross two cache lines, which results in double the number of the cache lines that need to be transferred among CPUs. This can be fixed by requiring call_single_data to be aligned with the size of call_single_data. Currently the size of call_single_data is the power of 2. If we add new fields to call_single_data, we may need to add padding to make sure the size of new definition is the power of 2 as well. Fortunately, this is enforced by GCC, which will report bad sizes. To set alignment requirements of call_single_data to the size of call_single_data, a struct definition and a typedef is used. To test the effect of the patch, I used the vm-scalability multiple thread swap test case (swap-w-seq-mt). The test will create multiple threads and each thread will eat memory until all RAM and part of swap is used, so that huge number of IPIs are triggered when unmapping memory. In the test, the throughput of memory writing improves ~5% compared with misaligned call_single_data, because of faster IPIs. Suggested-by: Peter Zijlstra <[email protected]> Signed-off-by: Huang, Ying <[email protected]> [ Add call_single_data_t and align with size of call_single_data. ] Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Cc: Aaron Lu <[email protected]> Cc: Borislav Petkov <[email protected]> Cc: Eric Dumazet <[email protected]> Cc: Juergen Gross <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: Michael Ellerman <[email protected]> Cc: Thomas Gleixner <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Ingo Molnar <[email protected]>
1 parent f52be57 commit 966a967

File tree

12 files changed

+39
-33
lines changed

12 files changed

+39
-33
lines changed

arch/mips/kernel/smp.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -648,12 +648,12 @@ EXPORT_SYMBOL(flush_tlb_one);
648648
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
649649

650650
static DEFINE_PER_CPU(atomic_t, tick_broadcast_count);
651-
static DEFINE_PER_CPU(struct call_single_data, tick_broadcast_csd);
651+
static DEFINE_PER_CPU(call_single_data_t, tick_broadcast_csd);
652652

653653
void tick_broadcast(const struct cpumask *mask)
654654
{
655655
atomic_t *count;
656-
struct call_single_data *csd;
656+
call_single_data_t *csd;
657657
int cpu;
658658

659659
for_each_cpu(cpu, mask) {
@@ -674,7 +674,7 @@ static void tick_broadcast_callee(void *info)
674674

675675
static int __init tick_broadcast_init(void)
676676
{
677-
struct call_single_data *csd;
677+
call_single_data_t *csd;
678678
int cpu;
679679

680680
for (cpu = 0; cpu < NR_CPUS; cpu++) {

block/blk-softirq.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ static void trigger_softirq(void *data)
6060
static int raise_blk_irq(int cpu, struct request *rq)
6161
{
6262
if (cpu_online(cpu)) {
63-
struct call_single_data *data = &rq->csd;
63+
call_single_data_t *data = &rq->csd;
6464

6565
data->func = trigger_softirq;
6666
data->info = rq;

drivers/block/null_blk.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
struct nullb_cmd {
1414
struct list_head list;
1515
struct llist_node ll_list;
16-
struct call_single_data csd;
16+
call_single_data_t csd;
1717
struct request *rq;
1818
struct bio *bio;
1919
unsigned int tag;

drivers/cpuidle/coupled.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -119,13 +119,13 @@ struct cpuidle_coupled {
119119

120120
#define CPUIDLE_COUPLED_NOT_IDLE (-1)
121121

122-
static DEFINE_PER_CPU(struct call_single_data, cpuidle_coupled_poke_cb);
122+
static DEFINE_PER_CPU(call_single_data_t, cpuidle_coupled_poke_cb);
123123

124124
/*
125125
* The cpuidle_coupled_poke_pending mask is used to avoid calling
126-
* __smp_call_function_single with the per cpu call_single_data struct already
126+
* __smp_call_function_single with the per cpu call_single_data_t struct already
127127
* in use. This prevents a deadlock where two cpus are waiting for each others
128-
* call_single_data struct to be available
128+
* call_single_data_t struct to be available
129129
*/
130130
static cpumask_t cpuidle_coupled_poke_pending;
131131

@@ -339,7 +339,7 @@ static void cpuidle_coupled_handle_poke(void *info)
339339
*/
340340
static void cpuidle_coupled_poke(int cpu)
341341
{
342-
struct call_single_data *csd = &per_cpu(cpuidle_coupled_poke_cb, cpu);
342+
call_single_data_t *csd = &per_cpu(cpuidle_coupled_poke_cb, cpu);
343343

344344
if (!cpumask_test_and_set_cpu(cpu, &cpuidle_coupled_poke_pending))
345345
smp_call_function_single_async(cpu, csd);
@@ -651,7 +651,7 @@ int cpuidle_coupled_register_device(struct cpuidle_device *dev)
651651
{
652652
int cpu;
653653
struct cpuidle_device *other_dev;
654-
struct call_single_data *csd;
654+
call_single_data_t *csd;
655655
struct cpuidle_coupled *coupled;
656656

657657
if (cpumask_empty(&dev->coupled_cpus))

drivers/net/ethernet/cavium/liquidio/lio_main.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2468,7 +2468,7 @@ static void liquidio_napi_drv_callback(void *arg)
24682468
if (OCTEON_CN23XX_PF(oct) || droq->cpu_id == this_cpu) {
24692469
napi_schedule_irqoff(&droq->napi);
24702470
} else {
2471-
struct call_single_data *csd = &droq->csd;
2471+
call_single_data_t *csd = &droq->csd;
24722472

24732473
csd->func = napi_schedule_wrapper;
24742474
csd->info = &droq->napi;

drivers/net/ethernet/cavium/liquidio/octeon_droq.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@ struct octeon_droq {
328328

329329
u32 cpu_id;
330330

331-
struct call_single_data csd;
331+
call_single_data_t csd;
332332
};
333333

334334
#define OCT_DROQ_SIZE (sizeof(struct octeon_droq))

include/linux/blkdev.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ typedef __u32 __bitwise req_flags_t;
134134
struct request {
135135
struct list_head queuelist;
136136
union {
137-
struct call_single_data csd;
137+
call_single_data_t csd;
138138
u64 fifo_time;
139139
};
140140

include/linux/netdevice.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2774,7 +2774,7 @@ struct softnet_data {
27742774
unsigned int input_queue_head ____cacheline_aligned_in_smp;
27752775

27762776
/* Elements below can be accessed between CPUs for RPS/RFS */
2777-
struct call_single_data csd ____cacheline_aligned_in_smp;
2777+
call_single_data_t csd ____cacheline_aligned_in_smp;
27782778
struct softnet_data *rps_ipi_next;
27792779
unsigned int cpu;
27802780
unsigned int input_queue_tail;

include/linux/smp.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,17 @@
1414
#include <linux/llist.h>
1515

1616
typedef void (*smp_call_func_t)(void *info);
17-
struct call_single_data {
17+
struct __call_single_data {
1818
struct llist_node llist;
1919
smp_call_func_t func;
2020
void *info;
2121
unsigned int flags;
2222
};
2323

24+
/* Use __aligned() to avoid to use 2 cache lines for 1 csd */
25+
typedef struct __call_single_data call_single_data_t
26+
__aligned(sizeof(struct __call_single_data));
27+
2428
/* total number of cpus in this system (may exceed NR_CPUS) */
2529
extern unsigned int total_cpus;
2630

@@ -48,7 +52,7 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
4852
smp_call_func_t func, void *info, bool wait,
4953
gfp_t gfp_flags);
5054

51-
int smp_call_function_single_async(int cpu, struct call_single_data *csd);
55+
int smp_call_function_single_async(int cpu, call_single_data_t *csd);
5256

5357
#ifdef CONFIG_SMP
5458

kernel/sched/sched.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -769,7 +769,7 @@ struct rq {
769769
#ifdef CONFIG_SCHED_HRTICK
770770
#ifdef CONFIG_SMP
771771
int hrtick_csd_pending;
772-
struct call_single_data hrtick_csd;
772+
call_single_data_t hrtick_csd;
773773
#endif
774774
struct hrtimer hrtick_timer;
775775
#endif

0 commit comments

Comments
 (0)