Skip to content

Commit eeb0957

Browse files
committed
use specialized asm parallel probe when possible to save a few cycles
1 parent 2356409 commit eeb0957

File tree

1 file changed

+49
-0
lines changed

1 file changed

+49
-0
lines changed

include/cache/monitor.h

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,49 @@ i64 probe_skx_sf_evset_para_asm(EVSet *evset, u64 *end_tsc, u32 *aux) {
5858
return *end_tsc - start;
5959
}
6060

61+
static __always_inline
62+
i64 probe_icx_sf_evset_para_asm(EVSet *evset, u64 *end_tsc, u32 *aux) {
63+
u8 **addrs = evset->addrs;
64+
_force_addr_calc(addrs);
65+
u64 start = _timer_start();
66+
__asm__ __volatile__("mov (%0), %%r10\n\t"
67+
"mov (%%r10), %%r11\n\t"
68+
"mov 8(%0), %%r10\n\t"
69+
"mov (%%r10), %%r11\n\t"
70+
"mov 16(%0), %%r10\n\t"
71+
"mov (%%r10), %%r11\n\t"
72+
"mov 24(%0), %%r10\n\t"
73+
"mov (%%r10), %%r11\n\t" // 4
74+
"mov 32(%0), %%r10\n\t"
75+
"mov (%%r10), %%r11\n\t"
76+
"mov 40(%0), %%r10\n\t"
77+
"mov (%%r10), %%r11\n\t"
78+
"mov 48(%0), %%r10\n\t"
79+
"mov (%%r10), %%r11\n\t"
80+
"mov 56(%0), %%r10\n\t"
81+
"mov (%%r10), %%r11\n\t" // 8
82+
"mov 64(%0), %%r10\n\t"
83+
"mov (%%r10), %%r11\n\t"
84+
"mov 72(%0), %%r10\n\t"
85+
"mov (%%r10), %%r11\n\t"
86+
"mov 80(%0), %%r10\n\t"
87+
"mov (%%r10), %%r11\n\t"
88+
"mov 88(%0), %%r10\n\t"
89+
"mov (%%r10), %%r11\n\t" // 12
90+
"mov 96(%0), %%r10\n\t"
91+
"mov (%%r10), %%r11\n\t"
92+
"mov 104(%0), %%r10\n\t"
93+
"mov (%%r10), %%r11\n\t"
94+
"mov 112(%0), %%r10\n\t"
95+
"mov (%%r10), %%r11\n\t"
96+
"mov 120(%0), %%r10\n\t"
97+
"mov (%%r10), %%r11\n\t" // 16
98+
::"r"(addrs)
99+
: "r10", "r11", "memory");
100+
*end_tsc = _timer_end_aux(aux);
101+
return *end_tsc - start;
102+
}
103+
61104
static __always_inline
62105
i64 probe_skx_sf_evset_para_noasm(EVSet *evset, u64 *end_tsc, u32 *aux) {
63106
u8 **addrs = evset->addrs;
@@ -79,7 +122,13 @@ i64 probe_skx_sf_evset_ptr_chase(EVSet *evset, u64 *end_tsc, u32 *aux) {
79122

80123
static __always_inline
81124
i64 probe_skx_sf_evset_para(EVSet *evset, u64 *end_tsc, u32 *aux) {
125+
#if defined (SKYLAKE) || defined(CASCADE)
126+
return probe_skx_sf_evset_para_asm(evset, end_tsc, aux);
127+
#elif defined (ICELAKE)
128+
return probe_icx_sf_evset_para_asm(evset, end_tsc, aux);
129+
#else
82130
return probe_skx_sf_evset_para_noasm(evset, end_tsc, aux);
131+
#endif
83132
}
84133

85134
void prime_skx_sf_evset_para(EVSet *evset, u32 arr_repeat, u32 l2_repeat);

0 commit comments

Comments
 (0)