Auto merge of rust-lang#84725 - sebpop:arm64-isb, r=joshtriplett

bors · bors · commit e244e840f2ad · 2021-05-02T04:54:31.000Z
[Arm64] use isb instruction instead of yield in spin loops On arm64 we have seen on several databases that ISB (instruction synchronization barrier) is better to use than yield in a spin loop. The yield instruction is a nop. The isb instruction puts the processor to sleep for some short time. isb is a good equivalent to the pause instruction on x86. Below is an experiment that shows the effects of yield and isb on Arm64 and the time of a pause instruction on x86 Intel processors. The micro-benchmarks use https://github.com/google/benchmark.git ``` $ cat a.cc static void BM_scalar_increment(benchmark::State& state) { int i = 0; for (auto _ : state) benchmark::DoNotOptimize(i++); } BENCHMARK(BM_scalar_increment); static void BM_yield(benchmark::State& state) { for (auto _ : state) asm volatile("yield"::); } BENCHMARK(BM_yield); static void BM_isb(benchmark::State& state) { for (auto _ : state) asm volatile("isb"::); } BENCHMARK(BM_isb); BENCHMARK_MAIN(); $ g++ -o run a.cc -O2 -lbenchmark -lpthread $ ./run -------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------- AWS Graviton2 (Neoverse-N1) processor: BM_scalar_increment 0.485 ns 0.485 ns 1000000000 BM_yield 0.400 ns 0.400 ns 1000000000 BM_isb 13.2 ns 13.2 ns 52993304 AWS Graviton (A-72) processor: BM_scalar_increment 0.897 ns 0.874 ns 801558633 BM_yield 0.877 ns 0.875 ns 800002377 BM_isb 13.0 ns 12.7 ns 55169412 Apple Arm64 M1 processor: BM_scalar_increment 0.315 ns 0.315 ns 1000000000 BM_yield 0.313 ns 0.313 ns 1000000000 BM_isb 9.06 ns 9.06 ns 77259282 ``` ``` static void BM_pause(benchmark::State& state) { for (auto _ : state) asm volatile("pause"::); } BENCHMARK(BM_pause); Intel Skylake processor: BM_scalar_increment 0.295 ns 0.295 ns 1000000000 BM_pause 41.7 ns 41.7 ns 16780553 ``` Tested on Graviton2 aarch64-linux with `./x.py test`.
diff --git a/library/core/src/hint.rs b/library/core/src/hint.rs
@@ -128,7 +128,7 @@ pub fn spin_loop() {
         #[cfg(target_arch = "aarch64")]
         {
             // SAFETY: the `cfg` attr ensures that we only execute this on aarch64 targets.
-            unsafe { crate::arch::aarch64::__yield() };
+            unsafe { crate::arch::aarch64::__isb(crate::arch::aarch64::SY) };
         }
         #[cfg(target_arch = "arm")]
         {

Original file line number	Diff line number	Diff line change
`@@ -128,7 +128,7 @@ pub fn spin_loop() {`
`128`	`128`	`#[cfg(target_arch = "aarch64")]`
`129`	`129`	`{`
`130`	`130`	// SAFETY: the `cfg` attr ensures that we only execute this on aarch64 targets.
`131`		`- unsafe { crate::arch::aarch64::__yield() };`
	`131`	`+ unsafe { crate::arch::aarch64::__isb(crate::arch::aarch64::SY) };`
`132`	`132`	`}`
`133`	`133`	`#[cfg(target_arch = "arm")]`
`134`	`134`	`{`