From 9d52fbcd1ed7992aa155d0296cbbaf4bafd2157d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E9=94=AE=E5=86=AC?= Date: Thu, 24 Jan 2019 00:05:50 +0800 Subject: [PATCH] Add: add StdSortWithGoroutine --- README.md | 67 ++++++++++++++++++++++++++++----------------- ordered_map_test.go | 2 +- sort_test.go | 21 +++++++++++--- std_sort.go | 28 +++++++++++++++++-- 4 files changed, 86 insertions(+), 32 deletions(-) diff --git a/README.md b/README.md index 9f5301f..d555fd3 100644 --- a/README.md +++ b/README.md @@ -495,33 +495,33 @@ func (o StdItems) Len() int { **数据随机分布** ```shell -BenchmarkStdSort-6 100 22561304 ns/op -BenchmarkQuickSort-6 200 8809502 ns/op -BenchmarkShellSort-6 100 13712372 ns/op -BenchmarkHeapSort-6 100 12027323 ns/op -BenchmarkMergeSort-6 100 12379869 ns/op +BenchmarkStdSort-8 50 22978524 ns/op +BenchmarkQuickSort-8 100 11648689 ns/op +BenchmarkShellSort-8 100 17353544 ns/op +BenchmarkHeapSort-8 100 14501199 ns/op +BenchmarkMergeSort-8 100 13793086 ns/op ``` 是不是眼前一亮 😂,自己写的快排居然这么厉害,比标准的 sort 快了不止两倍??? 这里出现这样的情况的主要原因是 sort 实现了 sort.Interface,该接口需要有三个方法 Less()/Len()/Swap(),而接口的类型转换是有成本的。**通用**意味着**牺牲**,这是**专**和**精**权衡后的结果。当然,标准的 sort 大部分情况的性能都是可以接受的,也是比较方便的。但当你需要追求极致性能的话,自己针对特定需求实现排序算法肯定会是更好的选择。 **数据升序分布** ```shell -BenchmarkStdSort-6 200 9412444 ns/op -BenchmarkQuickSort-6 1 2697328000 ns/op -BenchmarkShellSort-6 1000 1442077 ns/op -BenchmarkHeapSort-6 300 5841314 ns/op -BenchmarkMergeSort-6 500 3756284 ns/op +BenchmarkStdSort-8 200 7285511 ns/op +BenchmarkQuickSort-8 1 3351046900 ns/op +BenchmarkShellSort-8 1000 1679506 ns/op +BenchmarkHeapSort-8 200 6632256 ns/op +BenchmarkMergeSort-8 300 4308582 ns/op ``` -是不是又是眼前一亮 🤣,我去 为什么这次标准的排序比快排快了这么多,官方的排序不也是快排吗?(好像也没人会比快排慢是吧 😅) +是不是又是眼前一亮 🤣,我去 为什么这次标准的排序比快排快了这么多,官方的排序不也是快排吗?(这个测试结果看起来好像也没人会比快排慢是吧 😅) **数据降序分布** ```shell -BenchmarkStdSort-6 200 9548365 ns/op -BenchmarkQuickSort-6 1 2678204600 ns/op -BenchmarkShellSort-6 500 2417678 ns/op -BenchmarkHeapSort-6 300 5858391 ns/op -BenchmarkMergeSort-6 500 3865994 ns/op +BenchmarkStdSort-8 200 7405331 ns/op +BenchmarkQuickSort-8 1 3390954400 ns/op +BenchmarkShellSort-8 500 2900240 ns/op +BenchmarkHeapSort-8 200 7091124 ns/op +BenchmarkMergeSort-8 300 4295169 ns/op ``` emmmmmmm,同上 😓 @@ -532,26 +532,43 @@ emmmmmmm,同上 😓 **数据随机分布** ```shell -BenchmarkStdSort-6 100 22680519 ns/op -BenchmarkQuickSort-6 200 9022003 ns/op -BenchmarkSort-6 200 8754770 ns/op +BenchmarkStdSort-8 100 22649399 ns/op +BenchmarkQuickSort-8 100 10870924 ns/op +BenchmarkStdSortWithoutInterface-8 100 10511605 ns/op ``` **数据升序分布** ```shell -BenchmarkStdSort-6 200 9312165 ns/op -BenchmarkShellSort-6 1000 1323258 ns/op -BenchmarkSort-6 1000 1273628 ns/op +BenchmarkStdSort-8 200 7006117 ns/op +BenchmarkShellSort-8 1000 1667537 ns/op +BenchmarkStdSortWithoutInterface-8 1000 1619643 ns/op ``` **数据降序分布** ```shell -BenchmarkStdSort-6 200 9540368 ns/op -BenchmarkShellSort-6 1000 2286450 ns/op -BenchmarkSort-6 1000 1288236 ns/op +BenchmarkStdSort-8 200 7614625 ns/op +BenchmarkShellSort-8 500 3051834 ns/op +BenchmarkStdSortWithoutInterface-8 1000 1689479 ns/op ``` 🖖 [Sort](https://github.com/chenjiandongx/collections/blob/master/std_sort.go) 完胜!!! +故事到这里还没有结束,我们还可以进一步思考如何获得更高的排序性能,没错,就是 goroutine,将一个数据切分成两半,分别使用 `StdSortWithoutInterface` 排序,将排序后的结果进行一次归并排序,就可以得到最终的有序数组,这次我们测试的数组长度为 **10e5** + +为了验证真正的`并行计算` 我们将分别测试 cpu 数量为 1, 2, 8 的情况 +```shell +BenchmarkStdSort 5 260696480 ns/op +BenchmarkStdSort-2 5 246746560 ns/op +BenchmarkStdSort-8 5 248532560 ns/op +BenchmarkStdSortWithoutInterface 10 124666470 ns/op +BenchmarkStdSortWithoutInterface-2 10 120676740 ns/op +BenchmarkStdSortWithoutInterface-8 10 126062650 ns/op +BenchmarkStdSortWithGoroutine 20 125163280 ns/op +BenchmarkStdSortWithGoroutine-2 20 80835825 ns/op +BenchmarkStdSortWithGoroutine-8 20 81232625 ns/op +``` + +😎 WOW!!! cpu 数量为 1 时大家相差无几,cpu > 1 以后,goroutine 做到了真正的并行,利用多核进行计算,速度提升了 **1.5** 倍,比默认的 Sort 方法提升了 **4** 倍。诺,这就是算法的魅力。 + ### 📃 License MIT [©chenjiandongx](http://github.com/chenjiandongx) diff --git a/ordered_map_test.go b/ordered_map_test.go index 7c645d4..64f88b6 100644 --- a/ordered_map_test.go +++ b/ordered_map_test.go @@ -2,7 +2,7 @@ package collections import ( "testing" - + "github.com/cevaris/ordered_map" ) diff --git a/sort_test.go b/sort_test.go index 32a4ee1..552e60d 100644 --- a/sort_test.go +++ b/sort_test.go @@ -133,15 +133,28 @@ func BenchmarkMergeSort(b *testing.B) { } } -func TestSort(t *testing.T) { +func TestStdSortWithoutInterface(t *testing.T) { items := yieldRandomArray(maxCnt) - Sort(items) + StdSortWithoutInterface(items) assert.True(t, assertSort(items)) } -func BenchmarkSort(b *testing.B) { +func BenchmarkStdSortWithoutInterface(b *testing.B) { for i := 0; i < b.N; i++ { items := yieldRandomArray(maxCnt) - Sort(items) + StdSortWithoutInterface(items) + } +} + +func TestStdSortWithGoroutine(t *testing.T) { + items := yieldRandomArray(maxCnt) + StdSortWithGoroutine(items) + assert.True(t, assertSort(items)) +} + +func BenchmarkStdSortWithGoroutine(b *testing.B) { + for i := 0; i < b.N; i++ { + items := yieldRandomArray(maxCnt) + StdSortWithGoroutine(items) } } diff --git a/std_sort.go b/std_sort.go index 57248a2..b10ecfa 100644 --- a/std_sort.go +++ b/std_sort.go @@ -194,9 +194,17 @@ func qSort(data []int, a, b, maxDepth int) { // Sort sorts data. // It makes one call to data.Len to determine n, and O(n*log(n)) calls to // data.Less and data.Swap. The sort is not guaranteed to be stable. -func Sort(data []int) { +func stdSort(data []int, start, end int, ch chan bool) { n := len(data) - qSort(data, 0, n, maxDepth(n)) + mid := n / 2 + qSort(data, start, end, maxDepth(mid)) + ch <- true +} + +func StdSortWithoutInterface(data []int) { + n := len(data) + mid := n / 2 + qSort(data, 0, n, maxDepth(mid)) } func maxDepth(n int) int { @@ -206,3 +214,19 @@ func maxDepth(n int) int { } return depth * 2 } + +func StdSortWithGoroutine(data []int) { + n := len(data) + mid := n / 2 + + chs := make(chan bool, 2) + + go stdSort(data, 0, mid, chs) + go stdSort(data, mid, n, chs) + + for i := 0; i < 2; i++ { + <-chs + } + res := make([]int, n) + mergeArray(data, 0, mid-1, n-1, res) +}