Skip to content

Commit bb1ac41

Browse files
committed
Add AMD's Bolt perf tests
This adds AMD's Bolt C++ Template Library performance tests, so Boost.Compute can be compared with Bolt.
1 parent aa15cd6 commit bb1ac41

15 files changed

+754
-4
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ option(BOOST_COMPUTE_HAVE_QT "Have Qt" OFF)
4848
option(BOOST_COMPUTE_HAVE_VTK "Have VTK" OFF)
4949
option(BOOST_COMPUTE_HAVE_CUDA "Have CUDA" OFF)
5050
option(BOOST_COMPUTE_HAVE_TBB "Have TBB" OFF)
51+
option(BOOST_COMPUTE_HAVE_BOLT "Have BOLT" OFF)
5152

5253
include_directories(include)
5354

cmake/FindBolt.cmake

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
############################################################################
2+
# © 2012,2014 Advanced Micro Devices, Inc. All rights reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
############################################################################
17+
18+
# Locate an BOLT implementation.
19+
#
20+
# Defines the following variables:
21+
#
22+
# BOLT_FOUND - Found an Bolt imlementation
23+
#
24+
# Also defines the library variables below as normal
25+
# variables.
26+
#
27+
# BOLT_LIBRARIES - These contain debug/optimized keywords when a debugging library is found
28+
# BOLT_INCLUDE_DIRS - All relevant Bolt include directories
29+
#
30+
# Accepts the following variables as input:
31+
#
32+
# BOLT_ROOT - (as a CMake or environment variable)
33+
# The root directory of an BOLT installation
34+
#
35+
# FIND_LIBRARY_USE_LIB64_PATHS - Global property that controls whether FindBOLT should search for
36+
# 64bit or 32bit libs
37+
#
38+
#-----------------------
39+
# Example Usage:
40+
#
41+
# find_package(BOLT REQUIRED)
42+
# include_directories(${BOLT_INCLUDE_DIRS})
43+
#
44+
# add_executable(foo foo.cc)
45+
# target_link_libraries(foo ${BOLT_LIBRARIES})
46+
#
47+
#-----------------------
48+
49+
# This module helps to use BOLT_FIND_COMPONENTS, BOLT_FIND_REQUIRED, BOLT_FIND_QUIETLY
50+
include( FindPackageHandleStandardArgs )
51+
52+
# Search for 64bit libs if FIND_LIBRARY_USE_LIB64_PATHS is set to true in the global environment, 32bit libs else
53+
get_property( LIB64 GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS )
54+
55+
# Debug print statements
56+
#message( "BOLT_LIBRARY_PATH_SUFFIXES: ${BOLT_LIBRARY_PATH_SUFFIXES}" )
57+
#message( "ENV{BOLT_ROOT}: $ENV{BOLT_ROOT}" )
58+
#message( "BOLT_FIND_COMPONENTS: ${BOLT_FIND_COMPONENTS}" )
59+
#message( "BOLT_FIND_REQUIRED: ${BOLT_FIND_REQUIRED}" )
60+
61+
# Set the component to find if the user does not specify explicitely
62+
if( NOT BOLT_FIND_COMPONENTS )
63+
set( BOLT_FIND_COMPONENTS CL )
64+
endif( )
65+
if(WIN32)
66+
if( MSVC_VERSION VERSION_LESS 1600 )
67+
set( myMSVCVer "vc90" )
68+
elseif( MSVC_VERSION VERSION_LESS 1700 )
69+
set( myMSVCVer "vc100" )
70+
elseif( MSVC_VERSION VERSION_LESS 1800 )
71+
set( myMSVCVer "vc110" )
72+
else()
73+
set( myMSVCVer "vc120" )
74+
endif( )
75+
else()
76+
set( myMSVCVer "gcc" )
77+
endif()
78+
79+
if(WIN32)
80+
set( BoltLibName "clBolt.runtime.${myMSVCVer}")
81+
set( LIB_EXT "lib")
82+
else()
83+
set( BoltLibName "libclBolt.runtime.${myMSVCVer}")
84+
set( LIB_EXT "a")
85+
endif()
86+
87+
# Eventually, Bolt may support multiple backends, but for now it only supports CL
88+
list( FIND BOLT_FIND_COMPONENTS CL find_CL )
89+
if( NOT find_CL EQUAL -1 )
90+
set( BOLT_LIBNAME_BASE ${BoltLibName} )
91+
endif( )
92+
93+
if( NOT find_CL EQUAL -1 )
94+
# Find and set the location of main BOLT static lib file
95+
find_library( BOLT_LIBRARY_STATIC_RELEASE
96+
NAMES ${BOLT_LIBNAME_BASE}.${LIB_EXT}
97+
HINTS
98+
${BOLT_ROOT}
99+
ENV BOLT_ROOT
100+
DOC "BOLT static library path"
101+
PATH_SUFFIXES lib
102+
)
103+
mark_as_advanced( BOLT_LIBRARY_STATIC_RELEASE )
104+
105+
# Find and set the location of main BOLT static lib file
106+
find_library( BOLT_LIBRARY_STATIC_DEBUG
107+
NAMES ${BOLT_LIBNAME_BASE}.debug.${LIB_EXT}
108+
HINTS
109+
${BOLT_ROOT}
110+
ENV BOLT_ROOT
111+
DOC "BOLT static library path"
112+
PATH_SUFFIXES lib
113+
)
114+
mark_as_advanced( BOLT_LIBRARY_STATIC_DEBUG )
115+
116+
if( BOLT_LIBRARY_STATIC_RELEASE )
117+
set( BOLT_LIBRARY_STATIC optimized ${BOLT_LIBRARY_STATIC_RELEASE} )
118+
else( )
119+
set( BOLT_LIBRARY_STATIC "" )
120+
message( "${BOLT_LIBNAME_BASE}.${LIB_EXT}: Release static bolt library not found" )
121+
endif( )
122+
123+
if( BOLT_LIBRARY_STATIC_DEBUG )
124+
set( BOLT_LIBRARY_STATIC ${BOLT_LIBRARY_STATIC} debug ${BOLT_LIBRARY_STATIC_DEBUG} )
125+
else( )
126+
message( "${BOLT_LIBNAME_BASE}.debug.${LIB_EXT}: Debug static bolt library not found" )
127+
endif( )
128+
129+
find_path( BOLT_INCLUDE_DIRS
130+
NAMES bolt/cl/bolt.h
131+
HINTS
132+
${BOLT_ROOT}
133+
ENV BOLT_ROOT
134+
DOC "BOLT header file path"
135+
PATH_SUFFIXES include
136+
)
137+
mark_as_advanced( BOLT_INCLUDE_DIRS )
138+
139+
FIND_PACKAGE_HANDLE_STANDARD_ARGS( BOLT DEFAULT_MSG BOLT_LIBRARY_STATIC BOLT_INCLUDE_DIRS )
140+
endif( )
141+
142+
if( BOLT_FOUND )
143+
list( APPEND BOLT_LIBRARIES ${BOLT_LIBRARY_STATIC} )
144+
else( )
145+
if( NOT BOLT_FIND_QUIETLY )
146+
message( WARNING "FindBOLT could not find the BOLT library" )
147+
message( STATUS "Did you remember to set the BOLT_ROOT environment variable?" )
148+
endif( )
149+
endif()

perf/CMakeLists.txt

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,12 @@ endif()
88

99
if(${BOOST_COMPUTE_THREAD_SAFE} AND NOT ${BOOST_COMPUTE_USE_CPP11})
1010
set(PERF_BOOST_COMPONENTS ${PERF_BOOST_COMPONENTS} thread)
11+
elseif(${BOOST_COMPUTE_HAVE_BOLT} AND ${BOOST_COMPUTE_USE_CPP11})
12+
set(PERF_BOOST_COMPONENTS ${PERF_BOOST_COMPONENTS} thread)
13+
endif()
14+
15+
if(${BOOST_COMPUTE_HAVE_BOLT} AND ${BOOST_COMPUTE_USE_CPP11})
16+
set(PERF_BOOST_COMPONENTS ${PERF_BOOST_COMPONENTS} date_time)
1117
endif()
1218

1319
find_package(Boost 1.48 REQUIRED COMPONENTS ${PERF_BOOST_COMPONENTS})
@@ -95,7 +101,7 @@ set(STL_BENCHMARKS
95101
stl_unique_copy
96102
)
97103

98-
# stl benchmarks which rerquire c++11
104+
# stl benchmarks which require c++11
99105
if(${BOOST_COMPUTE_USE_CPP11})
100106
list(APPEND
101107
STL_BENCHMARKS
@@ -154,3 +160,30 @@ if(${BOOST_COMPUTE_HAVE_TBB})
154160
target_link_libraries(${PERF_TARGET} ${TBB_LIBRARIES} ${Boost_LIBRARIES})
155161
endforeach()
156162
endif()
163+
164+
# bolt c++ template lib benchmarks (for comparison)
165+
if(${BOOST_COMPUTE_HAVE_BOLT} AND ${BOOST_COMPUTE_USE_CPP11})
166+
find_package(Bolt REQUIRED)
167+
include_directories(${BOLT_INCLUDE_DIRS})
168+
169+
set(BOLT_BENCHMARKS
170+
bolt_accumulate
171+
bolt_count
172+
bolt_exclusive_scan
173+
bolt_fill
174+
bolt_inner_product
175+
bolt_max_element
176+
bolt_merge
177+
bolt_partial_sum
178+
bolt_saxpy
179+
bolt_sort
180+
)
181+
182+
foreach(BENCHMARK ${BOLT_BENCHMARKS})
183+
set(PERF_TARGET perf_${BENCHMARK})
184+
add_executable(${PERF_TARGET} perf_${BENCHMARK}.cpp)
185+
target_link_libraries(${PERF_TARGET} ${OPENCL_LIBRARIES} ${BOLT_LIBRARIES} ${Boost_LIBRARIES})
186+
endforeach()
187+
elseif(${BOOST_COMPUTE_HAVE_BOLT} AND NOT ${BOOST_COMPUTE_USE_CPP11})
188+
message(WARNING "BOOST_COMPUTE_USE_CPP11 must be ON for building Bolt C++ Template Library performance tests.")
189+
endif()

perf/perf.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,18 @@ def run_benchmark(name, sizes, vs=[]):
128128
"sort",
129129
"unique"
130130
],
131+
"bolt" : [
132+
"accumulate",
133+
"count",
134+
"exclusive_scan",
135+
"fill",
136+
"inner_product",
137+
"max_element",
138+
"merge",
139+
"partial_sum",
140+
"saxpy",
141+
"sort"
142+
],
131143
"tbb": [
132144
"accumulate",
133145
"merge",
@@ -187,7 +199,7 @@ def run_benchmark(name, sizes, vs=[]):
187199

188200
sizes = sorted(sizes)
189201

190-
competitors = ["tbb", "thrust", "stl"]
202+
competitors = ["bolt", "tbb", "thrust", "stl"]
191203

192204
report = run_benchmark(test, sizes, competitors)
193205

perf/perf_bolt_accumulate.cpp

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
//---------------------------------------------------------------------------//
2+
// Copyright (c) 2015 Jakub Szuppe <[email protected]>
3+
//
4+
// Distributed under the Boost Software License, Version 1.0
5+
// See accompanying file LICENSE_1_0.txt or copy at
6+
// http://www.boost.org/LICENSE_1_0.txt
7+
//
8+
// See http://kylelutz.github.com/compute for more information.
9+
//---------------------------------------------------------------------------//
10+
11+
#include <iostream>
12+
#include <algorithm>
13+
#include <vector>
14+
15+
#include <bolt/cl/copy.h>
16+
#include <bolt/cl/device_vector.h>
17+
#include <bolt/cl/reduce.h>
18+
19+
#include "perf.hpp"
20+
21+
int main(int argc, char *argv[])
22+
{
23+
perf_parse_args(argc, argv);
24+
25+
std::cout << "size: " << PERF_N << std::endl;
26+
27+
bolt::cl::control ctrl = bolt::cl::control::getDefault();
28+
::cl::Device device = ctrl.getDevice();
29+
std::cout << "device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
30+
31+
// create host vector
32+
std::vector<int> host_vec = generate_random_vector<int>(PERF_N);
33+
34+
// create device vectors
35+
bolt::cl::device_vector<int> device_vec(PERF_N);
36+
37+
// transfer data to the device
38+
bolt::cl::copy(host_vec.begin(), host_vec.end(), device_vec.begin());
39+
40+
int sum = 0;
41+
perf_timer t;
42+
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
43+
t.start();
44+
sum = bolt::cl::reduce(device_vec.begin(), device_vec.end());
45+
t.stop();
46+
}
47+
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
48+
std::cout << "sum: " << sum << std::endl;
49+
50+
return 0;
51+
}

perf/perf_bolt_count.cpp

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
//---------------------------------------------------------------------------//
2+
// Copyright (c) 2015 Jakub Szuppe <[email protected]>
3+
//
4+
// Distributed under the Boost Software License, Version 1.0
5+
// See accompanying file LICENSE_1_0.txt or copy at
6+
// http://www.boost.org/LICENSE_1_0.txt
7+
//
8+
// See http://kylelutz.github.com/compute for more information.
9+
//---------------------------------------------------------------------------//
10+
11+
#include <iostream>
12+
#include <algorithm>
13+
#include <vector>
14+
15+
#include <bolt/cl/count.h>
16+
#include <bolt/cl/copy.h>
17+
#include <bolt/cl/device_vector.h>
18+
19+
#include "perf.hpp"
20+
21+
int rand_int()
22+
{
23+
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
24+
}
25+
26+
int main(int argc, char *argv[])
27+
{
28+
perf_parse_args(argc, argv);
29+
30+
std::cout << "size: " << PERF_N << std::endl;
31+
32+
bolt::cl::control ctrl = bolt::cl::control::getDefault();
33+
::cl::Device device = ctrl.getDevice();
34+
std::cout << "device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
35+
36+
// create vector of random numbers on the host
37+
std::vector<int> h_vec(PERF_N);
38+
std::generate(h_vec.begin(), h_vec.end(), rand_int);
39+
40+
// create device vector
41+
bolt::cl::device_vector<int> d_vec(PERF_N);
42+
43+
// transfer data to the device
44+
bolt::cl::copy(h_vec.begin(), h_vec.end(), d_vec.begin());
45+
46+
size_t count = 0;
47+
perf_timer t;
48+
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
49+
t.start();
50+
count = bolt::cl::count(ctrl, d_vec.begin(), d_vec.end(), 4);
51+
t.stop();
52+
}
53+
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
54+
std::cout << "count: " << count << std::endl;
55+
56+
return 0;
57+
}

0 commit comments

Comments
 (0)