diff --git a/cmake/tbb.cmake b/cmake/tbb.cmake index b9d4a87..73b6d5a 100755 --- a/cmake/tbb.cmake +++ b/cmake/tbb.cmake @@ -9,18 +9,17 @@ find_package(Git REQUIRED) ExternalProject_Add( tbb_src PREFIX "vendor/intel/tbb" - GIT_REPOSITORY "https://github.com/wjakob/tbb.git" - GIT_TAG b066defc0229a1e92d7a200eb3fe0f7e35945d95 + GIT_REPOSITORY "https://github.com/seb711/oneTBB.git" + GIT_TAG master TIMEOUT 10 BUILD_COMMAND make UPDATE_COMMAND "" # to prevent rebuilding everytime INSTALL_COMMAND "" CMAKE_ARGS - -DCMAKE_INSTALL_PREFIX=${CMAKE_BINARY_DIR}/vendor/tbb_cpp -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} - -DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE} + -DCMAKE_LIBRARY_OUTPUT_DIRECTORY:STRING=./lib + -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} -DTBB_TEST:BOOL=OFF ) # Prepare json diff --git a/tools/conversion/btrtocsv.cpp b/tools/conversion/btrtocsv.cpp index 4d29b82..e64cf39 100644 --- a/tools/conversion/btrtocsv.cpp +++ b/tools/conversion/btrtocsv.cpp @@ -13,7 +13,8 @@ #include "yaml-cpp/yaml.h" #include "spdlog/spdlog.h" #include "tbb/parallel_for.h" -#include "tbb/task_scheduler_init.h" +#define TBB_PREVIEW_GLOBAL_CONTROL 1 +#include "tbb/global_control.h" // ------------------------------------------------------------------------------ // Btrfiles library #include "btrfiles.hpp" @@ -106,7 +107,7 @@ int main(int argc, char **argv) SchemePool::refresh(); // Init TBB TODO: is that actually still necessary ? - tbb::task_scheduler_init init(FLAGS_threads); // NOLINT(cppcoreguidelines-narrowing-conversions) + tbb::global_control c(tbb::global_control::max_allowed_parallelism, FLAGS_threads); // Open output file auto csvstream = std::ofstream(FLAGS_csv); diff --git a/tools/conversion/csvtobtr.cpp b/tools/conversion/csvtobtr.cpp index 786e68d..15c0259 100644 --- a/tools/conversion/csvtobtr.cpp +++ b/tools/conversion/csvtobtr.cpp @@ -15,7 +15,9 @@ #include #include #include -#include +#include +#define TBB_PREVIEW_GLOBAL_CONTROL 1 +#include "tbb/global_control.h" // ------------------------------------------------------------------------------ // Btr internal includes #include "common/Utils.hpp" @@ -42,7 +44,7 @@ DEFINE_bool(create_btr, false, "If false will exit after binary creation"); DEFINE_bool(verify, true, "Verify that decompression works"); DEFINE_int32(chunk, -1, "Select a specific chunk to measure"); DEFINE_int32(column, -1, "Select a specific column to measure"); -DEFINE_uint32(threads, 8, ""); +DEFINE_uint32(threads, -1, ""); // ------------------------------------------------------------------------------ using namespace btrblocks; // ------------------------------------------------------------------------------ @@ -72,8 +74,13 @@ int main(int argc, char **argv) // This seems necessary to be SchemePool::refresh(); - // Init TBB TODO: is that actually still necessary ? - tbb::task_scheduler_init init(FLAGS_threads); + if (FLAGS_threads < 1) { + tbb::global_control c(tbb::global_control::max_allowed_parallelism, + std::thread::hardware_concurrency()); + } else { + tbb::global_control c(tbb::global_control::max_allowed_parallelism, + FLAGS_threads); + } // Load schema const auto schema = YAML::LoadFile(FLAGS_yaml); @@ -124,7 +131,7 @@ int main(int argc, char **argv) // Prepare datastructures for btr compression //auto ranges = relation.getRanges(static_cast(1), 9999); - auto ranges = relation.getRanges(SplitStrategy::SEQUENTIAL, 9999); + auto ranges = relation.getRanges(SplitStrategy::SEQUENTIAL, -1); assert(ranges.size() > 0); Datablock datablockV2(relation); std::filesystem::create_directory(FLAGS_btr); diff --git a/tools/conversion/decompression-speed.cpp b/tools/conversion/decompression-speed.cpp index 87ea0f8..450329b 100644 --- a/tools/conversion/decompression-speed.cpp +++ b/tools/conversion/decompression-speed.cpp @@ -6,7 +6,8 @@ // ------------------------------------------------------------------------------------- #include "gflags/gflags.h" #include "tbb/parallel_for.h" -#include "tbb/task_scheduler_init.h" +#define TBB_PREVIEW_GLOBAL_CONTROL 1 +#include "tbb/global_control.h" // ------------------------------------------------------------------------------------- #include "common/PerfEvent.hpp" #include "common/Utils.hpp" @@ -108,7 +109,7 @@ int main(int argc, char **argv) { } else { threads = FLAGS_threads; } - tbb::task_scheduler_init init(threads); + tbb::global_control c(tbb::global_control::max_allowed_parallelism, threads); // Read the metadata std::vector raw_file_metadata; diff --git a/tools/playground/generate_s3_data.cpp b/tools/playground/generate_s3_data.cpp index 1b9d9dc..a44ccb5 100644 --- a/tools/playground/generate_s3_data.cpp +++ b/tools/playground/generate_s3_data.cpp @@ -6,7 +6,8 @@ #include // ------------------------------------------------------------------------------------- #include -#include +#define TBB_PREVIEW_GLOBAL_CONTROL 1 +#include // ------------------------------------------------------------------------------------- #include #include @@ -180,9 +181,7 @@ int main(int argc, char** argv) { std::stringstream bucket; bucket << bucket_prefix << "-" << region; - // tbb::task_scheduler_init init(1); - - // tbb::task_scheduler_init init(1); + // tbb::global_control c(tbb::global_control::max_allowed_parallelism, 1); Aws::SDKOptions options; Aws::InitAPI(options); diff --git a/tools/regression-benchmark/benchmark.sh b/tools/regression-benchmark/benchmark.sh new file mode 100755 index 0000000..6e59a62 --- /dev/null +++ b/tools/regression-benchmark/benchmark.sh @@ -0,0 +1,61 @@ +#!/usr/bin/env bash + +# Exit on error +set -e + +# Function to read CSV file and sync URIs +sync_uris() { + input_file="$1" + output_file="$2" + index=1 + + while IFS=',' read -r name uri yaml; do + schemaname=$(basename "$yaml") + + echo $uri + + if [[ ! -f "./csvtobtrdata/yaml/$name/$schemaname" ]]; then + mkdir ./csvtobtrdata/yaml/$name -p + aws s3 cp $yaml ./csvtobtrdata/yaml/$name/ --request-payer requester + fi + + btr_dir="./csvtobtrdata/btrblocks/$name/" + mkdir -p "$btr_dir" || rm -rf "${$btr_dir:?}"/* + bin_dir="./csvtobtrdata/btrblocks_bin/$name/" + echo "aws s3 sync $uri $bin_dir --request-payer requester" + if [[ ! -d $bin_dir ]]; then + aws s3 sync --request-payer requester $uri $bin_dir + fi + + yaml_file="./csvtobtrdata/yaml/$name/$schemaname" + ./csvtobtr --btr $btr_dir --binary $bin_dir --create_btr true --yaml $yaml_file + + echo "$name, $(./decompression-speed --btr $btr_dir --reps 100 --binary $bin_dir --yaml $yaml_file --verify)" >> $output_file + + ((index++)) + + done < "$input_file" +} + +# install things +# sudo apt-get update && sudo apt-get install libssl-dev libcurl4-openssl-dev -y +command -v aws &>/dev/null || { echo >&2 "Please install the aws cli"; exit 1; } + +# build the benchmark thing +output_file="results.csv" +rm -f $output_file +mkdir -p tmpbuild +cd tmpbuild + +dataset="../datasets.csv" +# Check if uris.csv exists +if [[ ! -f $dataset ]]; then + echo "$dataset file not found." + exit 1 +fi + +cmake ../../.. -DCMAKE_BUILD_TYPE=Release +make -j csvtobtr +make -j decompression-speed +sync_uris $dataset $output_file + diff --git a/tools/regression-benchmark/datasets.csv b/tools/regression-benchmark/datasets.csv new file mode 100644 index 0000000..ecfea81 --- /dev/null +++ b/tools/regression-benchmark/datasets.csv @@ -0,0 +1,43 @@ +Arade_1, s3://public-bi-eu-central-1/v0.0.1/binary/Arade_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Arade_1/Arade_1.yaml +Bimbo_1, s3://public-bi-eu-central-1/v0.0.1/binary/Bimbo_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Bimbo_1/Bimbo_1.yaml +CityMaxCapita_1, s3://public-bi-eu-central-1/v0.0.1/binary/CityMaxCapita_1/, s3://public-bi-eu-central-1/v0.0.1/binary/CityMaxCapita_1/CityMaxCapita_1.yaml +CMSprovider_1, s3://public-bi-eu-central-1/v0.0.1/binary/CMSprovider_1/, s3://public-bi-eu-central-1/v0.0.1/binary/CMSprovider_1/CMSprovider_1.yaml +CommonGovernment_1, s3://public-bi-eu-central-1/v0.0.1/binary/CommonGovernment_1/, s3://public-bi-eu-central-1/v0.0.1/binary/CommonGovernment_1/CommonGovernment_1.yaml +Cooperations_1, s3://public-bi-eu-central-1/v0.0.1/binary/Corporations_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Corporations_1/Corporations_1.yaml +Eixo_1, s3://public-bi-eu-central-1/v0.0.1/binary/Eixo_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Eixo_1/Eixo_1.yaml +Euro2016_1, s3://public-bi-eu-central-1/v0.0.1/binary/Euro2016_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Euro2016_1/Euro2016_1.yaml +Food_1, s3://public-bi-eu-central-1/v0.0.1/binary/Food_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Food_1/Food_1.yaml +Generico_1, s3://public-bi-eu-central-1/v0.0.1/binary/Generico_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Generico_1/Generico_1.yaml +Hashtags_1, s3://public-bi-eu-central-1/v0.0.1/binary/HashTags_1/, s3://public-bi-eu-central-1/v0.0.1/binary/HashTags_1/HashTags_1.yaml +IGlocations_2, s3://public-bi-eu-central-1/v0.0.1/binary/IGlocations2_1/, s3://public-bi-eu-central-1/v0.0.1/binary/IGlocations2_1/IGlocations2_1.yaml +Medicare1_1, s3://public-bi-eu-central-1/v0.0.1/binary/Medicare1_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Medicare1_1/Medicare1_1.yaml +Medicare2_1, s3://public-bi-eu-central-1/v0.0.1/binary/Medicare2_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Medicare2_1/Medicare2_1.yaml +Medicare3_1, s3://public-bi-eu-central-1/v0.0.1/binary/Medicare3_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Medicare3_1/Medicare3_1.yaml +MedPayment1_1, s3://public-bi-eu-central-1/v0.0.1/binary/MedPayment1_1/, s3://public-bi-eu-central-1/v0.0.1/binary/MedPayment1_1/MedPayment1_1.yaml +MedPayment2_1, s3://public-bi-eu-central-1/v0.0.1/binary/MedPayment2_1/, s3://public-bi-eu-central-1/v0.0.1/binary/MedPayment2_1/MedPayment2_1.yaml +MLB_68, s3://public-bi-eu-central-1/v0.0.1/binary/68/MLB_68/, s3://public-bi-eu-central-1/v0.0.1/binary/MLB/68/MLB_68.yaml +Motos_1, s3://public-bi-eu-central-1/v0.0.1/binary/Motos_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Motos_1/Motos_1.yaml +MulheresMil_1, s3://public-bi-eu-central-1/v0.0.1/binary/MulheresMil_1/, s3://public-bi-eu-central-1/v0.0.1/binary/MulheresMil_1/MulheresMil_1.yaml +NYC_1, s3://public-bi-eu-central-1/v0.0.1/binary/NYC_1/, s3://public-bi-eu-central-1/v0.0.1/binary/NYC_1/NYC_1.yaml +PanCreactomy1_1, s3://public-bi-eu-central-1/v0.0.1/binary/PanCreactomy1_1/, s3://public-bi-eu-central-1/v0.0.1/binary/PanCreactomy1_1/PanCreactomy1_1.yaml +PanCreactomy2_1, s3://public-bi-eu-central-1/v0.0.1/binary/PanCreactomy2_1/, s3://public-bi-eu-central-1/v0.0.1/binary/PanCreactomy2_1/PanCreactomy2_1.yaml +Physicians_1, s3://public-bi-eu-central-1/v0.0.1/binary/Physicians_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Physicians_1/Physicians_1.yaml +Provider_1, s3://public-bi-eu-central-1/v0.0.1/binary/Provider_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Provider_1/Provider_1.yaml +RealEstate1_1, s3://public-bi-eu-central-1/v0.0.1/binary/RealEstate1_1/, s3://public-bi-eu-central-1/v0.0.1/binary/RealEstate1_1/RealEstate1_1.yaml +RealEstate2_1, s3://public-bi-eu-central-1/v0.0.1/binary/RealEstate2_1/, s3://public-bi-eu-central-1/v0.0.1/binary/RealEstate2_1/RealEstate2_1.yaml +Redfin1_1, s3://public-bi-eu-central-1/v0.0.1/binary/Redfin1_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Redfin1_1/Redfin1_1.yaml +Redfin2_1, s3://public-bi-eu-central-1/v0.0.1/binary/Redfin2_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Redfin2_1/Redfin2_1.yaml +Redfin3_1, s3://public-bi-eu-central-1/v0.0.1/binary/Redfin3_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Redfin3_1/Redfin3_1.yaml +Redfin4_1, s3://public-bi-eu-central-1/v0.0.1/binary/Redfin4_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Redfin4_1/Redfin4_1.yaml +Rentabilidad_1, s3://public-bi-eu-central-1/v0.0.1/binary/Rentabilidad_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Rentabilidad_1/Rentabilidad_1.yaml +Romance_1, s3://public-bi-eu-central-1/v0.0.1/binary/Romance_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Romance_1/Romance_1.yaml +SalariesFrance_1, s3://public-bi-eu-central-1/v0.0.1/binary/SalariesFrance_1/, s3://public-bi-eu-central-1/v0.0.1/binary/SalariesFrance_1/SalariesFrance_1.yaml +TableroSistemaPenal_1, s3://public-bi-eu-central-1/v0.0.1/binary/TableroSistemaPenal_1/, s3://public-bi-eu-central-1/v0.0.1/binary/TableroSistemaPenal_1/TableroSistemaPenal_1.yaml +Taxpayer_1, s3://public-bi-eu-central-1/v0.0.1/binary/Taxpayer_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Taxpayer_1/Taxpayer_1.yaml +Telco_1, s3://public-bi-eu-central-1/v0.0.1/binary/Telco_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Telco_1/Telco_1.yaml +TrainsUK1_4, s3://public-bi-eu-central-1/v0.0.1/binary/4/TrainsUK1_4/, s3://public-bi-eu-central-1/v0.0.1/binary/TrainsUK1/4/TrainsUK1_4.yaml +TrainsUK2_1, s3://public-bi-eu-central-1/v0.0.1/binary/TrainsUK2_1/, s3://public-bi-eu-central-1/v0.0.1/binary/TrainsUK2_1/TrainsUK2_1.yaml +Uberlandia_1, s3://public-bi-eu-central-1/v0.0.1/binary/Uberlandia_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Uberlandia_1/Uberlandia_1.yaml +USCensus_1, s3://public-bi-eu-central-1/v0.0.1/binary/USCensus_1/, s3://public-bi-eu-central-1/v0.0.1/binary/USCensus_1/USCensus_1.yaml +Wins_1, s3://public-bi-eu-central-1/v0.0.1/binary/Wins_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Wins_1/Wins_1.yaml +YaleLanguages_1, s3://public-bi-eu-central-1/v0.0.1/binary/YaleLanguages_1/, s3://public-bi-eu-central-1/v0.0.1/binary/YaleLanguages_1/YaleLanguages_1.yaml