Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions cmake/tbb.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,17 @@ find_package(Git REQUIRED)
ExternalProject_Add(
tbb_src
PREFIX "vendor/intel/tbb"
GIT_REPOSITORY "https://github.com/wjakob/tbb.git"
GIT_TAG b066defc0229a1e92d7a200eb3fe0f7e35945d95
GIT_REPOSITORY "https://github.com/seb711/oneTBB.git"
GIT_TAG master
TIMEOUT 10
BUILD_COMMAND make
UPDATE_COMMAND "" # to prevent rebuilding everytime
INSTALL_COMMAND ""
CMAKE_ARGS
-DCMAKE_INSTALL_PREFIX=${CMAKE_BINARY_DIR}/vendor/tbb_cpp
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
-DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}
-DCMAKE_LIBRARY_OUTPUT_DIRECTORY:STRING=./lib
-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} -DTBB_TEST:BOOL=OFF
)

# Prepare json
Expand Down
5 changes: 3 additions & 2 deletions tools/conversion/btrtocsv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
#include "yaml-cpp/yaml.h"
#include "spdlog/spdlog.h"
#include "tbb/parallel_for.h"
#include "tbb/task_scheduler_init.h"
#define TBB_PREVIEW_GLOBAL_CONTROL 1
#include "tbb/global_control.h"
// ------------------------------------------------------------------------------
// Btrfiles library
#include "btrfiles.hpp"
Expand Down Expand Up @@ -106,7 +107,7 @@ int main(int argc, char **argv)
SchemePool::refresh();

// Init TBB TODO: is that actually still necessary ?
tbb::task_scheduler_init init(FLAGS_threads); // NOLINT(cppcoreguidelines-narrowing-conversions)
tbb::global_control c(tbb::global_control::max_allowed_parallelism, FLAGS_threads);

// Open output file
auto csvstream = std::ofstream(FLAGS_csv);
Expand Down
17 changes: 12 additions & 5 deletions tools/conversion/csvtobtr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
#include <yaml-cpp/yaml.h>
#include <spdlog/spdlog.h>
#include <tbb/parallel_for.h>
#include <tbb/task_scheduler_init.h>
#include <thread>
#define TBB_PREVIEW_GLOBAL_CONTROL 1
#include "tbb/global_control.h"
// ------------------------------------------------------------------------------
// Btr internal includes
#include "common/Utils.hpp"
Expand All @@ -42,7 +44,7 @@ DEFINE_bool(create_btr, false, "If false will exit after binary creation");
DEFINE_bool(verify, true, "Verify that decompression works");
DEFINE_int32(chunk, -1, "Select a specific chunk to measure");
DEFINE_int32(column, -1, "Select a specific column to measure");
DEFINE_uint32(threads, 8, "");
DEFINE_uint32(threads, -1, "");
// ------------------------------------------------------------------------------
using namespace btrblocks;
// ------------------------------------------------------------------------------
Expand Down Expand Up @@ -72,8 +74,13 @@ int main(int argc, char **argv)
// This seems necessary to be
SchemePool::refresh();

// Init TBB TODO: is that actually still necessary ?
tbb::task_scheduler_init init(FLAGS_threads);
if (FLAGS_threads < 1) {
tbb::global_control c(tbb::global_control::max_allowed_parallelism,
std::thread::hardware_concurrency());
} else {
tbb::global_control c(tbb::global_control::max_allowed_parallelism,
FLAGS_threads);
}

// Load schema
const auto schema = YAML::LoadFile(FLAGS_yaml);
Expand Down Expand Up @@ -124,7 +131,7 @@ int main(int argc, char **argv)

// Prepare datastructures for btr compression
//auto ranges = relation.getRanges(static_cast<SplitStrategy>(1), 9999);
auto ranges = relation.getRanges(SplitStrategy::SEQUENTIAL, 9999);
auto ranges = relation.getRanges(SplitStrategy::SEQUENTIAL, -1);
assert(ranges.size() > 0);
Datablock datablockV2(relation);
std::filesystem::create_directory(FLAGS_btr);
Expand Down
5 changes: 3 additions & 2 deletions tools/conversion/decompression-speed.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
// -------------------------------------------------------------------------------------
#include "gflags/gflags.h"
#include "tbb/parallel_for.h"
#include "tbb/task_scheduler_init.h"
#define TBB_PREVIEW_GLOBAL_CONTROL 1
#include "tbb/global_control.h"
// -------------------------------------------------------------------------------------
#include "common/PerfEvent.hpp"
#include "common/Utils.hpp"
Expand Down Expand Up @@ -108,7 +109,7 @@ int main(int argc, char **argv) {
} else {
threads = FLAGS_threads;
}
tbb::task_scheduler_init init(threads);
tbb::global_control c(tbb::global_control::max_allowed_parallelism, threads);

// Read the metadata
std::vector<char> raw_file_metadata;
Expand Down
7 changes: 3 additions & 4 deletions tools/playground/generate_s3_data.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
#include <vector>
// -------------------------------------------------------------------------------------
#include <tbb/parallel_for.h>
#include <tbb/task_scheduler_init.h>
#define TBB_PREVIEW_GLOBAL_CONTROL 1
#include <tbb/global_control.h>
// -------------------------------------------------------------------------------------
#include <aws/core/Aws.h>
#include <aws/s3-crt/S3CrtClient.h>
Expand Down Expand Up @@ -180,9 +181,7 @@ int main(int argc, char** argv) {
std::stringstream bucket;
bucket << bucket_prefix << "-" << region;

// tbb::task_scheduler_init init(1);

// tbb::task_scheduler_init init(1);
// tbb::global_control c(tbb::global_control::max_allowed_parallelism, 1);

Aws::SDKOptions options;
Aws::InitAPI(options);
Expand Down
61 changes: 61 additions & 0 deletions tools/regression-benchmark/benchmark.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#!/usr/bin/env bash

# Exit on error
set -e

# Function to read CSV file and sync URIs
sync_uris() {
input_file="$1"
output_file="$2"
index=1

while IFS=',' read -r name uri yaml; do
schemaname=$(basename "$yaml")

echo $uri

if [[ ! -f "./csvtobtrdata/yaml/$name/$schemaname" ]]; then
mkdir ./csvtobtrdata/yaml/$name -p
aws s3 cp $yaml ./csvtobtrdata/yaml/$name/ --request-payer requester
fi

btr_dir="./csvtobtrdata/btrblocks/$name/"
mkdir -p "$btr_dir" || rm -rf "${$btr_dir:?}"/*
bin_dir="./csvtobtrdata/btrblocks_bin/$name/"
echo "aws s3 sync $uri $bin_dir --request-payer requester"
if [[ ! -d $bin_dir ]]; then
aws s3 sync --request-payer requester $uri $bin_dir
fi

yaml_file="./csvtobtrdata/yaml/$name/$schemaname"
./csvtobtr --btr $btr_dir --binary $bin_dir --create_btr true --yaml $yaml_file

echo "$name, $(./decompression-speed --btr $btr_dir --reps 100 --binary $bin_dir --yaml $yaml_file --verify)" >> $output_file

((index++))

done < "$input_file"
}

# install things
# sudo apt-get update && sudo apt-get install libssl-dev libcurl4-openssl-dev -y
command -v aws &>/dev/null || { echo >&2 "Please install the aws cli"; exit 1; }

# build the benchmark thing
output_file="results.csv"
rm -f $output_file
mkdir -p tmpbuild
cd tmpbuild

dataset="../datasets.csv"
# Check if uris.csv exists
if [[ ! -f $dataset ]]; then
echo "$dataset file not found."
exit 1
fi

cmake ../../.. -DCMAKE_BUILD_TYPE=Release
make -j csvtobtr
make -j decompression-speed
sync_uris $dataset $output_file

43 changes: 43 additions & 0 deletions tools/regression-benchmark/datasets.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
Arade_1, s3://public-bi-eu-central-1/v0.0.1/binary/Arade_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Arade_1/Arade_1.yaml
Bimbo_1, s3://public-bi-eu-central-1/v0.0.1/binary/Bimbo_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Bimbo_1/Bimbo_1.yaml
CityMaxCapita_1, s3://public-bi-eu-central-1/v0.0.1/binary/CityMaxCapita_1/, s3://public-bi-eu-central-1/v0.0.1/binary/CityMaxCapita_1/CityMaxCapita_1.yaml
CMSprovider_1, s3://public-bi-eu-central-1/v0.0.1/binary/CMSprovider_1/, s3://public-bi-eu-central-1/v0.0.1/binary/CMSprovider_1/CMSprovider_1.yaml
CommonGovernment_1, s3://public-bi-eu-central-1/v0.0.1/binary/CommonGovernment_1/, s3://public-bi-eu-central-1/v0.0.1/binary/CommonGovernment_1/CommonGovernment_1.yaml
Cooperations_1, s3://public-bi-eu-central-1/v0.0.1/binary/Corporations_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Corporations_1/Corporations_1.yaml
Eixo_1, s3://public-bi-eu-central-1/v0.0.1/binary/Eixo_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Eixo_1/Eixo_1.yaml
Euro2016_1, s3://public-bi-eu-central-1/v0.0.1/binary/Euro2016_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Euro2016_1/Euro2016_1.yaml
Food_1, s3://public-bi-eu-central-1/v0.0.1/binary/Food_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Food_1/Food_1.yaml
Generico_1, s3://public-bi-eu-central-1/v0.0.1/binary/Generico_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Generico_1/Generico_1.yaml
Hashtags_1, s3://public-bi-eu-central-1/v0.0.1/binary/HashTags_1/, s3://public-bi-eu-central-1/v0.0.1/binary/HashTags_1/HashTags_1.yaml
IGlocations_2, s3://public-bi-eu-central-1/v0.0.1/binary/IGlocations2_1/, s3://public-bi-eu-central-1/v0.0.1/binary/IGlocations2_1/IGlocations2_1.yaml
Medicare1_1, s3://public-bi-eu-central-1/v0.0.1/binary/Medicare1_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Medicare1_1/Medicare1_1.yaml
Medicare2_1, s3://public-bi-eu-central-1/v0.0.1/binary/Medicare2_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Medicare2_1/Medicare2_1.yaml
Medicare3_1, s3://public-bi-eu-central-1/v0.0.1/binary/Medicare3_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Medicare3_1/Medicare3_1.yaml
MedPayment1_1, s3://public-bi-eu-central-1/v0.0.1/binary/MedPayment1_1/, s3://public-bi-eu-central-1/v0.0.1/binary/MedPayment1_1/MedPayment1_1.yaml
MedPayment2_1, s3://public-bi-eu-central-1/v0.0.1/binary/MedPayment2_1/, s3://public-bi-eu-central-1/v0.0.1/binary/MedPayment2_1/MedPayment2_1.yaml
MLB_68, s3://public-bi-eu-central-1/v0.0.1/binary/68/MLB_68/, s3://public-bi-eu-central-1/v0.0.1/binary/MLB/68/MLB_68.yaml
Motos_1, s3://public-bi-eu-central-1/v0.0.1/binary/Motos_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Motos_1/Motos_1.yaml
MulheresMil_1, s3://public-bi-eu-central-1/v0.0.1/binary/MulheresMil_1/, s3://public-bi-eu-central-1/v0.0.1/binary/MulheresMil_1/MulheresMil_1.yaml
NYC_1, s3://public-bi-eu-central-1/v0.0.1/binary/NYC_1/, s3://public-bi-eu-central-1/v0.0.1/binary/NYC_1/NYC_1.yaml
PanCreactomy1_1, s3://public-bi-eu-central-1/v0.0.1/binary/PanCreactomy1_1/, s3://public-bi-eu-central-1/v0.0.1/binary/PanCreactomy1_1/PanCreactomy1_1.yaml
PanCreactomy2_1, s3://public-bi-eu-central-1/v0.0.1/binary/PanCreactomy2_1/, s3://public-bi-eu-central-1/v0.0.1/binary/PanCreactomy2_1/PanCreactomy2_1.yaml
Physicians_1, s3://public-bi-eu-central-1/v0.0.1/binary/Physicians_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Physicians_1/Physicians_1.yaml
Provider_1, s3://public-bi-eu-central-1/v0.0.1/binary/Provider_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Provider_1/Provider_1.yaml
RealEstate1_1, s3://public-bi-eu-central-1/v0.0.1/binary/RealEstate1_1/, s3://public-bi-eu-central-1/v0.0.1/binary/RealEstate1_1/RealEstate1_1.yaml
RealEstate2_1, s3://public-bi-eu-central-1/v0.0.1/binary/RealEstate2_1/, s3://public-bi-eu-central-1/v0.0.1/binary/RealEstate2_1/RealEstate2_1.yaml
Redfin1_1, s3://public-bi-eu-central-1/v0.0.1/binary/Redfin1_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Redfin1_1/Redfin1_1.yaml
Redfin2_1, s3://public-bi-eu-central-1/v0.0.1/binary/Redfin2_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Redfin2_1/Redfin2_1.yaml
Redfin3_1, s3://public-bi-eu-central-1/v0.0.1/binary/Redfin3_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Redfin3_1/Redfin3_1.yaml
Redfin4_1, s3://public-bi-eu-central-1/v0.0.1/binary/Redfin4_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Redfin4_1/Redfin4_1.yaml
Rentabilidad_1, s3://public-bi-eu-central-1/v0.0.1/binary/Rentabilidad_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Rentabilidad_1/Rentabilidad_1.yaml
Romance_1, s3://public-bi-eu-central-1/v0.0.1/binary/Romance_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Romance_1/Romance_1.yaml
SalariesFrance_1, s3://public-bi-eu-central-1/v0.0.1/binary/SalariesFrance_1/, s3://public-bi-eu-central-1/v0.0.1/binary/SalariesFrance_1/SalariesFrance_1.yaml
TableroSistemaPenal_1, s3://public-bi-eu-central-1/v0.0.1/binary/TableroSistemaPenal_1/, s3://public-bi-eu-central-1/v0.0.1/binary/TableroSistemaPenal_1/TableroSistemaPenal_1.yaml
Taxpayer_1, s3://public-bi-eu-central-1/v0.0.1/binary/Taxpayer_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Taxpayer_1/Taxpayer_1.yaml
Telco_1, s3://public-bi-eu-central-1/v0.0.1/binary/Telco_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Telco_1/Telco_1.yaml
TrainsUK1_4, s3://public-bi-eu-central-1/v0.0.1/binary/4/TrainsUK1_4/, s3://public-bi-eu-central-1/v0.0.1/binary/TrainsUK1/4/TrainsUK1_4.yaml
TrainsUK2_1, s3://public-bi-eu-central-1/v0.0.1/binary/TrainsUK2_1/, s3://public-bi-eu-central-1/v0.0.1/binary/TrainsUK2_1/TrainsUK2_1.yaml
Uberlandia_1, s3://public-bi-eu-central-1/v0.0.1/binary/Uberlandia_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Uberlandia_1/Uberlandia_1.yaml
USCensus_1, s3://public-bi-eu-central-1/v0.0.1/binary/USCensus_1/, s3://public-bi-eu-central-1/v0.0.1/binary/USCensus_1/USCensus_1.yaml
Wins_1, s3://public-bi-eu-central-1/v0.0.1/binary/Wins_1/, s3://public-bi-eu-central-1/v0.0.1/binary/Wins_1/Wins_1.yaml
YaleLanguages_1, s3://public-bi-eu-central-1/v0.0.1/binary/YaleLanguages_1/, s3://public-bi-eu-central-1/v0.0.1/binary/YaleLanguages_1/YaleLanguages_1.yaml