Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ message(STATUS " WITH_LOGGING = ${WITH_LOGGING}")
message(STATUS " SAMPLING_TEST_MODE = ${SAMPLING_TEST_MODE}")
message(STATUS " ENABLE_FOR_SCHEME = ${ENABLE_FOR_SCHEME}")
message(STATUS " BUILD_SHARED_LIBRARY = ${BUILD_SHARED_LIBRARY}")
message(STATUS " BTRBLOCKS_CHUNKSIZE = ${BTRBLOCKS_CHUNKSIZE}")
message(STATUS "[test] settings")
message(STATUS " GTEST_INCLUDE_DIR = ${GTEST_INCLUDE_DIR}")
message(STATUS " GTEST_LIBRARY_PATH = ${GTEST_LIBRARY_PATH}")
Expand Down
7 changes: 6 additions & 1 deletion btrblocks/btrblocks.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@
#include "scheme/SchemeConfig.hpp"
#include "scheme/SchemeType.hpp"
// ------------------------------------------------------------------------------
#ifndef CHUNKSIZE
#define CHUNKSIZE 16 // Define the CHUNKSIZE if it is not defined by an compile option
#warning "No chunk size was set, defaulting to CHUNKSIZE=16"
#endif
// ------------------------------------------------------------------------------
namespace btrblocks {
// ------------------------------------------------------------------------------
// Global configuation used by the compression interface
Expand All @@ -21,7 +26,7 @@ enum class SchemeSelection : uint8_t { SAMPLE, TRY_ALL };
// ------------------------------------------------------------------------------
struct BtrBlocksConfig {
// clang-format off
size_t block_size{65536}; // max tuples in a single block
size_t block_size{1 << CHUNKSIZE}; // max tuples in a single block
uint32_t sample_size{64}; // run size of each sample
uint32_t sample_count{10}; // number of samples to take

Expand Down
2 changes: 1 addition & 1 deletion btrblocks/compression/BtrReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ u32 BtrReader::getDecompressedDataSize(u32 index) {

auto input_data = static_cast<const u8*>(meta->data);
BitmapWrapper* bitmapWrapper = this->getBitmap(index);
u32 size = scheme.getTotalLength(input_data, meta->tuple_count, bitmapWrapper);
u32 size = scheme.getDecompressedSize(input_data, meta->tuple_count, bitmapWrapper);
return size;
}
default: {
Expand Down
5 changes: 3 additions & 2 deletions btrblocks/extern/FastPFOR.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wuninitialized"
#include <headers/blockpacking.h>
#include <headers/codecfactory.h>
#include <headers/compositecodec.h>
#include <headers/deltautil.h>
#include <headers/fastpfor.h>
#include <headers/simdfastpfor.h>
#include <headers/variablebyte.h>
#pragma GCC diagnostic pop
// -------------------------------------------------------------------------------------
using namespace btrblocks;
Expand Down
2 changes: 2 additions & 0 deletions btrblocks/local.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ target_include_directories(btrblocks
PUBLIC ${BTR_PUBLIC_INCLUDE_DIR}
PRIVATE ${BTR_PRIVATE_INCLUDE_DIR})

target_compile_options(btrblocks PRIVATE "-DCHUNKSIZE=${BTRBLOCKS_CHUNKSIZE}")

# set_target_properties(btrblocks PROPERTIES PUBLIC_HEADER "${BTR_HH}")

# ---------------------------------------------------------------------------
Expand Down
4 changes: 4 additions & 0 deletions cmake/environment.cmake
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
# ---------------------------------------------------------------------------
# Environment-specific settings
# ---------------------------------------------------------------------------
if(NOT DEFINED BTRBLOCKS_CHUNKSIZE)
# Set BTRBLOCKS_CHUNKSIZE only if it's not already defined
set(BTRBLOCKS_CHUNKSIZE 16)
endif()

if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" AND CMAKE_BUILD_TYPE MATCHES Debug)
add_compile_options(-fstandalone-debug)
Expand Down
2 changes: 1 addition & 1 deletion cmake/fastpfor.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ ExternalProject_Add(
fastpfor_src
PREFIX "vendor/lemire/fastpfor"
GIT_REPOSITORY "https://github.com/lemire/FastPFor.git"
GIT_TAG 773283d4a11fa2440a1b3b28fd77f775e86d7898
GIT_TAG master
TIMEOUT 10
UPDATE_COMMAND "" # to prevent rebuilding everytime
INSTALL_COMMAND ""
Expand Down
2 changes: 1 addition & 1 deletion cmake/fsst.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ ExternalProject_Add(
fsst_src
PREFIX "vendor/cwida/fsst"
GIT_REPOSITORY "https://github.com/cwida/fsst.git"
GIT_TAG 0f0f9057048412da1ee48e35d516155cb7edd155
GIT_TAG aaa3a0cedfe191607f6884a6781cc2d0e97d203c
TIMEOUT 10
UPDATE_COMMAND "" # to prevent rebuilding everytime
INSTALL_COMMAND ""
Expand Down
9 changes: 4 additions & 5 deletions cmake/tbb.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,17 @@ find_package(Git REQUIRED)
ExternalProject_Add(
tbb_src
PREFIX "vendor/intel/tbb"
GIT_REPOSITORY "https://github.com/wjakob/tbb.git"
GIT_TAG b066defc0229a1e92d7a200eb3fe0f7e35945d95
GIT_REPOSITORY "https://github.com/seb711/oneTBB.git"
GIT_TAG master
TIMEOUT 10
BUILD_COMMAND make
UPDATE_COMMAND "" # to prevent rebuilding everytime
INSTALL_COMMAND ""
CMAKE_ARGS
-DCMAKE_INSTALL_PREFIX=${CMAKE_BINARY_DIR}/vendor/tbb_cpp
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
-DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}
-DCMAKE_LIBRARY_OUTPUT_DIRECTORY:STRING=./lib
-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} -DTBB_TEST:BOOL=OFF
)

# Prepare json
Expand Down
108 changes: 94 additions & 14 deletions tools/conversion/decompression-speed.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,34 @@
#include <fstream>
#include <random>
#include <tbb/parallel_for_each.h>
#include <thread>
// -------------------------------------------------------------------------------------
#include "gflags/gflags.h"
#define TBB_PREVIEW_GLOBAL_CONTROL 1
#include "tbb/global_control.h"
#include "tbb/parallel_for.h"
#include "tbb/task_scheduler_init.h"
#include <yaml-cpp/yaml.h>
// -------------------------------------------------------------------------------------
#include "btrfiles.hpp"
#include "common/PerfEvent.hpp"
#include "common/Utils.hpp"
#include "compression/BtrReader.hpp"
#include "scheme/SchemePool.hpp"
// -------------------------------------------------------------------------------------
DEFINE_string(btr, "btr", "Directory with btr input");
DEFINE_int32(threads, 1, "Number of threads used. not specifying lets tbb decide");
DEFINE_int32(threads, -1, "Number of threads used. not specifying lets tbb decide");
DEFINE_int32(column, -1, "Select a specific column to measure");
DEFINE_string(typefilter, "", "Only measure columns with given type");
//DEFINE_int32(chunk, -1, "Select a specific chunk to measure");
DEFINE_uint32(reps, 1, "Loop reps times");
DEFINE_bool(perfevent, false, "Profile with perf event if true");
DEFINE_bool(output_summary, false, "Output a summary of total speed and size");
DEFINE_bool(output_columns, true, "Output speeds and sizes for single columns");
DEFINE_bool(output_summary, true, "Output a summary of total speed and size");
DEFINE_bool(output_columns, false, "Output speeds and sizes for single columns");
DEFINE_bool(print_simd_debug, false, "Print SIMD usage debug information");
// if verification is needed following is obligatory
DEFINE_bool(verify, false, "Verify that decompression works");
DEFINE_string(yaml, "schema.yaml", "Schema in YAML format");
DEFINE_string(binary, "binary", "Directory for binary output");
// -------------------------------------------------------------------------------------
using namespace btrblocks;
// -------------------------------------------------------------------------------------
Expand All @@ -37,6 +45,53 @@ void reset_bitmaps(const FileMetadata *metadata, std::vector<std::vector<BtrRead
});
}
// -------------------------------------------------------------------------------------
struct DecompressedChunkData {
vector<BITMAP> bitmap;
std::vector<u8> data;
u64 tuple_count;
bool requiresCopy;
Range range;
size_t decompressedSize;
};
// -------------------------------------------------------------------------------------
u64 measure_and_store(const FileMetadata *metadata, std::vector<std::vector<BtrReader>> &readers, std::vector<u64> &runtimes, std::vector<u32> &columns, std::vector<std::vector<std::vector<DecompressedChunkData>>> &decompressed_data) {
// Make sure no bitmap is cached
reset_bitmaps(metadata, readers, columns);

auto total_start_time = std::chrono::steady_clock::now();

tbb::parallel_for_each(columns, [&](u32 column_i) {
decompressed_data[column_i].resize(metadata->parts[column_i].num_parts);
// TODO not sure if measuring the time like that will cause problems
auto start_time = std::chrono::steady_clock::now();
tbb::parallel_for(u32(0), metadata->parts[column_i].num_parts, [&](u32 part_i) {
auto &reader = readers[column_i][part_i];
decompressed_data[column_i][part_i].resize(reader.getChunkCount());

tbb::parallel_for(u32(0), reader.getChunkCount(), [&](u32 chunk_i) {
thread_local std::vector<u8> dataDest;

decompressed_data[column_i][part_i][chunk_i].requiresCopy =
reader.readColumn(dataDest, chunk_i);
decompressed_data[column_i][part_i][chunk_i].data = std::move(dataDest);
decompressed_data[column_i][part_i][chunk_i].bitmap = std::move(reader.getBitmap(chunk_i)->writeBITMAP());
decompressed_data[column_i][part_i][chunk_i].tuple_count = reader.getChunkMetadata(chunk_i)->tuple_count;
decompressed_data[column_i][part_i][chunk_i].range = tuple<u64, u64>(
reader.getPartMetadata()->offsets[chunk_i],
reader.getTupleCount(chunk_i));
decompressed_data[column_i][part_i][chunk_i].decompressedSize = reader.getDecompressedDataSize(chunk_i);
});
});
auto end_time = std::chrono::steady_clock::now();
auto runtime = std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time);
runtimes[column_i] += runtime.count();
});

auto total_end_time = std::chrono::steady_clock::now();
auto total_runtime = std::chrono::duration_cast<std::chrono::microseconds>(total_end_time - total_start_time);
return total_runtime.count();
}
// -------------------------------------------------------------------------------------
u64 measure(const FileMetadata *metadata, std::vector<std::vector<BtrReader>> &readers, std::vector<u64> &runtimes, std::vector<u32> &columns) {
// Make sure no bitmap is cached
reset_bitmaps(metadata, readers, columns);
Expand Down Expand Up @@ -103,12 +158,13 @@ int main(int argc, char **argv) {

int threads;
if (FLAGS_threads < 1) {
// Automatic selection
threads = -1;
// Automatic selection
tbb::global_control c(tbb::global_control::max_allowed_parallelism,
std::thread::hardware_concurrency());
} else {
threads = FLAGS_threads;
threads = FLAGS_threads;
tbb::global_control c(tbb::global_control::max_allowed_parallelism, threads);
}
tbb::task_scheduler_init init(threads);

// Read the metadata
std::vector<char> raw_file_metadata;
Expand Down Expand Up @@ -202,6 +258,35 @@ int main(int argc, char **argv) {
total_compressed_size += compressed_sizes[column_i];
}


// Verify
size_t total_size_verify = 0;
if (FLAGS_verify) {
std::vector<std::vector<std::vector<DecompressedChunkData>>> decompressed_data(file_metadata->num_columns);

measure_and_store(file_metadata, readers, runtimes, columns, decompressed_data);

const auto schema = YAML::LoadFile(FLAGS_yaml);
Relation relation = files::readDirectory(schema, FLAGS_binary.back() == '/' ? FLAGS_binary : FLAGS_binary + "/");
auto ranges = relation.getRanges(SplitStrategy::SEQUENTIAL, -1);

for (u32 column_i : columns) {
u32 global_chunk_counter = 0;
for (u32 part_i = 0; part_i < file_metadata->parts[column_i].num_parts; part_i++) {
for (u32 chunk_i = 0; chunk_i < decompressed_data[column_i][part_i].size(); chunk_i++, global_chunk_counter++) {
DecompressedChunkData& decompressedChunk = decompressed_data[column_i][part_i][chunk_i];
auto input_chunk = relation.getInputChunk(ranges[global_chunk_counter], global_chunk_counter, column_i);
if (!input_chunk.compareContents(decompressedChunk.data.data(), decompressedChunk.bitmap,
decompressedChunk.tuple_count,
decompressedChunk.requiresCopy)) {
throw Generic_Exception("Decompression yields different contents");
}
total_size_verify += input_chunk.size;
}
}
}
}

if (FLAGS_output_columns) {
for (u32 column_i : columns) {
double average_runtime = static_cast<double>(runtimes[column_i]) / static_cast<double>(FLAGS_reps);
Expand Down Expand Up @@ -229,12 +314,7 @@ int main(int argc, char **argv) {
double s = average_runtime / (1000.0 * 1000.0);
double mbs = mb / s;

std::cout << "Total:"
<< " " << total_compressed_size << " Bytes"
<< " " << total_size << " Bytes"
<< " " << average_runtime << " us"
<< " " << mbs << " MB/s"
<< std::endl;
std::cout << std::to_string(average_runtime) << ", " << total_compressed_size << ", " << total_size << ", " << total_size_verify << ", " << std::to_string((double)total_size / (double)total_compressed_size) << '\n';
}
}
// -------------------------------------------------------------------------------------
2 changes: 1 addition & 1 deletion tools/conversion/local.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ target_link_libraries(btrmeta btrblocks gflags)

add_executable(decompression-speed ${BTR_CONVERSION_DIR}/decompression-speed.cpp)
target_include_directories(decompression-speed PRIVATE ${BTR_INCLUDE_DIR})
target_link_libraries(decompression-speed btrblocks tbb gflags)
target_link_libraries(decompression-speed btrfiles btrblocks tbb gflags)

add_executable(decompression-speed-s3 ${BTR_CONVERSION_DIR}/decompression-speed-s3.cpp)
target_include_directories(decompression-speed-s3 PRIVATE ${BTR_INCLUDE_DIR})
Expand Down