Skip to content

Commit ccb2736

Browse files
committed
Sparse serialization suport
1 parent 574da81 commit ccb2736

34 files changed

+1310
-229
lines changed

clickhouse/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ SET ( clickhouse-cpp-lib-src
1818
columns/lowcardinalityadaptor.h
1919
columns/nullable.cpp
2020
columns/numeric.cpp
21+
columns/serialization.cpp
2122
columns/string.cpp
2223
columns/tuple.cpp
2324
columns/uuid.cpp
@@ -115,6 +116,7 @@ INSTALL(FILES columns/itemview.h DESTINATION include/clickhouse/columns/)
115116
INSTALL(FILES columns/lowcardinality.h DESTINATION include/clickhouse/columns/)
116117
INSTALL(FILES columns/nullable.h DESTINATION include/clickhouse/columns/)
117118
INSTALL(FILES columns/numeric.h DESTINATION include/clickhouse/columns/)
119+
INSTALL(FILES columns/serialization.h DESTINATION include/clickhouse/columns/)
118120
INSTALL(FILES columns/string.h DESTINATION include/clickhouse/columns/)
119121
INSTALL(FILES columns/tuple.h DESTINATION include/clickhouse/columns/)
120122
INSTALL(FILES columns/utils.h DESTINATION include/clickhouse/columns/)

clickhouse/client.cpp

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,10 @@
4242
#define DBMS_MIN_REVISION_WITH_DISTRIBUTED_DEPTH 54448
4343
#define DBMS_MIN_REVISION_WITH_INITIAL_QUERY_START_TIME 54449
4444
#define DBMS_MIN_REVISION_WITH_INCREMENTAL_PROFILE_EVENTS 54451
45+
#define DBMS_MIN_REVISION_WITH_PARALLEL_REPLICAS 54453
46+
#define DBMS_MIN_REVISION_WITH_CUSTOM_SERIALIZATION 54454
4547

46-
#define REVISION DBMS_MIN_REVISION_WITH_INCREMENTAL_PROFILE_EVENTS
48+
#define REVISION DBMS_MIN_REVISION_WITH_CUSTOM_SERIALIZATION
4749

4850
namespace clickhouse {
4951

@@ -552,7 +554,19 @@ bool Client::Impl::ReadBlock(InputStream& input, Block* block) {
552554
return false;
553555
}
554556

557+
uint8_t has_custom_serialization = 0;
558+
if (REVISION >= DBMS_MIN_REVISION_WITH_CUSTOM_SERIALIZATION) {
559+
if (!WireFormat::ReadFixed(input, &has_custom_serialization)) {
560+
return false;
561+
}
562+
}
563+
555564
if (ColumnRef col = CreateColumnByType(type, create_column_settings)) {
565+
566+
if (has_custom_serialization) {
567+
col->LoadSerializationKind(&input);
568+
}
569+
556570
if (num_rows && !col->Load(&input, num_rows)) {
557571
throw ProtocolError("can't load column '" + name + "' of type " + type);
558572
}
@@ -708,6 +722,16 @@ void Client::Impl::SendQuery(const Query& query) {
708722
throw UnimplementedError(std::string("Can't send open telemetry tracing context to a server, server version is too old"));
709723
}
710724
}
725+
726+
if (server_info_.revision >= DBMS_MIN_REVISION_WITH_PARALLEL_REPLICAS)
727+
{
728+
// collaborate_with_initiator
729+
WireFormat::WriteUInt64 (*output_, 0u);
730+
// count_participating_replicas
731+
WireFormat::WriteUInt64 (*output_, 0u);
732+
// number_of_current_replica
733+
WireFormat::WriteUInt64 (*output_, 0u);
734+
}
711735
}
712736

713737
/// Per query settings
@@ -757,6 +781,17 @@ void Client::Impl::WriteBlock(const Block& block, OutputStream& output) {
757781
WireFormat::WriteString(output, bi.Name());
758782
WireFormat::WriteString(output, bi.Type()->GetName());
759783

784+
bool has_custom = bi.Column()->HasCustomSerialization();
785+
if (server_info_.revision >= DBMS_MIN_REVISION_WITH_CUSTOM_SERIALIZATION) {
786+
WireFormat::WriteFixed(output, static_cast<uint8_t>(has_custom));
787+
if (has_custom) {
788+
bi.Column()->SaveSerializationKind(&output);
789+
}
790+
} else {
791+
// Current implementation works only for server version >= v22.1.2.2-stable
792+
throw UnimplementedError(std::string("Can't send column with custom serialisation to a server, server version is too old"));
793+
}
794+
760795
// Empty columns are not serialized and occupy exactly 0 bytes.
761796
// ref https://github.com/ClickHouse/ClickHouse/blob/39b37a3240f74f4871c8c1679910e065af6bea19/src/Formats/NativeWriter.cpp#L163
762797
const bool containsData = block.GetRowCount() > 0;

clickhouse/columns/array.cpp

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,14 @@ ColumnArray::ColumnArray(ColumnRef data)
1111
}
1212

1313
ColumnArray::ColumnArray(ColumnRef data, std::shared_ptr<ColumnUInt64> offsets)
14-
: Column(Type::CreateArray(data->Type()))
14+
: Column(Type::CreateArray(data->Type()), Serialization::MakeDefault(this))
1515
, data_(data)
1616
, offsets_(offsets)
1717
{
1818
}
1919

2020
ColumnArray::ColumnArray(ColumnArray&& other)
21-
: Column(other.Type())
21+
: Column(other.Type(), Serialization::MakeDefault(this))
2222
, data_(std::move(other.data_))
2323
, offsets_(std::move(other.offsets_))
2424
{
@@ -73,30 +73,29 @@ bool ColumnArray::LoadPrefix(InputStream* input, size_t rows) {
7373
if (!rows) {
7474
return true;
7575
}
76-
77-
return data_->LoadPrefix(input, rows);
76+
return data_->GetSerialization()->LoadPrefix(data_.get(), input, rows);
7877
}
7978

8079
bool ColumnArray::LoadBody(InputStream* input, size_t rows) {
8180
if (!rows) {
8281
return true;
8382
}
84-
if (!offsets_->LoadBody(input, rows)) {
83+
if (!offsets_->GetSerialization()->LoadBody(offsets_.get(), input, rows)) {
8584
return false;
8685
}
87-
if (!data_->LoadBody(input, (*offsets_)[rows - 1])) {
86+
if (!data_->GetSerialization()->LoadBody(data_.get(), input, (*offsets_)[rows - 1])) {
8887
return false;
8988
}
9089
return true;
9190
}
9291

9392
void ColumnArray::SavePrefix(OutputStream* output) {
94-
data_->SavePrefix(output);
93+
data_->GetSerialization()->SavePrefix(data_.get(), output);
9594
}
9695

9796
void ColumnArray::SaveBody(OutputStream* output) {
98-
offsets_->SaveBody(output);
99-
data_->SaveBody(output);
97+
offsets_->GetSerialization()->SaveBody(offsets_.get(), output);
98+
data_->GetSerialization()->SaveBody(data_.get(), output);
10099
}
101100

102101
void ColumnArray::Clear() {
@@ -114,6 +113,18 @@ void ColumnArray::Swap(Column& other) {
114113
offsets_.swap(col.offsets_);
115114
}
116115

116+
void ColumnArray::SetSerializationKind(Serialization::Kind kind) {
117+
switch (kind)
118+
{
119+
case Serialization::Kind::DEFAULT:
120+
serialization_ = Serialization::MakeDefault(this);
121+
break;
122+
default:
123+
throw UnimplementedError("Serialization kind:" + std::to_string(static_cast<int>(kind))
124+
+ " is not supported for column of " + type_->GetName());
125+
}
126+
}
127+
117128
void ColumnArray::OffsetsIncrease(size_t n) {
118129
offsets_->Append(n);
119130
}

clickhouse/columns/array.h

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -49,18 +49,6 @@ class ColumnArray : public Column {
4949
/// Appends content of given column to the end of current one.
5050
void Append(ColumnRef column) override;
5151

52-
/// Loads column prefix from input stream.
53-
bool LoadPrefix(InputStream* input, size_t rows) override;
54-
55-
/// Loads column data from input stream.
56-
bool LoadBody(InputStream* input, size_t rows) override;
57-
58-
/// Saves column prefix to output stream.
59-
void SavePrefix(OutputStream* output) override;
60-
61-
/// Saves column data to output stream.
62-
void SaveBody(OutputStream* output) override;
63-
6452
/// Clear column data .
6553
void Clear() override;
6654

@@ -72,6 +60,8 @@ class ColumnArray : public Column {
7260
ColumnRef CloneEmpty() const override;
7361
void Swap(Column&) override;
7462

63+
void SetSerializationKind(Serialization::Kind kind) override;
64+
7565
void OffsetsIncrease(size_t);
7666

7767
protected:
@@ -86,6 +76,20 @@ class ColumnArray : public Column {
8676
void Reset();
8777

8878
private:
79+
/// Loads column prefix from input stream.
80+
bool LoadPrefix(InputStream* input, size_t rows);
81+
82+
/// Loads column data from input stream.
83+
bool LoadBody(InputStream* input, size_t rows);
84+
85+
/// Saves column prefix to output stream.
86+
void SavePrefix(OutputStream* output);
87+
88+
/// Saves column data to output stream.
89+
void SaveBody(OutputStream* output);
90+
91+
friend SerializationDefault<ColumnArray>;
92+
8993
ColumnRef data_;
9094
std::shared_ptr<ColumnUInt64> offsets_;
9195
};

clickhouse/columns/column.cpp

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,44 @@
11
#include "column.h"
22

3+
#include "../base/wire_format.h"
4+
35
namespace clickhouse {
46

5-
bool Column::LoadPrefix(InputStream*, size_t) {
6-
/// does nothing by default
7+
bool Column::Load(InputStream* input, size_t rows) {
8+
assert(serialization_);
9+
return serialization_->LoadPrefix(this, input, rows)
10+
&& serialization_->LoadBody(this, input, rows);
11+
}
12+
13+
/// Saves column data to output stream.
14+
void Column::Save(OutputStream* output) {
15+
assert(serialization_);
16+
serialization_->SavePrefix(this, output);
17+
serialization_->SaveBody(this,output);
18+
}
19+
20+
bool Column::LoadSerializationKind(InputStream* input) {
21+
uint8_t kind;
22+
if (!WireFormat::ReadFixed(*input, &kind)) {
23+
return false;
24+
}
25+
SetSerializationKind(static_cast<Serialization::Kind>(kind));
726
return true;
827
}
928

10-
bool Column::Load(InputStream* input, size_t rows) {
11-
return LoadPrefix(input, rows) && LoadBody(input, rows);
29+
void Column::SaveSerializationKind(OutputStream* output) {
30+
assert(serialization_);
31+
WireFormat::WriteFixed(*output, static_cast<uint8_t>(serialization_->GetKind()));
1232
}
1333

14-
void Column::SavePrefix(OutputStream*) {
15-
/// does nothing by default
34+
SerializationRef Column::GetSerialization() {
35+
assert(serialization_);
36+
return serialization_;
1637
}
1738

18-
/// Saves column data to output stream.
19-
void Column::Save(OutputStream* output) {
20-
SavePrefix(output);
21-
SaveBody(output);
39+
bool Column::HasCustomSerialization() const {
40+
assert(serialization_);
41+
return serialization_->GetKind() != Serialization::Kind::DEFAULT;
2242
}
2343

2444
}

clickhouse/columns/column.h

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include "../types/types.h"
44
#include "../columns/itemview.h"
5+
#include "../columns/serialization.h"
56
#include "../exceptions.h"
67

78
#include <memory>
@@ -19,7 +20,11 @@ using ColumnRef = std::shared_ptr<class Column>;
1920
*/
2021
class Column : public std::enable_shared_from_this<Column> {
2122
public:
22-
explicit inline Column(TypeRef type) : type_(type) {}
23+
explicit inline Column(TypeRef type, SerializationRef serialization)
24+
: type_(std::move(type))
25+
, serialization_(std::move(serialization))
26+
{
27+
}
2328

2429
virtual ~Column() {}
2530

@@ -56,18 +61,6 @@ class Column : public std::enable_shared_from_this<Column> {
5661
/// Should be called only once from the client. Derived classes should not call it.
5762
bool Load(InputStream* input, size_t rows);
5863

59-
/// Loads column prefix from input stream.
60-
virtual bool LoadPrefix(InputStream* input, size_t rows);
61-
62-
/// Loads column data from input stream.
63-
virtual bool LoadBody(InputStream* input, size_t rows) = 0;
64-
65-
/// Saves column prefix to output stream. Column types with prefixes must implement it.
66-
virtual void SavePrefix(OutputStream* output);
67-
68-
/// Saves column body to output stream.
69-
virtual void SaveBody(OutputStream* output) = 0;
70-
7164
/// Template method to save to output stream. It'll call SavePrefix and SaveBody respectively
7265
/// Should be called only once from the client. Derived classes should not call it.
7366
/// Save is split in Prefix and Body because some data types require prefixes and specific serialization order.
@@ -93,12 +86,23 @@ class Column : public std::enable_shared_from_this<Column> {
9386
throw UnimplementedError("GetItem() is not supported for column of " + type_->GetName());
9487
}
9588

89+
virtual bool LoadSerializationKind(InputStream* input);
90+
91+
virtual void SaveSerializationKind(OutputStream* output);
92+
93+
virtual void SetSerializationKind(Serialization::Kind kind) = 0;
94+
95+
SerializationRef GetSerialization();
96+
97+
virtual bool HasCustomSerialization() const;
98+
9699
friend void swap(Column& left, Column& right) {
97100
left.Swap(right);
98101
}
99102

100103
protected:
101104
TypeRef type_;
105+
SerializationRef serialization_;
102106
};
103107

104108
} // namespace clickhouse

0 commit comments

Comments
 (0)