Skip to content

Commit 7db174a

Browse files
author
Anibal Pinto
committed
BUG#34819861: Gtid_set::Interval_chunk mem incorrectly tracked on THD_applier_module_receiver
In a GR cluster setup with a primary and two secondaries, it was observed that the primary was not releasing all allocated memory related to the "memory/sql/Gtid_set::Interval_chunk" event name. The memory used by GR associated to the the key `memory/sql/Gtid_set::Interval_chunk` is allocated by one thread: `thread/group_rpl/THD_applier_module_receiver`, but it is released by a different thread: `thread/group_rpl/THD_certifier_broadcast`. This was causing a incorrect memory usage relative to `thread/group_rpl/THD_applier_module_receiver` since only memory allocations were accounted, making the reported memory always increasing. The global memory reported on performance_schema.memory_summary_global_by_event_name` was not affected. In order to avoid the incorrect memory usage, the used memory ownership is transferred from `thread/group_rpl/THD_applier_module_receiver` when it becomes out of scope into `thread/group_rpl/THD_certifier_broadcast` when it becomes part of the scope. Change-Id: Ieed76a6fd338ef693f8cf812c5e11680494b8829
1 parent 64efe68 commit 7db174a

File tree

6 files changed

+196
-3
lines changed

6 files changed

+196
-3
lines changed

include/prealloced_array.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,15 @@ class Prealloced_array {
310310
return false;
311311
}
312312

313+
/**
314+
Claim memory ownership.
315+
316+
@param claim take ownership of memory
317+
*/
318+
void claim_memory_ownership(bool claim) {
319+
if (!using_inline_buffer()) my_claim(m_ext.m_array_ptr, claim);
320+
}
321+
313322
/**
314323
Copies an element into the back of the array.
315324
Complexity: Constant (amortized time, reallocation may happen).
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
include/group_replication.inc
2+
Warnings:
3+
Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
4+
Note #### Storing MySQL user name or password information in the connection metadata repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START REPLICA; see the 'START REPLICA Syntax' in the MySQL Manual for more information.
5+
[connection server1]
6+
include/start_and_bootstrap_group_replication.inc
7+
8+
############################################################
9+
# 1. Populate server with data to alloc memory using
10+
# Gtid_set::Interval_chunk key.
11+
CREATE TABLE t1 (c1 INT NOT NULL AUTO_INCREMENT PRIMARY KEY, c2 INT) ENGINE=InnoDB;
12+
BEGIN;
13+
INSERT INTO t1 (c2) values (2);
14+
INSERT INTO t1 (c2) values (2);
15+
INSERT INTO t1 (c2) values (2);
16+
INSERT INTO t1 (c2) values (2);
17+
INSERT INTO t1 (c2) values (2);
18+
COMMIT;
19+
20+
############################################################
21+
# 2. Assert values on thread and global counters before
22+
# running garbage collect.
23+
include/assert.inc [Global counters shall been higher than sum of applier and certifier threads.]
24+
25+
############################################################
26+
# 3. Wait for execution of another garbage collection
27+
28+
############################################################
29+
# 4. Assert values on thread and global counters after
30+
# running garbage collect.
31+
include/assert.inc [Global counters shall been higher than sum of applier and certifier threads.]
32+
33+
############################################################
34+
# 5. Cleanup
35+
DROP TABLE t1;
36+
include/group_replication_end.inc
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
################################################################################
2+
# Validate that primary releases all allocated memory related to
3+
# "memory/sql/Gtid_set::Interval_chunk" event name.
4+
#
5+
# Test:
6+
# 0. The test requires one server: M1.
7+
# 1. Populate server with data to alloc memory using
8+
# Gtid_set::Interval_chunk key.
9+
# 2. Assert values on thread and global counters before
10+
# running garbage collect.
11+
# 3. Wait for execution of another garbage collection
12+
# 4. Assert values on thread and global counters after
13+
# running garbage collect.
14+
# 5. Cleanup
15+
################################################################################
16+
17+
--source include/have_nodebug.inc
18+
--source include/not_valgrind.inc
19+
--source include/not_asan.inc
20+
--source include/big_test.inc
21+
--source include/have_group_replication_plugin.inc
22+
--let $rpl_skip_group_replication_start= 1
23+
--source include/group_replication.inc
24+
25+
--source include/start_and_bootstrap_group_replication.inc
26+
27+
28+
--echo
29+
--echo ############################################################
30+
--echo # 1. Populate server with data to alloc memory using
31+
--echo # Gtid_set::Interval_chunk key.
32+
33+
CREATE TABLE t1 (c1 INT NOT NULL AUTO_INCREMENT PRIMARY KEY, c2 INT) ENGINE=InnoDB;
34+
35+
BEGIN;
36+
INSERT INTO t1 (c2) values (2);
37+
INSERT INTO t1 (c2) values (2);
38+
INSERT INTO t1 (c2) values (2);
39+
INSERT INTO t1 (c2) values (2);
40+
INSERT INTO t1 (c2) values (2);
41+
COMMIT;
42+
43+
--exec $MYSQL_SLAP --create-schema=test --delimiter=";" --iterations=100 --query="INSERT INTO t1 (c2) SELECT c2 FROM t1 LIMIT 5" --concurrency=1 --silent
44+
45+
46+
--echo
47+
--echo ############################################################
48+
--echo # 2. Assert values on thread and global counters before
49+
--echo # running garbage collect.
50+
51+
--let $applier_bytes_used= `SELECT CURRENT_NUMBER_OF_BYTES_USED FROM performance_schema.memory_summary_by_thread_by_event_name JOIN performance_schema.threads USING(thread_id) WHERE event_name = "memory/sql/Gtid_set::Interval_chunk" AND name = "thread/group_rpl/THD_applier_module_receiver"`
52+
--let $certifier_bytes_used= `SELECT CURRENT_NUMBER_OF_BYTES_USED FROM performance_schema.memory_summary_by_thread_by_event_name JOIN performance_schema.threads USING(thread_id) WHERE event_name = "memory/sql/Gtid_set::Interval_chunk" AND name = "thread/group_rpl/THD_certifier_broadcast"`
53+
--let $global_bytes_used= `SELECT CURRENT_NUMBER_OF_BYTES_USED FROM performance_schema.memory_summary_global_by_event_name WHERE EVENT_NAME = "memory/sql/Gtid_set::Interval_chunk"`
54+
55+
--let $assert_text= Global counters shall been higher than sum of applier and certifier threads.
56+
--let $assert_cond= [SELECT $applier_bytes_used + $certifier_bytes_used] <= $global_bytes_used
57+
--source include/assert.inc
58+
59+
60+
--echo
61+
--echo ############################################################
62+
--echo # 3. Wait for execution of another garbage collection
63+
64+
--let $wait_timeout= 150
65+
--let $wait_condition= SELECT Count_transactions_rows_validating=5 FROM performance_schema.replication_group_member_stats WHERE member_id IN (SELECT @@server_uuid)
66+
--source include/wait_condition.inc
67+
68+
69+
--echo
70+
--echo ############################################################
71+
--echo # 4. Assert values on thread and global counters after
72+
--echo # running garbage collect.
73+
74+
--let $applier_bytes_used= `SELECT CURRENT_NUMBER_OF_BYTES_USED FROM performance_schema.memory_summary_by_thread_by_event_name JOIN performance_schema.threads USING(thread_id) WHERE event_name = "memory/sql/Gtid_set::Interval_chunk" AND name = "thread/group_rpl/THD_applier_module_receiver"`
75+
--let $certifier_bytes_used= `SELECT CURRENT_NUMBER_OF_BYTES_USED FROM performance_schema.memory_summary_by_thread_by_event_name JOIN performance_schema.threads USING(thread_id) WHERE event_name = "memory/sql/Gtid_set::Interval_chunk" AND name = "thread/group_rpl/THD_certifier_broadcast"`
76+
--let $global_bytes_used= `SELECT CURRENT_NUMBER_OF_BYTES_USED FROM performance_schema.memory_summary_global_by_event_name WHERE EVENT_NAME = "memory/sql/Gtid_set::Interval_chunk"`
77+
78+
--let $assert_text= Global counters shall been higher than sum of applier and certifier threads.
79+
--let $assert_cond= [SELECT $applier_bytes_used + $certifier_bytes_used] <= $global_bytes_used
80+
--source include/assert.inc
81+
82+
83+
--echo
84+
--echo ############################################################
85+
--echo # 5. Cleanup
86+
87+
DROP TABLE t1;
88+
89+
--source include/group_replication_end.inc

plugin/group_replication/src/certifier.cc

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -577,7 +577,15 @@ void Certifier::clear_certification_info() {
577577
for (Certification_info::iterator it = certification_info.begin();
578578
it != certification_info.end(); ++it) {
579579
// We can only delete the last reference.
580-
if (it->second->unlink() == 0) delete it->second;
580+
if (it->second->unlink() == 0) {
581+
/*
582+
Claim Gtid_set_ref used memory to
583+
`thread/group_rpl/THD_certifier_broadcast` thread, since this is thread
584+
that does release the memory.
585+
*/
586+
it->second->claim_memory_ownership(true);
587+
delete it->second;
588+
}
581589
}
582590

583591
certification_info.clear();
@@ -904,6 +912,14 @@ rpl_gno Certifier::certify(Gtid_set *snapshot_version,
904912
item_previous_sequence_number != parallel_applier_sequence_number)
905913
transaction_last_committed = item_previous_sequence_number;
906914
}
915+
/*
916+
The memory used by Gtid_set_ref is allocated by
917+
`thread/group_rpl/THD_applier_module_receiver`, though it will be released
918+
by `thread/group_rpl/THD_certifier_broadcast` thread. To avoid untracked
919+
memory release on `thread/group_rpl/THD_applier_module_receiver` we do
920+
dissociate this used memory from this thread.
921+
*/
922+
snapshot_version_value->claim_memory_ownership(false);
907923
}
908924

909925
/*
@@ -1154,7 +1170,15 @@ bool Certifier::add_item(const char *item, Gtid_set_ref *snapshot_version,
11541170
*item_previous_sequence_number =
11551171
it->second->get_parallel_applier_sequence_number();
11561172

1157-
if (it->second->unlink() == 0) delete it->second;
1173+
if (it->second->unlink() == 0) {
1174+
/*
1175+
Claim Gtid_set_ref used memory to
1176+
`thread/group_rpl/THD_certifier_broadcast` thread, since this is thread
1177+
that does release the memory.
1178+
*/
1179+
it->second->claim_memory_ownership(true);
1180+
delete it->second;
1181+
}
11581182

11591183
it->second = snapshot_version;
11601184
error = false;
@@ -1277,7 +1301,15 @@ void Certifier::garbage_collect() {
12771301
stable_gtid_set_lock->wrlock();
12781302
while (it != certification_info.end()) {
12791303
if (it->second->is_subset_not_equals(stable_gtid_set)) {
1280-
if (it->second->unlink() == 0) delete it->second;
1304+
if (it->second->unlink() == 0) {
1305+
/*
1306+
Claim Gtid_set_ref used memory to
1307+
`thread/group_rpl/THD_certifier_broadcast` thread, since this is
1308+
thread that does release the memory.
1309+
*/
1310+
it->second->claim_memory_ownership(true);
1311+
delete it->second;
1312+
}
12811313
certification_info.erase(it++);
12821314
} else
12831315
++it;
@@ -1662,6 +1694,14 @@ int Certifier::set_certification_info(
16621694
value->link();
16631695
certification_info.insert(
16641696
std::pair<std::string, Gtid_set_ref *>(key, value));
1697+
/*
1698+
The memory used by Gtid_set_ref is allocated by
1699+
`thread/group_rpl/THD_applier_module_receiver`, though it will be released
1700+
by `thread/group_rpl/THD_certifier_broadcast` thread. To avoid untracked
1701+
memory release on `thread/group_rpl/THD_applier_module_receiver` we do
1702+
dissociate this used memory from this thread.
1703+
*/
1704+
value->claim_memory_ownership(false);
16651705
}
16661706

16671707
if (initialize_server_gtid_set()) {

sql/rpl_gtid.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1491,6 +1491,14 @@ class Gtid_set {
14911491
public:
14921492
/// Destroy this Gtid_set.
14931493
~Gtid_set();
1494+
1495+
/**
1496+
Claim ownership of memory.
1497+
1498+
@param claim claim ownership of memory.
1499+
*/
1500+
void claim_memory_ownership(bool claim);
1501+
14941502
/**
14951503
Removes all gtids from this Gtid_set.
14961504

sql/rpl_gtid_set.cc

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,17 @@ Gtid_set::Gtid_set(Sid_map *_sid_map, const char *text,
9999
*status = add_gtid_text(text);
100100
}
101101

102+
void Gtid_set::claim_memory_ownership(bool claim) {
103+
m_intervals.claim_memory_ownership(claim);
104+
105+
Interval_chunk *chunk = chunks;
106+
while (chunk != nullptr) {
107+
Interval_chunk *next_chunk = chunk->next;
108+
my_claim(chunk, claim);
109+
chunk = next_chunk;
110+
}
111+
}
112+
102113
void Gtid_set::init() {
103114
DBUG_TRACE;
104115
has_cached_string_length = false;

0 commit comments

Comments
 (0)