Skip to content

Commit 13d292e

Browse files
committed
BUG#36548269 Log when incidents occur to mysqld.log
Log before and after when ndbcluster plugin writes an incident to binlog. There are several functions that write incident messages to the injector, doing more or less the same but with some variation. In order to be able to add logging, verification and resource management a new utility function is extracted for writing an incident message to the injector. Since function is used rarely and only in error scenarios make sure to write a message to the log both before and after writing to injector. Furthermore change so that write of incident is not infinitely retried when failure to inject incident to binlog occurs. This will end up in the MySQL Server error log like this: <time> 2 [System] [MY-010866] [NDB] Binlog: Writing incident 'cluster disconnect' to binlog <time> 2 [ERROR] [MY-010853] [Server] cluster disconnect An incident event has been written to the binary log which will stop the replicas. <time> 2 [System] [MY-010866] [NDB] Binlog: Incident 'cluster disconnect' written to binlog Change-Id: Iaaed28fe6c2254eca3802421922632e8c1807b8a
1 parent dd36348 commit 13d292e

File tree

3 files changed

+49
-41
lines changed

3 files changed

+49
-41
lines changed

sql/rpl_injector.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -147,8 +147,6 @@ class injector {
147147
m_is_transactional(is_transactional),
148148
m_skip_hash(skip_hash) {}
149149

150-
char const *db_name() const { return m_table->s->db.str; }
151-
char const *table_name() const { return m_table->s->table_name.str; }
152150
TABLE *get_table() const { return m_table; }
153151
bool is_transactional() const { return m_is_transactional; }
154152
bool skip_hash() const { return m_skip_hash; }

storage/ndb/plugin/ha_ndbcluster_binlog.cc

Lines changed: 28 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -6066,17 +6066,25 @@ int Ndb_binlog_thread::handle_error(NdbEventOperation *pOp) const {
60666066
return 0;
60676067
}
60686068

6069-
/**
6070-
Inject an incident (aka. 'lost events' or 'gap') into the injector,
6071-
indicating that problem has occurred while processing the event stream.
6069+
void Ndb_binlog_thread::inject_incident_message(injector *inj, THD *thd,
6070+
const char *message) const {
6071+
DBUG_TRACE;
60726072

6073-
@param thd The thread handle
6074-
@param inj Pointer to the injector
6075-
@param event_type Type of the event problem that has occurred.
6076-
@param gap_epoch The epoch when problem was detected.
6073+
// First write the error message to log
6074+
log_info("Writing incident '%s' to binlog", message);
60776075

6078-
*/
6079-
void Ndb_binlog_thread::inject_incident(
6076+
// Write incident message to injector
6077+
if (inj->record_incident(thd, message) != 0) {
6078+
log_error("Failed to write incident to binlog");
6079+
return;
6080+
}
6081+
6082+
// Since injecting an incident to binlog is rare, also write message to log
6083+
// indicating that incident has been written
6084+
log_info("Incident '%s' written to binlog", message);
6085+
}
6086+
6087+
void Ndb_binlog_thread::inject_incident_for_event(
60806088
injector *inj, THD *thd, NdbDictionary::Event::TableEvent event_type,
60816089
Uint64 gap_epoch) const {
60826090
DBUG_TRACE;
@@ -6090,17 +6098,9 @@ void Ndb_binlog_thread::inject_incident(
60906098

60916099
char errmsg[80];
60926100
snprintf(errmsg, sizeof(errmsg),
6093-
"Detected %s in GCI %llu, "
6094-
"inserting GAP event",
6095-
reason, gap_epoch);
6096-
6097-
// Write error message to log
6098-
log_error("%s", errmsg);
6099-
6100-
// Record incident in injector
6101-
if (inj->record_incident(thd, errmsg) != 0) {
6102-
log_error("Failed to record incident");
6103-
}
6101+
"Detected %s in epoch %u/%u, inserting GAP event", reason,
6102+
(uint)(gap_epoch >> 32), (uint)(gap_epoch));
6103+
inject_incident_message(inj, thd, errmsg);
61046104
}
61056105

61066106
/**
@@ -6785,7 +6785,7 @@ bool Ndb_binlog_thread::handle_events_for_epoch(THD *thd, injector *inj,
67856785
if (event_type == NdbDictionary::Event::TE_INCONSISTENT ||
67866786
event_type == NdbDictionary::Event::TE_OUT_OF_MEMORY) {
67876787
// Error has occurred in event stream processing, inject incident
6788-
inject_incident(inj, thd, event_type, current_epoch);
6788+
inject_incident_for_event(inj, thd, event_type, current_epoch);
67896789

67906790
i_pOp = i_ndb->nextEvent2();
67916791
return true; // OK, error handled
@@ -7083,9 +7083,9 @@ void Ndb_binlog_thread::do_wakeup() {
70837083
*/
70847084
}
70857085

7086-
bool Ndb_binlog_thread::check_reconnect_incident(
7086+
void Ndb_binlog_thread::check_reconnect_incident(
70877087
THD *thd, injector *inj, Reconnect_type incident_id) const {
7088-
std::string_view msg = "cluster disconnect";
7088+
const char *msg = "cluster disconnect";
70897089
log_verbose(1, "Check for incidents");
70907090

70917091
if (incident_id == MYSQLD_STARTUP) {
@@ -7104,17 +7104,12 @@ bool Ndb_binlog_thread::check_reconnect_incident(
71047104
*/
71057105
log_verbose(60, " - skipping incident for first log, log_number: %u",
71067106
log_number);
7107-
return false; // No incident written
7107+
return; // No incident written
71087108
}
71097109
log_verbose(60, " - current binlog file number: %u", log_number);
71107110
}
71117111

7112-
// Write an incident event to the binlog since it's not possible to know what
7113-
// has happened in the cluster while not being connected.
7114-
log_verbose(20, "Writing incident for %.*s", (int)msg.length(), msg.data());
7115-
if (inj->record_incident(thd, msg)) log_error("Failed to record incident");
7116-
7117-
return true; // Incident written
7112+
inject_incident_message(inj, thd, msg);
71187113
}
71197114

71207115
bool Ndb_binlog_thread::handle_purge(const char *filename) {
@@ -7458,11 +7453,9 @@ void Ndb_binlog_thread::do_run() {
74587453
lex_start(thd);
74597454

74607455
if (do_reconnect_incident && ndb_binlog_running) {
7461-
if (check_reconnect_incident(thd, inj, reconnect_incident_id)) {
7462-
// Incident written, don't report incident again unless Ndb_binlog_thread
7463-
// is restarted
7464-
do_reconnect_incident = false;
7465-
}
7456+
check_reconnect_incident(thd, inj, reconnect_incident_id);
7457+
// Don't report incident again unless thread is restarted
7458+
do_reconnect_incident = false;
74667459
}
74677460
reconnect_incident_id = CLUSTER_DISCONNECT;
74687461

storage/ndb/plugin/ndb_binlog_thread.h

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,26 @@ class Ndb_binlog_thread : public Ndb_component {
236236
*/
237237
void log_ndb_error(const NdbError &ndberr) const;
238238

239+
/**
240+
Write an incident message to the binlog.
241+
@param inj Pointer to the injector
242+
@param thd The thread handle
243+
@param message The message to write.
244+
*/
245+
void inject_incident_message(injector *inj, THD *thd,
246+
const char *message) const;
247+
248+
/**
249+
Write an incident for particular NDB event type to the binlog.
250+
@param inj Pointer to the injector
251+
@param thd The thread handle
252+
@param event_type Type of the NDB event problem that has occurred.
253+
@param gap_epoch The epoch when problem was detected.
254+
*/
255+
void inject_incident_for_event(injector *inj, THD *thd,
256+
NdbDictionary::Event::TableEvent event_type,
257+
Uint64 gap_epoch) const;
258+
239259
/*
240260
The Ndb_binlog_thread is supposed to make a continuous recording
241261
of the activity in the cluster to the mysqlds binlog. When this
@@ -251,7 +271,7 @@ class Ndb_binlog_thread : public Ndb_component {
251271
// from the cluster
252272
CLUSTER_DISCONNECT
253273
};
254-
bool check_reconnect_incident(THD *thd, injector *inj,
274+
void check_reconnect_incident(THD *thd, injector *inj,
255275
Reconnect_type incident_id) const;
256276

257277
/**
@@ -329,9 +349,6 @@ class Ndb_binlog_thread : public Ndb_component {
329349
// Functions for injecting events
330350
bool inject_apply_status_write(injector_transaction &trans,
331351
ulonglong gci) const;
332-
void inject_incident(injector *inj, THD *thd,
333-
NdbDictionary::Event::TableEvent event_type,
334-
Uint64 gap_epoch) const;
335352
void inject_table_map(injector_transaction &trans, Ndb *ndb) const;
336353
void commit_trans(injector_transaction &trans, THD *thd, Uint64 current_epoch,
337354
ndb_binlog_index_row *rows, unsigned trans_row_count,

0 commit comments

Comments
 (0)