[Commits] 48e3b4ca5dd: MDEV-15607: mysqld crashed few after node is being joined with sst

jan jan.lindstrom at mariadb.com
Tue Apr 24 14:46:09 EEST 2018


revision-id: 48e3b4ca5dd6a6cffbee64381dc301d43c66e036 (mariadb-10.1.32-67-g48e3b4ca5dd)
parent(s): 9c34a4124d67d9e3f70837eaeb11290f35e8f8d0
author: Jan Lindström
committer: Jan Lindström
timestamp: 2018-04-24 14:43:41 +0300
message:

MDEV-15607: mysqld crashed few after node is being joined with sst

This is a typical systemd response where it tries to shutdown the
joiner (due to "timeout") before the joiner manages to complete SST.

wsrep_sst_wait
wsrep_SE_init_wait
	While waiting the operation to finish use mysql_cond_timedwait
	instead of mysql_cond_wait and if operation is not finished
	extend systemd timeout (if needed).

---
 sql/wsrep_sst.cc | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/sql/wsrep_sst.cc b/sql/wsrep_sst.cc
index 260755d08a8..c1f980bd595 100644
--- a/sql/wsrep_sst.cc
+++ b/sql/wsrep_sst.cc
@@ -30,6 +30,10 @@
 #include <cstdio>
 #include <cstdlib>
 
+#if MYSQL_VERSION_ID < 100200
+# include <my_service_manager.h>
+#endif
+
 static char wsrep_defaults_file[FN_REFLEN * 2 + 10 + 30 +
                                 sizeof(WSREP_SST_OPT_CONF) +
                                 sizeof(WSREP_SST_OPT_CONF_SUFFIX) +
@@ -186,6 +190,9 @@ bool wsrep_before_SE()
 static bool            sst_complete = false;
 static bool            sst_needed   = false;
 
+#define WSREP_EXTEND_TIMEOUT_INTERVAL 30
+#define WSREP_TIMEDWAIT_SECONDS 10
+
 void wsrep_sst_grab ()
 {
   WSREP_INFO("wsrep_sst_grab()");
@@ -197,11 +204,16 @@ void wsrep_sst_grab ()
 // Wait for end of SST
 bool wsrep_sst_wait ()
 {
+  struct timespec wtime = {WSREP_TIMEDWAIT_SECONDS, 0};
   if (mysql_mutex_lock (&LOCK_wsrep_sst)) abort();
   while (!sst_complete)
   {
     WSREP_INFO("Waiting for SST to complete.");
-    mysql_cond_wait (&COND_wsrep_sst, &LOCK_wsrep_sst);
+    mysql_cond_timedwait (&COND_wsrep_sst, &LOCK_wsrep_sst, &wtime);
+    if (!sst_complete)
+      service_manager_extend_timeout(WSREP_EXTEND_TIMEOUT_INTERVAL,
+        "WSREP state transfer ongoing, current seqno: %ld", local_seqno);
+
   }
 
   if (local_seqno >= 0)
@@ -1298,9 +1310,14 @@ void wsrep_SE_init_grab()
 
 void wsrep_SE_init_wait()
 {
+  struct timespec wtime = {WSREP_TIMEDWAIT_SECONDS, 0};
   while (SE_initialized == false)
   {
-    mysql_cond_wait (&COND_wsrep_sst_init, &LOCK_wsrep_sst_init);
+    mysql_cond_timedwait (&COND_wsrep_sst_init, &LOCK_wsrep_sst_init, &wtime);
+
+    if (!SE_initialized)
+      service_manager_extend_timeout(WSREP_EXTEND_TIMEOUT_INTERVAL,
+        "WSREP SE initialization ongoing.");
   }
   mysql_mutex_unlock (&LOCK_wsrep_sst_init);
 }


More information about the commits mailing list