[Commits] Rev 4275: MDEV-5262: Missing retry after temp error in parallel replication in http://bazaar.launchpad.net/~maria-captains/maria/10.0

knielsen at knielsen-hq.org knielsen at knielsen-hq.org
Tue Jul 8 16:39:25 EEST 2014


At http://bazaar.launchpad.net/~maria-captains/maria/10.0

------------------------------------------------------------
revno: 4275
revision-id: knielsen at knielsen-hq.org-20140513114206-cjvle53zycke12hs
parent: knielsen at knielsen-hq.org-20140508122018-cdwr0phcaphkkj27
author: knielsen at knielsen-hq.org
committer: Kristian Nielsen <knielsen at knielsen-hq.org>
branch nick: tmp-10.0
timestamp: Tue 2014-05-13 13:42:06 +0200
message:
  MDEV-5262: Missing retry after temp error in parallel replication
  
  Implement that if first retry fails, we can do another attempt.
  
  Add testcases to test multi-retry that succeeds in second attempt, and
  multi-retry that eventually fails due to exceeding slave_trans_retries.
=== modified file 'mysql-test/suite/rpl/r/rpl_parallel_retry.result'
--- a/mysql-test/suite/rpl/r/rpl_parallel_retry.result	2014-05-08 12:20:18 +0000
+++ b/mysql-test/suite/rpl/r/rpl_parallel_retry.result	2014-05-13 11:42:06 +0000
@@ -28,23 +28,21 @@ END IF;
 RETURN x;
 END
 ||
+SET sql_log_bin=1;
 include/stop_slave.inc
-SET @old_format= @@SESSION.binlog_format;
-SET binlog_format='statement';
 SET gtid_seq_no = 100;
 BEGIN;
 INSERT INTO t1 VALUES (2,1);
 UPDATE t1 SET b=b+1 WHERE a=1;
 INSERT INTO t1 VALUES (3,1);
 COMMIT;
-SET binlog_format=@old_format;
 SELECT * FROM t1 ORDER BY a;
 a       b
 1       2
 2       1
 3       1
 SET @old_dbug= @@GLOBAL.debug_dbug;
-SET GLOBAL debug_dbug="+d,rpl_parallel_simulate_temp_err_gtid_0_1_100";
+SET GLOBAL debug_dbug="+d,rpl_parallel_simulate_temp_err_gtid_0_x_100";
 include/start_slave.inc
 SET GLOBAL debug_dbug=@old_dbug;
 retries
@@ -54,6 +52,95 @@ a	b
 1       2
 2       1
 3       1
+*** Test that double retry works when the first retry also fails with temp error ***
+include/stop_slave.inc
+SET gtid_seq_no = 100;
+SET @old_server_id = @@server_id;
+SET server_id = 10;
+BEGIN;
+INSERT INTO t1 VALUES (4,1);
+UPDATE t1 SET b=b+1 WHERE a=1;
+INSERT INTO t1 VALUES (5,1);
+INSERT INTO t1 VALUES (6,1);
+COMMIT;
+SET server_id = @old_server_id;
+SELECT * FROM t1 ORDER BY a;
+a       b
+1       3
+2       1
+3       1
+4       1
+5       1
+6       1
+SET @old_dbug= @@GLOBAL.debug_dbug;
+SET GLOBAL debug_dbug="+d,rpl_parallel_simulate_temp_err_gtid_0_x_100,rpl_parallel_simulate_double_temp_err_gtid_0_x_100";
+include/start_slave.inc
+SET GLOBAL debug_dbug=@old_dbug;
+retries
+2
+SELECT * FROM t1 ORDER BY a;
+a       b
+1       3
+2       1
+3       1
+4       1
+5       1
+6       1
+*** Test too many retries, eventually causing failure. ***
+include/stop_slave.inc
+SET gtid_seq_no = 100;
+SET @old_server_id = @@server_id;
+SET server_id = 11;
+BEGIN;
+INSERT INTO t1 VALUES (7,1);
+UPDATE t1 SET b=b+1 WHERE a=1;
+INSERT INTO t1 VALUES (8,1);
+INSERT INTO t1 VALUES (9,1);
+COMMIT;
+SET server_id = @old_server_id;
+SELECT * FROM t1 ORDER BY a;
+a       b
+1       4
+2       1
+3       1
+4       1
+5       1
+6       1
+7       1
+8       1
+9       1
+SET sql_log_bin=0;
+CALL mtr.add_suppression("Slave worker thread retried transaction 10 time\\(s\\) in vain, giving up");
+CALL mtr.add_suppression("Slave: Deadlock found when trying to get lock; try restarting transaction");
+SET sql_log_bin=1;
+SET @old_dbug= @@GLOBAL.debug_dbug;
+SET GLOBAL debug_dbug="+d,rpl_parallel_simulate_temp_err_gtid_0_x_100,rpl_parallel_simulate_infinite_temp_err_gtid_0_x_100";
+START SLAVE;
+include/wait_for_slave_sql_error.inc [errno=1213]
+SET GLOBAL debug_dbug=@old_dbug;
+retries
+10
+SELECT * FROM t1 ORDER BY a;
+a       b
+1       3
+2       1
+3       1
+4       1
+5       1
+6       1
+STOP SLAVE IO_THREAD;
+include/start_slave.inc
+SELECT * FROM t1 ORDER BY a;
+a       b
+1       4
+2       1
+3       1
+4       1
+5       1
+6       1
+7       1
+8       1
+9       1
 include/stop_slave.inc
 SET GLOBAL slave_parallel_threads=@old_parallel_threads;
 include/start_slave.inc

=== modified file 'mysql-test/suite/rpl/t/rpl_parallel_retry.test'
--- a/mysql-test/suite/rpl/t/rpl_parallel_retry.test	2014-05-08 12:20:18 +0000
+++ b/mysql-test/suite/rpl/t/rpl_parallel_retry.test	2014-05-13 11:42:06 +0000
@@ -47,27 +47,22 @@ CREATE FUNCTION foo(x INT, d1 VARCHAR(50
   END
 ||
 --delimiter ;
+SET sql_log_bin=1;
 --source include/stop_slave.inc
 
 --connection server_1
-SET @old_format= @@SESSION.binlog_format;
-SET binlog_format='statement';
 SET gtid_seq_no = 100;
 BEGIN;
 INSERT INTO t1 VALUES (2,1);
 UPDATE t1 SET b=b+1 WHERE a=1;
-#INSERT INTO t1 VALUES (3,foo(1,
-#    "ha_write_row_end SIGNAL q1_ready WAIT_FOR q1_cont",
-#    ""));
 INSERT INTO t1 VALUES (3,1);
 COMMIT;
-SET binlog_format=@old_format;
 SELECT * FROM t1 ORDER BY a;
 --save_master_pos
 
 --connection server_2
 SET @old_dbug= @@GLOBAL.debug_dbug;
-SET GLOBAL debug_dbug="+d,rpl_parallel_simulate_temp_err_gtid_0_1_100";
+SET GLOBAL debug_dbug="+d,rpl_parallel_simulate_temp_err_gtid_0_x_100";
 let $old_retry= query_get_value(SHOW STATUS LIKE 'Slave_retried_transactions', Value, 1);
 --source include/start_slave.inc
 --sync_with_master
@@ -79,6 +74,82 @@ eval SELECT $new_retry - $old_retry AS r
 
 SELECT * FROM t1 ORDER BY a;
 
+
+--echo *** Test that double retry works when the first retry also fails with temp error ***
+--source include/stop_slave.inc
+
+--connection server_1
+SET gtid_seq_no = 100;
+SET @old_server_id = @@server_id;
+SET server_id = 10;
+BEGIN;
+INSERT INTO t1 VALUES (4,1);
+UPDATE t1 SET b=b+1 WHERE a=1;
+INSERT INTO t1 VALUES (5,1);
+INSERT INTO t1 VALUES (6,1);
+COMMIT;
+SET server_id = @old_server_id;
+SELECT * FROM t1 ORDER BY a;
+--save_master_pos
+
+--connection server_2
+SET @old_dbug= @@GLOBAL.debug_dbug;
+SET GLOBAL debug_dbug="+d,rpl_parallel_simulate_temp_err_gtid_0_x_100,rpl_parallel_simulate_double_temp_err_gtid_0_x_100";
+let $old_retry= query_get_value(SHOW STATUS LIKE 'Slave_retried_transactions', Value, 1);
+--source include/start_slave.inc
+--sync_with_master
+SET GLOBAL debug_dbug=@old_dbug;
+let $new_retry= query_get_value(SHOW STATUS LIKE 'Slave_retried_transactions', Value, 1);
+--disable_query_log
+eval SELECT $new_retry - $old_retry AS retries;
+--enable_query_log
+
+SELECT * FROM t1 ORDER BY a;
+
+
+--echo *** Test too many retries, eventually causing failure. ***
+--source include/stop_slave.inc
+
+--connection server_1
+SET gtid_seq_no = 100;
+SET @old_server_id = @@server_id;
+SET server_id = 11;
+BEGIN;
+INSERT INTO t1 VALUES (7,1);
+UPDATE t1 SET b=b+1 WHERE a=1;
+INSERT INTO t1 VALUES (8,1);
+INSERT INTO t1 VALUES (9,1);
+COMMIT;
+SET server_id = @old_server_id;
+SELECT * FROM t1 ORDER BY a;
+--save_master_pos
+
+--connection server_2
+SET sql_log_bin=0;
+CALL mtr.add_suppression("Slave worker thread retried transaction 10 time\\(s\\) in vain, giving up");
+CALL mtr.add_suppression("Slave: Deadlock found when trying to get lock; try restarting transaction");
+SET sql_log_bin=1;
+
+SET @old_dbug= @@GLOBAL.debug_dbug;
+SET GLOBAL debug_dbug="+d,rpl_parallel_simulate_temp_err_gtid_0_x_100,rpl_parallel_simulate_infinite_temp_err_gtid_0_x_100";
+let $old_retry= query_get_value(SHOW STATUS LIKE 'Slave_retried_transactions', Value, 1);
+START SLAVE;
+--let $slave_sql_errno= 1213
+--let $slave_timeout= 10
+--source include/wait_for_slave_sql_error.inc
+SET GLOBAL debug_dbug=@old_dbug;
+let $new_retry= query_get_value(SHOW STATUS LIKE 'Slave_retried_transactions', Value, 1);
+--disable_query_log
+eval SELECT $new_retry - $old_retry AS retries;
+--enable_query_log
+
+SELECT * FROM t1 ORDER BY a;
+STOP SLAVE IO_THREAD;
+--source include/start_slave.inc
+--sync_with_master
+SELECT * FROM t1 ORDER BY a;
+
+
 --connection server_2
 --source include/stop_slave.inc
 SET GLOBAL slave_parallel_threads=@old_parallel_threads;

=== modified file 'sql/rpl_parallel.cc'
--- a/sql/rpl_parallel.cc	2014-05-08 12:20:18 +0000
+++ b/sql/rpl_parallel.cc	2014-05-13 11:42:06 +0000
@@ -188,6 +188,22 @@ unlock_or_exit_cond(THD *thd, mysql_mute
 }
 
 
+#ifndef DBUG_OFF
+static int
+dbug_simulate_tmp_error(rpl_group_info *rgi, THD *thd)
+{
+  if (rgi->current_gtid.domain_id == 0 && rgi->current_gtid.seq_no == 100 &&
+      rgi->retry_event_count == 4)
+  {
+    thd->clear_error();
+    thd->get_stmt_da()->reset_diagnostics_area();
+    my_error(ER_LOCK_DEADLOCK, MYF(0));
+    return 1;
+  }
+  return 0;
+}
+#endif
+
 static int
 retry_handle_relay_log_rotate(Log_event *ev, IO_CACHE *rlog)
 {
@@ -204,15 +220,18 @@ retry_event_group(rpl_group_info *rgi, r
   File fd;
   const char *errmsg= NULL;
   inuse_relaylog *ir= rgi->relay_log;
-  uint64 event_count= 0;
+  uint64 event_count;
   uint64 events_to_execute= rgi->retry_event_count;
   Relay_log_info *rli= rgi->rli;
-  int err= 0;
+  int err;
   ulonglong cur_offset, old_offset;
   char log_name[FN_REFLEN];
   THD *thd= rgi->thd;
+  ulong retries= 0;
 
 do_retry:
+  event_count= 0;
+  err= 0;
   rgi->cleanup_context(thd, 1);
 
   mysql_mutex_lock(&rli->data_lock);
@@ -268,10 +287,26 @@ retry_event_group(rpl_group_info *rgi, r
     else
       err= retry_handle_relay_log_rotate(ev, &rlog);
     delete_or_keep_event_post_apply(rgi, event_type, ev);
-
+    DBUG_EXECUTE_IF("rpl_parallel_simulate_double_temp_err_gtid_0_x_100",
+                    if (retries == 0) err= dbug_simulate_tmp_error(rgi, thd););
+    DBUG_EXECUTE_IF("rpl_parallel_simulate_infinite_temp_err_gtid_0_x_100",
+                    err= dbug_simulate_tmp_error(rgi, thd););
     if (err)
     {
-      /* ToDo: Need to here also handle second retry. */
+      if (has_temporary_error(thd))
+      {
+        ++retries;
+        if (retries < slave_trans_retries)
+        {
+          end_io_cache(&rlog);
+          mysql_file_close(fd, MYF(MY_WME));
+          goto do_retry;
+        }
+        sql_print_error("Slave worker thread retried transaction %lu time(s) "
+                        "in vain, giving up. Consider raising the value of "
+                        "the slave_transaction_retries variable.",
+                        slave_trans_retries);
+      }
       goto err;
     }
 
@@ -592,29 +627,23 @@ handle_rpl_parallel_thread(void *arg)
       {
         ++rgi->retry_event_count;
         err= rpt_handle_event(events, rpt);
-        DBUG_EXECUTE_IF("rpl_parallel_simulate_temp_err_gtid_0_1_100",
-          if (rgi->current_gtid.domain_id == 0 &&
-              rgi->current_gtid.server_id == 1 &&
-              rgi->current_gtid.seq_no == 100 &&
-              rgi->retry_event_count == 4)
-          {
-            thd->clear_error();
-            thd->get_stmt_da()->reset_diagnostics_area();
-            my_error(ER_LOCK_DEADLOCK, MYF(0));
-            err= 1;
-          };);
+        delete_or_keep_event_post_apply(rgi, event_type, events->ev);
+        DBUG_EXECUTE_IF("rpl_parallel_simulate_temp_err_gtid_0_x_100",
+                        err= dbug_simulate_tmp_error(rgi, thd););
         if (err && has_temporary_error(thd))
           err= retry_event_group(rgi, rpt, events);
       }
       else
+      {
+        delete events->ev;
         err= thd->wait_for_prior_commit();
+      }
 
       end_of_group=
         in_event_group &&
         ((group_standalone && !Log_event::is_part_of_group(event_type)) ||
          group_ending);
 
-      delete_or_keep_event_post_apply(rgi, event_type, events->ev);
       events->next= qevs_to_free;
       qevs_to_free= events;
 
@@ -1528,15 +1557,9 @@ rpl_parallel::do_event(rpl_group_info *s
 
   if (typ == GTID_EVENT)
   {
-    uint32 domain_id;
-    if (likely(typ == GTID_EVENT))
-    {
-      Gtid_log_event *gtid_ev= static_cast<Gtid_log_event *>(ev);
-      domain_id= (rli->mi->using_gtid == Master_info::USE_GTID_NO ?
-                  0 : gtid_ev->domain_id);
-    }
-    else
-      domain_id= 0;
+    Gtid_log_event *gtid_ev= static_cast<Gtid_log_event *>(ev);
+    uint32 domain_id= (rli->mi->using_gtid == Master_info::USE_GTID_NO ?
+                       0 : gtid_ev->domain_id);
     if (!(e= find(domain_id)))
     {
       my_error(ER_OUT_OF_RESOURCES, MYF(MY_WME));



More information about the commits mailing list