[Commits] Rev 4489: MDEV-7065: Incorrect relay log position in parallel replication after retry of transaction in http://bazaar.launchpad.net/~maria-captains/maria/10.0

knielsen at knielsen-hq.org knielsen at knielsen-hq.org
Thu Nov 13 11:46:09 EET 2014


At http://bazaar.launchpad.net/~maria-captains/maria/10.0

------------------------------------------------------------
revno: 4489
revision-id: knielsen at knielsen-hq.org-20141113094609-4ipkc7mw5a2ipfyk
parent: knielsen at knielsen-hq.org-20141113093120-a066l75lv86hn7t3
committer: Kristian Nielsen <knielsen at knielsen-hq.org>
branch nick: work-10.0
timestamp: Thu 2014-11-13 10:46:09 +0100
message:
  MDEV-7065: Incorrect relay log position in parallel replication after retry of transaction
  
  The retry of an event group in parallel replication set the wrong value for
  the end log position of the event that was retried
  (qev->future_event_relay_log_pos). It was too large by the size of the event,
  so it pointed into the middle of the following event.
  
  If the retry happened in the very last event of the event group, _and_ the SQL
  thread was stopped just after successfully retrying that event, then the SQL
  threads's relay log position would be left incorrect. Restarting the SQL
  thread could then try to read events from a garbage offset in the relay log,
  usually leading to an error about not being able to read the event.
=== modified file 'mysql-test/suite/rpl/r/rpl_parallel_retry.result'
--- a/mysql-test/suite/rpl/r/rpl_parallel_retry.result	2014-05-15 13:52:08 +0000
+++ b/mysql-test/suite/rpl/r/rpl_parallel_retry.result	2014-11-13 09:46:09 +0000
@@ -188,6 +188,52 @@ a	LENGTH(b)
 3       5012
 4       5000
 SET GLOBAL max_relay_log_size=@old_max;
+*** MDEV-7065: Incorrect relay log position in parallel replication after retry of transaction ***
+include/stop_slave.inc
+BEGIN;
+INSERT INTO t1 VALUES (100, 0);
+INSERT INTO t1 VALUES (101, 0);
+INSERT INTO t1 VALUES (102, 0);
+INSERT INTO t1 VALUES (103, 0);
+COMMIT;
+SELECT * FROM t1 WHERE a >= 100 ORDER BY a;
+a       b
+100     0
+101     0
+102     0
+103     0
+SET @old_dbug= @@GLOBAL.debug_dbug;
+SET GLOBAL debug_dbug="+d,rpl_parallel_simulate_temp_err_xid";
+include/start_slave.inc
+SET GLOBAL debug_dbug=@old_dbug;
+retries
+1
+SELECT * FROM t1 WHERE a >= 100 ORDER BY a;
+a       b
+100     0
+101     0
+102     0
+103     0
+include/stop_slave_sql.inc
+INSERT INTO t1 VALUES (104, 1);
+INSERT INTO t1 VALUES (105, 1);
+INSERT INTO t1 VALUES (106, 1);
+INSERT INTO t1 VALUES (107, 1);
+INSERT INTO t1 VALUES (108, 1);
+INSERT INTO t1 VALUES (109, 1);
+include/start_slave.inc
+SELECT * FROM t1 WHERE a >= 100 ORDER BY a;
+a       b
+100     0
+101     0
+102     0
+103     0
+104     1
+105     1
+106     1
+107     1
+108     1
+109     1
 include/stop_slave.inc
 SET GLOBAL slave_parallel_threads=@old_parallel_threads;
 include/start_slave.inc

=== modified file 'mysql-test/suite/rpl/t/rpl_parallel_retry.test'
--- a/mysql-test/suite/rpl/t/rpl_parallel_retry.test	2014-05-15 13:52:08 +0000
+++ b/mysql-test/suite/rpl/t/rpl_parallel_retry.test	2014-11-13 09:46:09 +0000
@@ -135,7 +135,6 @@ SET GLOBAL debug_dbug="+d,rpl_parallel_s
 let $old_retry= query_get_value(SHOW STATUS LIKE 'Slave_retried_transactions', Value, 1);
 START SLAVE;
 --let $slave_sql_errno= 1213
---let $slave_timeout= 10
 --source include/wait_for_slave_sql_error.inc
 SET GLOBAL debug_dbug=@old_dbug;
 let $new_retry= query_get_value(SHOW STATUS LIKE 'Slave_retried_transactions', Value, 1);
@@ -208,6 +207,58 @@ SELECT a, LENGTH(b) FROM t2 ORDER BY a;
 SET GLOBAL max_relay_log_size=@old_max;
 
 
+--echo *** MDEV-7065: Incorrect relay log position in parallel replication after retry of transaction ***
+
+--connection server_2
+--source include/stop_slave.inc
+
+--connection server_1
+BEGIN;
+INSERT INTO t1 VALUES (100, 0);
+INSERT INTO t1 VALUES (101, 0);
+INSERT INTO t1 VALUES (102, 0);
+INSERT INTO t1 VALUES (103, 0);
+COMMIT;
+SELECT * FROM t1 WHERE a >= 100 ORDER BY a;
+--save_master_pos
+
+--connection server_2
+# Inject a DBUG error insert to cause the XID event of the single transaction
+# from the master to fail with a deadlock error and be retried.
+# The bug was that the retry of the XID would leave the relay log position
+# incorrect (off by the size of XID event).
+SET @old_dbug= @@GLOBAL.debug_dbug;
+SET GLOBAL debug_dbug="+d,rpl_parallel_simulate_temp_err_xid";
+let $old_retry= query_get_value(SHOW STATUS LIKE 'Slave_retried_transactions', Value, 1);
+--source include/start_slave.inc
+--sync_with_master
+SET GLOBAL debug_dbug=@old_dbug;
+let $new_retry= query_get_value(SHOW STATUS LIKE 'Slave_retried_transactions', Value, 1);
+--disable_query_log
+eval SELECT $new_retry - $old_retry AS retries;
+--enable_query_log
+
+SELECT * FROM t1 WHERE a >= 100 ORDER BY a;
+# Stop the SQL thread. When the bug was there to give the incorrect relay log
+# position, the restart of the SQL thread would read garbage data from the
+# middle of an event and fail with relay log IO error.
+--source include/stop_slave_sql.inc
+
+--connection server_1
+INSERT INTO t1 VALUES (104, 1);
+INSERT INTO t1 VALUES (105, 1);
+INSERT INTO t1 VALUES (106, 1);
+INSERT INTO t1 VALUES (107, 1);
+INSERT INTO t1 VALUES (108, 1);
+INSERT INTO t1 VALUES (109, 1);
+--save_master_pos
+
+--connection server_2
+--source include/start_slave.inc
+--sync_with_master
+SELECT * FROM t1 WHERE a >= 100 ORDER BY a;
+
+
 --connection server_2
 --source include/stop_slave.inc
 SET GLOBAL slave_parallel_threads=@old_parallel_threads;

=== modified file 'sql/rpl_parallel.cc'
--- a/sql/rpl_parallel.cc	2014-11-13 09:20:48 +0000
+++ b/sql/rpl_parallel.cc	2014-11-13 09:46:09 +0000
@@ -446,7 +446,7 @@ retry_event_group(rpl_group_info *rgi, r
     ev->thd= thd;
 
     mysql_mutex_lock(&rpt->LOCK_rpl_thread);
-    qev= rpt->retry_get_qev(ev, orig_qev, log_name, cur_offset,
+    qev= rpt->retry_get_qev(ev, orig_qev, log_name, old_offset,
                             cur_offset - old_offset);
     mysql_mutex_unlock(&rpt->LOCK_rpl_thread);
     if (!qev)
@@ -776,6 +776,18 @@ handle_rpl_parallel_thread(void *arg)
       if (likely(!rgi->worker_error) && !skip_event_group)
       {
         ++rgi->retry_event_count;
+#ifndef DBUG_OFF
+        err= 0;
+        DBUG_EXECUTE_IF("rpl_parallel_simulate_temp_err_xid",
+          if (event_type == XID_EVENT)
+          {
+            thd->clear_error();
+            thd->get_stmt_da()->reset_diagnostics_area();
+            my_error(ER_LOCK_DEADLOCK, MYF(0));
+            err= 1;
+          });
+        if (!err)
+#endif
         err= rpt_handle_event(qev, rpt);
         delete_or_keep_event_post_apply(rgi, event_type, qev->ev);
         DBUG_EXECUTE_IF("rpl_parallel_simulate_temp_err_gtid_0_x_100",



More information about the commits mailing list