[Commits] Rev 4492: MDEV-6917: Parallel replication: "Commit failed due to failure of an earlier commit on which this one depends", but no prior failure seen in http://bazaar.launchpad.net/~maria-captains/maria/10.0

knielsen at knielsen-hq.org knielsen at knielsen-hq.org
Thu Nov 13 14:56:00 EET 2014


At http://bazaar.launchpad.net/~maria-captains/maria/10.0

------------------------------------------------------------
revno: 4492
revision-id: knielsen at knielsen-hq.org-20141113100131-292bj775z1f7ebc2
parent: knielsen at knielsen-hq.org-20141113094609-k7lyzs4bywctjy1p
committer: Kristian Nielsen <knielsen at knielsen-hq.org>
branch nick: work-10.0
timestamp: Thu 2014-11-13 11:01:31 +0100
message:
  MDEV-6917: Parallel replication: "Commit failed due to failure of an earlier commit on which this one depends", but no prior failure seen
  
  This bug was seen when parallel replication experienced a deadlock between
  transactions T1 and T2, where T2 has reached the commit phase and is waiting
  for T1 to commit first. In this case, the deadlock is broken by sending a kill
  to T2; that kill error is then later detected and converted to a deadlock
  error, which causes T2 to be rolled back and retried.
  
  The problem was that the kill caused ha_commit_trans() to errorneously call
  wakeup_subsequent_commits() on T3, signalling it to abort because T2 failed
  during commit. This is incorrect, because the error in T2 is only a temporary
  error, which will be resolved by normal transaction retry. We should not
  signal error to the next transaction until we have executed the code that
  handles such temporary errors.
  
  So this patch just removes the calls to wakeup_subsequent_commits() from
  ha_commit_trans(). They are incorrect in this case, and they are not needed in
  general, as wakeup_subsequent_commits() must in any case be called in
  finish_event_group() to wakeup any transactions that may have started to wait
  after ha_commit_trans(). And normally, wakeup will in fact have happened
  earlier, either from the binlog group commit code, or (in case of no
  binlogging) after the fast part of InnoDB/XtraDB group commit.
  
  The symptom of this bug was that replication would break on some transaction
  with "Commit failed due to failure of an earlier commit on which this one
  depends", but with no such failure of an earlier commit visible anywhere.
=== modified file 'mysql-test/suite/rpl/r/rpl_parallel_retry.result'
--- a/mysql-test/suite/rpl/r/rpl_parallel_retry.result	2014-11-13 09:46:09 +0000
+++ b/mysql-test/suite/rpl/r/rpl_parallel_retry.result	2014-11-13 10:01:31 +0000
@@ -234,9 +234,74 @@ a	b
 107     1
 108     1
 109     1
+*** MDEV-6917: Parallel replication: "Commit failed due to failure of an earlier commit on which this one depends", but no prior failure seen **
+CREATE TABLE t3 (a INT PRIMARY KEY, b INT, KEY b_idx(b)) ENGINE=InnoDB;
+INSERT INTO t3 VALUES (1,NULL), (2,2), (3,NULL), (4,4), (5, NULL), (6, 6);
+CREATE TABLE t4 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB;
+SET @old_format= @@SESSION.binlog_format;
+SET binlog_format='statement';
+include/stop_slave.inc
+CHANGE MASTER TO master_use_gtid=no;
+SET @old_format= @@SESSION.binlog_format;
+SET binlog_format='statement';
+BEGIN;
+INSERT INTO t4 VALUES (10, foo(1, 'before_execute_sql_command WAIT_FOR t1_start', ''));
+UPDATE t3 SET b=NULL WHERE a=6;
+SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued1 WAIT_FOR master_cont1';
+COMMIT;
+SET debug_sync='now WAIT_FOR master_queued1';
+SET @old_format= @@SESSION.binlog_format;
+SET binlog_format='statement';
+BEGIN;
+INSERT INTO t4 VALUES (20, foo(2, 'group_commit_waiting_for_prior SIGNAL t2_waiting', ''));
+DELETE FROM t3 WHERE b <= 3;
+SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued2';
+COMMIT;
+SET debug_sync='now WAIT_FOR master_queued2';
+SET @old_format= @@SESSION.binlog_format;
+SET binlog_format='statement';
+BEGIN;
+INSERT INTO t4 VALUES (30, foo(3, 'before_execute_sql_command WAIT_FOR t3_start', 'group_commit_waiting_for_prior SIGNAL t3_waiting'));
+INSERT INTO t3 VALUES (7,7);
+SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued3';
+COMMIT;
+SET debug_sync='now WAIT_FOR master_queued3';
+SET debug_sync='now SIGNAL master_cont1';
+SET binlog_format=@old_format;
+SET binlog_format=@old_format;
+SET debug_sync='RESET';
+SET binlog_format=@old_format;
+SELECT * FROM t3 ORDER BY a;
+a       b
+1       NULL
+3       NULL
+4       4
+5       NULL
+6       NULL
+7       7
+SET @old_dbug=@@GLOBAL.debug_dbug;
+SET GLOBAL debug_dbug="+d,thd_need_ordering_with_force";
+include/start_slave.inc
+SET debug_sync='now WAIT_FOR t2_waiting';
+SET debug_sync='now SIGNAL t3_start';
+SET debug_sync='now WAIT_FOR t3_waiting';
+SET debug_sync='now SIGNAL t1_start';
+SET GLOBAL debug_dbug=@old_dbug;
+SET debug_sync='RESET';
+retries
+1
+SELECT * FROM t3 ORDER BY a;
+a       b
+1       NULL
+3       NULL
+4       4
+5       NULL
+6       NULL
+7       7
+SET binlog_format=@old_format;
 include/stop_slave.inc
 SET GLOBAL slave_parallel_threads=@old_parallel_threads;
 include/start_slave.inc
-DROP TABLE t1, t2;
+DROP TABLE t1, t2, t3, t4;
 DROP function foo;
 include/rpl_end.inc

=== modified file 'mysql-test/suite/rpl/t/rpl_parallel_retry.test'
--- a/mysql-test/suite/rpl/t/rpl_parallel_retry.test	2014-11-13 09:46:09 +0000
+++ b/mysql-test/suite/rpl/t/rpl_parallel_retry.test	2014-11-13 10:01:31 +0000
@@ -259,13 +259,126 @@ INSERT INTO t1 VALUES (109, 1);
 SELECT * FROM t1 WHERE a >= 100 ORDER BY a;
 
 
+--echo *** MDEV-6917: Parallel replication: "Commit failed due to failure of an earlier commit on which this one depends", but no prior failure seen **
+
+--connection server_1
+CREATE TABLE t3 (a INT PRIMARY KEY, b INT, KEY b_idx(b)) ENGINE=InnoDB;
+INSERT INTO t3 VALUES (1,NULL), (2,2), (3,NULL), (4,4), (5, NULL), (6, 6);
+CREATE TABLE t4 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB;
+
+# We need statement binlog format to be able to inject debug_sync statements
+# on the slave with calls to foo().
+SET @old_format= @@SESSION.binlog_format;
+SET binlog_format='statement';
+--save_master_pos
+
+--connection server_2
+--sync_with_master
+--source include/stop_slave.inc
+CHANGE MASTER TO master_use_gtid=no;
+
+--connection server_1
+
+# Create a group commit with three transactions T1, T2, T3.
+# T2 will block T1 on the slave where we will make it run first, so it will be
+# deadlock killed.
+# The bug was that in this case, T3 was signalled to fail due to T2 failing,
+# even though the retry of T2 was later successful.
+
+--connect (con1,127.0.0.1,root,,test,$SERVER_MYPORT_1,)
+SET @old_format= @@SESSION.binlog_format;
+SET binlog_format='statement';
+BEGIN;
+INSERT INTO t4 VALUES (10, foo(1, 'before_execute_sql_command WAIT_FOR t1_start', ''));
+UPDATE t3 SET b=NULL WHERE a=6;
+SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued1 WAIT_FOR master_cont1';
+send COMMIT;
+--connection server_1
+SET debug_sync='now WAIT_FOR master_queued1';
+
+--connect (con2,127.0.0.1,root,,test,$SERVER_MYPORT_1,)
+SET @old_format= @@SESSION.binlog_format;
+SET binlog_format='statement';
+BEGIN;
+INSERT INTO t4 VALUES (20, foo(2, 'group_commit_waiting_for_prior SIGNAL t2_waiting', ''));
+DELETE FROM t3 WHERE b <= 3;
+SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued2';
+send COMMIT;
+
+--connection server_1
+SET debug_sync='now WAIT_FOR master_queued2';
+
+--connect (con3,127.0.0.1,root,,test,$SERVER_MYPORT_1,)
+SET @old_format= @@SESSION.binlog_format;
+SET binlog_format='statement';
+BEGIN;
+INSERT INTO t4 VALUES (30, foo(3, 'before_execute_sql_command WAIT_FOR t3_start', 'group_commit_waiting_for_prior SIGNAL t3_waiting'));
+INSERT INTO t3 VALUES (7,7);
+SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued3';
+send COMMIT;
+
+--connection server_1
+SET debug_sync='now WAIT_FOR master_queued3';
+SET debug_sync='now SIGNAL master_cont1';
+
+--connection con1
+REAP;
+SET binlog_format=@old_format;
+--connection con2
+REAP;
+SET binlog_format=@old_format;
+--connection con3
+REAP;
+SET debug_sync='RESET';
+SET binlog_format=@old_format;
+
+--connection server_1
+--save_master_pos
+SELECT * FROM t3 ORDER BY a;
+
+
+--connection server_2
+let $old_retry= query_get_value(SHOW STATUS LIKE 'Slave_retried_transactions', Value, 1);
+SET @old_dbug=@@GLOBAL.debug_dbug;
+SET GLOBAL debug_dbug="+d,thd_need_ordering_with_force";
+--source include/start_slave.inc
+# First, wait for T2 to complete up to where it is waiting for T1 to group
+# commit for both of them. This will set locks that will block T1, causing
+# a deadlock kill and retry of T2. T1 and T3 are still blocked at the start
+# of each their SQL statements.
+SET debug_sync='now WAIT_FOR t2_waiting';
+# Now let T3 move on until the point where it is itself ready to commit.
+SET debug_sync='now SIGNAL t3_start';
+SET debug_sync='now WAIT_FOR t3_waiting';
+# Now T2 and T3 are set up, so we can let T1 proceed.
+SET debug_sync='now SIGNAL t1_start';
+# Now we can wait for the slave to catch up.
+# We should see T2 being deadlock killed and retried.
+# The bug was that T2 deadlock kill would cause T3 to fail due to failure
+# of an earlier commit. This is wrong as T2 did not fail, it was only
+# retried.
+--sync_with_master
+SET GLOBAL debug_dbug=@old_dbug;
+SET debug_sync='RESET';
+let $new_retry= query_get_value(SHOW STATUS LIKE 'Slave_retried_transactions', Value, 1);
+--disable_query_log
+eval SELECT $new_retry - $old_retry >= 1 AS retries;
+--enable_query_log
+SELECT * FROM t3 ORDER BY a;
+
+
+--connection server_1
+SET binlog_format=@old_format;
+
+
+# Clean up.
 --connection server_2
 --source include/stop_slave.inc
 SET GLOBAL slave_parallel_threads=@old_parallel_threads;
 --source include/start_slave.inc
 
 --connection server_1
-DROP TABLE t1, t2;
+DROP TABLE t1, t2, t3, t4;
 DROP function foo;
 
 --source include/rpl_end.inc

=== modified file 'sql/handler.cc'
--- a/sql/handler.cc	2014-09-30 17:31:14 +0000
+++ b/sql/handler.cc	2014-11-13 10:01:31 +0000
@@ -1327,10 +1327,7 @@ int ha_commit_trans(THD *thd, bool all)
       Free resources and perform other cleanup even for 'empty' transactions.
     */
     if (is_real_trans)
-    {
       thd->transaction.cleanup();
-      thd->wakeup_subsequent_commits(error);
-    }
     DBUG_RETURN(0);
   }
 
@@ -1364,7 +1361,6 @@ int ha_commit_trans(THD *thd, bool all)
                                       thd->variables.lock_wait_timeout))
     {
       ha_rollback_trans(thd, all);
-      thd->wakeup_subsequent_commits(1);
       DBUG_RETURN(1);
     }
 
@@ -1452,7 +1448,6 @@ int ha_commit_trans(THD *thd, bool all)
 err:
   error= 1;                                  /* Transaction was rolled back */
   ha_rollback_trans(thd, all);
-  thd->wakeup_subsequent_commits(error);
 
 end:
   if (rw_trans && mdl_request.ticket)
@@ -1546,10 +1541,7 @@ commit_one_phase_2(THD *thd, bool all, T
   }
   /* Free resources and perform other cleanup even for 'empty' transactions. */
   if (is_real_trans)
-  {
-    thd->wakeup_subsequent_commits(error);
     thd->transaction.cleanup();
-  }
 
   DBUG_RETURN(error);
 }

=== modified file 'sql/sql_class.cc'
--- a/sql/sql_class.cc	2014-11-13 09:31:20 +0000
+++ b/sql/sql_class.cc	2014-11-13 10:01:31 +0000
@@ -4361,6 +4361,7 @@ thd_need_ordering_with(const MYSQL_THD t
     return 1;
   if (!rgi->commit_id || rgi->commit_id != other_rgi->commit_id)
     return 1;
+  DBUG_EXECUTE_IF("thd_need_ordering_with_force", return 1;);
   /*
     Otherwise, these two threads are doing parallel replication within the same
     replication domain. Their commit order is already fixed, so we do not need



More information about the commits mailing list