[Commits] Rev 2863: MWL#163: release of row locks in InnoDB during prepare in http://bazaar.launchpad.net/~maria-captains/maria/5.1

knielsen at knielsen-hq.org knielsen at knielsen-hq.org
Wed Nov 3 18:07:08 EET 2010


At http://bazaar.launchpad.net/~maria-captains/maria/5.1

------------------------------------------------------------
revno: 2863
revision-id: knielsen at knielsen-hq.org-20101103160708-88w8p8ibe9iaoggj
parent: knielsen at knielsen-hq.org-20101102074027-jlnx0rnbdpovl122
committer: knielsen at knielsen-hq.org
branch nick: work-5.1-mwl163
timestamp: Wed 2010-11-03 17:07:08 +0100
message:
  MWL#163: release of row locks in InnoDB during prepare
  
  Port the Facebook patch for releasing InnoDB row locks early to the
  MWL#116 framework.
  
  A new --innodb-release-locks-early option (off by default) enables a
  prepare_ordered() handlerton method which will release row locks and
  commit a transaction to memory immediately after successful prepare.
  
  If the server subsequently tries to rollback (ie. due to binlog error),
  crashes the server to prevent corrupting the InnoDB state.
=== added file 'mysql-test/r/innodb_release_row_locks_early.result'
--- a/mysql-test/r/innodb_release_row_locks_early.result	1970-01-01 00:00:00 +0000
+++ b/mysql-test/r/innodb_release_row_locks_early.result	2010-11-03 16:07:08 +0000
@@ -0,0 +1,103 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (k INT NOT NULL, a INT NOT NULL, b INT NOT NULL, c INT NOT NULL, PRIMARY KEY(k)) ENGINE=InnoDB;
+INSERT INTO t1 (k, a, b, c) VALUES (1, 0, 0, 0);
+INSERT INTO t1 (k, a, b, c) VALUES (2, 0, 0, 0);
+INSERT INTO t1 (k, a, b, c) VALUES (3, 0, 0, 0);
+INSERT INTO t1 (k, a, b, c) VALUES (4, 0, 0, 0);
+RESET MASTER;
+SET DEBUG_SYNC= 'RESET';
+# Connection c1
+BEGIN;
+UPDATE t1 SET a=10 WHERE k=1;
+SET DEBUG_SYNC="commit_after_release_LOCK_prepare_ordered SIGNAL c1_prepared WAIT_FOR c2_committing";
+COMMIT;
+# Connection c2
+SET DEBUG_SYNC="now WAIT_FOR c1_prepared";
+BEGIN;
+SELECT * FROM t1 WHERE k=1 FOR UPDATE;
+k       a       b       c
+1       10      0       0
+UPDATE t1 SET a=20 WHERE k=1;
+SET DEBUG_SYNC="now SIGNAL c2_committing";
+COMMIT;
+# Connection c1
+BEGIN;
+UPDATE t1 SET a=10 WHERE k=2;
+SET DEBUG_SYNC="commit_after_release_LOCK_prepare_ordered SIGNAL c1_prepared WAIT_FOR c2_committed TIMEOUT 2";
+COMMIT;
+# Connection c2
+SET DEBUG_SYNC="now WAIT_FOR c1_prepared";
+BEGIN;
+SELECT * FROM t1 WHERE k=2 FOR UPDATE;
+k       a       b       c
+2       10      0       0
+UPDATE t1 SET a=20 WHERE k=2;
+SET DEBUG_SYNC="binlog_after_log_and_order SIGNAL c2_committed";
+COMMIT;
+# Connection c1
+# This should warn about DEBUG_SYNC timeout
+Warnings:
+Warning 1639    debug sync point wait timed out
+# Connection c2
+SHOW BINLOG EVENTS LIMIT 2,12;
+Log_name        Pos     Event_type      Server_id       End_log_pos     Info
+master-bin.000001       174     Query   1       265     use `test`; UPDATE t1 SET a=10 WHERE k=1
+master-bin.000001       265     Xid     1       292     COMMIT /* xid=XX */
+master-bin.000001       292     Query   1       360     BEGIN
+master-bin.000001       360     Query   1       451     use `test`; UPDATE t1 SET a=20 WHERE k=1
+master-bin.000001       451     Xid     1       478     COMMIT /* xid=XX */
+master-bin.000001       478     Query   1       546     BEGIN
+master-bin.000001       546     Query   1       637     use `test`; UPDATE t1 SET a=10 WHERE k=2
+master-bin.000001       637     Xid     1       664     COMMIT /* xid=XX */
+master-bin.000001       664     Query   1       732     BEGIN
+master-bin.000001       732     Query   1       823     use `test`; UPDATE t1 SET a=20 WHERE k=2
+master-bin.000001       823     Xid     1       850     COMMIT /* xid=XX */
+# Connection c1
+RESET MASTER;
+SET DEBUG_SYNC="commit_after_release_LOCK_prepare_ordered SIGNAL c1_prepared WAIT_FOR c2_committing";
+UPDATE t1 SET a=10 WHERE k=3;
+# Connection c2
+SET DEBUG_SYNC="now WAIT_FOR c1_prepared";
+SELECT * FROM t1 WHERE k=3 FOR UPDATE;
+k       a       b       c
+3       10      0       0
+SET DEBUG_SYNC="commit_after_release_LOCK_prepare_ordered SIGNAL c2_committing";
+UPDATE t1 SET a=20 WHERE k=3;
+# Connection c1
+SET DEBUG_SYNC="commit_after_release_LOCK_prepare_ordered SIGNAL c1_prepared WAIT_FOR c2_committed TIMEOUT 2";
+UPDATE t1 SET a=10 WHERE k=4;
+# Connection c2
+SET DEBUG_SYNC="now WAIT_FOR c1_prepared";
+SELECT * FROM t1 WHERE k=4 FOR UPDATE;
+k       a       b       c
+4       10      0       0
+SET DEBUG_SYNC="binlog_after_log_and_order SIGNAL c2_committed";
+UPDATE t1 SET a=20 WHERE k=4;
+# Connection c1
+# This should warn about DEBUG_SYNC timeout
+SHOW WARNINGS;
+Level   Code    Message
+Warning 1639    debug sync point wait timed out
+# Connection c2
+SHOW BINLOG EVENTS LIMIT 1,12;
+Log_name        Pos     Event_type      Server_id       End_log_pos     Info
+master-bin.000001       106     Query   1       174     BEGIN
+master-bin.000001       174     Query   1       265     use `test`; UPDATE t1 SET a=10 WHERE k=3
+master-bin.000001       265     Xid     1       292     COMMIT /* xid=XX */
+master-bin.000001       292     Query   1       360     BEGIN
+master-bin.000001       360     Query   1       451     use `test`; UPDATE t1 SET a=20 WHERE k=3
+master-bin.000001       451     Xid     1       478     COMMIT /* xid=XX */
+master-bin.000001       478     Query   1       546     BEGIN
+master-bin.000001       546     Query   1       637     use `test`; UPDATE t1 SET a=10 WHERE k=4
+master-bin.000001       637     Xid     1       664     COMMIT /* xid=XX */
+master-bin.000001       664     Query   1       732     BEGIN
+master-bin.000001       732     Query   1       823     use `test`; UPDATE t1 SET a=20 WHERE k=4
+master-bin.000001       823     Xid     1       850     COMMIT /* xid=XX */
+SELECT * FROM t1 ORDER BY k;
+k       a       b       c
+1       20      0       0
+2       20      0       0
+3       20      0       0
+4       20      0       0
+DROP TABLE t1;
+SET DEBUG_SYNC= 'RESET';

=== added file 'mysql-test/t/innodb_release_row_locks_early-master.opt'
--- a/mysql-test/t/innodb_release_row_locks_early-master.opt	1970-01-01 00:00:00 +0000
+++ b/mysql-test/t/innodb_release_row_locks_early-master.opt	2010-11-03 16:07:08 +0000
@@ -0,0 +1 @@
+--innodb-release-locks-early=1

=== added file 'mysql-test/t/innodb_release_row_locks_early.test'
--- a/mysql-test/t/innodb_release_row_locks_early.test	1970-01-01 00:00:00 +0000
+++ b/mysql-test/t/innodb_release_row_locks_early.test	2010-11-03 16:07:08 +0000
@@ -0,0 +1,135 @@
+--source include/have_debug_sync.inc
+--source include/have_innodb.inc
+--source include/have_log_bin.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (k INT NOT NULL, a INT NOT NULL, b INT NOT NULL, c INT NOT NULL, PRIMARY KEY(k)) ENGINE=InnoDB;
+INSERT INTO t1 (k, a, b, c) VALUES (1, 0, 0, 0);
+INSERT INTO t1 (k, a, b, c) VALUES (2, 0, 0, 0);
+INSERT INTO t1 (k, a, b, c) VALUES (3, 0, 0, 0);
+INSERT INTO t1 (k, a, b, c) VALUES (4, 0, 0, 0);
+
+RESET MASTER;
+SET DEBUG_SYNC= 'RESET';
+
+# Two transactions A,B that update the same row.
+# A releases row locks during the prepare phase, and waits using DEBUG_SYNC.
+# B then updates the same row.
+# Verify that
+#  - B's update can proceed while A is waiting for commit, showing that
+#    locks are released early.
+#  - B cannot be binlogged before A.
+
+connect(c1,127.0.0.1,root,,test,$MASTER_MYPORT,);
+connect(c2,127.0.0.1,root,,test,$MASTER_MYPORT,);
+
+connection c1;
+--echo # Connection c1
+
+# First verify that row locks are released early.
+BEGIN;
+UPDATE t1 SET a=10 WHERE k=1;
+# Wait until c2 starts COMMIT, to verify that we release our locks in prepare.
+SET DEBUG_SYNC="commit_after_release_LOCK_prepare_ordered SIGNAL c1_prepared WAIT_FOR c2_committing";
+send COMMIT;
+
+    connection c2;
+    --echo # Connection c2
+    SET DEBUG_SYNC="now WAIT_FOR c1_prepared";
+    BEGIN;
+    SELECT * FROM t1 WHERE k=1 FOR UPDATE;
+    UPDATE t1 SET a=20 WHERE k=1;
+    SET DEBUG_SYNC="now SIGNAL c2_committing";
+    COMMIT;
+
+connection c1;
+--echo # Connection c1
+reap;
+
+# Now verify that binlog order is correct.
+BEGIN;
+UPDATE t1 SET a=10 WHERE k=2;
+# This time wait until c2 is binlogged. This should time out, as we must not
+# allow c2 to finish commit before c1.
+SET DEBUG_SYNC="commit_after_release_LOCK_prepare_ordered SIGNAL c1_prepared WAIT_FOR c2_committed TIMEOUT 2";
+send COMMIT;
+
+    connection c2;
+    --echo # Connection c2
+    SET DEBUG_SYNC="now WAIT_FOR c1_prepared";
+    BEGIN;
+    SELECT * FROM t1 WHERE k=2 FOR UPDATE;
+    UPDATE t1 SET a=20 WHERE k=2;
+    SET DEBUG_SYNC="binlog_after_log_and_order SIGNAL c2_committed";
+    send COMMIT;
+
+connection c1;
+--echo # Connection c1
+--echo # This should warn about DEBUG_SYNC timeout
+reap;
+
+connection c2;
+--echo # Connection c2
+reap;
+
+--replace_regex /xid=[0-9]+/xid=XX/
+SHOW BINLOG EVENTS LIMIT 2,12;
+
+
+connection c1;
+--echo # Connection c1
+# Now the same thing, but using autocommit.
+RESET MASTER;
+# First verify that row locks are released early.
+# Wait until c2 starts COMMIT, to verify that we release our locks in prepare.
+SET DEBUG_SYNC="commit_after_release_LOCK_prepare_ordered SIGNAL c1_prepared WAIT_FOR c2_committing";
+send UPDATE t1 SET a=10 WHERE k=3;
+
+    connection c2;
+    --echo # Connection c2
+    SET DEBUG_SYNC="now WAIT_FOR c1_prepared";
+    SELECT * FROM t1 WHERE k=3 FOR UPDATE;
+    SET DEBUG_SYNC="commit_after_release_LOCK_prepare_ordered SIGNAL c2_committing";
+    UPDATE t1 SET a=20 WHERE k=3;
+
+connection c1;
+--echo # Connection c1
+reap;
+
+# Now verify that binlog order is correct, this time with autocommit.
+# This time wait until c2 is binlogged. This should time out, as we must not
+# allow c2 to finish commit before c1.
+SET DEBUG_SYNC="commit_after_release_LOCK_prepare_ordered SIGNAL c1_prepared WAIT_FOR c2_committed TIMEOUT 2";
+send UPDATE t1 SET a=10 WHERE k=4;
+
+    connection c2;
+    --echo # Connection c2
+    SET DEBUG_SYNC="now WAIT_FOR c1_prepared";
+    SELECT * FROM t1 WHERE k=4 FOR UPDATE;
+    SET DEBUG_SYNC="binlog_after_log_and_order SIGNAL c2_committed";
+    send UPDATE t1 SET a=20 WHERE k=4;
+
+connection c1;
+--echo # Connection c1
+--echo # This should warn about DEBUG_SYNC timeout
+reap;
+# There is a bug (patch pending) that warnings are not shown automatically
+# by the client; just do it manually. When bug fix is merged, remove the
+# manual SHOW WARNINGS.
+SHOW WARNINGS;
+
+connection c2;
+--echo # Connection c2
+reap;
+
+--replace_regex /xid=[0-9]+/xid=XX/
+SHOW BINLOG EVENTS LIMIT 1,12;
+
+
+SELECT * FROM t1 ORDER BY k;
+
+DROP TABLE t1;
+SET DEBUG_SYNC= 'RESET';

=== modified file 'sql/log.cc'
--- a/sql/log.cc	2010-11-02 07:40:27 +0000
+++ b/sql/log.cc	2010-11-03 16:07:08 +0000
@@ -4915,6 +4915,7 @@ MYSQL_BIN_LOG::write_transaction_to_binl
     DEBUG_SYNC(entry->thd, "commit_after_prepare_ordered");
   }
   pthread_mutex_unlock(&LOCK_prepare_ordered);
+  DEBUG_SYNC(entry->thd, "commit_after_release_LOCK_prepare_ordered");
 
   /*
     The first in the queue handle group commit for all; the others just wait
@@ -6399,6 +6400,8 @@ TC_LOG_BINLOG::log_and_order(THD *thd, m
   else
     err= binlog_flush_trx_cache(thd, trx_data, NULL, all);
 
+  DEBUG_SYNC(thd, "binlog_after_log_and_order");
+
   DBUG_RETURN(!err);
 }
 

=== modified file 'storage/xtradb/handler/ha_innodb.cc'
--- a/storage/xtradb/handler/ha_innodb.cc	2010-11-01 14:41:09 +0000
+++ b/storage/xtradb/handler/ha_innodb.cc	2010-11-03 16:07:08 +0000
@@ -221,6 +221,7 @@ static const char* innobase_change_buffe
 static INNOBASE_SHARE *get_share(const char *table_name);
 static void free_share(INNOBASE_SHARE *share);
 static int innobase_close_connection(handlerton *hton, THD* thd);
+static void innobase_prepare_ordered(handlerton *hton, THD* thd, bool all);
 static void innobase_commit_ordered(handlerton *hton, THD* thd, bool all);
 static int innobase_commit(handlerton *hton, THD* thd, bool all);
 static int innobase_rollback(handlerton *hton, THD* thd, bool all);
@@ -2042,6 +2043,10 @@ innobase_init(
         innobase_hton->savepoint_set=innobase_savepoint;
         innobase_hton->savepoint_rollback=innobase_rollback_to_savepoint;
         innobase_hton->savepoint_release=innobase_release_savepoint;
+        if (innobase_release_locks_early)
+                innobase_hton->prepare_ordered=innobase_prepare_ordered;
+        else
+                innobase_hton->prepare_ordered=NULL;
         innobase_hton->commit_ordered=innobase_commit_ordered;
         innobase_hton->commit=innobase_commit;
         innobase_hton->rollback=innobase_rollback;
@@ -2728,6 +2733,54 @@ innobase_start_trx_and_assign_read_view(
         DBUG_RETURN(0);
 }
 
+/*****************************************************************//**
+Release row locks early during prepare phase.
+
+Only enabled if --innodb-release-locks-early=1. In this case, a prepared
+transaction is treated as committed to memory (but not to disk), and we
+release row locks at the end of the prepare phase.
+
+The consistent commit ordering guarantees of prepare_ordered() calls means
+that transactions will not be binlogged in different order than locks are
+released (which would cause trouble for statement-based replication).
+
+This optimisation is not 100% safe, so is not enabled by default. But some
+applications may decide to enable it to reduce contention on hotspot rows.
+
+The consequences of enabling this are:
+
+ - It is not possible to rollback after successful prepare(). If there is
+   a need to rollback (ie. failure to binlog the transaction), we crash the
+   server (!)
+
+ - If we crash during commit, it is possible that an application/user can have
+   seen another transaction committed that is not recovered by XA crash
+   recovery. Thus durability is partially lost. However, consistency is still
+   guaranteed, we never recover a transaction and not recover another
+   transaction that committed before. */
+static
+void
+innobase_prepare_ordered(
+/*============*/
+        handlerton *hton, /*!< in: Innodb handlerton */
+        THD*    thd,    /*!< in: MySQL thread handle of the user for whom
+                        the transaction should be committed */
+        bool    all)    /*!< in:        TRUE - commit transaction
+                                FALSE - the current SQL statement ended */
+{
+        trx_t*          trx;
+        DBUG_ENTER("innobase_prepare_ordered");
+        DBUG_ASSERT(hton == innodb_hton_ptr);
+
+        trx = check_trx_exists(thd);
+
+        mutex_enter(&kernel_mutex);
+        lock_release_off_kernel(trx);
+        mutex_exit(&kernel_mutex);
+
+        DBUG_VOID_RETURN;
+}
+
 static
 void
 innobase_commit_ordered_2(
@@ -2968,6 +3021,15 @@ innobase_rollback(
 
         row_unlock_table_autoinc_for_mysql(trx);
 
+        /* if transaction has already released locks, it is too late to
+        rollback */
+        if (innobase_release_locks_early && trx->conc_state == TRX_PREPARED
+            && UT_LIST_GET_LEN(trx->trx_locks) == 0) {
+                sql_print_error("Rollback after releasing locks! "
+                                "errno=%d, dberr=%d", errno, trx->error_state);
+                ut_error;
+        }
+
         if (all
                 || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
 
@@ -11672,6 +11734,11 @@ static	MYSQL_SYSVAR_ULINT(pass_corrupt_t
   "except for the deletion.",
   NULL, NULL, 0, 0, 1, 0);
 
+static MYSQL_SYSVAR_BOOL(release_locks_early, innobase_release_locks_early,
+  PLUGIN_VAR_READONLY,
+  "Release row locks in the prepare stage instead of in the commit stage",
+  NULL, NULL, FALSE);
+
 static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(page_size),
   MYSQL_SYSVAR(additional_mem_pool_size),
@@ -11758,6 +11825,7 @@ static struct st_mysql_sys_var* innobase
   MYSQL_SYSVAR(auto_lru_dump),
   MYSQL_SYSVAR(use_purge_thread),
   MYSQL_SYSVAR(pass_corrupt_table),
+  MYSQL_SYSVAR(release_locks_early),
   NULL
 };
 

=== modified file 'storage/xtradb/include/srv0srv.h'
--- a/storage/xtradb/include/srv0srv.h	2010-10-19 12:16:15 +0000
+++ b/storage/xtradb/include/srv0srv.h	2010-11-03 16:07:08 +0000
@@ -345,6 +345,9 @@ extern ulint srv_buf_pool_reads;
 /** Time in seconds between automatic buffer pool dumps */
 extern uint srv_auto_lru_dump;
 
+/** Release row locks already in the prepare phase */
+extern my_bool innobase_release_locks_early;
+
 /** Status variables to be passed to MySQL */
 typedef struct export_var_struct export_struc;
 

=== modified file 'storage/xtradb/include/trx0sys.ic'
--- a/storage/xtradb/include/trx0sys.ic	2010-08-03 10:54:05 +0000
+++ b/storage/xtradb/include/trx0sys.ic	2010-11-03 16:07:08 +0000
@@ -338,6 +338,12 @@ trx_list_get_min_trx_id(void)
 
 /****************************************************************//**
 Checks if a transaction with the given id is active.
+IMPORTANT ASSUMPTION:
+        It is assumed that this function is only used for the purpose of
+        determining if locks need to be created for the input transaction.
+        So if innobase_release_locks_early global option is set, and if
+        the transaction is already in prepared state, this returns FALSE,
+        as locks are no longer needed for the transaction.
 @return TRUE if active */
 UNIV_INLINE
 ibool
@@ -366,7 +372,8 @@ trx_is_active(
 
         trx = trx_get_on_id(trx_id);
         if (trx && (trx->conc_state == TRX_ACTIVE
-                    || trx->conc_state == TRX_PREPARED)) {
+                    || (trx->conc_state == TRX_PREPARED &&
+                        !innobase_release_locks_early))) {
 
                 return(TRUE);
         }

=== modified file 'storage/xtradb/srv/srv0srv.c'
--- a/storage/xtradb/srv/srv0srv.c	2010-10-19 12:16:15 +0000
+++ b/storage/xtradb/srv/srv0srv.c	2010-11-03 16:07:08 +0000
@@ -487,6 +487,9 @@ UNIV_INTERN FILE*	srv_misc_tmpfile;
 UNIV_INTERN ulint       srv_main_thread_process_no      = 0;
 UNIV_INTERN ulint       srv_main_thread_id              = 0;
 
+/* Release row locks already in the prepare phase */
+UNIV_INTERN my_bool     innobase_release_locks_early    = FALSE;
+
 /* The following count work done by srv_master_thread. */
 
 /* Iterations by the 'once per second' loop. */



More information about the commits mailing list