[Commits] Rev 2864: MWL#136: Cross-engine consistency for START TRANSACTION WITH CONSISTENT SNAPSHOT in http://bazaar.launchpad.net/~maria-captains/maria/5.1

knielsen at knielsen-hq.org knielsen at knielsen-hq.org
Sun Nov 7 23:37:44 EET 2010


At http://bazaar.launchpad.net/~maria-captains/maria/5.1

------------------------------------------------------------
revno: 2864
revision-id: knielsen at knielsen-hq.org-20101107213743-3luszsivft2vt7t6
parent: knielsen at knielsen-hq.org-20101103155438-vdou0fngj6hpgpsd
committer: knielsen at knielsen-hq.org
branch nick: work-5.1-mwl136
timestamp: Sun 2010-11-07 22:37:43 +0100
message:
  MWL#136: Cross-engine consistency for START TRANSACTION WITH CONSISTENT SNAPSHOT
  
  Make the binlog handlerton participate in START TRANSACTION WITH CONSISTENT
  SNAPSHOT, recording the binlog position corresponding to the snapshot taken
  in other MVCC storage engines.
  
  Expose this consistent binlog position as the new status variables
  binlog_trx_file and binlog_trx_position. This enables to get a fully
  non-locking snapshot of the database (including binlog position for
  slave provisioning), avoiding the need for FLUSH TABLES WITH READ LOCK.
  
  Modify mysqldump to detect if the server supports this new feature, and
  if so, avoid FLUSH TABLES WITH READ LOCK for --single-transaction
  --master-data snapshot backups.
=== modified file 'client/mysqldump.c'
--- a/client/mysqldump.c	2010-08-11 10:55:54 +0000
+++ b/client/mysqldump.c	2010-11-07 21:37:43 +0000
@@ -77,6 +77,9 @@
 #define IGNORE_DATA 0x01 /* don't dump data for this table */
 #define IGNORE_INSERT_DELAYED 0x02 /* table doesn't support INSERT DELAYED */
 
+/* Chars needed to store LONGLONG, excluding trailing '\0'. */
+#define LONGLONG_LEN 20
+
 static void add_load_option(DYNAMIC_STRING *str, const char *option,
                              const char *option_value);
 static ulong find_set(TYPELIB *lib, const char *x, uint length,
@@ -344,9 +347,9 @@ static struct my_option my_long_options[
    "This causes the binary log position and filename to be appended to the "
    "output. If equal to 1, will print it as a CHANGE MASTER command; if equal"
    " to 2, that command will be prefixed with a comment symbol. "
-   "This option will turn --lock-all-tables on, unless "
-   "--single-transaction is specified too (in which case a "
-   "global read lock is only taken a short time at the beginning of the dump; "
+   "This option will turn --lock-all-tables on, unless --single-transaction "
+   "is specified too (on servers before MariaDB 5.3 this will still take a "
+   "global read lock for a short time at the beginning of the dump; "
    "don't forget to read about --single-transaction below). In all cases, "
    "any action on logs will happen at the exact moment of the dump. "
    "Option automatically turns --lock-tables off.",
@@ -1109,6 +1112,44 @@ static int fetch_db_collation(const char
 }
 
 
+/*
+  Check if server supports non-blocking binlog position using the
+  binlog_trx_file and binlog_trx_position status variables. If it does,
+  also return the position obtained if output pointers are non-NULL.
+  Returns 1 if position available, 0 if not.
+*/
+static int
+check_consistent_binlog_pos(char *binlog_pos_file, char *binlog_pos_offset)
+{
+  MYSQL_RES *res;
+  MYSQL_ROW row;
+  int found;
+
+  if (mysql_query_with_error_report(mysql, &res,
+                                    "SHOW STATUS LIKE 'binlog_trx_%'"))
+    return 1;
+
+  found= 0;
+  while ((row= mysql_fetch_row(res)))
+  {
+    if (0 == strcmp(row[0], "binlog_trx_file"))
+    {
+      if (binlog_pos_file)
+        strmake(binlog_pos_file, row[1], FN_REFLEN-1);
+      found++;
+    }
+    else if (0 == strcmp(row[0], "binlog_trx_position"))
+    {
+      if (binlog_pos_offset)
+        strmake(binlog_pos_offset, row[1], LONGLONG_LEN);
+      found++;
+    }
+  }
+  mysql_free_result(res);
+
+  return (found == 2);
+}
+
 static char *my_case_str(const char *str,
                          uint str_len,
                          const char *token,
@@ -4351,42 +4392,65 @@ static int dump_selected_tables(char *db
 } /* dump_selected_tables */
 
 
-static int do_show_master_status(MYSQL *mysql_con)
+static int do_show_master_status(MYSQL *mysql_con, int consistent_binlog_pos)
 {
   MYSQL_ROW row;
   MYSQL_RES *master;
+  char binlog_pos_file[FN_REFLEN];
+  char binlog_pos_offset[LONGLONG_LEN+1];
+  char *file, *offset;
   const char *comment_prefix=
     (opt_master_data == MYSQL_OPT_MASTER_DATA_COMMENTED_SQL) ? "-- " : "";
-  if (mysql_query_with_error_report(mysql_con, &master, "SHOW MASTER STATUS"))
+
+  if (consistent_binlog_pos)
   {
-    return 1;
+    if(!check_consistent_binlog_pos(binlog_pos_file, binlog_pos_offset))
+      return 1;
+    file= binlog_pos_file;
+    offset= binlog_pos_offset;
   }
   else
   {
+    if (mysql_query_with_error_report(mysql_con, &master, "SHOW MASTER STATUS"))
+      return 1;
+
     row= mysql_fetch_row(master);
     if (row && row[0] && row[1])
     {
-      /* SHOW MASTER STATUS reports file and position */
-      if (opt_comments)
-        fprintf(md_result_file,
-                "\n--\n-- Position to start replication or point-in-time "
-                "recovery from\n--\n\n");
-      fprintf(md_result_file,
-              "%sCHANGE MASTER TO MASTER_LOG_FILE='%s', MASTER_LOG_POS=%s;\n",
-              comment_prefix, row[0], row[1]);
-      check_io(md_result_file);
+      file= row[0];
+      offset= row[1];
     }
-    else if (!ignore_errors)
+    else
     {
-      /* SHOW MASTER STATUS reports nothing and --force is not enabled */
-      my_printf_error(0, "Error: Binlogging on server not active",
-                      MYF(0));
       mysql_free_result(master);
-      maybe_exit(EX_MYSQLERR);
-      return 1;
+      if (!ignore_errors)
+      {
+        /* SHOW MASTER STATUS reports nothing and --force is not enabled */
+        my_printf_error(0, "Error: Binlogging on server not active",
+                        MYF(0));
+        maybe_exit(EX_MYSQLERR);
+        return 1;
+      }
+      else
+      {
+        return 0;
+      }
     }
-    mysql_free_result(master);
   }
+
+  /* SHOW MASTER STATUS reports file and position */
+  if (opt_comments)
+    fprintf(md_result_file,
+            "\n--\n-- Position to start replication or point-in-time "
+            "recovery from\n--\n\n");
+  fprintf(md_result_file,
+          "%sCHANGE MASTER TO MASTER_LOG_FILE='%s', MASTER_LOG_POS=%s;\n",
+          comment_prefix, file, offset);
+  check_io(md_result_file);
+
+  if (!consistent_binlog_pos)
+    mysql_free_result(master);
+
   return 0;
 }
 
@@ -5025,6 +5089,7 @@ int main(int argc, char **argv)
 {
   char bin_log_name[FN_REFLEN];
   int exit_code;
+  int consistent_binlog_pos= 0;
   MY_INIT("mysqldump");
 
   compatible_mode_normal_str[0]= 0;
@@ -5055,7 +5120,13 @@ int main(int argc, char **argv)
   if (!path)
     write_header(md_result_file, *argv);
 
-  if ((opt_lock_all_tables || opt_master_data) &&
+  if (opt_single_transaction && opt_master_data)
+  {
+    /* See if we can avoid FLUSH TABLES WITH READ LOCK (MariaDB 5.3+). */
+    consistent_binlog_pos= check_consistent_binlog_pos(NULL, NULL);
+  }
+
+  if ((opt_lock_all_tables || (opt_master_data && !consistent_binlog_pos)) &&
       do_flush_tables_read_lock(mysql))
     goto err;
   if (opt_single_transaction && start_transaction(mysql))
@@ -5073,7 +5144,7 @@ int main(int argc, char **argv)
       goto err;
     flush_logs= 0; /* not anymore; that would not be sensible */
   }
-  if (opt_master_data && do_show_master_status(mysql))
+  if (opt_master_data && do_show_master_status(mysql, consistent_binlog_pos))
     goto err;
   if (opt_single_transaction && do_unlock_tables(mysql)) /* unlock but no commit! */
     goto err;

=== modified file 'mysql-test/r/mysqldump-max.result'
--- a/mysql-test/r/mysqldump-max.result	2009-02-19 17:22:28 +0000
+++ b/mysql-test/r/mysqldump-max.result	2010-11-07 21:37:43 +0000
@@ -290,3 +290,59 @@ COUNT(*)
 DROP VIEW  v1;
 DROP TABLE t1;
 SET GLOBAL storage_engine=@old_engine;
+# Connection default
+RESET MASTER;
+CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (1),(2);
+CREATE TABLE t2 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB;
+INSERT INTO t2 VALUES (1,0), (2,0);
+SELECT GET_LOCK("block_queries_1", 120);
+GET_LOCK("block_queries_1", 120)
+1
+# Connection c3
+SELECT GET_LOCK("block_queries_2", 120);
+GET_LOCK("block_queries_2", 120)
+1
+# Connection c1
+SET @c= 0;
+SELECT IF(@c<1, @c:=@c+1, GET_LOCK("block_queries_1", 120)) FROM t1 ORDER BY a;
+# Connection c2
+SET binlog_format="row";
+SET @d= 10;
+UPDATE t2 SET b=IF(@d<=10, @d:=@d+1, GET_LOCK("block_queries_2", 120)) ORDER BY a;
+# Connection default
+# Make sure other queries are running (and waiting).
+SELECT RELEASE_LOCK("block_queries_1");
+RELEASE_LOCK("block_queries_1")
+1
+# Connection c3
+SELECT RELEASE_LOCK("block_queries_2");
+RELEASE_LOCK("block_queries_2")
+1
+# Connection c1
+IF(@c<1, @c:=@c+1, GET_LOCK("block_queries_1", 120))
+1
+1
+# Connection c2
+# Connection default
+SELECT * FROM t2 ORDER BY a;
+a       b
+1       11
+2       1
+DROP TABLE t1;
+DROP TABLE t2;
+SHOW BINLOG EVENTS LIMIT 6,3;
+Log_name        Pos     Event_type      Server_id       End_log_pos     Info
+master-bin.000001       524     Query   1       592     BEGIN
+master-bin.000001       592     Query   1       689     use `test`; INSERT INTO t2 VALUES (1,0), (2,0)
+master-bin.000001       689     Xid     1       716     COMMIT /* XID */
+-- CHANGE MASTER TO MASTER_LOG_FILE='./master-bin.000001', MASTER_LOG_POS=716;
+SELECT * FROM t1 ORDER BY a;
+a
+1
+2
+SELECT * FROM t2 ORDER BY a;
+a       b
+1       0
+2       0
+DROP TABLE t1,t2;

=== added file 'mysql-test/suite/binlog/r/binlog_consistent.result'
--- a/mysql-test/suite/binlog/r/binlog_consistent.result	1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/binlog/r/binlog_consistent.result	2010-11-07 21:37:43 +0000
@@ -0,0 +1,99 @@
+RESET MASTER;
+# Connection default
+CREATE TABLE t1 (a INT, b VARCHAR(100), PRIMARY KEY (a,b)) ENGINE=innodb;
+SHOW MASTER STATUS;
+File    Position        Binlog_Do_DB    Binlog_Ignore_DB
+master-bin.000001       241             
+SHOW STATUS LIKE 'binlog_trx_%';
+Variable_name   Value
+binlog_trx_file ./master-bin.000001
+binlog_trx_position     241
+BEGIN;
+INSERT INTO t1 VALUES (0, "");
+# Connection con1
+BEGIN;
+INSERT INTO t1 VALUES (1, "");
+# Connection con2
+CREATE TABLE t2 (a INT PRIMARY KEY) ENGINE=myisam;
+BEGIN;
+INSERT INTO t1 VALUES (2, "first");
+INSERT INTO t2 VALUES (2);
+INSERT INTO t1 VALUES (2, "second");
+# Connection default
+COMMIT;
+SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
+START TRANSACTION WITH CONSISTENT SNAPSHOT;
+# Connection con3
+BEGIN;
+INSERT INTO t1 VALUES (3, "");
+INSERT INTO t2 VALUES (3);
+# Connection con4
+BEGIN;
+INSERT INTO t1 VALUES (4, "");
+COMMIT;
+# Connection default
+SELECT * FROM t1 ORDER BY a,b;
+a       b
+0       
+SHOW STATUS LIKE 'binlog_trx_%';
+Variable_name   Value
+binlog_trx_file ./master-bin.000001
+binlog_trx_position     540
+SHOW MASTER STATUS;
+File    Position        Binlog_Do_DB    Binlog_Ignore_DB
+master-bin.000001       727             
+SELECT * FROM t2 ORDER BY a;
+a
+2
+3
+# Connection con1
+COMMIT;
+# Connection con2
+COMMIT;
+# Connection con3
+COMMIT;
+FLUSH LOGS;
+# Connection default
+SELECT * FROM t1 ORDER BY a,b;
+a       b
+0       
+SHOW STATUS LIKE 'binlog_trx_%';
+Variable_name   Value
+binlog_trx_file ./master-bin.000001
+binlog_trx_position     540
+SHOW MASTER STATUS;
+File    Position        Binlog_Do_DB    Binlog_Ignore_DB
+master-bin.000002       106             
+COMMIT;
+SHOW STATUS LIKE 'binlog_trx_%';
+Variable_name   Value
+binlog_trx_file ./master-bin.000002
+binlog_trx_position     106
+SHOW MASTER STATUS;
+File    Position        Binlog_Do_DB    Binlog_Ignore_DB
+master-bin.000002       106             
+SHOW BINLOG EVENTS;
+Log_name        Pos     Event_type      Server_id       End_log_pos     Info
+master-bin.000001       4       Format_desc     1       106     Server ver: #, Binlog ver: #
+master-bin.000001       106     Query   1       241     use `test`; CREATE TABLE t1 (a INT, b VARCHAR(100), PRIMARY KEY (a,b)) ENGINE=innodb
+master-bin.000001       241     Query   1       353     use `test`; CREATE TABLE t2 (a INT PRIMARY KEY) ENGINE=myisam
+master-bin.000001       353     Query   1       421     BEGIN
+master-bin.000001       421     Query   1       513     use `test`; INSERT INTO t1 VALUES (0, "")
+master-bin.000001       513     Xid     1       540     COMMIT /* XID */
+master-bin.000001       540     Query   1       608     BEGIN
+master-bin.000001       608     Query   1       700     use `test`; INSERT INTO t1 VALUES (4, "")
+master-bin.000001       700     Xid     1       727     COMMIT /* XID */
+master-bin.000001       727     Query   1       795     BEGIN
+master-bin.000001       795     Query   1       887     use `test`; INSERT INTO t1 VALUES (1, "")
+master-bin.000001       887     Xid     1       914     COMMIT /* XID */
+master-bin.000001       914     Query   1       982     BEGIN
+master-bin.000001       982     Query   1       1079    use `test`; INSERT INTO t1 VALUES (2, "first")
+master-bin.000001       1079    Query   1       1167    use `test`; INSERT INTO t2 VALUES (2)
+master-bin.000001       1167    Query   1       1265    use `test`; INSERT INTO t1 VALUES (2, "second")
+master-bin.000001       1265    Xid     1       1292    COMMIT /* XID */
+master-bin.000001       1292    Query   1       1360    BEGIN
+master-bin.000001       1360    Query   1       1452    use `test`; INSERT INTO t1 VALUES (3, "")
+master-bin.000001       1452    Query   1       1540    use `test`; INSERT INTO t2 VALUES (3)
+master-bin.000001       1540    Xid     1       1567    COMMIT /* XID */
+master-bin.000001       1567    Rotate  1       1611    master-bin.000002;pos=4
+DROP TABLE t1,t2;

=== added file 'mysql-test/suite/binlog/t/binlog_consistent.test'
--- a/mysql-test/suite/binlog/t/binlog_consistent.test	1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/binlog/t/binlog_consistent.test	2010-11-07 21:37:43 +0000
@@ -0,0 +1,88 @@
+--source include/have_log_bin.inc
+--source include/have_binlog_format_mixed_or_statement.inc
+--source include/have_innodb.inc
+
+RESET MASTER;
+
+# Test that we get the correct binlog position from START TRANSACTION WITH
+# CONSISTENT SNAPSHOT even when other transactions are active.
+
+connect(con1,localhost,root,,);
+connect(con2,localhost,root,,);
+connect(con3,localhost,root,,);
+connect(con4,localhost,root,,);
+
+connection default;
+--echo # Connection default
+
+CREATE TABLE t1 (a INT, b VARCHAR(100), PRIMARY KEY (a,b)) ENGINE=innodb;
+SHOW MASTER STATUS;
+SHOW STATUS LIKE 'binlog_trx_%';
+BEGIN;
+INSERT INTO t1 VALUES (0, "");
+
+connection con1;
+--echo # Connection con1
+BEGIN;
+INSERT INTO t1 VALUES (1, "");
+
+connection con2;
+--echo # Connection con2
+CREATE TABLE t2 (a INT PRIMARY KEY) ENGINE=myisam;
+BEGIN;
+INSERT INTO t1 VALUES (2, "first");
+INSERT INTO t2 VALUES (2);
+INSERT INTO t1 VALUES (2, "second");
+
+connection default;
+--echo # Connection default
+COMMIT;
+
+SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
+START TRANSACTION WITH CONSISTENT SNAPSHOT;
+
+connection con3;
+--echo # Connection con3
+BEGIN;
+INSERT INTO t1 VALUES (3, "");
+INSERT INTO t2 VALUES (3);
+
+connection con4;
+--echo # Connection con4
+BEGIN;
+INSERT INTO t1 VALUES (4, "");
+COMMIT;
+
+connection default;
+--echo # Connection default
+SELECT * FROM t1 ORDER BY a,b;
+SHOW STATUS LIKE 'binlog_trx_%';
+SHOW MASTER STATUS;
+SELECT * FROM t2 ORDER BY a;
+
+connection con1;
+--echo # Connection con1
+COMMIT;
+
+connection con2;
+--echo # Connection con2
+COMMIT;
+
+connection con3;
+--echo # Connection con3
+COMMIT;
+FLUSH LOGS;
+
+connection default;
+--echo # Connection default
+SELECT * FROM t1 ORDER BY a,b;
+SHOW STATUS LIKE 'binlog_trx_%';
+SHOW MASTER STATUS;
+COMMIT;
+SHOW STATUS LIKE 'binlog_trx_%';
+SHOW MASTER STATUS;
+
+--replace_regex /\/\* xid=.* \*\//\/* XID *\// /Server ver: .*, Binlog ver: .*/Server ver: #, Binlog ver: #/ /table_id: [0-9]+/table_id: #/
+SHOW BINLOG EVENTS;
+
+DROP TABLE t1,t2;

=== modified file 'mysql-test/t/mysqldump-max.test'
--- a/mysql-test/t/mysqldump-max.test	2008-09-19 13:24:32 +0000
+++ b/mysql-test/t/mysqldump-max.test	2010-11-07 21:37:43 +0000
@@ -2,6 +2,7 @@
 --source include/not_embedded.inc
 --source include/have_innodb.inc
 --source include/have_archive.inc
+--source include/have_log_bin.inc
 
 --disable_warnings
 drop table if exists t1, t2, t3, t4, t5, t6;
@@ -1124,3 +1125,83 @@ DROP VIEW  v1;
 DROP TABLE t1;
 
 SET GLOBAL storage_engine=@old_engine;
+
+# Test fully non-locking mysqldump with consistent binlog position (MWL#136).
+
+connect(c1,127.0.0.1,root,,test,$MASTER_MYPORT,);
+connect(c2,127.0.0.1,root,,test,$MASTER_MYPORT,);
+connect(c3,127.0.0.1,root,,test,$MASTER_MYPORT,);
+
+connection default;
+--echo # Connection default
+RESET MASTER;
+CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (1),(2);
+CREATE TABLE t2 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB;
+INSERT INTO t2 VALUES (1,0), (2,0);
+SELECT GET_LOCK("block_queries_1", 120);
+
+connection c3;
+--echo # Connection c3
+SELECT GET_LOCK("block_queries_2", 120);
+
+# Start two queries that will be running on the tables during mysqldump
+connection c1;
+--echo # Connection c1
+SET @c= 0;
+send SELECT IF(@c<1, @c:=@c+1, GET_LOCK("block_queries_1", 120)) FROM t1 ORDER BY a;
+
+connection c2;
+--echo # Connection c2
+SET binlog_format="row";
+SET @d= 10;
+send UPDATE t2 SET b=IF(@d<=10, @d:=@d+1, GET_LOCK("block_queries_2", 120)) ORDER BY a;
+
+connection default;
+--echo # Connection default
+--echo # Make sure other queries are running (and waiting).
+let $wait_condition=
+  SELECT COUNT(*) FROM information_schema.processlist
+   WHERE state = "User lock" AND info LIKE 'SELECT%block_queries_1%';
+--source include/wait_condition.inc
+let $wait_condition=
+  SELECT COUNT(*) FROM information_schema.processlist
+   WHERE state = "User lock" AND info LIKE 'UPDATE%block_queries_2%';
+--source include/wait_condition.inc
+
+--exec $MYSQL_DUMP --master-data=2 --single-transaction test t1 t2 > $MYSQLTEST_VARDIR/tmp/mwl136.sql
+
+SELECT RELEASE_LOCK("block_queries_1");
+
+connection c3;
+--echo # Connection c3
+SELECT RELEASE_LOCK("block_queries_2");
+
+connection c1;
+--echo # Connection c1
+reap;
+
+connection c2;
+--echo # Connection c2
+reap;
+
+connection default;
+--echo # Connection default
+SELECT * FROM t2 ORDER BY a;
+DROP TABLE t1;
+DROP TABLE t2;
+--exec $MYSQL test < $MYSQLTEST_VARDIR/tmp/mwl136.sql
+
+--replace_regex /\/\* xid=.* \*\//\/* XID *\// /Server ver: .*, Binlog ver: .*/Server ver: #, Binlog ver: #/ /table_id: [0-9]+/table_id: #/
+SHOW BINLOG EVENTS LIMIT 6,3;
+--perl
+my $f= "$ENV{MYSQLTEST_VARDIR}/tmp/mwl136.sql";
+open F, '<', $f or die "Failed to open $f: $!\n";
+while (<F>) {
+   print if /CHANGE MASTER TO/;
+}
+EOF
+SELECT * FROM t1 ORDER BY a;
+SELECT * FROM t2 ORDER BY a;
+
+DROP TABLE t1,t2;

=== modified file 'sql/log.cc'
--- a/sql/log.cc	2010-11-02 07:40:27 +0000
+++ b/sql/log.cc	2010-11-07 21:37:43 +0000
@@ -62,6 +62,7 @@ static int binlog_savepoint_rollback(han
 static int binlog_commit(handlerton *hton, THD *thd, bool all);
 static int binlog_rollback(handlerton *hton, THD *thd, bool all);
 static int binlog_prepare(handlerton *hton, THD *thd, bool all);
+static int binlog_start_consistent_snapshot(handlerton *hton, THD *thd);
 
 /**
   Silence all errors and warnings reported when performing a write
@@ -155,9 +156,10 @@ class binlog_trx_data {
 public:
   binlog_trx_data()
     : at_least_one_stmt_committed(0), incident(FALSE), m_pending(0),
-    before_stmt_pos(MY_OFF_T_UNDEF), commit_bin_log_file_pos(0), using_xa(0)
+    before_stmt_pos(MY_OFF_T_UNDEF), last_commit_pos_offset(0), using_xa(0)
   {
     trans_log.end_of_file= max_binlog_cache_size;
+    strcpy(last_commit_pos_file, "");
   }
 
   ~binlog_trx_data()
@@ -215,7 +217,8 @@ public:
     incident= FALSE;
     trans_log.end_of_file= max_binlog_cache_size;
     using_xa= FALSE;
-    commit_bin_log_file_pos= 0;
+    strcpy(last_commit_pos_file, "");
+    last_commit_pos_offset= 0;
     DBUG_ASSERT(empty());
   }
 
@@ -261,10 +264,14 @@ public:
   */
   my_off_t before_stmt_pos;
   /*
-    Binlog position after current commit, available to storage engines during
-    commit_ordered() and commit().
+    Binlog position for current transaction.
+    For START TRANSACTION WITH CONSISTENT SNAPSHOT, this is the binlog
+    position corresponding to the snapshot taken. During (and after) commit,
+    this is set to the binlog position corresponding to just after the
+    commit (so storage engines can store it in their transaction log).
   */
-  ulonglong commit_bin_log_file_pos;
+  char last_commit_pos_file[FN_REFLEN];
+  my_off_t last_commit_pos_offset;
 
   /*
     Flag set true if this transaction is committed with log_xid() as part of
@@ -1392,6 +1399,7 @@ int binlog_init(void *p)
   binlog_hton->commit= binlog_commit;
   binlog_hton->rollback= binlog_rollback;
   binlog_hton->prepare= binlog_prepare;
+  binlog_hton->start_consistent_snapshot= binlog_start_consistent_snapshot;
   binlog_hton->flags= HTON_NOT_USER_SELECTABLE | HTON_HIDDEN;
   return 0;
 }
@@ -2771,6 +2779,11 @@ bool MYSQL_BIN_LOG::open(const char *log
     if (flush_io_cache(&log_file) ||
         my_sync(log_file.file, MYF(MY_WME)))
       goto err;
+    pthread_mutex_lock(&LOCK_commit_ordered);
+    strmake(last_commit_pos_file, log_file_name,
+            sizeof(last_commit_pos_file)-1);
+    last_commit_pos_offset= my_b_tell(&log_file);
+    pthread_mutex_unlock(&LOCK_commit_ordered);
 
     if (write_file_name_to_index_file)
     {
@@ -4199,6 +4212,25 @@ void THD::binlog_set_stmt_begin() {
   trx_data->before_stmt_pos= pos;
 }
 
+static int
+binlog_start_consistent_snapshot(handlerton *hton, THD *thd)
+{
+  int err= 0;
+  binlog_trx_data *trx_data;
+  DBUG_ENTER("binlog_start_consistent_snapshot");
+
+  thd->binlog_setup_trx_data();
+  trx_data= (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
+
+  /* Server layer calls us with LOCK_commit_ordered locked, so this is safe. */
+  strmake(trx_data->last_commit_pos_file, mysql_bin_log.last_commit_pos_file,
+          sizeof(trx_data->last_commit_pos_file)-1);
+  trx_data->last_commit_pos_offset= mysql_bin_log.last_commit_pos_offset;
+
+  trans_register_ha(thd, TRUE, hton);
+
+  DBUG_RETURN(err);
+}
 
 /*
   Write a table map to the binary log.
@@ -4337,6 +4369,9 @@ MYSQL_BIN_LOG::flush_and_set_pending_row
         rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED);
       }
 
+      pthread_mutex_lock(&LOCK_commit_ordered);
+      last_commit_pos_offset= my_b_tell(&log_file);
+      pthread_mutex_unlock(&LOCK_commit_ordered);
       pthread_mutex_unlock(&LOCK_log);
     }
 
@@ -4526,7 +4561,12 @@ bool MYSQL_BIN_LOG::write(Log_event *eve
 
 err_unlock:
     if (file == &log_file)
+    {
+      pthread_mutex_lock(&LOCK_commit_ordered);
+      last_commit_pos_offset= my_b_tell(&log_file);
+      pthread_mutex_unlock(&LOCK_commit_ordered);
       pthread_mutex_unlock(&LOCK_log);
+    }
 
 err:
     if (error)
@@ -4827,6 +4867,9 @@ bool MYSQL_BIN_LOG::write_incident(THD *
     signal_update();
     rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED);
   }
+  pthread_mutex_lock(&LOCK_commit_ordered);
+  last_commit_pos_offset= my_b_tell(&log_file);
+  pthread_mutex_unlock(&LOCK_commit_ordered);
   pthread_mutex_unlock(&LOCK_log);
 
   DBUG_RETURN(error);
@@ -4972,9 +5015,11 @@ MYSQL_BIN_LOG::write_transaction_to_binl
 void
 MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
 {
-  DBUG_ENTER("MYSQL_BIN_LOG::trx_group_commit_leader");
   uint xid_count= 0;
   uint write_count= 0;
+  my_off_t commit_offset;
+  DBUG_ENTER("MYSQL_BIN_LOG::trx_group_commit_leader");
+  LINT_INIT(commit_offset);
 
   /*
     Lock the LOCK_log(), and once we get it, collect any additional writes
@@ -5035,8 +5080,11 @@ MYSQL_BIN_LOG::trx_group_commit_leader(g
         write_count++;
       }
 
-      trx_data->commit_bin_log_file_pos=
+      strmake(trx_data->last_commit_pos_file, log_file_name,
+              sizeof(trx_data->last_commit_pos_file)-1);
+      commit_offset=
         log_file.pos_in_file + (log_file.write_pos - log_file.write_buffer);
+      trx_data->last_commit_pos_offset= commit_offset;
       if (trx_data->using_xa)
         xid_count++;
     }
@@ -5078,6 +5126,7 @@ MYSQL_BIN_LOG::trx_group_commit_leader(g
 
   DEBUG_SYNC(leader->thd, "commit_before_get_LOCK_commit_ordered");
   pthread_mutex_lock(&LOCK_commit_ordered);
+  last_commit_pos_offset= commit_offset;
   /*
     We cannot unlock LOCK_log until we have locked LOCK_commit_ordered;
     otherwise scheduling could allow the next group commit to run ahead of us,
@@ -6529,10 +6578,6 @@ ulonglong mysql_bin_log_file_pos(void)
 
   Since it stores the position inside THD, it is safe to call without any
   locking.
-
-  Note that currently the binlog file name is not stored inside THD, but this
-  is still safe as it can only change when the log is rotated, and we never
-  rotate the binlog while commits are pending inside storage engines.
 */
 void
 mysql_bin_log_commit_pos(THD *thd, ulonglong *out_pos, const char **out_file)
@@ -6541,8 +6586,8 @@ mysql_bin_log_commit_pos(THD *thd, ulong
     (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
   if (trx_data)
   {
-    *out_pos= trx_data->commit_bin_log_file_pos;
-    *out_file= mysql_bin_log.get_log_fname();
+    *out_file= trx_data->last_commit_pos_file;
+    *out_pos= (ulonglong)(trx_data->last_commit_pos_offset);
   }
   else
   {
@@ -6555,6 +6600,8 @@ mysql_bin_log_commit_pos(THD *thd, ulong
 
 static ulonglong binlog_status_var_num_commits;
 static ulonglong binlog_status_var_num_group_commits;
+static char binlog_trx_file[FN_REFLEN];
+static ulonglong binlog_trx_position;
 
 static SHOW_VAR binlog_status_vars_detail[]=
 {
@@ -6562,12 +6609,16 @@ static SHOW_VAR binlog_status_vars_detai
     (char *)&binlog_status_var_num_commits, SHOW_LONGLONG},
   {"group_commits",
     (char *)&binlog_status_var_num_group_commits, SHOW_LONGLONG},
+  {"trx_file",
+    (char *)&binlog_trx_file, SHOW_CHAR},
+  {"trx_position",
+   (char *)&binlog_trx_position, SHOW_LONGLONG},
   {NullS, NullS, SHOW_LONG}
 };
 
 static int show_binlog_vars(THD *thd, SHOW_VAR *var, char *buff)
 {
-  mysql_bin_log.set_status_variables();
+  mysql_bin_log.set_status_variables(thd);
   var->type= SHOW_ARRAY;
   var->value= (char *)&binlog_status_vars_detail;
   return 0;
@@ -6606,17 +6657,31 @@ static struct st_mysql_sys_var *binlog_s
   This is called only under LOCK_status, so we can fill in a static array.
 */
 void
-TC_LOG_BINLOG::set_status_variables()
+TC_LOG_BINLOG::set_status_variables(THD *thd)
 {
-  ulonglong num_commits, num_group_commits;
+  binlog_trx_data *trx_data;
+
+  if (thd)
+    trx_data= (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
+  else
+    trx_data= NULL;
 
   pthread_mutex_lock(&LOCK_commit_ordered);
-  num_commits= this->num_commits;
-  num_group_commits= this->num_group_commits;
+  binlog_status_var_num_commits= this->num_commits;
+  binlog_status_var_num_group_commits= this->num_group_commits;
+  if (!trx_data || 0 == strcmp(trx_data->last_commit_pos_file, ""))
+  {
+    strmake(binlog_trx_file, last_commit_pos_file, sizeof(binlog_trx_file)-1);
+    binlog_trx_position= last_commit_pos_offset;
+  }
   pthread_mutex_unlock(&LOCK_commit_ordered);
 
-  binlog_status_var_num_commits= num_commits;
-  binlog_status_var_num_group_commits= num_group_commits;
+  if (trx_data && 0 != strcmp(trx_data->last_commit_pos_file, ""))
+  {
+    strmake(binlog_trx_file, trx_data->last_commit_pos_file,
+            sizeof(binlog_trx_file)-1);
+    binlog_trx_position= trx_data->last_commit_pos_offset;
+  }
 }
 
 struct st_mysql_storage_engine binlog_storage_engine=

=== modified file 'sql/log.h'
--- a/sql/log.h	2010-11-01 15:01:25 +0000
+++ b/sql/log.h	2010-11-07 21:37:43 +0000
@@ -406,6 +406,12 @@ public:
   */
   Format_description_log_event *description_event_for_exec,
     *description_event_for_queue;
+  /*
+    Binlog position of last commit (or non-transactional write) to the binlog.
+    Access to this is protected by LOCK_commit_ordered.
+  */
+  char last_commit_pos_file[FN_REFLEN];
+  my_off_t last_commit_pos_offset;
 
   MYSQL_BIN_LOG();
   /*
@@ -521,7 +527,7 @@ public:
   inline void unlock_index() { pthread_mutex_unlock(&LOCK_index);}
   inline IO_CACHE *get_index_file() { return &index_file;}
   inline uint32 get_open_count() { return open_count; }
-  void set_status_variables();
+  void set_status_variables(THD *thd);
 };
 
 class Log_event_handler



More information about the commits mailing list