[Commits] Rev 3435: MDEV-232: After-review fixes. in http://bazaar.launchpad.net/~maria-captains/maria/10.0

knielsen at knielsen-hq.org knielsen at knielsen-hq.org
Thu Sep 13 14:20:14 EEST 2012


At http://bazaar.launchpad.net/~maria-captains/maria/10.0

------------------------------------------------------------
revno: 3435
revision-id: knielsen at knielsen-hq.org-20120904100634-6timbnsj7q2a1r6w
parent: knielsen at knielsen-hq.org-20120622102029-0ajf9octd54mghie
committer: knielsen at knielsen-hq.org
branch nick: work-5.5-mdev225-181-232-rebased
timestamp: Tue 2012-09-04 12:06:34 +0200
message:
  MDEV-232: After-review fixes.
=== modified file 'mysql-test/suite/binlog/t/binlog_checkpoint.test'
--- a/mysql-test/suite/binlog/t/binlog_checkpoint.test	2012-06-22 10:20:29 +0000
+++ b/mysql-test/suite/binlog/t/binlog_checkpoint.test	2012-09-04 10:06:34 +0000
@@ -87,8 +87,17 @@ connection con1;
 reap;
 
 connection default;
+
 --echo No commit checkpoints are pending, a new binlog checkpoint should have been logged.
 --let $binlog_file= master-bin.000003
+
+# Wait for the master-bin.000003 binlog checkpoint to appear.
+--let $wait_for_all= 0
+--let $show_statement= SHOW BINLOG EVENTS IN "$binlog_file"
+--let $field= Info
+--let $condition= = "master-bin.000003"
+--source include/wait_show_condition.inc
+
 --source include/show_binlog_events.inc
 
 

=== modified file 'mysql-test/suite/binlog/t/binlog_xa_recover.test'
--- a/mysql-test/suite/binlog/t/binlog_xa_recover.test	2012-06-22 10:20:29 +0000
+++ b/mysql-test/suite/binlog/t/binlog_xa_recover.test	2012-09-04 10:06:34 +0000
@@ -162,6 +162,13 @@ connection con11;
 reap;
 
 connection default;
+# Wait for the last (master-bin.000004) binlog checkpoint to appear.
+--let $wait_for_all= 0
+--let $show_statement= SHOW BINLOG EVENTS IN "master-bin.000004"
+--let $field= Info
+--let $condition= = "master-bin.000004"
+--source include/wait_show_condition.inc
+
 --echo Checking that master-bin.000004 is the last binlog checkpoint
 --source include/show_binlog_events.inc
 

=== modified file 'sql/handler.cc'
--- a/sql/handler.cc	2012-06-22 10:20:29 +0000
+++ b/sql/handler.cc	2012-09-04 10:06:34 +0000
@@ -1821,7 +1821,7 @@ bool mysql_xa_recover(THD *thd)
   See comments on handlerton method commit_checkpoint_request() for details.
 */
 void
-ha_commit_checkpoint_notify(handlerton *hton, void *cookie)
+commit_checkpoint_notify_ha(handlerton *hton, void *cookie)
 {
   tc_log->commit_checkpoint_notify(cookie);
 }

=== modified file 'sql/handler.h'
--- a/sql/handler.h	2012-06-22 10:20:29 +0000
+++ b/sql/handler.h	2012-09-04 10:06:34 +0000
@@ -989,25 +989,28 @@ struct handlerton
      An engine that does implement commit_checkpoint_request() is also
      expected to implement commit_ordered(), so that ordering of commits is
      consistent between 2pc participants. Such engine is no longer required to
-     durably flush to disk transactions as part of a commit() that has been
-     successfully prepare()d and commit_ordered(), thus potentionally saving
-     one fsync() call.
+     durably flush to disk transactions in commit(), provided that the
+     transaction has been successfully prepare()d and commit_ordered(); thus
+     potentionally saving one fsync() call. (Engine must still durably flush
+     to disk in commit() when no prepare()/commit_ordered() steps took place,
+     at least if durable commits are wanted; this happens eg. if binlog is
+     disabled).
 
      The TC will periodically (eg. once per binlog rotation) call
      commit_checkpoint_request(). When this happens, the engine must arrange
      for all transaction that have completed commit_ordered() to be durably
      flushed to disk (this does not include transactions that might be in the
      middle of executing commit_ordered()). When such flush has completed, the
-     engine must call ha_commit_checkpoint_notify(), passing back the opaque
+     engine must call commit_checkpoint_notify_ha(), passing back the opaque
      "cookie".
 
-     The flush and call of ha_commit_checkpoint_notify() need not happen
+     The flush and call of commit_checkpoint_notify_ha() need not happen
      immediately - it can be scheduled and performed asynchroneously (ie. as
      part of next prepare(), or sync every second, or whatever), but should
      not be postponed indefinitely. It is however also permissible to do it
      immediately, before returning from commit_checkpoint_request().
 
-     When ha_commit_checkpoint_notify() is called, the TC will know that the
+     When commit_checkpoint_notify_ha() is called, the TC will know that the
      transactions are durably committed, and thus no longer require XA
      recovery. It uses that to reduce the work needed for any subsequent XA
      recovery process.
@@ -3095,7 +3098,7 @@ int ha_binlog_end(THD *thd);
 const char *get_canonical_filename(handler *file, const char *path,
                                    char *tmp_path);
 bool mysql_xa_recover(THD *thd);
-void ha_commit_checkpoint_notify(handlerton *hton, void *cookie);
+void commit_checkpoint_notify_ha(handlerton *hton, void *cookie);
 
 inline const char *table_case_name(HA_CREATE_INFO *info, const char *name)
 {

=== modified file 'sql/log.cc'
--- a/sql/log.cc	2012-06-22 10:20:29 +0000
+++ b/sql/log.cc	2012-09-04 10:06:34 +0000
@@ -479,7 +479,14 @@ class binlog_cache_mngr {
   */
   bool using_xa;
   my_xid xa_xid;
-  ulong cookie;
+  bool need_unlog;
+  /*
+    Id of binlog that transaction was written to; only needed if need_unlog is
+    true.
+  */
+  ulong binlog_id;
+  /* Set if we get an error during commit that must be returned from unlog(). */
+  bool delayed_error;
 
 private:
 
@@ -1678,8 +1685,7 @@ binlog_flush_cache(THD *thd, binlog_cach
       So there is no work to do. Therefore, we will not increment any XID
       count, so we must not decrement any XID count in unlog().
     */
-    if (cache_mngr->using_xa && cache_mngr->xa_xid)
-      cache_mngr->cookie= BINLOG_COOKIE_DUMMY;
+    cache_mngr->need_unlog= 0;
   }
   cache_mngr->reset(using_stmt, using_trx);
 
@@ -2913,7 +2919,7 @@ MYSQL_BIN_LOG::MYSQL_BIN_LOG(uint *sync_
    checksum_alg_reset(BINLOG_CHECKSUM_ALG_UNDEF),
    relay_log_checksum_alg(BINLOG_CHECKSUM_ALG_UNDEF),
    description_event_for_exec(0), description_event_for_queue(0),
-   current_binlog_id(BINLOG_COOKIE_START)
+   current_binlog_id(0)
 {
   /*
     We don't want to initialize locks here as such initialization depends on
@@ -3317,7 +3323,7 @@ bool MYSQL_BIN_LOG::open(const char *log
       the new binlog file in binlog_xid_count_list.
     */
     mysql_mutex_lock(&LOCK_xid_list);
-    current_binlog_id+= BINLOG_COOKIE_ID_INCREMENT;
+    ++current_binlog_id;
     new_xid_list_entry->binlog_id= current_binlog_id;
     /* Remove any initial entries with no pending XIDs.  */
     while ((b= binlog_xid_count_list.head()) && b->xid_count == 0)
@@ -5587,7 +5593,7 @@ MYSQL_BIN_LOG::do_checkpoint_request(ulo
     commit_checkpoint_notify() calls are done, and we can log a new binlog
     checkpoint.
   */
-  mark_xid_done(binlog_id);
+  mark_xid_done(binlog_id, true);
 }
 
 
@@ -5662,20 +5668,7 @@ int MYSQL_BIN_LOG::rotate(bool force_rot
         We failed to rotate - so we have to decrement the xid_count back that
         we incremented before attempting the rotate.
       */
-      mysql_mutex_lock(&LOCK_xid_list);
-      I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
-      xid_count_per_binlog *b;
-      while ((b= it++))
-      {
-        if (b->binlog_id == binlog_id)
-        {
-          --b->xid_count;
-          break;
-        }
-      }
-      /* Binlog is always found, as we do not remove until count reaches 0 */
-      DBUG_ASSERT(b);
-      mysql_mutex_unlock(&LOCK_xid_list);
+      mark_xid_done(binlog_id, false);
     }
     else
       *check_purge= true;
@@ -6225,6 +6218,9 @@ MYSQL_BIN_LOG::write_transaction_to_binl
 bool
 MYSQL_BIN_LOG::write_transaction_to_binlog_events(group_commit_entry *entry)
 {
+  bool check_purge;
+  ulong binlog_id;
+
   /*
     To facilitate group commit for the binlog, we first queue up ourselves in
     the group commit queue. Then the first thread to enter the queue waits for
@@ -6255,7 +6251,7 @@ MYSQL_BIN_LOG::write_transaction_to_binl
   if (orig_queue != NULL)
     entry->thd->wait_for_wakeup_ready();
   else
-    trx_group_commit_leader(entry);
+    trx_group_commit_leader(entry, &check_purge, &binlog_id);
 
   if (!opt_optimize_thread_scheduling)
   {
@@ -6281,6 +6277,18 @@ MYSQL_BIN_LOG::write_transaction_to_binl
     {
       next->thd->signal_wakeup_ready();
     }
+    else
+    {
+      /*
+        If we rotated the binlog, and if we are using the unoptimized thread
+        scheduling where every thread runs its own commit_ordered(), then we
+        must do the commit checkpoint and log purge here, after all
+        commit_ordered() calls have finished, and locks have been released.
+      */
+      if (check_purge)
+        checkpoint_and_purge(binlog_id);
+    }
+
   }
 
   if (likely(!entry->error))
@@ -6311,8 +6319,9 @@ MYSQL_BIN_LOG::write_transaction_to_binl
     we need to mark it as not needed for recovery (unlog() is not called
     for a transaction if log_xid() fails).
   */
-  if (entry->cache_mngr->using_xa && entry->cache_mngr->xa_xid)
-    mark_xid_done(entry->cache_mngr->cookie);
+  if (entry->cache_mngr->using_xa && entry->cache_mngr->xa_xid &&
+      entry->cache_mngr->need_unlog)
+    mark_xid_done(entry->cache_mngr->binlog_id, true);
 
   return 1;
 }
@@ -6328,7 +6337,9 @@ MYSQL_BIN_LOG::write_transaction_to_binl
 
  */
 void
-MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
+MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader,
+                                       bool *check_purge_ptr,
+                                       ulong *binlog_id_ptr)
 {
   uint xid_count= 0;
   my_off_t UNINIT_VAR(commit_offset);
@@ -6404,10 +6415,13 @@ MYSQL_BIN_LOG::trx_group_commit_leader(g
         if (current->need_unlog)
         {
           xid_count++;
-          cache_mngr->cookie= binlog_id;
+          cache_mngr->need_unlog= true;
+          cache_mngr->binlog_id= binlog_id;
         }
         else
-          cache_mngr->cookie= BINLOG_COOKIE_DUMMY;
+          cache_mngr->need_unlog= false;
+
+        cache_mngr->delayed_error= false;
       }
     }
 
@@ -6476,7 +6490,7 @@ MYSQL_BIN_LOG::trx_group_commit_leader(g
         Instead set a flag so that we can return error later, from unlog(),
         when the transaction has been safely committed in the engine.
       */
-      leader->cache_mngr->cookie|= BINLOG_COOKIE_ERROR_FLAG;
+      leader->cache_mngr->delayed_error= true;
       my_error(ER_ERROR_ON_WRITE, MYF(ME_NOREFRESH), name, errno);
       check_purge= false;
     }
@@ -6510,6 +6524,15 @@ MYSQL_BIN_LOG::trx_group_commit_leader(g
       mysql_cond_wait(&COND_queue_busy, &LOCK_commit_ordered);
     group_commit_queue_busy= TRUE;
 
+    /*
+      Return these so parent can run checkpoint_and_purge().
+      (When using optimized thread scheduling, we run checkpoint_and_purge()
+      in this function, so parent does not need to and we need not return
+      these values).
+    */
+    *check_purge_ptr= check_purge;
+    *binlog_id_ptr= binlog_id;
+
     /* Note that we return with LOCK_commit_ordered locked! */
     DBUG_VOID_RETURN;
   }
@@ -7623,13 +7646,14 @@ int TC_LOG_MMAP::unlog(ulong cookie, my_
       mysql_mutex_unlock(&LOCK_pending_checkpoint);
       return 1;
     }
-    pending_checkpoint->cookies[pending_checkpoint->count++]= cookie;
-    if (pending_checkpoint->count == sizeof(pending_checkpoint->cookies) /
-        sizeof(pending_checkpoint->cookies[0]))
-    {
-      full_buffer= pending_checkpoint;
-      pending_checkpoint= NULL;
-    }
+  }
+
+  pending_checkpoint->cookies[pending_checkpoint->count++]= cookie;
+  if (pending_checkpoint->count == sizeof(pending_checkpoint->cookies) /
+      sizeof(pending_checkpoint->cookies[0]))
+  {
+    full_buffer= pending_checkpoint;
+    pending_checkpoint= NULL;
   }
   mysql_mutex_unlock(&LOCK_pending_checkpoint);
 
@@ -7920,9 +7944,6 @@ TC_LOG_BINLOG::log_and_order(THD *thd, m
 
   cache_mngr->using_xa= TRUE;
   cache_mngr->xa_xid= xid;
-#ifndef DBUG_OFF
-  cache_mngr->cookie= 0;
-#endif
   err= binlog_commit_flush_xid_caches(thd, cache_mngr, all, xid);
 
   DEBUG_SYNC(thd, "binlog_after_log_and_order");
@@ -7933,10 +7954,11 @@ TC_LOG_BINLOG::log_and_order(THD *thd, m
     If using explicit user XA, we will not have XID. We must still return a
     non-zero cookie (as zero cookie signals error).
   */
-  if (!xid)
-    DBUG_RETURN(BINLOG_COOKIE_DUMMY);
-  DBUG_ASSERT(cache_mngr->cookie != 0);
-  DBUG_RETURN(cache_mngr->cookie);
+  if (!xid || !cache_mngr->need_unlog)
+    DBUG_RETURN(BINLOG_COOKIE_DUMMY(cache_mngr->delayed_error));
+  else
+    DBUG_RETURN(BINLOG_COOKIE_MAKE(cache_mngr->binlog_id,
+                                   cache_mngr->delayed_error));
 }
 
 /*
@@ -7951,20 +7973,18 @@ TC_LOG_BINLOG::log_and_order(THD *thd, m
   binary log.
 */
 void
-TC_LOG_BINLOG::mark_xids_active(ulong cookie, uint xid_count)
+TC_LOG_BINLOG::mark_xids_active(ulong binlog_id, uint xid_count)
 {
   xid_count_per_binlog *b;
 
   DBUG_ENTER("TC_LOG_BINLOG::mark_xids_active");
-  DBUG_PRINT("info", ("cookie=%lu xid_count=%u", cookie, xid_count));
-  cookie&= ~(ulong)BINLOG_COOKIE_ERROR_FLAG;
-  DBUG_ASSERT(cookie != 0 && cookie != BINLOG_COOKIE_DUMMY);
+  DBUG_PRINT("info", ("binlog_id=%lu xid_count=%u", binlog_id, xid_count));
 
   mysql_mutex_lock(&LOCK_xid_list);
   I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
   while ((b= it++))
   {
-    if (b->binlog_id == cookie)
+    if (b->binlog_id == binlog_id)
     {
       b->xid_count += xid_count;
       break;
@@ -7990,16 +8010,13 @@ TC_LOG_BINLOG::mark_xids_active(ulong co
   checkpoint.
 */
 void
-TC_LOG_BINLOG::mark_xid_done(ulong cookie)
+TC_LOG_BINLOG::mark_xid_done(ulong binlog_id, bool write_checkpoint)
 {
   xid_count_per_binlog *b;
   bool first;
   ulong current;
 
   DBUG_ENTER("TC_LOG_BINLOG::mark_xid_done");
-  cookie&= ~(ulong)BINLOG_COOKIE_ERROR_FLAG;
-  if (cookie == BINLOG_COOKIE_DUMMY)
-    DBUG_VOID_RETURN;                           /* Nothing to do. */
 
   mysql_mutex_lock(&LOCK_xid_list);
   current= current_binlog_id;
@@ -8007,7 +8024,7 @@ TC_LOG_BINLOG::mark_xid_done(ulong cooki
   first= true;
   while ((b= it++))
   {
-    if (b->binlog_id == cookie)
+    if (b->binlog_id == binlog_id)
     {
       --b->xid_count;
       break;
@@ -8030,7 +8047,8 @@ TC_LOG_BINLOG::mark_xid_done(ulong cooki
     DBUG_VOID_RETURN;
   }
 
-  if (likely(cookie == current) || b->xid_count != 0 || !first)
+  if (likely(binlog_id == current) || b->xid_count != 0 || !first ||
+      !write_checkpoint)
   {
     /* No new binlog checkpoint reached yet. */
     mysql_mutex_unlock(&LOCK_xid_list);
@@ -8090,18 +8108,19 @@ int TC_LOG_BINLOG::unlog(ulong cookie, m
   if (!xid)
     DBUG_RETURN(0);
 
-  mark_xid_done(cookie);
+  if (!BINLOG_COOKIE_IS_DUMMY(cookie))
+    mark_xid_done(BINLOG_COOKIE_GET_ID(cookie), true);
   /*
     See comment in trx_group_commit_leader() - if rotate() gave a failure,
     we delay the return of error code to here.
   */
-  DBUG_RETURN(test(cookie & BINLOG_COOKIE_ERROR_FLAG));
+  DBUG_RETURN(BINLOG_COOKIE_GET_ERROR_FLAG(cookie));
 }
 
 void
 TC_LOG_BINLOG::commit_checkpoint_notify(void *cookie)
 {
-  mark_xid_done(((xid_count_per_binlog *)cookie)->binlog_id);
+  mark_xid_done(((xid_count_per_binlog *)cookie)->binlog_id, true);
 }
 
 int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name,

=== modified file 'sql/log.h'
--- a/sql/log.h	2012-06-22 10:20:29 +0000
+++ b/sql/log.h	2012-09-04 10:06:34 +0000
@@ -370,15 +370,30 @@ class MYSQL_QUERY_LOG: public MYSQL_LOG
 
 /*
   We assign each binlog file an internal ID, used to identify them for unlog().
-  Ids start from BINLOG_COOKIE_START; the value BINLOG_COOKIE_DUMMY is special
-  meaning "no binlog" (we cannot use zero as that is reserved for error return
-  from log_and_order).
-  The low-order bit is reserved for an error flag.
-*/
-#define BINLOG_COOKIE_ERROR_FLAG 1
-#define BINLOG_COOKIE_ID_INCREMENT (BINLOG_COOKIE_ERROR_FLAG*2)
-#define BINLOG_COOKIE_DUMMY (BINLOG_COOKIE_ID_INCREMENT*1)
-#define BINLOG_COOKIE_START (BINLOG_COOKIE_ID_INCREMENT*2)
+  The IDs start from 0 and increment for each new binlog created.
+
+  In unlog() we need to know the ID of the binlog file that the corresponding
+  transaction was written into. We also need a special value for a corner
+  case where there is no corresponding binlog id (since nothing was logged).
+  And we need an error flag to mark that unlog() must return failure.
+
+  We use the following macros to pack all of this information into the single
+  ulong available with log_and_order() / unlog().
+
+  Note that we cannot use the value 0 for cookie, as that is reserved as error
+  return value from log_and_order().
+  */
+#define BINLOG_COOKIE_ERROR_RETURN 0
+#define BINLOG_COOKIE_DUMMY_ID 1
+#define BINLOG_COOKIE_BASE 2
+#define BINLOG_COOKIE_DUMMY(error_flag) \
+  ( (BINLOG_COOKIE_DUMMY_ID<<1) | ((error_flag)&1) )
+#define BINLOG_COOKIE_MAKE(id, error_flag) \
+  ( (((id)+BINLOG_COOKIE_BASE)<<1) | ((error_flag)&1) )
+#define BINLOG_COOKIE_GET_ERROR_FLAG(c) ((c) & 1)
+#define BINLOG_COOKIE_GET_ID(c) ( ((ulong)(c)>>1) - BINLOG_COOKIE_BASE )
+#define BINLOG_COOKIE_IS_DUMMY(c) \
+  ( ((ulong)(c)>>1) == BINLOG_COOKIE_DUMMY_ID )
 
 void binlog_checkpoint_callback(void *cookie);
 
@@ -531,8 +546,9 @@ class MYSQL_BIN_LOG: public TC_LOG, priv
   void purge();
   int write_transaction_or_stmt(group_commit_entry *entry);
   bool write_transaction_to_binlog_events(group_commit_entry *entry);
-  void trx_group_commit_leader(group_commit_entry *leader);
-  void mark_xid_done(ulong cookie);
+  void trx_group_commit_leader(group_commit_entry *leader,
+                               bool *check_purge_ptr, ulong *binlog_id_ptr);
+  void mark_xid_done(ulong cookie, bool write_checkpoint);
   void mark_xids_active(ulong cookie, uint xid_count);
 
 public:

=== modified file 'storage/innobase/handler/ha_innodb.cc'
--- a/storage/innobase/handler/ha_innodb.cc	2012-06-22 10:20:29 +0000
+++ b/storage/innobase/handler/ha_innodb.cc	2012-09-04 10:06:34 +0000
@@ -3018,7 +3018,7 @@ innobase_checkpoint_request(
         void *cookie)
 {
         log_buffer_flush_to_disk();
-        ha_commit_checkpoint_notify(hton, cookie);
+        commit_checkpoint_notify_ha(hton, cookie);
 }
 
 /*****************************************************************//**

=== modified file 'storage/xtradb/handler/ha_innodb.cc'
--- a/storage/xtradb/handler/ha_innodb.cc	2012-06-22 10:20:29 +0000
+++ b/storage/xtradb/handler/ha_innodb.cc	2012-09-04 10:06:34 +0000
@@ -484,12 +484,16 @@ static MYSQL_THDVAR_ULONG(lock_wait_time
   NULL, NULL, 50, 1, 1024 * 1024 * 1024, 0);
 
 static MYSQL_THDVAR_ULONG(flush_log_at_trx_commit, PLUGIN_VAR_OPCMDARG,
-  "Set to 0 (write and flush once per second),"
-  " 1 (write and flush at each commit)"
-  " or 2 (write at commit, flush once per second)."
-  " Can also be set to 3 to write and flush at each commit, even when this is"
-  " unnecessary because of binlog crash recovery; should not normally be used"
-  " due to performance degration, but provided for completeness.",
+  "Controls the durability/speed trade-off for commits."
+  " Set to 0 (write and flush redo log to disk only once per second),"
+  " 1 (flush to disk at each commit),"
+  " 2 (write to log at commit but flush to disk only once per second)"
+  " or 3 (flush to disk at prepare and at commit, slower and usually redundant)."
+  " 1 and 3 guarantees that after a crash, committed transactions will"
+  " not be lost and will be consistent with the binlog and other transactional"
+  " engines. 2 can get inconsistent and lose transactions if there is a"
+  " power failure or kernel crash but not if mysqld crashes. 0 has no"
+  " guarantees in case of crash. 0 and 2 can be faster than 1 or 3.",
   NULL, NULL, 1, 0, 3, 0);
 
 static MYSQL_THDVAR_BOOL(fake_changes, PLUGIN_VAR_OPCMDARG,
@@ -3506,7 +3510,7 @@ innobase_checkpoint_request(
         void *cookie)
 {
         log_buffer_flush_to_disk();
-        ha_commit_checkpoint_notify(hton, cookie);
+        commit_checkpoint_notify_ha(hton, cookie);
 }
 
 /*****************************************************************//**



More information about the commits mailing list