[Commits] b4a4e798653: MDEV-13471: Test failure on innodb.log_file_size, 4k

jan jan.lindstrom at mariadb.com
Wed Aug 9 10:06:07 EEST 2017


revision-id: b4a4e79865399a443abc6a54c809eaf60d460fee (mariadb-galera-10.0.31-3-gb4a4e798653)
parent(s): 56b03e308fb4c0feee166ddf6a707d855affa3c3
author: Jan Lindström
committer: Jan Lindström
timestamp: 2017-08-09 09:39:12 +0300
message:

MDEV-13471: Test failure on innodb.log_file_size,4k

Problem was that 4k page size is not really supported in
Galera. For reference see:
	https://github.com/codership/galera/issues/398

Page size 4k is problematic for WSREP XID info location
that was set constant UNIV_PAGE_SIZE - 3500, conflicting
rseg undo slots location if there is lot of undo tablespaces.
Undo tablespace identifiers and page numbers require
at least 126*8=1024 bytes starting from offset 56. Therefore,
WSREP XID startig from offset 596 would overwrite several
space_id,page_no pairs storing undo tablespace ids and page
numbers. This will cause InnoDB startup failure seen as
ERROR] InnoDB: Unable to open undo tablespace './undo30579'.

Fixed by moving WSREP XID info in 4k page size to
UNIV_PAGE_SIZE - 2500 that is currently empty. When we
read WSREP XID we read from both locations. This is safe
as WSREP XID info existence is marded by 0x77737265 in a
such way that it can't be same as uninitialized bytes.

When WSREP XID info is written is always written to new
position on 4k page size. There is no need to fix location
for page sizes larger than 4k as there is enough free space in
page between rseg undo slots and WSREP XID info.

trx0sys.h
	Move TRX_SYS_WSREP_XID_INFO to UNIV_PAGE_SIZE-2500
	for 4K page size and for others keep it on original
	position UNIV_PAGE_SIZE-3500. Introduce a new
	constant TRX_SYS_WSREP_XID_INFO_OLD to be able to
	read WSREP XID info also from old location.

trx_sys_read_wsrep_checkpoint()
	Read WSREP XID info from both new location and
	old location. If WSREP XID info is not found
	from both location it is initialized to correct
	location. If WSREP XID is located set up the
	offset to location where it is.

There is no new test case as test cases
      innodb.log_file_size,4k
      innodb-alter-tempfile,4k

fail if this fix is not there.

Note that if WSREP XID info has overwritten undo log
tablespace id and page_no pairs. Server startup fails and
SST is needed.

---
 storage/innobase/include/trx0sys.h |  7 +++-
 storage/innobase/trx/trx0sys.cc    | 82 +++++++++++++++++---------------------
 storage/xtradb/include/trx0sys.h   |  7 +++-
 storage/xtradb/trx/trx0sys.cc      | 82 +++++++++++++++++---------------------
 4 files changed, 84 insertions(+), 94 deletions(-)

diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h
index 8c6b13f9dd4..4107bfdb9f1 100644
--- a/storage/innobase/include/trx0sys.h
+++ b/storage/innobase/include/trx0sys.h
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -550,7 +551,11 @@ this contains the same fields as TRX_SYS_MYSQL_LOG_INFO below */
 
 #ifdef WITH_WSREP
 /* The offset to WSREP XID headers */
-#define TRX_SYS_WSREP_XID_INFO (UNIV_PAGE_SIZE - 3500)
+/* In 4K page size rseg slots (max 126) use at least 126*8 bytes.
+Therefore WSREP XID must be on different byte offset. */
+#define TRX_SYS_WSREP_XID_INFO (UNIV_PAGE_SIZE > 4096 ? UNIV_PAGE_SIZE - 3500 : UNIV_PAGE_SIZE - 2500)
+#define TRX_SYS_WSREP_XID_INFO_OLD (UNIV_PAGE_SIZE - 3500)
+
 #define TRX_SYS_WSREP_XID_MAGIC_N_FLD 0
 #define TRX_SYS_WSREP_XID_MAGIC_N 0x77737265
 
diff --git a/storage/innobase/trx/trx0sys.cc b/storage/innobase/trx/trx0sys.cc
index 7f8bbafe99c..6364800d547 100644
--- a/storage/innobase/trx/trx0sys.cc
+++ b/storage/innobase/trx/trx0sys.cc
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -321,49 +322,12 @@ trx_sys_print_mysql_binlog_offset(void)
 
 #ifdef WITH_WSREP
 
-#ifdef UNIV_DEBUG
-static long long trx_sys_cur_xid_seqno = -1;
-static unsigned char trx_sys_cur_xid_uuid[16];
-
-long long read_wsrep_xid_seqno(const XID* xid)
-{
-    long long seqno;
-    memcpy(&seqno, xid->data + 24, sizeof(long long));
-    return seqno;
-}
-
-void read_wsrep_xid_uuid(const XID* xid, unsigned char* buf)
-{
-    memcpy(buf, xid->data + 8, 16);
-}
-
-#endif /* UNIV_DEBUG */
-
 void
 trx_sys_update_wsrep_checkpoint(
         const XID*      xid,        /*!< in: transaction XID */
         trx_sysf_t*     sys_header, /*!< in: sys_header */
         mtr_t*          mtr)        /*!< in: mtr */
 {
-#ifdef UNIV_DEBUG
-        {
-            /* Check that seqno is monotonically increasing */
-            unsigned char xid_uuid[16];
-            long long xid_seqno = read_wsrep_xid_seqno(xid);
-            read_wsrep_xid_uuid(xid, xid_uuid);
-            if (!memcmp(xid_uuid, trx_sys_cur_xid_uuid, 8))
-            {
-                ut_ad(xid_seqno > trx_sys_cur_xid_seqno);
-                trx_sys_cur_xid_seqno = xid_seqno;
-            }
-            else
-            {
-                memcpy(trx_sys_cur_xid_uuid, xid_uuid, 16);
-            }
-            trx_sys_cur_xid_seqno = xid_seqno;
-        }
-#endif /* UNIV_DEBUG */
-
         ut_ad(xid && mtr);
         ut_a(xid->formatID == -1 || wsrep_is_wsrep_xid(xid));
 
@@ -401,7 +365,8 @@ trx_sys_read_wsrep_checkpoint(XID* xid)
 {
         trx_sysf_t* sys_header;
 	mtr_t	    mtr;
-        ulint       magic;
+	ulint	    magic, magic_old;
+	ulint	    offset;
 
         ut_ad(xid);
 
@@ -409,9 +374,25 @@ trx_sys_read_wsrep_checkpoint(XID* xid)
 
 	sys_header = trx_sysf_get(&mtr);
 
-        if ((magic = mach_read_from_4(sys_header + TRX_SYS_WSREP_XID_INFO
-                                      + TRX_SYS_WSREP_XID_MAGIC_N_FLD))
-            != TRX_SYS_WSREP_XID_MAGIC_N) {
+	magic = mach_read_from_4(sys_header + TRX_SYS_WSREP_XID_INFO
+		+ TRX_SYS_WSREP_XID_MAGIC_N_FLD);
+
+	/* In 4k page size WSREP XID info was before 5.5.58-galera,
+	10.0.32-galera, 10.1.27, 10.2.8 in incorrect location overlapping
+	rseg slots. Here, we need to read WSREP XID info from
+	both locations in 4k page size case. */
+	if (UNIV_PAGE_SIZE == 4096) {
+		magic_old = mach_read_from_4(sys_header + TRX_SYS_WSREP_XID_INFO_OLD
+			+ TRX_SYS_WSREP_XID_MAGIC_N_FLD);
+	} else {
+		magic_old = magic;
+	}
+
+	/* Here we check is there WSREP XID info on both locations. In
+	old releases it is not possible to have exactly these bytes
+	as uninitialized bytes. */
+        if (magic != TRX_SYS_WSREP_XID_MAGIC_N
+	    && magic_old != TRX_SYS_WSREP_XID_MAGIC_N) {
                 memset(xid, 0, sizeof(*xid));
                 xid->formatID = -1;
                 trx_sys_update_wsrep_checkpoint(xid, sys_header, &mtr);
@@ -419,18 +400,27 @@ trx_sys_read_wsrep_checkpoint(XID* xid)
                 return;
         }
 
+	/* Set up the position where to read WSREP XID that was
+	found. Old incorrect offset is unlikely. */
+	if (UNIV_LIKELY(magic == TRX_SYS_WSREP_XID_MAGIC_N)) {
+		offset = TRX_SYS_WSREP_XID_INFO;
+	} else {
+		offset = TRX_SYS_WSREP_XID_INFO_OLD;
+	}
+
         xid->formatID     = (int)mach_read_from_4(
                 sys_header
-                + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_FORMAT);
+                + offset + TRX_SYS_WSREP_XID_FORMAT);
         xid->gtrid_length = (int)mach_read_from_4(
                 sys_header
-                + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_GTRID_LEN);
+                + offset + TRX_SYS_WSREP_XID_GTRID_LEN);
         xid->bqual_length = (int)mach_read_from_4(
                 sys_header
-                + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_BQUAL_LEN);
+                + offset + TRX_SYS_WSREP_XID_BQUAL_LEN);
         ut_memcpy(xid->data,
-                  sys_header + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_DATA,
-                  XIDDATASIZE);
+		sys_header
+		+ offset + TRX_SYS_WSREP_XID_DATA,
+		XIDDATASIZE);
 
 	mtr_commit(&mtr);
 }
diff --git a/storage/xtradb/include/trx0sys.h b/storage/xtradb/include/trx0sys.h
index e4956c6a822..eb217ba0dde 100644
--- a/storage/xtradb/include/trx0sys.h
+++ b/storage/xtradb/include/trx0sys.h
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -571,7 +572,11 @@ this contains the same fields as TRX_SYS_MYSQL_LOG_INFO below */
 
 #ifdef WITH_WSREP
 /* The offset to WSREP XID headers */
-#define TRX_SYS_WSREP_XID_INFO (UNIV_PAGE_SIZE - 3500)
+/* In 4K page size rseg slots (max 126) use at least 126*8 bytes.
+Therefore WSREP XID must be on different byte offset. */
+#define TRX_SYS_WSREP_XID_INFO (UNIV_PAGE_SIZE > 4096 ? UNIV_PAGE_SIZE - 3500 : UNIV_PAGE_SIZE - 2500)
+#define TRX_SYS_WSREP_XID_INFO_OLD (UNIV_PAGE_SIZE - 3500)
+
 #define TRX_SYS_WSREP_XID_MAGIC_N_FLD 0
 #define TRX_SYS_WSREP_XID_MAGIC_N 0x77737265
 
diff --git a/storage/xtradb/trx/trx0sys.cc b/storage/xtradb/trx/trx0sys.cc
index 9a0b7180435..e4d879dba59 100644
--- a/storage/xtradb/trx/trx0sys.cc
+++ b/storage/xtradb/trx/trx0sys.cc
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -321,49 +322,12 @@ trx_sys_print_mysql_binlog_offset(void)
 
 #ifdef WITH_WSREP
 
-#ifdef UNIV_DEBUG
-static long long trx_sys_cur_xid_seqno = -1;
-static unsigned char trx_sys_cur_xid_uuid[16];
-
-long long read_wsrep_xid_seqno(const XID* xid)
-{
-    long long seqno;
-    memcpy(&seqno, xid->data + 24, sizeof(long long));
-    return seqno;
-}
-
-void read_wsrep_xid_uuid(const XID* xid, unsigned char* buf)
-{
-    memcpy(buf, xid->data + 8, 16);
-}
-
-#endif /* UNIV_DEBUG */
-
 void
 trx_sys_update_wsrep_checkpoint(
         const XID*      xid,        /*!< in: transaction XID */
         trx_sysf_t*     sys_header, /*!< in: sys_header */
         mtr_t*          mtr)        /*!< in: mtr */
 {
-#ifdef UNIV_DEBUG
-        {
-            /* Check that seqno is monotonically increasing */
-            unsigned char xid_uuid[16];
-            long long xid_seqno = read_wsrep_xid_seqno(xid);
-            read_wsrep_xid_uuid(xid, xid_uuid);
-            if (!memcmp(xid_uuid, trx_sys_cur_xid_uuid, 8))
-            {
-                ut_ad(xid_seqno > trx_sys_cur_xid_seqno);
-                trx_sys_cur_xid_seqno = xid_seqno;
-            }
-            else
-            {
-                memcpy(trx_sys_cur_xid_uuid, xid_uuid, 16);
-            }
-            trx_sys_cur_xid_seqno = xid_seqno;
-        }
-#endif /* UNIV_DEBUG */
-
         ut_ad(xid && mtr);
         ut_a(xid->formatID == -1 || wsrep_is_wsrep_xid((const void *)xid));
 
@@ -401,7 +365,8 @@ trx_sys_read_wsrep_checkpoint(XID* xid)
 {
         trx_sysf_t* sys_header;
 	mtr_t	    mtr;
-        ulint       magic;
+	ulint	    magic, magic_old;
+	ulint	    offset;
 
         ut_ad(xid);
 
@@ -409,9 +374,25 @@ trx_sys_read_wsrep_checkpoint(XID* xid)
 
 	sys_header = trx_sysf_get(&mtr);
 
-        if ((magic = mach_read_from_4(sys_header + TRX_SYS_WSREP_XID_INFO
-                                      + TRX_SYS_WSREP_XID_MAGIC_N_FLD))
-            != TRX_SYS_WSREP_XID_MAGIC_N) {
+	magic = mach_read_from_4(sys_header + TRX_SYS_WSREP_XID_INFO
+		+ TRX_SYS_WSREP_XID_MAGIC_N_FLD);
+
+	/* In 4k page size WSREP XID info was before 5.5.58-galera,
+	10.0.32-galera, 10.1.27, 10.2.8 in incorrect location overlapping
+	rseg slots. Here, we need to read WSREP XID info from
+	both locations in 4k page size case. */
+	if (UNIV_PAGE_SIZE == 4096) {
+		magic_old = mach_read_from_4(sys_header + TRX_SYS_WSREP_XID_INFO_OLD
+			+ TRX_SYS_WSREP_XID_MAGIC_N_FLD);
+	} else {
+		magic_old = magic;
+	}
+
+	/* Here we check is there WSREP XID info on both locations. In
+	old releases it is not possible to have exactly these bytes
+	as uninitialized bytes. */
+        if (magic != TRX_SYS_WSREP_XID_MAGIC_N
+	    && magic_old != TRX_SYS_WSREP_XID_MAGIC_N) {
                 memset(xid, 0, sizeof(*xid));
                 xid->formatID = -1;
                 trx_sys_update_wsrep_checkpoint(xid, sys_header, &mtr);
@@ -419,18 +400,27 @@ trx_sys_read_wsrep_checkpoint(XID* xid)
                 return;
         }
 
+	/* Set up the position where to read WSREP XID that was
+	found. Old incorrect offset is unlikely. */
+	if (UNIV_LIKELY(magic == TRX_SYS_WSREP_XID_MAGIC_N)) {
+		offset = TRX_SYS_WSREP_XID_INFO;
+	} else {
+		offset = TRX_SYS_WSREP_XID_INFO_OLD;
+	}
+
         xid->formatID     = (int)mach_read_from_4(
                 sys_header
-                + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_FORMAT);
+                + offset + TRX_SYS_WSREP_XID_FORMAT);
         xid->gtrid_length = (int)mach_read_from_4(
                 sys_header
-                + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_GTRID_LEN);
+                + offset + TRX_SYS_WSREP_XID_GTRID_LEN);
         xid->bqual_length = (int)mach_read_from_4(
                 sys_header
-                + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_BQUAL_LEN);
+                + offset + TRX_SYS_WSREP_XID_BQUAL_LEN);
         ut_memcpy(xid->data,
-                  sys_header + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_DATA,
-                  XIDDATASIZE);
+		sys_header
+		+ offset + TRX_SYS_WSREP_XID_DATA,
+		XIDDATASIZE);
 
 	mtr_commit(&mtr);
 }


More information about the commits mailing list