[Commits] 47396ddea91: Merge 5.5 into 10.0

marko.makela at mariadb.com marko.makela at mariadb.com
Wed Mar 8 11:43:18 EET 2017


revision-id: 47396ddea91698f6ea1829697566a4d7c31bc789 (mariadb-10.0.29-63-g47396ddea91)
parent(s): c4f3e64c23fe7f7fd18c0a79f87f9771df15fe9f 6860a4b55664d8fc53636906240c089417e1849d
author: Marko Mäkelä
committer: Marko Mäkelä
timestamp: 2017-03-08 11:40:43 +0200
message:

Merge 5.5 into 10.0

Also, implement MDEV-11027 a little differently from 5.5:

recv_sys_t::report(ib_time_t): Determine whether progress should
be reported.

recv_apply_hashed_log_recs(): Rename the parameter to last_batch.


 mysql-test/r/subselect4.result                 |  34 ++++
 mysql-test/r/subselect_innodb.result           |  15 ++
 mysql-test/suite/innodb/r/log_file_size.result |  12 +-
 mysql-test/suite/innodb/t/log_file_size.test   |  16 +-
 mysql-test/t/subselect4.test                   |  19 ++
 mysql-test/t/subselect_innodb.test             |  59 ++-----
 plugin/server_audit/server_audit.c             |   2 +-
 sql/item_subselect.cc                          |   7 +
 sql/item_subselect.h                           |   9 +
 sql/log_slow.h                                 |  33 ++--
 sql/sql_select.cc                              |   2 +-
 storage/innobase/include/log0recv.h            |  33 ++--
 storage/innobase/log/log0log.cc                |  11 +-
 storage/innobase/log/log0recv.cc               | 171 ++++++------------
 storage/innobase/srv/srv0start.cc              |   6 +-
 storage/maria/ha_maria.cc                      | 230 ++++++++++++-------------
 storage/myisam/ha_myisam.cc                    | 204 +++++++++++-----------
 storage/xtradb/include/log0recv.h              |  33 ++--
 storage/xtradb/log/log0log.cc                  |  11 +-
 storage/xtradb/log/log0recv.cc                 | 169 ++++++------------
 storage/xtradb/srv/srv0start.cc                |   4 +-
 21 files changed, 501 insertions(+), 579 deletions(-)

diff --cc mysql-test/r/subselect_innodb.result
index 07d00e96549,e8a545778aa..fe69dac17a0
--- a/mysql-test/r/subselect_innodb.result
+++ b/mysql-test/r/subselect_innodb.result
@@@ -496,56 -455,17 +496,71 @@@ HAVING SQ2_alias1 . col_int_key >= 
  drop table t1;
  set optimizer_switch=@subselect_innodb_tmp;
  #
+ # MDEV-9635:Server crashes in part_of_refkey  or assertion
+ # `!created && key_to_save < (int)s->keys' failed in
+ # TABLE::use_index(int) or with join_cache_level>2
+ #
+ SET join_cache_level=3;
+ CREATE TABLE t1 (f1 VARCHAR(1024)) ENGINE=InnoDB;
+ CREATE ALGORITHM=TEMPTABLE VIEW v1 AS SELECT * FROM t1;
+ CREATE TABLE t2 (f2 VARCHAR(4)) ENGINE=InnoDB;
+ INSERT INTO t2 VALUES ('foo'),('bar');
+ SELECT * FROM v1, t2 WHERE ( f1, f2 ) IN ( SELECT f1, f1 FROM t1 );
+ f1	f2
+ set join_cache_level = default;
+ drop view v1;
+ drop table t1,t2;
++#
 +# MDEV-6041: ORDER BY+subqueries: subquery_table.key=outer_table.col is not recongized as binding
 +#
 +create table t1(a int) engine=innodb;
 +insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
 +create table t2(
 +id int primary key,
 +key1 int,
 +col1 int,
 +key(key1)
 +) engine=innodb;
 +insert into t2 
 +select 
 +A.a + B.a*10 + C.a*100 + D.a* 1000, 
 +A.a + 10*B.a, 
 +123456 
 +from t1 A, t1 B, t1 C, t1 D;
 +# Table tsubq:
 +#   - must use 'ref' (not 'index'), and must not use 'Using filesort'
 +#   - shows a bad estimate for 'rows' (but I'm not sure if one can do better w/o histograms)
 +explain select 
 +(SELECT 
 +concat(id, '-', key1, '-', col1)
 +FROM t2
 +WHERE t2.key1 = t1.a
 +ORDER BY t2.id ASC LIMIT 1)
 +from 
 +t1;
 +id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 +1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	10	
 +2	DEPENDENT SUBQUERY	t2	ref	key1	key1	5	test.t1.a	1	Using where
 +#
 +# MDEV-6081: ORDER BY+ref(const): selectivity is very incorrect (MySQL Bug#14338686)
 +#
 +alter table t2 add key2 int;
 +update t2 set key2=key1;
 +alter table t2 add key(key2);
 +analyze table t2;
 +Table	Op	Msg_type	Msg_text
 +test.t2	analyze	status	OK
 +flush tables;
 +# Table tsubq must use 'ref' + Using filesort (not 'index' w/o filesort)
 +explain select 
 +(SELECT 
 +concat(id, '-', key1, '-', col1)
 +FROM t2
 +WHERE t2.key1 = t1.a
 +ORDER BY t2.key2 ASC LIMIT 1)
 +from 
 +t1;
 +id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 +1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	#	
 +2	DEPENDENT SUBQUERY	t2	ref	key1	key1	5	test.t1.a	#	Using where; Using filesort
 +drop table t1,t2;
diff --cc mysql-test/suite/innodb/r/log_file_size.result
index d0b389379e7,00000000000..67794f293aa
mode 100644,000000..100644
--- a/mysql-test/suite/innodb/r/log_file_size.result
+++ b/mysql-test/suite/innodb/r/log_file_size.result
@@@ -1,73 -1,0 +1,69 @@@
 +CREATE TABLE t1(a INT PRIMARY KEY) ENGINE=InnoDB;
 +BEGIN;
 +INSERT INTO t1 VALUES (42);
 +# Kill and restart: --innodb-log-file-size=6M
 +SELECT * FROM t1;
 +a
 +INSERT INTO t1 VALUES (42);
 +BEGIN;
 +DELETE FROM t1;
 +# Kill and restart: --innodb-log-files-in-group=3 --innodb-log-file-size=5M
 +SELECT * FROM t1;
 +a
 +42
 +INSERT INTO t1 VALUES (123);
 +BEGIN;
 +DELETE FROM t1;
 +# Kill the server
 +SELECT * FROM t1;
 +ERROR 42000: Unknown storage engine 'InnoDB'
 +FOUND /syntax error in innodb_log_group_home_dir/ in mysqld.1.err
 +SELECT * FROM t1;
 +ERROR 42000: Unknown storage engine 'InnoDB'
- FOUND /InnoDB: Starting an apply batch of log records/ in mysqld.1.err
++FOUND /InnoDB: Starting crash recovery from checkpoint LSN=/ in mysqld.1.err
 +SELECT * FROM t1;
 +ERROR 42000: Unknown storage engine 'InnoDB'
- FOUND /InnoDB: Starting an apply batch of log records/ in mysqld.1.err
 +SELECT * FROM t1;
 +ERROR 42000: Unknown storage engine 'InnoDB'
 +FOUND /InnoDB: innodb_read_only prevents crash recovery/ in mysqld.1.err
 +SELECT * FROM t1;
 +ERROR 42000: Unknown storage engine 'InnoDB'
- FOUND /InnoDB: Starting an apply batch of log records/ in mysqld.1.err
- FOUND /InnoDB: Resizing redo log from 3\*[0-9]+ to 2\*[0-9]+ pages/ in mysqld.1.err
++FOUND /redo log from 3\*[0-9]+ to 2\*[0-9]+ pages/ in mysqld.1.err
 +SELECT * FROM t1;
 +ERROR 42000: Unknown storage engine 'InnoDB'
- FOUND /InnoDB: Starting an apply batch of log records/ in mysqld.1.err
- FOUND /InnoDB: Resizing redo log from 3\*[0-9]+ to 2\*[0-9]+ pages/ in mysqld.1.err
++FOUND /redo log from 3\*[0-9]+ to 2\*[0-9]+ pages/ in mysqld.1.err
 +SELECT * FROM t1;
 +ERROR 42000: Unknown storage engine 'InnoDB'
 +FOUND /InnoDB: innodb_read_only prevents crash recovery/ in mysqld.1.err
 +SELECT * FROM t1;
 +ERROR 42000: Unknown storage engine 'InnoDB'
- FOUND /InnoDB: Starting an apply batch of log records/ in mysqld.1.err
- FOUND /InnoDB: Resizing redo log from 3\*[0-9]+ to 2\*[0-9]+ pages/ in mysqld.1.err
++FOUND /redo log from 3\*[0-9]+ to 2\*[0-9]+ pages/ in mysqld.1.err
 +SELECT * FROM t1;
 +ERROR 42000: Unknown storage engine 'InnoDB'
 +SELECT * FROM t1;
 +ERROR 42000: Unknown storage engine 'InnoDB'
 +FOUND /InnoDB: Cannot create log files in read-only mode/ in mysqld.1.err
 +SELECT * FROM t1;
 +ERROR 42000: Unknown storage engine 'InnoDB'
 +FOUND /InnoDB: Setting log file .*ib_logfile[0-9]+ size to/ in mysqld.1.err
 +SELECT * FROM t1;
 +ERROR 42000: Unknown storage engine 'InnoDB'
 +FOUND /InnoDB: Setting log file .*ib_logfile[0-9]+ size to/ in mysqld.1.err
 +SELECT * FROM t1;
 +ERROR 42000: Unknown storage engine 'InnoDB'
 +FOUND /InnoDB: Only one log file found/ in mysqld.1.err
 +SELECT * FROM t1;
 +ERROR 42000: Unknown storage engine 'InnoDB'
 +FOUND /InnoDB: Log file .*ib_logfile0 size 7 is not a multiple of innodb_page_size/ in mysqld.1.err
 +SELECT * FROM t1;
 +ERROR 42000: Unknown storage engine 'InnoDB'
 +FOUND /InnoDB: Log file .*ib_logfile1 is of different size 1048576 bytes than other log files/ in mysqld.1.err
 +SELECT * FROM t1;
 +ERROR 42000: Unknown storage engine 'InnoDB'
 +FOUND /InnoDB: Setting log file .*ib_logfile[0-9]+ size to/ in mysqld.1.err
 +FOUND /InnoDB: Renaming log file .*ib_logfile101 to .*ib_logfile0/ in mysqld.1.err
 +SELECT * FROM t1;
 +a
 +42
 +123
 +DROP TABLE t1;
diff --cc mysql-test/suite/innodb/t/log_file_size.test
index ae63ee6e133,00000000000..bf307123734
mode 100644,000000..100644
--- a/mysql-test/suite/innodb/t/log_file_size.test
+++ b/mysql-test/suite/innodb/t/log_file_size.test
@@@ -1,222 -1,0 +1,214 @@@
 +# Test resizing the InnoDB redo log.
 +
 +--source include/have_innodb.inc
 +
 +# Embedded server does not support crashing
 +--source include/not_embedded.inc
 +# Avoid CrashReporter popup on Mac
 +--source include/not_crashrep.inc
 +# DBUG_EXECUTE_IF is needed
 +--source include/have_debug.inc
 +
 +if (`SELECT @@innodb_log_file_size = 1048576`) {
 +  --skip Test requires innodb_log_file_size>1M.
 +}
 +
 +--disable_query_log
 +call mtr.add_suppression("InnoDB: Resizing redo log");
 +call mtr.add_suppression("InnoDB: Starting to delete and rewrite log files");
 +call mtr.add_suppression("InnoDB: New log files created");
 +# This message is output by 10.0 and 10.1, not by 10.2
 +call mtr.add_suppression("InnoDB: The log sequence number in the ibdata files is higher than the log sequence number in the ib_logfiles");
 +call mtr.add_suppression("InnoDB: The log sequence numbers [0-9]+ and [0-9]+ in ibdata files do not match the log sequence number [0-9]+ in the ib_logfiles");
 +call mtr.add_suppression("syntax error in innodb_log_group_home_dir");
 +call mtr.add_suppression("Plugin 'InnoDB' init function returned error");
 +call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed");
 +call mtr.add_suppression("InnoDB: Plugin initialization aborted");
 +call mtr.add_suppression("InnoDB: innodb_read_only prevents crash recovery");
 +call mtr.add_suppression("InnoDB:  Are you sure you are using the right ib_logfiles");
 +call mtr.add_suppression("InnoDB: Cannot create log files in read-only mode");
 +call mtr.add_suppression("InnoDB: Only one log file found");
 +call mtr.add_suppression("InnoDB: Log file .*ib_logfile[01].* size");
 +call mtr.add_suppression("InnoDB: Unable to open .*ib_logfile0. to check native AIO read support");
 +# InnoDB shutdown after refused startup is not clean in 10.0 or 10.1!
 +call mtr.add_suppression("mysqld got signal 11");
 +call mtr.add_suppression("Attempting backtrace");
 +FLUSH TABLES;
 +--enable_query_log
 +
 +CREATE TABLE t1(a INT PRIMARY KEY) ENGINE=InnoDB;
 +BEGIN;
 +INSERT INTO t1 VALUES (42);
 +
 +let $restart_parameters = --innodb-log-file-size=6M;
 +--source include/kill_and_restart_mysqld.inc
 +
 +SELECT * FROM t1;
 +
 +INSERT INTO t1 VALUES (42);
 +BEGIN;
 +DELETE FROM t1;
 +
 +let $restart_parameters = --innodb-log-files-in-group=3 --innodb-log-file-size=5M;
 +--source include/kill_and_restart_mysqld.inc
 +
 +SELECT * FROM t1;
 +
 +INSERT INTO t1 VALUES (123);
 +
 +let MYSQLD_DATADIR= `select @@datadir`;
 +let SEARCH_RANGE= -50000;
 +let SEARCH_FILE= $MYSQLTEST_VARDIR/log/mysqld.1.err;
 +
 +BEGIN;
 +DELETE FROM t1;
 +
 +--source include/kill_mysqld.inc
 +
 +--let $restart_parameters= --innodb-log-group-home-dir=foo\;bar
 +--source include/start_mysqld.inc
 +--error ER_UNKNOWN_STORAGE_ENGINE
 +SELECT * FROM t1;
 +let SEARCH_PATTERN= syntax error in innodb_log_group_home_dir;
 +--source include/search_pattern_in_file.inc
 +
 +--let $restart_parameters= --debug=d,innodb_log_abort_1
 +--source include/restart_mysqld.inc
 +--error ER_UNKNOWN_STORAGE_ENGINE
 +SELECT * FROM t1;
- let SEARCH_PATTERN= InnoDB: Starting an apply batch of log records;
++let SEARCH_PATTERN= InnoDB: Starting crash recovery from checkpoint LSN=;
 +--source include/search_pattern_in_file.inc
 +
 +--let $restart_parameters= --debug=d,innodb_log_abort_3
 +--source include/restart_mysqld.inc
 +--error ER_UNKNOWN_STORAGE_ENGINE
 +SELECT * FROM t1;
- let SEARCH_PATTERN= InnoDB: Starting an apply batch of log records;
- --source include/search_pattern_in_file.inc
 +
 +--let $restart_parameters= --innodb-read-only
 +--source include/restart_mysqld.inc
 +
 +--error ER_UNKNOWN_STORAGE_ENGINE
 +SELECT * FROM t1;
 +let SEARCH_PATTERN= InnoDB: innodb_read_only prevents crash recovery;
 +--source include/search_pattern_in_file.inc
 +
 +--let $restart_parameters= --debug=d,innodb_log_abort_4
 +--source include/restart_mysqld.inc
 +--error ER_UNKNOWN_STORAGE_ENGINE
 +SELECT * FROM t1;
- let SEARCH_PATTERN= InnoDB: Starting an apply batch of log records;
- --source include/search_pattern_in_file.inc
- let SEARCH_PATTERN= InnoDB: Resizing redo log from 3\*[0-9]+ to 2\*[0-9]+ pages;
++let SEARCH_PATTERN= redo log from 3\*[0-9]+ to 2\*[0-9]+ pages;
 +--source include/search_pattern_in_file.inc
 +
 +--let $restart_parameters= --debug=d,innodb_log_abort_5
 +--source include/restart_mysqld.inc
 +--error ER_UNKNOWN_STORAGE_ENGINE
 +SELECT * FROM t1;
- let SEARCH_PATTERN= InnoDB: Starting an apply batch of log records;
- --source include/search_pattern_in_file.inc
- let SEARCH_PATTERN= InnoDB: Resizing redo log from 3\*[0-9]+ to 2\*[0-9]+ pages;
++let SEARCH_PATTERN= redo log from 3\*[0-9]+ to 2\*[0-9]+ pages;
 +--source include/search_pattern_in_file.inc
 +
 +--let $restart_parameters= --innodb-read-only
 +--source include/restart_mysqld.inc
 +--error ER_UNKNOWN_STORAGE_ENGINE
 +SELECT * FROM t1;
 +let SEARCH_PATTERN= InnoDB: innodb_read_only prevents crash recovery;
 +--source include/search_pattern_in_file.inc
 +
 +--let $restart_parameters= --debug=d,innodb_log_abort_6
 +--source include/restart_mysqld.inc
 +--error ER_UNKNOWN_STORAGE_ENGINE
 +SELECT * FROM t1;
 +
- let SEARCH_PATTERN= InnoDB: Starting an apply batch of log records;
- --source include/search_pattern_in_file.inc
- let SEARCH_PATTERN= InnoDB: Resizing redo log from 3\*[0-9]+ to 2\*[0-9]+ pages;
++let SEARCH_PATTERN= redo log from 3\*[0-9]+ to 2\*[0-9]+ pages;
 +--source include/search_pattern_in_file.inc
 +
 +--let $restart_parameters= --debug=d,innodb_log_abort_7
 +--source include/restart_mysqld.inc
 +--error ER_UNKNOWN_STORAGE_ENGINE
 +SELECT * FROM t1;
 +
 +# this aborts right after deleting all log files
 +
 +--let $restart_parameters= --innodb-read-only
 +--source include/restart_mysqld.inc
 +--error ER_UNKNOWN_STORAGE_ENGINE
 +SELECT * FROM t1;
 +
 +let SEARCH_PATTERN= InnoDB: Cannot create log files in read-only mode;
 +--source include/search_pattern_in_file.inc
 +
 +--let $restart_parameters= --debug=d,innodb_log_abort_8
 +--source include/restart_mysqld.inc
 +--error ER_UNKNOWN_STORAGE_ENGINE
 +SELECT * FROM t1;
 +
 +let SEARCH_PATTERN= InnoDB: Setting log file .*ib_logfile[0-9]+ size to;
 +--source include/search_pattern_in_file.inc
 +
 +--let $restart_parameters= --debug=d,innodb_log_abort_9
 +--source include/restart_mysqld.inc
 +--error ER_UNKNOWN_STORAGE_ENGINE
 +SELECT * FROM t1;
 +
 +let SEARCH_PATTERN= InnoDB: Setting log file .*ib_logfile[0-9]+ size to;
 +--source include/search_pattern_in_file.inc
 +--source include/shutdown_mysqld.inc
 +
 +# We should have perfectly synced files here.
 +# Rename the log files, and trigger an error in recovery.
 +--move_file $MYSQLD_DATADIR/ib_logfile101 $MYSQLD_DATADIR/ib_logfile0
 +--move_file $MYSQLD_DATADIR/ib_logfile1 $MYSQLD_DATADIR/ib_logfile1_hidden
 +
 +--let $restart_parameters=
 +--source include/start_mysqld.inc
 +--error ER_UNKNOWN_STORAGE_ENGINE
 +SELECT * FROM t1;
 +
 +let SEARCH_PATTERN= InnoDB: Only one log file found;
 +--source include/search_pattern_in_file.inc
 +--move_file $MYSQLD_DATADIR/ib_logfile0 $MYSQLD_DATADIR/ib_logfile101
 +
 +perl;
 +die unless open(FILE, ">$ENV{MYSQLD_DATADIR}/ib_logfile0");
 +print FILE "garbage";
 +close(FILE);
 +EOF
 +
 +--source include/restart_mysqld.inc
 +--error ER_UNKNOWN_STORAGE_ENGINE
 +SELECT * FROM t1;
 +let SEARCH_PATTERN= InnoDB: Log file .*ib_logfile0 size 7 is not a multiple of innodb_page_size;
 +--source include/search_pattern_in_file.inc
 +--remove_file $MYSQLD_DATADIR/ib_logfile0
 +--move_file $MYSQLD_DATADIR/ib_logfile101 $MYSQLD_DATADIR/ib_logfile0
 +
 +perl;
 +die unless open(FILE, ">$ENV{MYSQLD_DATADIR}/ib_logfile1");
 +print FILE "junkfill" x 131072;
 +close(FILE);
 +EOF
 +
 +--source include/restart_mysqld.inc
 +--error ER_UNKNOWN_STORAGE_ENGINE
 +SELECT * FROM t1;
 +
 +let SEARCH_PATTERN= InnoDB: Log file .*ib_logfile1 is of different size 1048576 bytes than other log files;
 +--source include/search_pattern_in_file.inc
 +--remove_file $MYSQLD_DATADIR/ib_logfile1
 +--move_file $MYSQLD_DATADIR/ib_logfile0 $MYSQLD_DATADIR/ib_logfile101
 +--move_file $MYSQLD_DATADIR/ib_logfile1_hidden $MYSQLD_DATADIR/ib_logfile1
 +
 +--let $restart_parameters= --debug=d,innodb_log_abort_10
 +--source include/restart_mysqld.inc
 +--error ER_UNKNOWN_STORAGE_ENGINE
 +SELECT * FROM t1;
 +
 +let SEARCH_PATTERN= InnoDB: Setting log file .*ib_logfile[0-9]+ size to;
 +--source include/search_pattern_in_file.inc
 +let SEARCH_PATTERN= InnoDB: Renaming log file .*ib_logfile101 to .*ib_logfile0;
 +--source include/search_pattern_in_file.inc
 +
 +--let $restart_parameters=
 +--source include/restart_mysqld.inc
 +
 +SELECT * FROM t1;
 +DROP TABLE t1;
diff --cc sql/item_subselect.cc
index 8cff6a1e6c8,78dcfc4215c..e87db62bd98
--- a/sql/item_subselect.cc
+++ b/sql/item_subselect.cc
@@@ -2379,8 -2373,13 +2385,9 @@@ Item_in_subselect::create_row_in_to_exi
                                             (char *)"<no matter>",
                                             (char *)"<list ref>"));
          item= new Item_cond_or(item, item_isnull);
 -        /* 
 -          TODO: why we create the above for cases where the right part
 -                cant be NULL?
 -        */
          if (left_expr->element_index(i)->maybe_null)
          {
+           disable_cond_guard_for_const_null_left_expr(i);
            if (!(item= new Item_func_trig_cond(item, get_cond_guard(i))))
              DBUG_RETURN(true);
            if (!(having_col_item= 
diff --cc sql/item_subselect.h
index 0c62c3f8d02,2f166c83e8f..f457eb92e3e
--- a/sql/item_subselect.h
+++ b/sql/item_subselect.h
@@@ -618,9 -596,17 +618,18 @@@ public
    void set_first_execution() { if (first_execution) first_execution= FALSE; }
    bool expr_cache_is_needed(THD *thd);
    inline bool left_expr_has_null();
 +
+   void disable_cond_guard_for_const_null_left_expr(int i)
+   {
+     if (left_expr->const_item() && !left_expr->is_expensive())
+     {
+       if (left_expr->element_index(i)->is_null())
+         set_cond_guard_var(i,FALSE);
+     }
+   }
 -  
++
    int optimize(double *out_rows, double *cost);
 -  /* 
 +  /*
      Return the identifier that we could use to identify the subquery for the
      user.
    */
diff --cc sql/log_slow.h
index 3ae2060cc27,541ef55f9e1..aea5b149263
--- a/sql/log_slow.h
+++ b/sql/log_slow.h
@@@ -16,23 -16,19 +16,22 @@@
  /* Defining what to log to slow log */
  
  #define LOG_SLOW_VERBOSITY_INIT           0
- #define LOG_SLOW_VERBOSITY_INNODB         (1 << 0)
- #define LOG_SLOW_VERBOSITY_QUERY_PLAN     (1 << 1)
- #define LOG_SLOW_VERBOSITY_EXPLAIN        (1 << 2)
+ #define LOG_SLOW_VERBOSITY_INNODB         (1U << 0)
+ #define LOG_SLOW_VERBOSITY_QUERY_PLAN     (1U << 1)
++#define LOG_SLOW_VERBOSITY_EXPLAIN        (1U << 2)
  
  #define QPLAN_INIT            QPLAN_QC_NO
  
- #define QPLAN_ADMIN           (1 << 0)
- #define QPLAN_FILESORT        (1 << 1)
- #define QPLAN_FILESORT_DISK   (1 << 2)
- #define QPLAN_FULL_JOIN       (1 << 3)
- #define QPLAN_FULL_SCAN       (1 << 4)
- #define QPLAN_QC              (1 << 5)
- #define QPLAN_QC_NO           (1 << 6)
- #define QPLAN_TMP_DISK        (1 << 7)
- #define QPLAN_TMP_TABLE       (1 << 8)
- #define QPLAN_FILESORT_PRIORITY_QUEUE       (1 << 9)
- 
+ #define QPLAN_ADMIN           (1U << 0)
+ #define QPLAN_FILESORT        (1U << 1)
+ #define QPLAN_FILESORT_DISK   (1U << 2)
+ #define QPLAN_FULL_JOIN       (1U << 3)
+ #define QPLAN_FULL_SCAN       (1U << 4)
+ #define QPLAN_QC              (1U << 5)
+ #define QPLAN_QC_NO           (1U << 6)
+ #define QPLAN_TMP_DISK        (1U << 7)
+ #define QPLAN_TMP_TABLE       (1U << 8)
++#define QPLAN_FILESORT_PRIORITY_QUEUE       (1U << 9)
++ 
  /* ... */
- #define QPLAN_MAX             (((ulong) 1) << 31) /* reserved as placeholder */
- 
+ #define QPLAN_MAX             (1U << 31) /* reserved as placeholder */
diff --cc storage/innobase/include/log0recv.h
index f4c7b4ed882,aa5f79aa198..bf34b6a3e29
--- a/storage/innobase/include/log0recv.h
+++ b/storage/innobase/include/log0recv.h
@@@ -1,6 -1,7 +1,7 @@@
  /*****************************************************************************
  
 -Copyright (c) 1997, 2010, Innobase Oy. All Rights Reserved.
 +Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
+ Copyright (c) 2017, MariaDB Corporation.
  
  This program is free software; you can redistribute it and/or modify it under
  the terms of the GNU General Public License as published by the Free Software
@@@ -271,20 -273,20 +272,12 @@@ voi
  recv_sys_var_init(void);
  /*===================*/
  #endif /* !UNIV_HOTBACKUP */
--/*******************************************************************//**
--Empties the hash table of stored log records, applying them to appropriate
--pages. */
++/** Apply the hash table of stored log records to persistent data pages.
++ at param[in]	last_batch	whether the change buffer merge will be
++				performed as part of the operation */
  UNIV_INTERN
  void
--recv_apply_hashed_log_recs(
--/*=======================*/
--	ibool	allow_ibuf);	/*!< in: if TRUE, also ibuf operations are
--				allowed during the application; if FALSE,
--				no ibuf operations are allowed, and after
--				the application all file pages are flushed to
--				disk and invalidated in buffer pool: this
--				alternative means that no new log records
--				can be generated during the application */
++recv_apply_hashed_log_recs(bool last_batch);
  #ifdef UNIV_HOTBACKUP
  /*******************************************************************//**
  Applies log records in the hash table to a backup. */
@@@ -444,8 -441,6 +439,22 @@@ struct recv_sys_t
  	hash_table_t*	addr_hash;/*!< hash table of file addresses of pages */
  	ulint		n_addrs;/*!< number of not processed hashed file
  				addresses in the hash table */
 +
 +	recv_dblwr_t	dblwr;
++
++	/** Determine whether redo log recovery progress should be reported.
++	@param[in]	time	the current time
++	@return	whether progress should be reported
++		(the last report was at least 15 seconds ago) */
++	bool report(ib_time_t time)
++	{
++		if (time - progress_time < 15) {
++			return false;
++		}
++
++		progress_time = time;
++		return true;
++	}
  };
  
  /** The recovery system */
diff --cc storage/innobase/log/log0log.cc
index 57516d7c8f4,00000000000..b94a6d30c7d
mode 100644,000000..100644
--- a/storage/innobase/log/log0log.cc
+++ b/storage/innobase/log/log0log.cc
@@@ -1,3731 -1,0 +1,3736 @@@
 +/*****************************************************************************
 +
 +Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
 +Copyright (c) 2009, Google Inc.
- Copyright (c) 2017, MariaDB Corporation. All Rights Reserved.
++Copyright (c) 2017, MariaDB Corporation.
 +
 +Portions of this file contain modifications contributed and copyrighted by
 +Google, Inc. Those modifications are gratefully acknowledged and are described
 +briefly in the InnoDB documentation. The contributions by Google are
 +incorporated with their permission, and subject to the conditions contained in
 +the file COPYING.Google.
 +
 +This program is free software; you can redistribute it and/or modify it under
 +the terms of the GNU General Public License as published by the Free Software
 +Foundation; version 2 of the License.
 +
 +This program is distributed in the hope that it will be useful, but WITHOUT
 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 +
 +You should have received a copy of the GNU General Public License along with
 +this program; if not, write to the Free Software Foundation, Inc.,
 +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
 +
 +*****************************************************************************/
 +
 +/**************************************************//**
 + at file log/log0log.cc
 +Database log
 +
 +Created 12/9/1995 Heikki Tuuri
 +*******************************************************/
 +
 +#include "log0log.h"
 +
 +#ifdef UNIV_NONINL
 +#include "log0log.ic"
 +#endif
 +
 +#ifndef UNIV_HOTBACKUP
 +#include "mem0mem.h"
 +#include "buf0buf.h"
 +#include "buf0flu.h"
 +#include "srv0srv.h"
 +#include "log0recv.h"
 +#include "fil0fil.h"
 +#include "dict0boot.h"
 +#include "srv0srv.h"
 +#include "srv0start.h"
 +#include "trx0sys.h"
 +#include "trx0trx.h"
 +#include "trx0roll.h"
 +#include "srv0mon.h"
 +
 +/*
 +General philosophy of InnoDB redo-logs:
 +
 +1) Every change to a contents of a data page must be done
 +through mtr, which in mtr_commit() writes log records
 +to the InnoDB redo log.
 +
 +2) Normally these changes are performed using a mlog_write_ulint()
 +or similar function.
 +
 +3) In some page level operations only a code number of a
 +c-function and its parameters are written to the log to
 +reduce the size of the log.
 +
 +  3a) You should not add parameters to these kind of functions
 +  (e.g. trx_undo_header_create(), trx_undo_insert_header_reuse())
 +
 +  3b) You should not add such functionality which either change
 +  working when compared with the old or are dependent on data
 +  outside of the page. These kind of functions should implement
 +  self-contained page transformation and it should be unchanged
 +  if you don't have very essential reasons to change log
 +  semantics or format.
 +
 +*/
 +
 +/* Global log system variable */
 +UNIV_INTERN log_t*	log_sys	= NULL;
 +
 +#ifdef UNIV_PFS_RWLOCK
 +UNIV_INTERN mysql_pfs_key_t	checkpoint_lock_key;
 +# ifdef UNIV_LOG_ARCHIVE
 +UNIV_INTERN mysql_pfs_key_t	archive_lock_key;
 +# endif
 +#endif /* UNIV_PFS_RWLOCK */
 +
 +#ifdef UNIV_PFS_MUTEX
 +UNIV_INTERN mysql_pfs_key_t	log_sys_mutex_key;
 +UNIV_INTERN mysql_pfs_key_t	log_flush_order_mutex_key;
 +#endif /* UNIV_PFS_MUTEX */
 +
 +#ifdef UNIV_DEBUG
 +UNIV_INTERN ibool	log_do_write = TRUE;
 +#endif /* UNIV_DEBUG */
 +
 +/* These control how often we print warnings if the last checkpoint is too
 +old */
 +UNIV_INTERN ibool	log_has_printed_chkp_warning = FALSE;
 +UNIV_INTERN time_t	log_last_warning_time;
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +/* Pointer to this variable is used as the i/o-message when we do i/o to an
 +archive */
 +UNIV_INTERN byte	log_archive_io;
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +/* A margin for free space in the log buffer before a log entry is catenated */
 +#define LOG_BUF_WRITE_MARGIN	(4 * OS_FILE_LOG_BLOCK_SIZE)
 +
 +/* Margins for free space in the log buffer after a log entry is catenated */
 +#define LOG_BUF_FLUSH_RATIO	2
 +#define LOG_BUF_FLUSH_MARGIN	(LOG_BUF_WRITE_MARGIN + 4 * UNIV_PAGE_SIZE)
 +
 +/* Margin for the free space in the smallest log group, before a new query
 +step which modifies the database, is started */
 +
 +#define LOG_CHECKPOINT_FREE_PER_THREAD	(4 * UNIV_PAGE_SIZE)
 +#define LOG_CHECKPOINT_EXTRA_FREE	(8 * UNIV_PAGE_SIZE)
 +
 +/* This parameter controls asynchronous making of a new checkpoint; the value
 +should be bigger than LOG_POOL_PREFLUSH_RATIO_SYNC */
 +
 +#define LOG_POOL_CHECKPOINT_RATIO_ASYNC	32
 +
 +/* This parameter controls synchronous preflushing of modified buffer pages */
 +#define LOG_POOL_PREFLUSH_RATIO_SYNC	16
 +
 +/* The same ratio for asynchronous preflushing; this value should be less than
 +the previous */
 +#define LOG_POOL_PREFLUSH_RATIO_ASYNC	8
 +
 +/* Extra margin, in addition to one log file, used in archiving */
 +#define LOG_ARCHIVE_EXTRA_MARGIN	(4 * UNIV_PAGE_SIZE)
 +
 +/* This parameter controls asynchronous writing to the archive */
 +#define LOG_ARCHIVE_RATIO_ASYNC		16
 +
 +/* Codes used in unlocking flush latches */
 +#define LOG_UNLOCK_NONE_FLUSHED_LOCK	1
 +#define LOG_UNLOCK_FLUSH_LOCK		2
 +
 +/* States of an archiving operation */
 +#define	LOG_ARCHIVE_READ	1
 +#define	LOG_ARCHIVE_WRITE	2
 +
 +/******************************************************//**
 +Completes a checkpoint write i/o to a log file. */
 +static
 +void
 +log_io_complete_checkpoint(void);
 +/*============================*/
 +#ifdef UNIV_LOG_ARCHIVE
 +/******************************************************//**
 +Completes an archiving i/o. */
 +static
 +void
 +log_io_complete_archive(void);
 +/*=========================*/
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +/****************************************************************//**
 +Returns the oldest modified block lsn in the pool, or log_sys->lsn if none
 +exists.
 + at return	LSN of oldest modification */
 +static
 +lsn_t
 +log_buf_pool_get_oldest_modification(void)
 +/*======================================*/
 +{
 +	lsn_t	lsn;
 +
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +
 +	lsn = buf_pool_get_oldest_modification();
 +
 +	if (!lsn) {
 +
 +		lsn = log_sys->lsn;
 +	}
 +
 +	return(lsn);
 +}
 +
 +/** Extends the log buffer.
 + at param[in] len	requested minimum size in bytes */
 +static
 +void
 +log_buffer_extend(
 +	ulint	len)
 +{
 +	ulint	move_start;
 +	ulint	move_end;
 +	byte	tmp_buf[OS_FILE_LOG_BLOCK_SIZE];
 +
 +	mutex_enter(&(log_sys->mutex));
 +
 +	while (log_sys->is_extending) {
 +		/* Another thread is trying to extend already.
 +		Needs to wait for. */
 +		mutex_exit(&(log_sys->mutex));
 +
 +		log_buffer_flush_to_disk();
 +
 +		mutex_enter(&(log_sys->mutex));
 +
 +		if (srv_log_buffer_size > len / UNIV_PAGE_SIZE) {
 +			/* Already extended enough by the others */
 +			mutex_exit(&(log_sys->mutex));
 +			return;
 +		}
 +	}
 +
 +	log_sys->is_extending = true;
 +
 +	while (log_sys->n_pending_writes != 0
 +	       || ut_calc_align_down(log_sys->buf_free,
 +				     OS_FILE_LOG_BLOCK_SIZE)
 +		  != ut_calc_align_down(log_sys->buf_next_to_write,
 +					OS_FILE_LOG_BLOCK_SIZE)) {
 +		/* Buffer might have >1 blocks to write still. */
 +		mutex_exit(&(log_sys->mutex));
 +
 +		log_buffer_flush_to_disk();
 +
 +		mutex_enter(&(log_sys->mutex));
 +	}
 +
 +	move_start = ut_calc_align_down(
 +		log_sys->buf_free,
 +		OS_FILE_LOG_BLOCK_SIZE);
 +	move_end = log_sys->buf_free;
 +
 +	/* store the last log block in buffer */
 +	ut_memcpy(tmp_buf, log_sys->buf + move_start,
 +		  move_end - move_start);
 +
 +	log_sys->buf_free -= move_start;
 +	log_sys->buf_next_to_write -= move_start;
 +
 +	/* reallocate log buffer */
 +	srv_log_buffer_size = len / UNIV_PAGE_SIZE + 1;
 +	mem_free(log_sys->buf_ptr);
 +	log_sys->buf_ptr = static_cast<byte*>(
 +		mem_zalloc(LOG_BUFFER_SIZE + OS_FILE_LOG_BLOCK_SIZE));
 +	log_sys->buf = static_cast<byte*>(
 +		ut_align(log_sys->buf_ptr, OS_FILE_LOG_BLOCK_SIZE));
 +	log_sys->buf_size = LOG_BUFFER_SIZE;
 +	log_sys->max_buf_free = log_sys->buf_size / LOG_BUF_FLUSH_RATIO
 +		- LOG_BUF_FLUSH_MARGIN;
 +
 +	/* restore the last log block */
 +	ut_memcpy(log_sys->buf, tmp_buf, move_end - move_start);
 +
 +	ut_ad(log_sys->is_extending);
 +	log_sys->is_extending = false;
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +	ib_logf(IB_LOG_LEVEL_INFO,
 +		"innodb_log_buffer_size was extended to %lu.",
 +		LOG_BUFFER_SIZE);
 +}
 +
 +/************************************************************//**
 +Opens the log for log_write_low. The log must be closed with log_close and
 +released with log_release.
 + at return	start lsn of the log record */
 +UNIV_INTERN
 +lsn_t
 +log_reserve_and_open(
 +/*=================*/
 +	ulint	len)	/*!< in: length of data to be catenated */
 +{
 +	log_t*	log			= log_sys;
 +	ulint	len_upper_limit;
 +#ifdef UNIV_LOG_ARCHIVE
 +	ulint	archived_lsn_age;
 +	ulint	dummy;
 +#endif /* UNIV_LOG_ARCHIVE */
 +#ifdef UNIV_DEBUG
 +	ulint	count			= 0;
 +#endif /* UNIV_DEBUG */
 +
 +	if (len >= log->buf_size / 2) {
 +		DBUG_EXECUTE_IF("ib_log_buffer_is_short_crash",
 +				DBUG_SUICIDE(););
 +
 +		/* log_buffer is too small. try to extend instead of crash. */
 +		ib_logf(IB_LOG_LEVEL_WARN,
 +			"The transaction log size is too large"
 +			" for innodb_log_buffer_size (%lu >= %lu / 2). "
 +			"Trying to extend it.",
 +			len, LOG_BUFFER_SIZE);
 +
 +		log_buffer_extend((len + 1) * 2);
 +	}
 +loop:
 +	mutex_enter(&(log->mutex));
 +	ut_ad(!recv_no_log_write);
 +
 +	if (log->is_extending) {
 +
 +		mutex_exit(&(log->mutex));
 +
 +		/* Log buffer size is extending. Writing up to the next block
 +		should wait for the extending finished. */
 +
 +		os_thread_sleep(100000);
 +
 +		ut_ad(++count < 50);
 +
 +		goto loop;
 +	}
 +
 +	/* Calculate an upper limit for the space the string may take in the
 +	log buffer */
 +
 +	len_upper_limit = LOG_BUF_WRITE_MARGIN + (5 * len) / 4;
 +
 +	if (log->buf_free + len_upper_limit > log->buf_size) {
 +
 +		mutex_exit(&(log->mutex));
 +
 +		/* Not enough free space, do a syncronous flush of the log
 +		buffer */
 +
 +		log_buffer_flush_to_disk();
 +
 +		srv_stats.log_waits.inc();
 +
 +		ut_ad(++count < 50);
 +
 +		goto loop;
 +	}
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	if (log->archiving_state != LOG_ARCH_OFF) {
 +
 +		archived_lsn_age = log->lsn - log->archived_lsn;
 +		if (archived_lsn_age + len_upper_limit
 +		    > log->max_archived_lsn_age) {
 +			/* Not enough free archived space in log groups: do a
 +			synchronous archive write batch: */
 +
 +			mutex_exit(&(log->mutex));
 +
 +			ut_ad(len_upper_limit <= log->max_archived_lsn_age);
 +
 +			log_archive_do(TRUE, &dummy);
 +
 +			ut_ad(++count < 50);
 +
 +			goto loop;
 +		}
 +	}
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +#ifdef UNIV_LOG_DEBUG
 +	log->old_buf_free = log->buf_free;
 +	log->old_lsn = log->lsn;
 +#endif
 +	return(log->lsn);
 +}
 +
 +/************************************************************//**
 +Writes to the log the string given. It is assumed that the caller holds the
 +log mutex. */
 +UNIV_INTERN
 +void
 +log_write_low(
 +/*==========*/
 +	byte*	str,		/*!< in: string */
 +	ulint	str_len)	/*!< in: string length */
 +{
 +	log_t*	log	= log_sys;
 +	ulint	len;
 +	ulint	data_len;
 +	byte*	log_block;
 +
 +	ut_ad(mutex_own(&(log->mutex)));
 +part_loop:
 +	ut_ad(!recv_no_log_write);
 +	/* Calculate a part length */
 +
 +	data_len = (log->buf_free % OS_FILE_LOG_BLOCK_SIZE) + str_len;
 +
 +	if (data_len <= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
 +
 +		/* The string fits within the current log block */
 +
 +		len = str_len;
 +	} else {
 +		data_len = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE;
 +
 +		len = OS_FILE_LOG_BLOCK_SIZE
 +			- (log->buf_free % OS_FILE_LOG_BLOCK_SIZE)
 +			- LOG_BLOCK_TRL_SIZE;
 +	}
 +
 +	ut_memcpy(log->buf + log->buf_free, str, len);
 +
 +	str_len -= len;
 +	str = str + len;
 +
 +	log_block = static_cast<byte*>(
 +		ut_align_down(
 +			log->buf + log->buf_free, OS_FILE_LOG_BLOCK_SIZE));
 +
 +	log_block_set_data_len(log_block, data_len);
 +
 +	if (data_len == OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
 +		/* This block became full */
 +		log_block_set_data_len(log_block, OS_FILE_LOG_BLOCK_SIZE);
 +		log_block_set_checkpoint_no(log_block,
 +					    log_sys->next_checkpoint_no);
 +		len += LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE;
 +
 +		log->lsn += len;
 +
 +		/* Initialize the next block header */
 +		log_block_init(log_block + OS_FILE_LOG_BLOCK_SIZE, log->lsn);
 +	} else {
 +		log->lsn += len;
 +	}
 +
 +	log->buf_free += len;
 +
 +	ut_ad(log->buf_free <= log->buf_size);
 +
 +	if (str_len > 0) {
 +		goto part_loop;
 +	}
 +
 +	srv_stats.log_write_requests.inc();
 +}
 +
 +/************************************************************//**
 +Closes the log.
 + at return	lsn */
 +UNIV_INTERN
 +lsn_t
 +log_close(void)
 +/*===========*/
 +{
 +	byte*		log_block;
 +	ulint		first_rec_group;
 +	lsn_t		oldest_lsn;
 +	lsn_t		lsn;
 +	log_t*		log	= log_sys;
 +	lsn_t		checkpoint_age;
 +
 +	ut_ad(mutex_own(&(log->mutex)));
 +	ut_ad(!recv_no_log_write);
 +
 +	lsn = log->lsn;
 +
 +	log_block = static_cast<byte*>(
 +		ut_align_down(
 +			log->buf + log->buf_free, OS_FILE_LOG_BLOCK_SIZE));
 +
 +	first_rec_group = log_block_get_first_rec_group(log_block);
 +
 +	if (first_rec_group == 0) {
 +		/* We initialized a new log block which was not written
 +		full by the current mtr: the next mtr log record group
 +		will start within this block at the offset data_len */
 +
 +		log_block_set_first_rec_group(
 +			log_block, log_block_get_data_len(log_block));
 +	}
 +
 +	if (log->buf_free > log->max_buf_free) {
 +
 +		log->check_flush_or_checkpoint = TRUE;
 +	}
 +
 +	checkpoint_age = lsn - log->last_checkpoint_lsn;
 +
 +	if (checkpoint_age >= log->log_group_capacity) {
 +		/* TODO: split btr_store_big_rec_extern_fields() into small
 +		steps so that we can release all latches in the middle, and
 +		call log_free_check() to ensure we never write over log written
 +		after the latest checkpoint. In principle, we should split all
 +		big_rec operations, but other operations are smaller. */
 +
 +		if (!log_has_printed_chkp_warning
 +		    || difftime(time(NULL), log_last_warning_time) > 15) {
 +
 +			log_has_printed_chkp_warning = TRUE;
 +			log_last_warning_time = time(NULL);
 +
 +			ut_print_timestamp(stderr);
 +			fprintf(stderr,
 +				" InnoDB: ERROR: the age of the last"
 +				" checkpoint is " LSN_PF ",\n"
 +				"InnoDB: which exceeds the log group"
 +				" capacity " LSN_PF ".\n"
 +				"InnoDB: If you are using big"
 +				" BLOB or TEXT rows, you must set the\n"
 +				"InnoDB: combined size of log files"
 +				" at least 10 times bigger than the\n"
 +				"InnoDB: largest such row.\n",
 +				checkpoint_age,
 +				log->log_group_capacity);
 +		}
 +	}
 +
 +	if (checkpoint_age <= log->max_modified_age_sync) {
 +
 +		goto function_exit;
 +	}
 +
 +	oldest_lsn = buf_pool_get_oldest_modification();
 +
 +	if (!oldest_lsn
 +	    || lsn - oldest_lsn > log->max_modified_age_sync
 +	    || checkpoint_age > log->max_checkpoint_age_async) {
 +
 +		log->check_flush_or_checkpoint = TRUE;
 +	}
 +function_exit:
 +
 +#ifdef UNIV_LOG_DEBUG
 +	log_check_log_recs(log->buf + log->old_buf_free,
 +			   log->buf_free - log->old_buf_free, log->old_lsn);
 +#endif
 +
 +	return(lsn);
 +}
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +/******************************************************//**
 +Pads the current log block full with dummy log records. Used in producing
 +consistent archived log files. */
 +static
 +void
 +log_pad_current_log_block(void)
 +/*===========================*/
 +{
 +	byte		b		= MLOG_DUMMY_RECORD;
 +	ulint		pad_length;
 +	ulint		i;
 +	ib_uint64_t	lsn;
 +
 +	/* We retrieve lsn only because otherwise gcc crashed on HP-UX */
 +	lsn = log_reserve_and_open(OS_FILE_LOG_BLOCK_SIZE);
 +
 +	pad_length = OS_FILE_LOG_BLOCK_SIZE
 +		- (log_sys->buf_free % OS_FILE_LOG_BLOCK_SIZE)
 +		- LOG_BLOCK_TRL_SIZE;
 +
 +	for (i = 0; i < pad_length; i++) {
 +		log_write_low(&b, 1);
 +	}
 +
 +	lsn = log_sys->lsn;
 +
 +	log_close();
 +	log_release();
 +
 +	ut_a(lsn % OS_FILE_LOG_BLOCK_SIZE == LOG_BLOCK_HDR_SIZE);
 +}
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +/******************************************************//**
 +Calculates the data capacity of a log group, when the log file headers are not
 +included.
 + at return	capacity in bytes */
 +UNIV_INTERN
 +lsn_t
 +log_group_get_capacity(
 +/*===================*/
 +	const log_group_t*	group)	/*!< in: log group */
 +{
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +
 +	return((group->file_size - LOG_FILE_HDR_SIZE) * group->n_files);
 +}
 +
 +/******************************************************//**
 +Calculates the offset within a log group, when the log file headers are not
 +included.
 + at return	size offset (<= offset) */
 +UNIV_INLINE
 +lsn_t
 +log_group_calc_size_offset(
 +/*=======================*/
 +	lsn_t			offset,	/*!< in: real offset within the
 +					log group */
 +	const log_group_t*	group)	/*!< in: log group */
 +{
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +
 +	return(offset - LOG_FILE_HDR_SIZE * (1 + offset / group->file_size));
 +}
 +
 +/******************************************************//**
 +Calculates the offset within a log group, when the log file headers are
 +included.
 + at return	real offset (>= offset) */
 +UNIV_INLINE
 +lsn_t
 +log_group_calc_real_offset(
 +/*=======================*/
 +	lsn_t			offset,	/*!< in: size offset within the
 +					log group */
 +	const log_group_t*	group)	/*!< in: log group */
 +{
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +
 +	return(offset + LOG_FILE_HDR_SIZE
 +	       * (1 + offset / (group->file_size - LOG_FILE_HDR_SIZE)));
 +}
 +
 +/******************************************************//**
 +Calculates the offset of an lsn within a log group.
 + at return	offset within the log group */
 +static
 +lsn_t
 +log_group_calc_lsn_offset(
 +/*======================*/
 +	lsn_t			lsn,	/*!< in: lsn */
 +	const log_group_t*	group)	/*!< in: log group */
 +{
 +	lsn_t	gr_lsn;
 +	lsn_t	gr_lsn_size_offset;
 +	lsn_t	difference;
 +	lsn_t	group_size;
 +	lsn_t	offset;
 +
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +
 +	gr_lsn = group->lsn;
 +
 +	gr_lsn_size_offset = log_group_calc_size_offset(group->lsn_offset, group);
 +
 +	group_size = log_group_get_capacity(group);
 +
 +	if (lsn >= gr_lsn) {
 +
 +		difference = lsn - gr_lsn;
 +	} else {
 +		difference = gr_lsn - lsn;
 +
 +		difference = difference % group_size;
 +
 +		difference = group_size - difference;
 +	}
 +
 +	offset = (gr_lsn_size_offset + difference) % group_size;
 +
 +	/* fprintf(stderr,
 +	"Offset is " LSN_PF " gr_lsn_offset is " LSN_PF
 +	" difference is " LSN_PF "\n",
 +	offset, gr_lsn_size_offset, difference);
 +	*/
 +
 +	return(log_group_calc_real_offset(offset, group));
 +}
 +#endif /* !UNIV_HOTBACKUP */
 +
 +#ifdef UNIV_DEBUG
 +UNIV_INTERN ibool	log_debug_writes = FALSE;
 +#endif /* UNIV_DEBUG */
 +
 +/*******************************************************************//**
 +Calculates where in log files we find a specified lsn.
 + at return	log file number */
 +UNIV_INTERN
 +ulint
 +log_calc_where_lsn_is(
 +/*==================*/
 +	ib_int64_t*	log_file_offset,	/*!< out: offset in that file
 +						(including the header) */
 +	ib_uint64_t	first_header_lsn,	/*!< in: first log file start
 +						lsn */
 +	ib_uint64_t	lsn,			/*!< in: lsn whose position to
 +						determine */
 +	ulint		n_log_files,		/*!< in: total number of log
 +						files */
 +	ib_int64_t	log_file_size)		/*!< in: log file size
 +						(including the header) */
 +{
 +	ib_int64_t	capacity	= log_file_size - LOG_FILE_HDR_SIZE;
 +	ulint		file_no;
 +	ib_int64_t	add_this_many;
 +
 +	if (lsn < first_header_lsn) {
 +		add_this_many = 1 + (first_header_lsn - lsn)
 +			/ (capacity * (ib_int64_t) n_log_files);
 +		lsn += add_this_many
 +			* capacity * (ib_int64_t) n_log_files;
 +	}
 +
 +	ut_a(lsn >= first_header_lsn);
 +
 +	file_no = ((ulint)((lsn - first_header_lsn) / capacity))
 +		% n_log_files;
 +	*log_file_offset = (lsn - first_header_lsn) % capacity;
 +
 +	*log_file_offset = *log_file_offset + LOG_FILE_HDR_SIZE;
 +
 +	return(file_no);
 +}
 +
 +#ifndef UNIV_HOTBACKUP
 +/********************************************************//**
 +Sets the field values in group to correspond to a given lsn. For this function
 +to work, the values must already be correctly initialized to correspond to
 +some lsn, for instance, a checkpoint lsn. */
 +UNIV_INTERN
 +void
 +log_group_set_fields(
 +/*=================*/
 +	log_group_t*	group,	/*!< in/out: group */
 +	lsn_t		lsn)	/*!< in: lsn for which the values should be
 +				set */
 +{
 +	group->lsn_offset = log_group_calc_lsn_offset(lsn, group);
 +	group->lsn = lsn;
 +}
 +
 +/*****************************************************************//**
 +Calculates the recommended highest values for lsn - last_checkpoint_lsn,
 +lsn - buf_get_oldest_modification(), and lsn - max_archive_lsn_age.
 + at return error value FALSE if the smallest log group is too small to
 +accommodate the number of OS threads in the database server */
 +static
 +ibool
 +log_calc_max_ages(void)
 +/*===================*/
 +{
 +	log_group_t*	group;
 +	lsn_t		margin;
 +	ulint		free;
 +	ibool		success		= TRUE;
 +	lsn_t		smallest_capacity;
 +	lsn_t		archive_margin;
 +	lsn_t		smallest_archive_margin;
 +
 +	mutex_enter(&(log_sys->mutex));
 +
 +	group = UT_LIST_GET_FIRST(log_sys->log_groups);
 +
 +	ut_ad(group);
 +
 +	smallest_capacity = LSN_MAX;
 +	smallest_archive_margin = LSN_MAX;
 +
 +	while (group) {
 +		if (log_group_get_capacity(group) < smallest_capacity) {
 +
 +			smallest_capacity = log_group_get_capacity(group);
 +		}
 +
 +		archive_margin = log_group_get_capacity(group)
 +			- (group->file_size - LOG_FILE_HDR_SIZE)
 +			- LOG_ARCHIVE_EXTRA_MARGIN;
 +
 +		if (archive_margin < smallest_archive_margin) {
 +
 +			smallest_archive_margin = archive_margin;
 +		}
 +
 +		group = UT_LIST_GET_NEXT(log_groups, group);
 +	}
 +
 +	/* Add extra safety */
 +	smallest_capacity = smallest_capacity - smallest_capacity / 10;
 +
 +	/* For each OS thread we must reserve so much free space in the
 +	smallest log group that it can accommodate the log entries produced
 +	by single query steps: running out of free log space is a serious
 +	system error which requires rebooting the database. */
 +
 +	free = LOG_CHECKPOINT_FREE_PER_THREAD * (10 + srv_thread_concurrency)
 +		+ LOG_CHECKPOINT_EXTRA_FREE;
 +	if (free >= smallest_capacity / 2) {
 +		success = FALSE;
 +
 +		goto failure;
 +	} else {
 +		margin = smallest_capacity - free;
 +	}
 +
 +	margin = margin - margin / 10;	/* Add still some extra safety */
 +
 +	log_sys->log_group_capacity = smallest_capacity;
 +
 +	log_sys->max_modified_age_async = margin
 +		- margin / LOG_POOL_PREFLUSH_RATIO_ASYNC;
 +	log_sys->max_modified_age_sync = margin
 +		- margin / LOG_POOL_PREFLUSH_RATIO_SYNC;
 +
 +	log_sys->max_checkpoint_age_async = margin - margin
 +		/ LOG_POOL_CHECKPOINT_RATIO_ASYNC;
 +	log_sys->max_checkpoint_age = margin;
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	log_sys->max_archived_lsn_age = smallest_archive_margin;
 +
 +	log_sys->max_archived_lsn_age_async = smallest_archive_margin
 +		- smallest_archive_margin / LOG_ARCHIVE_RATIO_ASYNC;
 +#endif /* UNIV_LOG_ARCHIVE */
 +failure:
 +	mutex_exit(&(log_sys->mutex));
 +
 +	if (!success) {
 +		fprintf(stderr,
 +			"InnoDB: Error: ib_logfiles are too small"
 +			" for innodb_thread_concurrency %lu.\n"
 +			"InnoDB: The combined size of ib_logfiles"
 +			" should be bigger than\n"
 +			"InnoDB: 200 kB * innodb_thread_concurrency.\n"
 +			"InnoDB: To get mysqld to start up, set"
 +			" innodb_thread_concurrency in my.cnf\n"
 +			"InnoDB: to a lower value, for example, to 8."
 +			" After an ERROR-FREE shutdown\n"
 +			"InnoDB: of mysqld you can adjust the size of"
 +			" ib_logfiles, as explained in\n"
 +			"InnoDB: " REFMAN "adding-and-removing.html\n"
 +			"InnoDB: Cannot continue operation."
 +			" Calling exit(1).\n",
 +			(ulong) srv_thread_concurrency);
 +
 +		exit(1);
 +	}
 +
 +	return(success);
 +}
 +
 +/******************************************************//**
 +Initializes the log. */
 +UNIV_INTERN
 +void
 +log_init(void)
 +/*==========*/
 +{
 +	log_sys = static_cast<log_t*>(mem_alloc(sizeof(log_t)));
 +
 +	mutex_create(log_sys_mutex_key, &log_sys->mutex, SYNC_LOG);
 +
 +	mutex_create(log_flush_order_mutex_key,
 +		     &log_sys->log_flush_order_mutex,
 +		     SYNC_LOG_FLUSH_ORDER);
 +
 +	mutex_enter(&(log_sys->mutex));
 +
 +	/* Start the lsn from one log block from zero: this way every
 +	log record has a start lsn != zero, a fact which we will use */
 +
 +	log_sys->lsn = LOG_START_LSN;
 +
 +	ut_a(LOG_BUFFER_SIZE >= 16 * OS_FILE_LOG_BLOCK_SIZE);
 +	ut_a(LOG_BUFFER_SIZE >= 4 * UNIV_PAGE_SIZE);
 +
 +	log_sys->buf_ptr = static_cast<byte*>(
 +		mem_zalloc(LOG_BUFFER_SIZE + OS_FILE_LOG_BLOCK_SIZE));
 +
 +	log_sys->buf = static_cast<byte*>(
 +		ut_align(log_sys->buf_ptr, OS_FILE_LOG_BLOCK_SIZE));
 +
 +	log_sys->buf_size = LOG_BUFFER_SIZE;
 +	log_sys->is_extending = false;
 +
 +	log_sys->max_buf_free = log_sys->buf_size / LOG_BUF_FLUSH_RATIO
 +		- LOG_BUF_FLUSH_MARGIN;
 +	log_sys->check_flush_or_checkpoint = TRUE;
 +	UT_LIST_INIT(log_sys->log_groups);
 +
 +	log_sys->n_log_ios = 0;
 +
 +	log_sys->n_log_ios_old = log_sys->n_log_ios;
 +	log_sys->last_printout_time = time(NULL);
 +	/*----------------------------*/
 +
 +	log_sys->buf_next_to_write = 0;
 +
 +	log_sys->write_lsn = 0;
 +	log_sys->current_flush_lsn = 0;
 +	log_sys->flushed_to_disk_lsn = 0;
 +
 +	log_sys->written_to_some_lsn = log_sys->lsn;
 +	log_sys->written_to_all_lsn = log_sys->lsn;
 +
 +	log_sys->n_pending_writes = 0;
 +
 +	log_sys->no_flush_event = os_event_create();
 +
 +	os_event_set(log_sys->no_flush_event);
 +
 +	log_sys->one_flushed_event = os_event_create();
 +
 +	os_event_set(log_sys->one_flushed_event);
 +
 +	/*----------------------------*/
 +
 +	log_sys->next_checkpoint_no = 0;
 +	log_sys->last_checkpoint_lsn = log_sys->lsn;
 +	log_sys->n_pending_checkpoint_writes = 0;
 +
 +
 +	rw_lock_create(checkpoint_lock_key, &log_sys->checkpoint_lock,
 +		       SYNC_NO_ORDER_CHECK);
 +
 +	log_sys->checkpoint_buf_ptr = static_cast<byte*>(
 +		mem_zalloc(2 * OS_FILE_LOG_BLOCK_SIZE));
 +
 +	log_sys->checkpoint_buf = static_cast<byte*>(
 +		ut_align(log_sys->checkpoint_buf_ptr, OS_FILE_LOG_BLOCK_SIZE));
 +
 +	/*----------------------------*/
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	/* Under MySQL, log archiving is always off */
 +	log_sys->archiving_state = LOG_ARCH_OFF;
 +	log_sys->archived_lsn = log_sys->lsn;
 +	log_sys->next_archived_lsn = 0;
 +
 +	log_sys->n_pending_archive_ios = 0;
 +
 +	rw_lock_create(archive_lock_key, &log_sys->archive_lock,
 +		       SYNC_NO_ORDER_CHECK);
 +
 +	log_sys->archive_buf = NULL;
 +
 +	/* ut_align(
 +	ut_malloc(LOG_ARCHIVE_BUF_SIZE
 +	+ OS_FILE_LOG_BLOCK_SIZE),
 +	OS_FILE_LOG_BLOCK_SIZE); */
 +	log_sys->archive_buf_size = 0;
 +
 +	/* memset(log_sys->archive_buf, '\0', LOG_ARCHIVE_BUF_SIZE); */
 +
 +	log_sys->archiving_on = os_event_create();
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	/*----------------------------*/
 +
 +	log_block_init(log_sys->buf, log_sys->lsn);
 +	log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
 +
 +	log_sys->buf_free = LOG_BLOCK_HDR_SIZE;
 +	log_sys->lsn = LOG_START_LSN + LOG_BLOCK_HDR_SIZE;
 +
 +	MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
 +		    log_sys->lsn - log_sys->last_checkpoint_lsn);
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +#ifdef UNIV_LOG_DEBUG
 +	recv_sys_create();
 +	recv_sys_init(buf_pool_get_curr_size());
 +
 +	recv_sys->parse_start_lsn = log_sys->lsn;
 +	recv_sys->scanned_lsn = log_sys->lsn;
 +	recv_sys->scanned_checkpoint_no = 0;
 +	recv_sys->recovered_lsn = log_sys->lsn;
 +	recv_sys->limit_lsn = LSN_MAX;
 +#endif
 +}
 +
 +/******************************************************************//**
 +Inits a log group to the log system. */
 +UNIV_INTERN
 +void
 +log_group_init(
 +/*===========*/
 +	ulint	id,			/*!< in: group id */
 +	ulint	n_files,		/*!< in: number of log files */
 +	lsn_t	file_size,		/*!< in: log file size in bytes */
 +	ulint	space_id,		/*!< in: space id of the file space
 +					which contains the log files of this
 +					group */
 +	ulint	archive_space_id MY_ATTRIBUTE((unused)))
 +					/*!< in: space id of the file space
 +					which contains some archived log
 +					files for this group; currently, only
 +					for the first log group this is
 +					used */
 +{
 +	ulint	i;
 +
 +	log_group_t*	group;
 +
 +	group = static_cast<log_group_t*>(mem_alloc(sizeof(log_group_t)));
 +
 +	group->id = id;
 +	group->n_files = n_files;
 +	group->file_size = file_size;
 +	group->space_id = space_id;
 +	group->state = LOG_GROUP_OK;
 +	group->lsn = LOG_START_LSN;
 +	group->lsn_offset = LOG_FILE_HDR_SIZE;
 +	group->n_pending_writes = 0;
 +
 +	group->file_header_bufs_ptr = static_cast<byte**>(
 +		mem_zalloc(sizeof(byte*) * n_files));
 +
 +	group->file_header_bufs = static_cast<byte**>(
 +		mem_zalloc(sizeof(byte**) * n_files));
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	group->archive_file_header_bufs_ptr = static_cast<byte*>(
 +		mem_zalloc( sizeof(byte*) * n_files));
 +
 +	group->archive_file_header_bufs = static_cast<byte*>(
 +		mem_zalloc(sizeof(byte*) * n_files));
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	for (i = 0; i < n_files; i++) {
 +		group->file_header_bufs_ptr[i] = static_cast<byte*>(
 +			mem_zalloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE));
 +
 +		group->file_header_bufs[i] = static_cast<byte*>(
 +			ut_align(group->file_header_bufs_ptr[i],
 +				 OS_FILE_LOG_BLOCK_SIZE));
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +		group->archive_file_header_bufs_ptr[i] = static_cast<byte*>(
 +			mem_zalloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE));
 +
 +		group->archive_file_header_bufs[i] = static_cast<byte*>(
 +			ut_align(group->archive_file_header_bufs_ptr[i],
 +				 OS_FILE_LOG_BLOCK_SIZE));
 +#endif /* UNIV_LOG_ARCHIVE */
 +	}
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	group->archive_space_id = archive_space_id;
 +
 +	group->archived_file_no = 0;
 +	group->archived_offset = 0;
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	group->checkpoint_buf_ptr = static_cast<byte*>(
 +		mem_zalloc(2 * OS_FILE_LOG_BLOCK_SIZE));
 +
 +	group->checkpoint_buf = static_cast<byte*>(
 +		ut_align(group->checkpoint_buf_ptr,OS_FILE_LOG_BLOCK_SIZE));
 +
 +	UT_LIST_ADD_LAST(log_groups, log_sys->log_groups, group);
 +
 +	ut_a(log_calc_max_ages());
 +}
 +
 +/******************************************************************//**
 +Does the unlockings needed in flush i/o completion. */
 +UNIV_INLINE
 +void
 +log_flush_do_unlocks(
 +/*=================*/
 +	ulint	code)	/*!< in: any ORed combination of LOG_UNLOCK_FLUSH_LOCK
 +			and LOG_UNLOCK_NONE_FLUSHED_LOCK */
 +{
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +
 +	/* NOTE that we must own the log mutex when doing the setting of the
 +	events: this is because transactions will wait for these events to
 +	be set, and at that moment the log flush they were waiting for must
 +	have ended. If the log mutex were not reserved here, the i/o-thread
 +	calling this function might be preempted for a while, and when it
 +	resumed execution, it might be that a new flush had been started, and
 +	this function would erroneously signal the NEW flush as completed.
 +	Thus, the changes in the state of these events are performed
 +	atomically in conjunction with the changes in the state of
 +	log_sys->n_pending_writes etc. */
 +
 +	if (code & LOG_UNLOCK_NONE_FLUSHED_LOCK) {
 +		os_event_set(log_sys->one_flushed_event);
 +	}
 +
 +	if (code & LOG_UNLOCK_FLUSH_LOCK) {
 +		os_event_set(log_sys->no_flush_event);
 +	}
 +}
 +
 +/******************************************************************//**
 +Checks if a flush is completed for a log group and does the completion
 +routine if yes.
 + at return	LOG_UNLOCK_NONE_FLUSHED_LOCK or 0 */
 +UNIV_INLINE
 +ulint
 +log_group_check_flush_completion(
 +/*=============================*/
 +	log_group_t*	group)	/*!< in: log group */
 +{
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +
 +	if (!log_sys->one_flushed && group->n_pending_writes == 0) {
 +#ifdef UNIV_DEBUG
 +		if (log_debug_writes) {
 +			fprintf(stderr,
 +				"Log flushed first to group %lu\n",
 +				(ulong) group->id);
 +		}
 +#endif /* UNIV_DEBUG */
 +		log_sys->written_to_some_lsn = log_sys->write_lsn;
 +		log_sys->one_flushed = TRUE;
 +
 +		return(LOG_UNLOCK_NONE_FLUSHED_LOCK);
 +	}
 +
 +#ifdef UNIV_DEBUG
 +	if (log_debug_writes && (group->n_pending_writes == 0)) {
 +
 +		fprintf(stderr, "Log flushed to group %lu\n",
 +			(ulong) group->id);
 +	}
 +#endif /* UNIV_DEBUG */
 +	return(0);
 +}
 +
 +/******************************************************//**
 +Checks if a flush is completed and does the completion routine if yes.
 + at return	LOG_UNLOCK_FLUSH_LOCK or 0 */
 +static
 +ulint
 +log_sys_check_flush_completion(void)
 +/*================================*/
 +{
 +	ulint	move_start;
 +	ulint	move_end;
 +
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +
 +	if (log_sys->n_pending_writes == 0) {
 +
 +		log_sys->written_to_all_lsn = log_sys->write_lsn;
 +		log_sys->buf_next_to_write = log_sys->write_end_offset;
 +
 +		if (log_sys->write_end_offset > log_sys->max_buf_free / 2) {
 +			/* Move the log buffer content to the start of the
 +			buffer */
 +
 +			move_start = ut_calc_align_down(
 +				log_sys->write_end_offset,
 +				OS_FILE_LOG_BLOCK_SIZE);
 +			move_end = ut_calc_align(log_sys->buf_free,
 +						 OS_FILE_LOG_BLOCK_SIZE);
 +
 +			ut_memmove(log_sys->buf, log_sys->buf + move_start,
 +				   move_end - move_start);
 +			log_sys->buf_free -= move_start;
 +
 +			log_sys->buf_next_to_write -= move_start;
 +		}
 +
 +		return(LOG_UNLOCK_FLUSH_LOCK);
 +	}
 +
 +	return(0);
 +}
 +
 +/******************************************************//**
 +Completes an i/o to a log file. */
 +UNIV_INTERN
 +void
 +log_io_complete(
 +/*============*/
 +	log_group_t*	group)	/*!< in: log group or a dummy pointer */
 +{
 +	ulint	unlock;
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	if ((byte*) group == &log_archive_io) {
 +		/* It was an archive write */
 +
 +		log_io_complete_archive();
 +
 +		return;
 +	}
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	if ((ulint) group & 0x1UL) {
 +		/* It was a checkpoint write */
 +		group = (log_group_t*)((ulint) group - 1);
 +
 +		if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
 +		    && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
 +
 +			fil_flush(group->space_id);
 +		}
 +
 +#ifdef UNIV_DEBUG
 +		if (log_debug_writes) {
 +			fprintf(stderr,
 +				"Checkpoint info written to group %lu\n",
 +				group->id);
 +		}
 +#endif /* UNIV_DEBUG */
 +		log_io_complete_checkpoint();
 +
 +		return;
 +	}
 +
 +	ut_error;	/*!< We currently use synchronous writing of the
 +			logs and cannot end up here! */
 +
 +	if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
 +	    && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
 +	    && srv_flush_log_at_trx_commit != 2) {
 +
 +		fil_flush(group->space_id);
 +	}
 +
 +	mutex_enter(&(log_sys->mutex));
 +	ut_ad(!recv_no_log_write);
 +
 +	ut_a(group->n_pending_writes > 0);
 +	ut_a(log_sys->n_pending_writes > 0);
 +
 +	group->n_pending_writes--;
 +	log_sys->n_pending_writes--;
 +	MONITOR_DEC(MONITOR_PENDING_LOG_WRITE);
 +
 +	unlock = log_group_check_flush_completion(group);
 +	unlock = unlock | log_sys_check_flush_completion();
 +
 +	log_flush_do_unlocks(unlock);
 +
 +	mutex_exit(&(log_sys->mutex));
 +}
 +
 +/******************************************************//**
 +Writes a log file header to a log file space. */
 +static
 +void
 +log_group_file_header_flush(
 +/*========================*/
 +	log_group_t*	group,		/*!< in: log group */
 +	ulint		nth_file,	/*!< in: header to the nth file in the
 +					log file space */
 +	lsn_t		start_lsn)	/*!< in: log file data starts at this
 +					lsn */
 +{
 +	byte*	buf;
 +	lsn_t	dest_offset;
 +
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +	ut_ad(!recv_no_log_write);
 +	ut_a(nth_file < group->n_files);
 +
 +	buf = *(group->file_header_bufs + nth_file);
 +
 +	mach_write_to_4(buf + LOG_GROUP_ID, group->id);
 +	mach_write_to_8(buf + LOG_FILE_START_LSN, start_lsn);
 +
 +	/* Wipe over possible label of mysqlbackup --restore */
 +	memcpy(buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP, "    ", 4);
 +
 +	dest_offset = nth_file * group->file_size;
 +
 +#ifdef UNIV_DEBUG
 +	if (log_debug_writes) {
 +		fprintf(stderr,
 +			"Writing log file header to group %lu file %lu\n",
 +			(ulong) group->id, (ulong) nth_file);
 +	}
 +#endif /* UNIV_DEBUG */
 +	if (log_do_write) {
 +		log_sys->n_log_ios++;
 +
 +		MONITOR_INC(MONITOR_LOG_IO);
 +
 +		srv_stats.os_log_pending_writes.inc();
 +
 +		fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, group->space_id, 0,
 +		       (ulint) (dest_offset / UNIV_PAGE_SIZE),
 +		       (ulint) (dest_offset % UNIV_PAGE_SIZE),
 +		       OS_FILE_LOG_BLOCK_SIZE,
 +		       buf, group);
 +
 +		srv_stats.os_log_pending_writes.dec();
 +	}
 +}
 +
 +/******************************************************//**
 +Stores a 4-byte checksum to the trailer checksum field of a log block
 +before writing it to a log file. This checksum is used in recovery to
 +check the consistency of a log block. */
 +static
 +void
 +log_block_store_checksum(
 +/*=====================*/
 +	byte*	block)	/*!< in/out: pointer to a log block */
 +{
 +	log_block_set_checksum(block, log_block_calc_checksum(block));
 +}
 +
 +/******************************************************//**
 +Writes a buffer to a log file group. */
 +UNIV_INTERN
 +void
 +log_group_write_buf(
 +/*================*/
 +	log_group_t*	group,		/*!< in: log group */
 +	byte*		buf,		/*!< in: buffer */
 +	ulint		len,		/*!< in: buffer len; must be divisible
 +					by OS_FILE_LOG_BLOCK_SIZE */
 +	lsn_t		start_lsn,	/*!< in: start lsn of the buffer; must
 +					be divisible by
 +					OS_FILE_LOG_BLOCK_SIZE */
 +	ulint		new_data_offset)/*!< in: start offset of new data in
 +					buf: this parameter is used to decide
 +					if we have to write a new log file
 +					header */
 +{
 +	ulint		write_len;
 +	ibool		write_header;
 +	lsn_t		next_offset;
 +	ulint		i;
 +
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +	ut_ad(!recv_no_log_write);
 +	ut_a(len % OS_FILE_LOG_BLOCK_SIZE == 0);
 +	ut_a(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
 +
 +	if (new_data_offset == 0) {
 +		write_header = TRUE;
 +	} else {
 +		write_header = FALSE;
 +	}
 +loop:
 +	if (len == 0) {
 +
 +		return;
 +	}
 +
 +	next_offset = log_group_calc_lsn_offset(start_lsn, group);
 +
 +	if ((next_offset % group->file_size == LOG_FILE_HDR_SIZE)
 +	    && write_header) {
 +		/* We start to write a new log file instance in the group */
 +
 +		ut_a(next_offset / group->file_size <= ULINT_MAX);
 +
 +		log_group_file_header_flush(group, (ulint)
 +					    (next_offset / group->file_size),
 +					    start_lsn);
 +		srv_stats.os_log_written.add(OS_FILE_LOG_BLOCK_SIZE);
 +
 +		srv_stats.log_writes.inc();
 +	}
 +
 +	if ((next_offset % group->file_size) + len > group->file_size) {
 +
 +		/* if the above condition holds, then the below expression
 +		is < len which is ulint, so the typecast is ok */
 +		write_len = (ulint)
 +			(group->file_size - (next_offset % group->file_size));
 +	} else {
 +		write_len = len;
 +	}
 +
 +#ifdef UNIV_DEBUG
 +	if (log_debug_writes) {
 +
 +		fprintf(stderr,
 +			"Writing log file segment to group %lu"
 +			" offset " LSN_PF " len %lu\n"
 +			"start lsn " LSN_PF "\n"
 +			"First block n:o %lu last block n:o %lu\n",
 +			(ulong) group->id, next_offset,
 +			write_len,
 +			start_lsn,
 +			(ulong) log_block_get_hdr_no(buf),
 +			(ulong) log_block_get_hdr_no(
 +				buf + write_len - OS_FILE_LOG_BLOCK_SIZE));
 +		ut_a(log_block_get_hdr_no(buf)
 +		     == log_block_convert_lsn_to_no(start_lsn));
 +
 +		for (i = 0; i < write_len / OS_FILE_LOG_BLOCK_SIZE; i++) {
 +
 +			ut_a(log_block_get_hdr_no(buf) + i
 +			     == log_block_get_hdr_no(
 +				     buf + i * OS_FILE_LOG_BLOCK_SIZE));
 +		}
 +	}
 +#endif /* UNIV_DEBUG */
 +	/* Calculate the checksums for each log block and write them to
 +	the trailer fields of the log blocks */
 +
 +	for (i = 0; i < write_len / OS_FILE_LOG_BLOCK_SIZE; i++) {
 +		log_block_store_checksum(buf + i * OS_FILE_LOG_BLOCK_SIZE);
 +	}
 +
 +	if (log_do_write) {
 +		log_sys->n_log_ios++;
 +
 +		MONITOR_INC(MONITOR_LOG_IO);
 +
 +		srv_stats.os_log_pending_writes.inc();
 +
 +		ut_a(next_offset / UNIV_PAGE_SIZE <= ULINT_MAX);
 +
 +		fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, group->space_id, 0,
 +		       (ulint) (next_offset / UNIV_PAGE_SIZE),
 +		       (ulint) (next_offset % UNIV_PAGE_SIZE), write_len, buf,
 +		       group);
 +
 +		srv_stats.os_log_pending_writes.dec();
 +
 +		srv_stats.os_log_written.add(write_len);
 +		srv_stats.log_writes.inc();
 +	}
 +
 +	if (write_len < len) {
 +		start_lsn += write_len;
 +		len -= write_len;
 +		buf += write_len;
 +
 +		write_header = TRUE;
 +
 +		goto loop;
 +	}
 +}
 +
 +/******************************************************//**
 +This function is called, e.g., when a transaction wants to commit. It checks
 +that the log has been written to the log file up to the last log entry written
 +by the transaction. If there is a flush running, it waits and checks if the
 +flush flushed enough. If not, starts a new flush. */
 +UNIV_INTERN
 +void
 +log_write_up_to(
 +/*============*/
 +	lsn_t	lsn,	/*!< in: log sequence number up to which
 +			the log should be written,
 +			LSN_MAX if not specified */
 +	ulint	wait,	/*!< in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
 +			or LOG_WAIT_ALL_GROUPS */
 +	ibool	flush_to_disk)
 +			/*!< in: TRUE if we want the written log
 +			also to be flushed to disk */
 +{
 +	log_group_t*	group;
 +	ulint		start_offset;
 +	ulint		end_offset;
 +	ulint		area_start;
 +	ulint		area_end;
 +#ifdef UNIV_DEBUG
 +	ulint		loop_count	= 0;
 +#endif /* UNIV_DEBUG */
 +	ulint		unlock;
 +	ib_uint64_t	write_lsn;
 +	ib_uint64_t	flush_lsn;
 +
 +	ut_ad(!srv_read_only_mode);
 +
 +	if (recv_no_ibuf_operations) {
 +		/* Recovery is running and no operations on the log files are
 +		allowed yet (the variable name .._no_ibuf_.. is misleading) */
 +
 +		return;
 +	}
 +
 +loop:
 +	ut_ad(++loop_count < 100);
 +
 +	mutex_enter(&(log_sys->mutex));
 +	ut_ad(!recv_no_log_write);
 +
 +	if (flush_to_disk
 +	    && log_sys->flushed_to_disk_lsn >= lsn) {
 +
 +		mutex_exit(&(log_sys->mutex));
 +
 +		return;
 +	}
 +
 +	if (!flush_to_disk
 +	    && (log_sys->written_to_all_lsn >= lsn
 +		|| (log_sys->written_to_some_lsn >= lsn
 +		    && wait != LOG_WAIT_ALL_GROUPS))) {
 +
 +		mutex_exit(&(log_sys->mutex));
 +
 +		return;
 +	}
 +
 +	if (log_sys->n_pending_writes > 0) {
 +		/* A write (+ possibly flush to disk) is running */
 +
 +		if (flush_to_disk
 +		    && log_sys->current_flush_lsn >= lsn) {
 +			/* The write + flush will write enough: wait for it to
 +			complete */
 +
 +			goto do_waits;
 +		}
 +
 +		if (!flush_to_disk
 +		    && log_sys->write_lsn >= lsn) {
 +			/* The write will write enough: wait for it to
 +			complete */
 +
 +			goto do_waits;
 +		}
 +
 +		mutex_exit(&(log_sys->mutex));
 +
 +		/* Wait for the write to complete and try to start a new
 +		write */
 +
 +		os_event_wait(log_sys->no_flush_event);
 +
 +		goto loop;
 +	}
 +
 +	if (!flush_to_disk
 +	    && log_sys->buf_free == log_sys->buf_next_to_write) {
 +		/* Nothing to write and no flush to disk requested */
 +
 +		mutex_exit(&(log_sys->mutex));
 +
 +		return;
 +	}
 +
 +#ifdef UNIV_DEBUG
 +	if (log_debug_writes) {
 +		fprintf(stderr,
 +			"Writing log from " LSN_PF " up to lsn " LSN_PF "\n",
 +			log_sys->written_to_all_lsn,
 +			log_sys->lsn);
 +	}
 +#endif /* UNIV_DEBUG */
 +	log_sys->n_pending_writes++;
 +	MONITOR_INC(MONITOR_PENDING_LOG_WRITE);
 +
 +	group = UT_LIST_GET_FIRST(log_sys->log_groups);
 +	group->n_pending_writes++;	/*!< We assume here that we have only
 +					one log group! */
 +
 +	os_event_reset(log_sys->no_flush_event);
 +	os_event_reset(log_sys->one_flushed_event);
 +
 +	start_offset = log_sys->buf_next_to_write;
 +	end_offset = log_sys->buf_free;
 +
 +	area_start = ut_calc_align_down(start_offset, OS_FILE_LOG_BLOCK_SIZE);
 +	area_end = ut_calc_align(end_offset, OS_FILE_LOG_BLOCK_SIZE);
 +
 +	ut_ad(area_end - area_start > 0);
 +
 +	log_sys->write_lsn = log_sys->lsn;
 +
 +	if (flush_to_disk) {
 +		log_sys->current_flush_lsn = log_sys->lsn;
 +	}
 +
 +	log_sys->one_flushed = FALSE;
 +
 +	log_block_set_flush_bit(log_sys->buf + area_start, TRUE);
 +	log_block_set_checkpoint_no(
 +		log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
 +		log_sys->next_checkpoint_no);
 +
 +	/* Copy the last, incompletely written, log block a log block length
 +	up, so that when the flush operation writes from the log buffer, the
 +	segment to write will not be changed by writers to the log */
 +
 +	ut_memcpy(log_sys->buf + area_end,
 +		  log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
 +		  OS_FILE_LOG_BLOCK_SIZE);
 +
 +	log_sys->buf_free += OS_FILE_LOG_BLOCK_SIZE;
 +	log_sys->write_end_offset = log_sys->buf_free;
 +
 +	group = UT_LIST_GET_FIRST(log_sys->log_groups);
 +
 +	/* Do the write to the log files */
 +
 +	while (group) {
 +		log_group_write_buf(
 +			group, log_sys->buf + area_start,
 +			area_end - area_start,
 +			ut_uint64_align_down(log_sys->written_to_all_lsn,
 +					     OS_FILE_LOG_BLOCK_SIZE),
 +			start_offset - area_start);
 +
 +		log_group_set_fields(group, log_sys->write_lsn);
 +
 +		group = UT_LIST_GET_NEXT(log_groups, group);
 +	}
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +	if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
 +		/* O_DSYNC means the OS did not buffer the log file at all:
 +		so we have also flushed to disk what we have written */
 +
 +		log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
 +
 +	} else if (flush_to_disk) {
 +
 +		group = UT_LIST_GET_FIRST(log_sys->log_groups);
 +
 +		fil_flush(group->space_id);
 +		log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
 +	}
 +
 +	mutex_enter(&(log_sys->mutex));
 +
 +	group = UT_LIST_GET_FIRST(log_sys->log_groups);
 +
 +	ut_a(group->n_pending_writes == 1);
 +	ut_a(log_sys->n_pending_writes == 1);
 +
 +	group->n_pending_writes--;
 +	log_sys->n_pending_writes--;
 +	MONITOR_DEC(MONITOR_PENDING_LOG_WRITE);
 +
 +	unlock = log_group_check_flush_completion(group);
 +	unlock = unlock | log_sys_check_flush_completion();
 +
 +	log_flush_do_unlocks(unlock);
 +
 +	write_lsn = log_sys->write_lsn;
 +	flush_lsn = log_sys->flushed_to_disk_lsn;
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +	innobase_mysql_log_notify(write_lsn, flush_lsn);
 +
 +	return;
 +
 +do_waits:
 +	mutex_exit(&(log_sys->mutex));
 +
 +	switch (wait) {
 +	case LOG_WAIT_ONE_GROUP:
 +		os_event_wait(log_sys->one_flushed_event);
 +		break;
 +	case LOG_WAIT_ALL_GROUPS:
 +		os_event_wait(log_sys->no_flush_event);
 +		break;
 +#ifdef UNIV_DEBUG
 +	case LOG_NO_WAIT:
 +		break;
 +	default:
 +		ut_error;
 +#endif /* UNIV_DEBUG */
 +	}
 +}
 +
 +/****************************************************************//**
 +Does a syncronous flush of the log buffer to disk. */
 +UNIV_INTERN
 +void
 +log_buffer_flush_to_disk(void)
 +/*==========================*/
 +{
 +	lsn_t	lsn;
 +
 +	ut_ad(!srv_read_only_mode);
 +	mutex_enter(&(log_sys->mutex));
 +
 +	lsn = log_sys->lsn;
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +	log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS, TRUE);
 +}
 +
 +/****************************************************************//**
 +This functions writes the log buffer to the log file and if 'flush'
 +is set it forces a flush of the log file as well. This is meant to be
 +called from background master thread only as it does not wait for
 +the write (+ possible flush) to finish. */
 +UNIV_INTERN
 +void
 +log_buffer_sync_in_background(
 +/*==========================*/
 +	ibool	flush)	/*!< in: flush the logs to disk */
 +{
 +	lsn_t	lsn;
 +
 +	mutex_enter(&(log_sys->mutex));
 +
 +	lsn = log_sys->lsn;
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +	log_write_up_to(lsn, LOG_NO_WAIT, flush);
 +}
 +
 +/********************************************************************
 +
 +Tries to establish a big enough margin of free space in the log buffer, such
 +that a new log entry can be catenated without an immediate need for a flush. */
 +static
 +void
 +log_flush_margin(void)
 +/*==================*/
 +{
 +	log_t*	log	= log_sys;
 +	lsn_t	lsn	= 0;
 +
 +	mutex_enter(&(log->mutex));
 +
 +	if (log->buf_free > log->max_buf_free) {
 +
 +		if (log->n_pending_writes > 0) {
 +			/* A flush is running: hope that it will provide enough
 +			free space */
 +		} else {
 +			lsn = log->lsn;
 +		}
 +	}
 +
 +	mutex_exit(&(log->mutex));
 +
 +	if (lsn) {
 +		log_write_up_to(lsn, LOG_NO_WAIT, FALSE);
 +	}
 +}
 +
 +/****************************************************************//**
 +Advances the smallest lsn for which there are unflushed dirty blocks in the
 +buffer pool. NOTE: this function may only be called if the calling thread owns
 +no synchronization objects!
 + at return false if there was a flush batch of the same type running,
 +which means that we could not start this flush batch */
 +static
 +bool
 +log_preflush_pool_modified_pages(
 +/*=============================*/
 +	lsn_t	new_oldest)	/*!< in: try to advance oldest_modified_lsn
 +				at least to this lsn */
 +{
 +	bool	success;
 +	ulint	n_pages;
 +
 +	if (recv_recovery_on) {
 +		/* If the recovery is running, we must first apply all
 +		log records to their respective file pages to get the
 +		right modify lsn values to these pages: otherwise, there
 +		might be pages on disk which are not yet recovered to the
 +		current lsn, and even after calling this function, we could
 +		not know how up-to-date the disk version of the database is,
 +		and we could not make a new checkpoint on the basis of the
 +		info on the buffer pool only. */
 +
- 		recv_apply_hashed_log_recs(TRUE);
++		recv_apply_hashed_log_recs(true);
 +	}
 +
 +	success = buf_flush_list(ULINT_MAX, new_oldest, &n_pages);
 +
 +	buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
 +
 +	if (!success) {
 +		MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS);
 +	}
 +
 +	MONITOR_INC_VALUE_CUMULATIVE(
 +		MONITOR_FLUSH_SYNC_TOTAL_PAGE,
 +		MONITOR_FLUSH_SYNC_COUNT,
 +		MONITOR_FLUSH_SYNC_PAGES,
 +		n_pages);
 +
 +	return(success);
 +}
 +
 +/******************************************************//**
 +Completes a checkpoint. */
 +static
 +void
 +log_complete_checkpoint(void)
 +/*=========================*/
 +{
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +	ut_ad(log_sys->n_pending_checkpoint_writes == 0);
 +
 +	log_sys->next_checkpoint_no++;
 +
 +	log_sys->last_checkpoint_lsn = log_sys->next_checkpoint_lsn;
 +	MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
 +		    log_sys->lsn - log_sys->last_checkpoint_lsn);
 +
 +	rw_lock_x_unlock_gen(&(log_sys->checkpoint_lock), LOG_CHECKPOINT);
 +}
 +
 +/******************************************************//**
 +Completes an asynchronous checkpoint info write i/o to a log file. */
 +static
 +void
 +log_io_complete_checkpoint(void)
 +/*============================*/
 +{
 +	mutex_enter(&(log_sys->mutex));
 +
 +	ut_ad(log_sys->n_pending_checkpoint_writes > 0);
 +
 +	log_sys->n_pending_checkpoint_writes--;
 +	MONITOR_DEC(MONITOR_PENDING_CHECKPOINT_WRITE);
 +
 +	if (log_sys->n_pending_checkpoint_writes == 0) {
 +		log_complete_checkpoint();
 +	}
 +
 +	mutex_exit(&(log_sys->mutex));
 +}
 +
 +/*******************************************************************//**
 +Writes info to a checkpoint about a log group. */
 +static
 +void
 +log_checkpoint_set_nth_group_info(
 +/*==============================*/
 +	byte*	buf,	/*!< in: buffer for checkpoint info */
 +	ulint	n,	/*!< in: nth slot */
 +	ulint	file_no,/*!< in: archived file number */
 +	ulint	offset)	/*!< in: archived file offset */
 +{
 +	ut_ad(n < LOG_MAX_N_GROUPS);
 +
 +	mach_write_to_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
 +			+ 8 * n + LOG_CHECKPOINT_ARCHIVED_FILE_NO, file_no);
 +	mach_write_to_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
 +			+ 8 * n + LOG_CHECKPOINT_ARCHIVED_OFFSET, offset);
 +}
 +
 +/*******************************************************************//**
 +Gets info from a checkpoint about a log group. */
 +UNIV_INTERN
 +void
 +log_checkpoint_get_nth_group_info(
 +/*==============================*/
 +	const byte*	buf,	/*!< in: buffer containing checkpoint info */
 +	ulint		n,	/*!< in: nth slot */
 +	ulint*		file_no,/*!< out: archived file number */
 +	ulint*		offset)	/*!< out: archived file offset */
 +{
 +	ut_ad(n < LOG_MAX_N_GROUPS);
 +
 +	*file_no = mach_read_from_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
 +				    + 8 * n + LOG_CHECKPOINT_ARCHIVED_FILE_NO);
 +	*offset = mach_read_from_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
 +				   + 8 * n + LOG_CHECKPOINT_ARCHIVED_OFFSET);
 +}
 +
 +/******************************************************//**
 +Writes the checkpoint info to a log group header. */
 +static
 +void
 +log_group_checkpoint(
 +/*=================*/
 +	log_group_t*	group)	/*!< in: log group */
 +{
 +	log_group_t*	group2;
 +#ifdef UNIV_LOG_ARCHIVE
 +	ib_uint64_t	archived_lsn;
 +	ib_uint64_t	next_archived_lsn;
 +#endif /* UNIV_LOG_ARCHIVE */
 +	lsn_t		lsn_offset;
 +	ulint		write_offset;
 +	ulint		fold;
 +	byte*		buf;
 +	ulint		i;
 +
 +	ut_ad(!srv_read_only_mode);
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +#if LOG_CHECKPOINT_SIZE > OS_FILE_LOG_BLOCK_SIZE
 +# error "LOG_CHECKPOINT_SIZE > OS_FILE_LOG_BLOCK_SIZE"
 +#endif
 +
 +	buf = group->checkpoint_buf;
 +
 +	mach_write_to_8(buf + LOG_CHECKPOINT_NO, log_sys->next_checkpoint_no);
 +	mach_write_to_8(buf + LOG_CHECKPOINT_LSN, log_sys->next_checkpoint_lsn);
 +
 +	lsn_offset = log_group_calc_lsn_offset(log_sys->next_checkpoint_lsn,
 +					       group);
 +	mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET_LOW32,
 +			lsn_offset & 0xFFFFFFFFUL);
 +	mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET_HIGH32,
 +			lsn_offset >> 32);
 +
 +	mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, log_sys->buf_size);
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	if (log_sys->archiving_state == LOG_ARCH_OFF) {
 +		archived_lsn = LSN_MAX;
 +	} else {
 +		archived_lsn = log_sys->archived_lsn;
 +
 +		if (archived_lsn != log_sys->next_archived_lsn) {
 +			next_archived_lsn = log_sys->next_archived_lsn;
 +			/* For debugging only */
 +		}
 +	}
 +
 +	mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, archived_lsn);
 +#else /* UNIV_LOG_ARCHIVE */
 +	mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, LSN_MAX);
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	for (i = 0; i < LOG_MAX_N_GROUPS; i++) {
 +		log_checkpoint_set_nth_group_info(buf, i, 0, 0);
 +	}
 +
 +	group2 = UT_LIST_GET_FIRST(log_sys->log_groups);
 +
 +	while (group2) {
 +		log_checkpoint_set_nth_group_info(buf, group2->id,
 +#ifdef UNIV_LOG_ARCHIVE
 +						  group2->archived_file_no,
 +						  group2->archived_offset
 +#else /* UNIV_LOG_ARCHIVE */
 +						  0, 0
 +#endif /* UNIV_LOG_ARCHIVE */
 +						  );
 +
 +		group2 = UT_LIST_GET_NEXT(log_groups, group2);
 +	}
 +
 +	fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
 +	mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_1, fold);
 +
 +	fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
 +			      LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
 +	mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_2, fold);
 +
 +	/* We alternate the physical place of the checkpoint info in the first
 +	log file */
 +
 +	if ((log_sys->next_checkpoint_no & 1) == 0) {
 +		write_offset = LOG_CHECKPOINT_1;
 +	} else {
 +		write_offset = LOG_CHECKPOINT_2;
 +	}
 +
 +	if (log_do_write) {
 +		if (log_sys->n_pending_checkpoint_writes == 0) {
 +
 +			rw_lock_x_lock_gen(&(log_sys->checkpoint_lock),
 +					   LOG_CHECKPOINT);
 +		}
 +
 +		log_sys->n_pending_checkpoint_writes++;
 +		MONITOR_INC(MONITOR_PENDING_CHECKPOINT_WRITE);
 +
 +		log_sys->n_log_ios++;
 +
 +		MONITOR_INC(MONITOR_LOG_IO);
 +
 +		/* We send as the last parameter the group machine address
 +		added with 1, as we want to distinguish between a normal log
 +		file write and a checkpoint field write */
 +
 +		fil_io(OS_FILE_WRITE | OS_FILE_LOG, false, group->space_id, 0,
 +		       write_offset / UNIV_PAGE_SIZE,
 +		       write_offset % UNIV_PAGE_SIZE,
 +		       OS_FILE_LOG_BLOCK_SIZE,
 +		       buf, ((byte*) group + 1));
 +
 +		ut_ad(((ulint) group & 0x1UL) == 0);
 +	}
 +}
 +#endif /* !UNIV_HOTBACKUP */
 +
 +#ifdef UNIV_HOTBACKUP
 +/******************************************************//**
 +Writes info to a buffer of a log group when log files are created in
 +backup restoration. */
 +UNIV_INTERN
 +void
 +log_reset_first_header_and_checkpoint(
 +/*==================================*/
 +	byte*		hdr_buf,/*!< in: buffer which will be written to the
 +				start of the first log file */
 +	ib_uint64_t	start)	/*!< in: lsn of the start of the first log file;
 +				we pretend that there is a checkpoint at
 +				start + LOG_BLOCK_HDR_SIZE */
 +{
 +	ulint		fold;
 +	byte*		buf;
 +	ib_uint64_t	lsn;
 +
 +	mach_write_to_4(hdr_buf + LOG_GROUP_ID, 0);
 +	mach_write_to_8(hdr_buf + LOG_FILE_START_LSN, start);
 +
 +	lsn = start + LOG_BLOCK_HDR_SIZE;
 +
 +	/* Write the label of mysqlbackup --restore */
 +	strcpy((char*) hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
 +	       "ibbackup ");
 +	ut_sprintf_timestamp((char*) hdr_buf
 +			     + (LOG_FILE_WAS_CREATED_BY_HOT_BACKUP
 +				+ (sizeof "ibbackup ") - 1));
 +	buf = hdr_buf + LOG_CHECKPOINT_1;
 +
 +	mach_write_to_8(buf + LOG_CHECKPOINT_NO, 0);
 +	mach_write_to_8(buf + LOG_CHECKPOINT_LSN, lsn);
 +
 +	mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET_LOW32,
 +			LOG_FILE_HDR_SIZE + LOG_BLOCK_HDR_SIZE);
 +	mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET_HIGH32, 0);
 +
 +	mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, 2 * 1024 * 1024);
 +
 +	mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, LSN_MAX);
 +
 +	fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
 +	mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_1, fold);
 +
 +	fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
 +			      LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
 +	mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_2, fold);
 +
 +	/* Starting from InnoDB-3.23.50, we should also write info on
 +	allocated size in the tablespace, but unfortunately we do not
 +	know it here */
 +}
 +#endif /* UNIV_HOTBACKUP */
 +
 +#ifndef UNIV_HOTBACKUP
 +/******************************************************//**
 +Reads a checkpoint info from a log group header to log_sys->checkpoint_buf. */
 +UNIV_INTERN
 +void
 +log_group_read_checkpoint_info(
 +/*===========================*/
 +	log_group_t*	group,	/*!< in: log group */
 +	ulint		field)	/*!< in: LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 */
 +{
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +
 +	log_sys->n_log_ios++;
 +
 +	MONITOR_INC(MONITOR_LOG_IO);
 +
 +	fil_io(OS_FILE_READ | OS_FILE_LOG, true, group->space_id, 0,
 +	       field / UNIV_PAGE_SIZE, field % UNIV_PAGE_SIZE,
 +	       OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL);
 +}
 +
 +/******************************************************//**
 +Writes checkpoint info to groups. */
 +UNIV_INTERN
 +void
 +log_groups_write_checkpoint_info(void)
 +/*==================================*/
 +{
 +	log_group_t*	group;
 +
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +
 +	if (!srv_read_only_mode) {
 +		for (group = UT_LIST_GET_FIRST(log_sys->log_groups);
 +		     group;
 +		     group = UT_LIST_GET_NEXT(log_groups, group)) {
 +
 +			log_group_checkpoint(group);
 +		}
 +	}
 +}
 +
 +/******************************************************//**
 +Makes a checkpoint. Note that this function does not flush dirty
 +blocks from the buffer pool: it only checks what is lsn of the oldest
 +modification in the pool, and writes information about the lsn in
 +log files. Use log_make_checkpoint_at to flush also the pool.
 + at return	TRUE if success, FALSE if a checkpoint write was already running */
 +UNIV_INTERN
 +ibool
 +log_checkpoint(
 +/*===========*/
 +	ibool	sync,		/*!< in: TRUE if synchronous operation is
 +				desired */
 +	ibool	write_always)	/*!< in: the function normally checks if the
 +				the new checkpoint would have a greater
 +				lsn than the previous one: if not, then no
 +				physical write is done; by setting this
 +				parameter TRUE, a physical write will always be
 +				made to log files */
 +{
 +	lsn_t	oldest_lsn;
 +
 +	ut_ad(!srv_read_only_mode);
 +
 +	if (recv_recovery_is_on()) {
- 		recv_apply_hashed_log_recs(TRUE);
++		recv_apply_hashed_log_recs(true);
 +	}
 +
 +	if (srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
 +		fil_flush_file_spaces(FIL_TABLESPACE);
 +	}
 +
 +	mutex_enter(&(log_sys->mutex));
 +
 +	ut_ad(!recv_no_log_write);
 +	oldest_lsn = log_buf_pool_get_oldest_modification();
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +	/* Because log also contains headers and dummy log records,
 +	if the buffer pool contains no dirty buffers, oldest_lsn
 +	gets the value log_sys->lsn from the previous function,
 +	and we must make sure that the log is flushed up to that
 +	lsn. If there are dirty buffers in the buffer pool, then our
 +	write-ahead-logging algorithm ensures that the log has been flushed
 +	up to oldest_lsn. */
 +
 +	log_write_up_to(oldest_lsn, LOG_WAIT_ALL_GROUPS, TRUE);
 +
 +	mutex_enter(&(log_sys->mutex));
 +
 +	if (!write_always
 +	    && log_sys->last_checkpoint_lsn >= oldest_lsn) {
 +
 +		mutex_exit(&(log_sys->mutex));
 +
 +		return(TRUE);
 +	}
 +
 +	ut_ad(log_sys->flushed_to_disk_lsn >= oldest_lsn);
 +
 +	if (log_sys->n_pending_checkpoint_writes > 0) {
 +		/* A checkpoint write is running */
 +
 +		mutex_exit(&(log_sys->mutex));
 +
 +		if (sync) {
 +			/* Wait for the checkpoint write to complete */
 +			rw_lock_s_lock(&(log_sys->checkpoint_lock));
 +			rw_lock_s_unlock(&(log_sys->checkpoint_lock));
 +		}
 +
 +		return(FALSE);
 +	}
 +
 +	log_sys->next_checkpoint_lsn = oldest_lsn;
 +
 +#ifdef UNIV_DEBUG
 +	if (log_debug_writes) {
 +		fprintf(stderr, "Making checkpoint no "
 +			LSN_PF " at lsn " LSN_PF "\n",
 +			log_sys->next_checkpoint_no,
 +			oldest_lsn);
 +	}
 +#endif /* UNIV_DEBUG */
 +
 +	log_groups_write_checkpoint_info();
 +
 +	MONITOR_INC(MONITOR_NUM_CHECKPOINT);
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +	if (sync) {
 +		/* Wait for the checkpoint write to complete */
 +		rw_lock_s_lock(&(log_sys->checkpoint_lock));
 +		rw_lock_s_unlock(&(log_sys->checkpoint_lock));
 +	}
 +
 +	return(TRUE);
 +}
 +
 +/****************************************************************//**
 +Makes a checkpoint at a given lsn or later. */
 +UNIV_INTERN
 +void
 +log_make_checkpoint_at(
 +/*===================*/
 +	lsn_t	lsn,		/*!< in: make a checkpoint at this or a
 +				later lsn, if LSN_MAX, makes
 +				a checkpoint at the latest lsn */
 +	ibool	write_always)	/*!< in: the function normally checks if
 +				the new checkpoint would have a
 +				greater lsn than the previous one: if
 +				not, then no physical write is done;
 +				by setting this parameter TRUE, a
 +				physical write will always be made to
 +				log files */
 +{
 +	/* Preflush pages synchronously */
 +
 +	while (!log_preflush_pool_modified_pages(lsn)) {
 +		/* Flush as much as we can */
 +	}
 +
 +	while (!log_checkpoint(TRUE, write_always)) {
 +		/* Force a checkpoint */
 +	}
 +}
 +
 +/****************************************************************//**
 +Tries to establish a big enough margin of free space in the log groups, such
 +that a new log entry can be catenated without an immediate need for a
 +checkpoint. NOTE: this function may only be called if the calling thread
 +owns no synchronization objects! */
 +static
 +void
 +log_checkpoint_margin(void)
 +/*=======================*/
 +{
 +	log_t*		log		= log_sys;
 +	lsn_t		age;
 +	lsn_t		checkpoint_age;
 +	ib_uint64_t	advance;
 +	lsn_t		oldest_lsn;
 +	ibool		checkpoint_sync;
 +	ibool		do_checkpoint;
 +	bool		success;
 +loop:
 +	checkpoint_sync = FALSE;
 +	do_checkpoint = FALSE;
 +	advance = 0;
 +
 +	mutex_enter(&(log->mutex));
 +	ut_ad(!recv_no_log_write);
 +
 +	if (log->check_flush_or_checkpoint == FALSE) {
 +		mutex_exit(&(log->mutex));
 +
 +		return;
 +	}
 +
 +	oldest_lsn = log_buf_pool_get_oldest_modification();
 +
 +	age = log->lsn - oldest_lsn;
 +
 +	if (age > log->max_modified_age_sync) {
 +
 +		/* A flush is urgent: we have to do a synchronous preflush */
 +		advance = 2 * (age - log->max_modified_age_sync);
 +	}
 +
 +	checkpoint_age = log->lsn - log->last_checkpoint_lsn;
 +
 +	if (checkpoint_age > log->max_checkpoint_age) {
 +		/* A checkpoint is urgent: we do it synchronously */
 +
 +		checkpoint_sync = TRUE;
 +
 +		do_checkpoint = TRUE;
 +
 +	} else if (checkpoint_age > log->max_checkpoint_age_async) {
 +		/* A checkpoint is not urgent: do it asynchronously */
 +
 +		do_checkpoint = TRUE;
 +
 +		log->check_flush_or_checkpoint = FALSE;
 +	} else {
 +		log->check_flush_or_checkpoint = FALSE;
 +	}
 +
 +	mutex_exit(&(log->mutex));
 +
 +	if (advance) {
 +		lsn_t	new_oldest = oldest_lsn + advance;
 +
 +		success = log_preflush_pool_modified_pages(new_oldest);
 +
 +		/* If the flush succeeded, this thread has done its part
 +		and can proceed. If it did not succeed, there was another
 +		thread doing a flush at the same time. */
 +		if (!success) {
 +			mutex_enter(&(log->mutex));
 +
 +			log->check_flush_or_checkpoint = TRUE;
 +
 +			mutex_exit(&(log->mutex));
 +			goto loop;
 +		}
 +	}
 +
 +	if (do_checkpoint) {
 +		log_checkpoint(checkpoint_sync, FALSE);
 +
 +		if (checkpoint_sync) {
 +
 +			goto loop;
 +		}
 +	}
 +}
 +
 +/******************************************************//**
 +Reads a specified log segment to a buffer. */
 +UNIV_INTERN
 +void
 +log_group_read_log_seg(
 +/*===================*/
 +	ulint		type,		/*!< in: LOG_ARCHIVE or LOG_RECOVER */
 +	byte*		buf,		/*!< in: buffer where to read */
 +	log_group_t*	group,		/*!< in: log group */
 +	lsn_t		start_lsn,	/*!< in: read area start */
 +	lsn_t		end_lsn)	/*!< in: read area end */
 +{
 +	ulint	len;
 +	lsn_t	source_offset;
 +	bool	sync;
 +
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +
 +	sync = (type == LOG_RECOVER);
 +loop:
 +	source_offset = log_group_calc_lsn_offset(start_lsn, group);
 +
 +	ut_a(end_lsn - start_lsn <= ULINT_MAX);
 +	len = (ulint) (end_lsn - start_lsn);
 +
 +	ut_ad(len != 0);
 +
 +	if ((source_offset % group->file_size) + len > group->file_size) {
 +
 +		/* If the above condition is true then len (which is ulint)
 +		is > the expression below, so the typecast is ok */
 +		len = (ulint) (group->file_size -
 +			(source_offset % group->file_size));
 +	}
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	if (type == LOG_ARCHIVE) {
 +
 +		log_sys->n_pending_archive_ios++;
 +	}
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	log_sys->n_log_ios++;
 +
 +	MONITOR_INC(MONITOR_LOG_IO);
 +
 +	ut_a(source_offset / UNIV_PAGE_SIZE <= ULINT_MAX);
 +
 +	fil_io(OS_FILE_READ | OS_FILE_LOG, sync, group->space_id, 0,
 +	       (ulint) (source_offset / UNIV_PAGE_SIZE),
 +	       (ulint) (source_offset % UNIV_PAGE_SIZE),
 +	       len, buf, NULL);
 +
 +	start_lsn += len;
 +	buf += len;
 +
++	if (recv_sys->report(ut_time())) {
++		ib_logf(IB_LOG_LEVEL_INFO, "Read redo log up to LSN=" LSN_PF,
++			start_lsn);
++	}
++
 +	if (start_lsn != end_lsn) {
 +
 +		goto loop;
 +	}
 +}
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +/******************************************************//**
 +Generates an archived log file name. */
 +UNIV_INTERN
 +void
 +log_archived_file_name_gen(
 +/*=======================*/
 +	char*	buf,	/*!< in: buffer where to write */
 +	ulint	id MY_ATTRIBUTE((unused)),
 +			/*!< in: group id;
 +			currently we only archive the first group */
 +	ulint	file_no)/*!< in: file number */
 +{
 +	sprintf(buf, "%sib_arch_log_%010lu", srv_arch_dir, (ulong) file_no);
 +}
 +
 +/******************************************************//**
 +Writes a log file header to a log file space. */
 +static
 +void
 +log_group_archive_file_header_write(
 +/*================================*/
 +	log_group_t*	group,		/*!< in: log group */
 +	ulint		nth_file,	/*!< in: header to the nth file in the
 +					archive log file space */
 +	ulint		file_no,	/*!< in: archived file number */
 +	ib_uint64_t	start_lsn)	/*!< in: log file data starts at this
 +					lsn */
 +{
 +	byte*	buf;
 +	ulint	dest_offset;
 +
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +
 +	ut_a(nth_file < group->n_files);
 +
 +	buf = *(group->archive_file_header_bufs + nth_file);
 +
 +	mach_write_to_4(buf + LOG_GROUP_ID, group->id);
 +	mach_write_to_8(buf + LOG_FILE_START_LSN, start_lsn);
 +	mach_write_to_4(buf + LOG_FILE_NO, file_no);
 +
 +	mach_write_to_4(buf + LOG_FILE_ARCH_COMPLETED, FALSE);
 +
 +	dest_offset = nth_file * group->file_size;
 +
 +	log_sys->n_log_ios++;
 +
 +	MONITOR_INC(MONITOR_LOG_IO);
 +
 +	fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, group->archive_space_id,
 +	       dest_offset / UNIV_PAGE_SIZE,
 +	       dest_offset % UNIV_PAGE_SIZE,
 +	       2 * OS_FILE_LOG_BLOCK_SIZE,
 +	       buf, &log_archive_io);
 +}
 +
 +/******************************************************//**
 +Writes a log file header to a completed archived log file. */
 +static
 +void
 +log_group_archive_completed_header_write(
 +/*=====================================*/
 +	log_group_t*	group,		/*!< in: log group */
 +	ulint		nth_file,	/*!< in: header to the nth file in the
 +					archive log file space */
 +	ib_uint64_t	end_lsn)	/*!< in: end lsn of the file */
 +{
 +	byte*	buf;
 +	ulint	dest_offset;
 +
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +	ut_a(nth_file < group->n_files);
 +
 +	buf = *(group->archive_file_header_bufs + nth_file);
 +
 +	mach_write_to_4(buf + LOG_FILE_ARCH_COMPLETED, TRUE);
 +	mach_write_to_8(buf + LOG_FILE_END_LSN, end_lsn);
 +
 +	dest_offset = nth_file * group->file_size + LOG_FILE_ARCH_COMPLETED;
 +
 +	log_sys->n_log_ios++;
 +
 +	MONITOR_INC(MONITOR_LOG_IO);
 +
 +	fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, group->archive_space_id,
 +	       dest_offset / UNIV_PAGE_SIZE,
 +	       dest_offset % UNIV_PAGE_SIZE,
 +	       OS_FILE_LOG_BLOCK_SIZE,
 +	       buf + LOG_FILE_ARCH_COMPLETED,
 +	       &log_archive_io);
 +}
 +
 +/******************************************************//**
 +Does the archive writes for a single log group. */
 +static
 +void
 +log_group_archive(
 +/*==============*/
 +	log_group_t*	group)	/*!< in: log group */
 +{
 +	os_file_t	file_handle;
 +	lsn_t		start_lsn;
 +	lsn_t		end_lsn;
 +	char		name[1024];
 +	byte*		buf;
 +	ulint		len;
 +	ibool		ret;
 +	lsn_t		next_offset;
 +	ulint		n_files;
 +	ulint		open_mode;
 +
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +
 +	start_lsn = log_sys->archived_lsn;
 +
 +	ut_a(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
 +
 +	end_lsn = log_sys->next_archived_lsn;
 +
 +	ut_a(end_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
 +
 +	buf = log_sys->archive_buf;
 +
 +	n_files = 0;
 +
 +	next_offset = group->archived_offset;
 +loop:
 +	if ((next_offset % group->file_size == 0)
 +	    || (fil_space_get_size(group->archive_space_id) == 0)) {
 +
 +		/* Add the file to the archive file space; create or open the
 +		file */
 +
 +		if (next_offset % group->file_size == 0) {
 +			open_mode = OS_FILE_CREATE;
 +		} else {
 +			open_mode = OS_FILE_OPEN;
 +		}
 +
 +		log_archived_file_name_gen(name, group->id,
 +					   group->archived_file_no + n_files);
 +
 +		file_handle = os_file_create(innodb_file_log_key,
 +					     name, open_mode,
 +					     OS_FILE_AIO,
 +					     OS_DATA_FILE, &ret);
 +
 +		if (!ret && (open_mode == OS_FILE_CREATE)) {
 +			file_handle = os_file_create(
 +				innodb_file_log_key, name, OS_FILE_OPEN,
 +				OS_FILE_AIO, OS_DATA_FILE, &ret);
 +		}
 +
 +		if (!ret) {
 +			fprintf(stderr,
 +				"InnoDB: Cannot create or open"
 +				" archive log file %s.\n"
 +				"InnoDB: Cannot continue operation.\n"
 +				"InnoDB: Check that the log archive"
 +				" directory exists,\n"
 +				"InnoDB: you have access rights to it, and\n"
 +				"InnoDB: there is space available.\n", name);
 +			exit(1);
 +		}
 +
 +#ifdef UNIV_DEBUG
 +		if (log_debug_writes) {
 +			fprintf(stderr, "Created archive file %s\n", name);
 +		}
 +#endif /* UNIV_DEBUG */
 +
 +		ret = os_file_close(file_handle);
 +
 +		ut_a(ret);
 +
 +		/* Add the archive file as a node to the space */
 +
 +		fil_node_create(name, group->file_size / UNIV_PAGE_SIZE,
 +				group->archive_space_id, FALSE);
 +
 +		if (next_offset % group->file_size == 0) {
 +			log_group_archive_file_header_write(
 +				group, n_files,
 +				group->archived_file_no + n_files,
 +				start_lsn);
 +
 +			next_offset += LOG_FILE_HDR_SIZE;
 +		}
 +	}
 +
 +	len = end_lsn - start_lsn;
 +
 +	if (group->file_size < (next_offset % group->file_size) + len) {
 +
 +		len = group->file_size - (next_offset % group->file_size);
 +	}
 +
 +#ifdef UNIV_DEBUG
 +	if (log_debug_writes) {
 +		fprintf(stderr,
 +			"Archiving starting at lsn " LSN_PF ", len %lu"
 +			" to group %lu\n",
 +			start_lsn,
 +			(ulong) len, (ulong) group->id);
 +	}
 +#endif /* UNIV_DEBUG */
 +
 +	log_sys->n_pending_archive_ios++;
 +
 +	log_sys->n_log_ios++;
 +
 +	MONITOR_INC(MONITOR_LOG_IO);
 +
 +	fil_io(OS_FILE_WRITE | OS_FILE_LOG, false, group->archive_space_id,
 +	       (ulint) (next_offset / UNIV_PAGE_SIZE),
 +	       (ulint) (next_offset % UNIV_PAGE_SIZE),
 +	       ut_calc_align(len, OS_FILE_LOG_BLOCK_SIZE), buf,
 +	       &log_archive_io);
 +
 +	start_lsn += len;
 +	next_offset += len;
 +	buf += len;
 +
 +	if (next_offset % group->file_size == 0) {
 +		n_files++;
 +	}
 +
 +	if (end_lsn != start_lsn) {
 +
 +		goto loop;
 +	}
 +
 +	group->next_archived_file_no = group->archived_file_no + n_files;
 +	group->next_archived_offset = next_offset % group->file_size;
 +
 +	ut_a(group->next_archived_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
 +}
 +
 +/*****************************************************//**
 +(Writes to the archive of each log group.) Currently, only the first
 +group is archived. */
 +static
 +void
 +log_archive_groups(void)
 +/*====================*/
 +{
 +	log_group_t*	group;
 +
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +
 +	group = UT_LIST_GET_FIRST(log_sys->log_groups);
 +
 +	log_group_archive(group);
 +}
 +
 +/*****************************************************//**
 +Completes the archiving write phase for (each log group), currently,
 +the first log group. */
 +static
 +void
 +log_archive_write_complete_groups(void)
 +/*===================================*/
 +{
 +	log_group_t*	group;
 +	ulint		end_offset;
 +	ulint		trunc_files;
 +	ulint		n_files;
 +	ib_uint64_t	start_lsn;
 +	ib_uint64_t	end_lsn;
 +	ulint		i;
 +
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +
 +	group = UT_LIST_GET_FIRST(log_sys->log_groups);
 +
 +	group->archived_file_no = group->next_archived_file_no;
 +	group->archived_offset = group->next_archived_offset;
 +
 +	/* Truncate from the archive file space all but the last
 +	file, or if it has been written full, all files */
 +
 +	n_files = (UNIV_PAGE_SIZE
 +		   * fil_space_get_size(group->archive_space_id))
 +		/ group->file_size;
 +	ut_ad(n_files > 0);
 +
 +	end_offset = group->archived_offset;
 +
 +	if (end_offset % group->file_size == 0) {
 +
 +		trunc_files = n_files;
 +	} else {
 +		trunc_files = n_files - 1;
 +	}
 +
 +#ifdef UNIV_DEBUG
 +	if (log_debug_writes && trunc_files) {
 +		fprintf(stderr,
 +			"Complete file(s) archived to group %lu\n",
 +			(ulong) group->id);
 +	}
 +#endif /* UNIV_DEBUG */
 +
 +	/* Calculate the archive file space start lsn */
 +	start_lsn = log_sys->next_archived_lsn
 +		- (end_offset - LOG_FILE_HDR_SIZE + trunc_files
 +		   * (group->file_size - LOG_FILE_HDR_SIZE));
 +	end_lsn = start_lsn;
 +
 +	for (i = 0; i < trunc_files; i++) {
 +
 +		end_lsn += group->file_size - LOG_FILE_HDR_SIZE;
 +
 +		/* Write a notice to the headers of archived log
 +		files that the file write has been completed */
 +
 +		log_group_archive_completed_header_write(group, i, end_lsn);
 +	}
 +
 +	fil_space_truncate_start(group->archive_space_id,
 +				 trunc_files * group->file_size);
 +
 +#ifdef UNIV_DEBUG
 +	if (log_debug_writes) {
 +		fputs("Archiving writes completed\n", stderr);
 +	}
 +#endif /* UNIV_DEBUG */
 +}
 +
 +/******************************************************//**
 +Completes an archiving i/o. */
 +static
 +void
 +log_archive_check_completion_low(void)
 +/*==================================*/
 +{
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +
 +	if (log_sys->n_pending_archive_ios == 0
 +	    && log_sys->archiving_phase == LOG_ARCHIVE_READ) {
 +
 +#ifdef UNIV_DEBUG
 +		if (log_debug_writes) {
 +			fputs("Archiving read completed\n", stderr);
 +		}
 +#endif /* UNIV_DEBUG */
 +
 +		/* Archive buffer has now been read in: start archive writes */
 +
 +		log_sys->archiving_phase = LOG_ARCHIVE_WRITE;
 +
 +		log_archive_groups();
 +	}
 +
 +	if (log_sys->n_pending_archive_ios == 0
 +	    && log_sys->archiving_phase == LOG_ARCHIVE_WRITE) {
 +
 +		log_archive_write_complete_groups();
 +
 +		log_sys->archived_lsn = log_sys->next_archived_lsn;
 +
 +		rw_lock_x_unlock_gen(&(log_sys->archive_lock), LOG_ARCHIVE);
 +	}
 +}
 +
 +/******************************************************//**
 +Completes an archiving i/o. */
 +static
 +void
 +log_io_complete_archive(void)
 +/*=========================*/
 +{
 +	log_group_t*	group;
 +
 +	mutex_enter(&(log_sys->mutex));
 +
 +	group = UT_LIST_GET_FIRST(log_sys->log_groups);
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +	fil_flush(group->archive_space_id);
 +
 +	mutex_enter(&(log_sys->mutex));
 +
 +	ut_ad(log_sys->n_pending_archive_ios > 0);
 +
 +	log_sys->n_pending_archive_ios--;
 +
 +	log_archive_check_completion_low();
 +
 +	mutex_exit(&(log_sys->mutex));
 +}
 +
 +/********************************************************************//**
 +Starts an archiving operation.
 + at return	TRUE if succeed, FALSE if an archiving operation was already running */
 +UNIV_INTERN
 +ibool
 +log_archive_do(
 +/*===========*/
 +	ibool	sync,	/*!< in: TRUE if synchronous operation is desired */
 +	ulint*	n_bytes)/*!< out: archive log buffer size, 0 if nothing to
 +			archive */
 +{
 +	ibool		calc_new_limit;
 +	ib_uint64_t	start_lsn;
 +	ib_uint64_t	limit_lsn;
 +
 +	calc_new_limit = TRUE;
 +loop:
 +	mutex_enter(&(log_sys->mutex));
 +
 +	switch (log_sys->archiving_state) {
 +	case LOG_ARCH_OFF:
 +arch_none:
 +		mutex_exit(&(log_sys->mutex));
 +
 +		*n_bytes = 0;
 +
 +		return(TRUE);
 +	case LOG_ARCH_STOPPED:
 +	case LOG_ARCH_STOPPING2:
 +		mutex_exit(&(log_sys->mutex));
 +
 +		os_event_wait(log_sys->archiving_on);
 +
 +		goto loop;
 +	}
 +
 +	start_lsn = log_sys->archived_lsn;
 +
 +	if (calc_new_limit) {
 +		ut_a(log_sys->archive_buf_size % OS_FILE_LOG_BLOCK_SIZE == 0);
 +		limit_lsn = start_lsn + log_sys->archive_buf_size;
 +
 +		*n_bytes = log_sys->archive_buf_size;
 +
 +		if (limit_lsn >= log_sys->lsn) {
 +
 +			limit_lsn = ut_uint64_align_down(
 +				log_sys->lsn, OS_FILE_LOG_BLOCK_SIZE);
 +		}
 +	}
 +
 +	if (log_sys->archived_lsn >= limit_lsn) {
 +
 +		goto arch_none;
 +	}
 +
 +	if (log_sys->written_to_all_lsn < limit_lsn) {
 +
 +		mutex_exit(&(log_sys->mutex));
 +
 +		log_write_up_to(limit_lsn, LOG_WAIT_ALL_GROUPS, TRUE);
 +
 +		calc_new_limit = FALSE;
 +
 +		goto loop;
 +	}
 +
 +	if (log_sys->n_pending_archive_ios > 0) {
 +		/* An archiving operation is running */
 +
 +		mutex_exit(&(log_sys->mutex));
 +
 +		if (sync) {
 +			rw_lock_s_lock(&(log_sys->archive_lock));
 +			rw_lock_s_unlock(&(log_sys->archive_lock));
 +		}
 +
 +		*n_bytes = log_sys->archive_buf_size;
 +
 +		return(FALSE);
 +	}
 +
 +	rw_lock_x_lock_gen(&(log_sys->archive_lock), LOG_ARCHIVE);
 +
 +	log_sys->archiving_phase = LOG_ARCHIVE_READ;
 +
 +	log_sys->next_archived_lsn = limit_lsn;
 +
 +#ifdef UNIV_DEBUG
 +	if (log_debug_writes) {
 +		fprintf(stderr,
 +			"Archiving from lsn " LSN_PF " to lsn " LSN_PF "\n",
 +			log_sys->archived_lsn, limit_lsn);
 +	}
 +#endif /* UNIV_DEBUG */
 +
 +	/* Read the log segment to the archive buffer */
 +
 +	log_group_read_log_seg(LOG_ARCHIVE, log_sys->archive_buf,
 +			       UT_LIST_GET_FIRST(log_sys->log_groups),
 +			       start_lsn, limit_lsn);
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +	if (sync) {
 +		rw_lock_s_lock(&(log_sys->archive_lock));
 +		rw_lock_s_unlock(&(log_sys->archive_lock));
 +	}
 +
 +	*n_bytes = log_sys->archive_buf_size;
 +
 +	return(TRUE);
 +}
 +
 +/****************************************************************//**
 +Writes the log contents to the archive at least up to the lsn when this
 +function was called. */
 +static
 +void
 +log_archive_all(void)
 +/*=================*/
 +{
 +	ib_uint64_t	present_lsn;
 +	ulint		dummy;
 +
 +	mutex_enter(&(log_sys->mutex));
 +
 +	if (log_sys->archiving_state == LOG_ARCH_OFF) {
 +		mutex_exit(&(log_sys->mutex));
 +
 +		return;
 +	}
 +
 +	present_lsn = log_sys->lsn;
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +	log_pad_current_log_block();
 +
 +	for (;;) {
 +		mutex_enter(&(log_sys->mutex));
 +
 +		if (present_lsn <= log_sys->archived_lsn) {
 +
 +			mutex_exit(&(log_sys->mutex));
 +
 +			return;
 +		}
 +
 +		mutex_exit(&(log_sys->mutex));
 +
 +		log_archive_do(TRUE, &dummy);
 +	}
 +}
 +
 +/*****************************************************//**
 +Closes the possible open archive log file (for each group) the first group,
 +and if it was open, increments the group file count by 2, if desired. */
 +static
 +void
 +log_archive_close_groups(
 +/*=====================*/
 +	ibool	increment_file_count)	/*!< in: TRUE if we want to increment
 +					the file count */
 +{
 +	log_group_t*	group;
 +	ulint		trunc_len;
 +
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +
 +	if (log_sys->archiving_state == LOG_ARCH_OFF) {
 +
 +		return;
 +	}
 +
 +	group = UT_LIST_GET_FIRST(log_sys->log_groups);
 +
 +	trunc_len = UNIV_PAGE_SIZE
 +		* fil_space_get_size(group->archive_space_id);
 +	if (trunc_len > 0) {
 +		ut_a(trunc_len == group->file_size);
 +
 +		/* Write a notice to the headers of archived log
 +		files that the file write has been completed */
 +
 +		log_group_archive_completed_header_write(
 +			group, 0, log_sys->archived_lsn);
 +
 +		fil_space_truncate_start(group->archive_space_id,
 +					 trunc_len);
 +		if (increment_file_count) {
 +			group->archived_offset = 0;
 +			group->archived_file_no += 2;
 +		}
 +
 +#ifdef UNIV_DEBUG
 +		if (log_debug_writes) {
 +			fprintf(stderr,
 +				"Incrementing arch file no to %lu"
 +				" in log group %lu\n",
 +				(ulong) group->archived_file_no + 2,
 +				(ulong) group->id);
 +		}
 +#endif /* UNIV_DEBUG */
 +	}
 +}
 +
 +/****************************************************************//**
 +Writes the log contents to the archive up to the lsn when this function was
 +called, and stops the archiving. When archiving is started again, the archived
 +log file numbers start from 2 higher, so that the archiving will not write
 +again to the archived log files which exist when this function returns.
 + at return	DB_SUCCESS or DB_ERROR */
 +UNIV_INTERN
 +ulint
 +log_archive_stop(void)
 +/*==================*/
 +{
 +	ibool	success;
 +
 +	mutex_enter(&(log_sys->mutex));
 +
 +	if (log_sys->archiving_state != LOG_ARCH_ON) {
 +
 +		mutex_exit(&(log_sys->mutex));
 +
 +		return(DB_ERROR);
 +	}
 +
 +	log_sys->archiving_state = LOG_ARCH_STOPPING;
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +	log_archive_all();
 +
 +	mutex_enter(&(log_sys->mutex));
 +
 +	log_sys->archiving_state = LOG_ARCH_STOPPING2;
 +	os_event_reset(log_sys->archiving_on);
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +	/* Wait for a possible archiving operation to end */
 +
 +	rw_lock_s_lock(&(log_sys->archive_lock));
 +	rw_lock_s_unlock(&(log_sys->archive_lock));
 +
 +	mutex_enter(&(log_sys->mutex));
 +
 +	/* Close all archived log files, incrementing the file count by 2,
 +	if appropriate */
 +
 +	log_archive_close_groups(TRUE);
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +	/* Make a checkpoint, so that if recovery is needed, the file numbers
 +	of new archived log files will start from the right value */
 +
 +	success = FALSE;
 +
 +	while (!success) {
 +		success = log_checkpoint(TRUE, TRUE);
 +	}
 +
 +	mutex_enter(&(log_sys->mutex));
 +
 +	log_sys->archiving_state = LOG_ARCH_STOPPED;
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +	return(DB_SUCCESS);
 +}
 +
 +/****************************************************************//**
 +Starts again archiving which has been stopped.
 + at return	DB_SUCCESS or DB_ERROR */
 +UNIV_INTERN
 +ulint
 +log_archive_start(void)
 +/*===================*/
 +{
 +	mutex_enter(&(log_sys->mutex));
 +
 +	if (log_sys->archiving_state != LOG_ARCH_STOPPED) {
 +
 +		mutex_exit(&(log_sys->mutex));
 +
 +		return(DB_ERROR);
 +	}
 +
 +	log_sys->archiving_state = LOG_ARCH_ON;
 +
 +	os_event_set(log_sys->archiving_on);
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +	return(DB_SUCCESS);
 +}
 +
 +/****************************************************************//**
 +Stop archiving the log so that a gap may occur in the archived log files.
 + at return	DB_SUCCESS or DB_ERROR */
 +UNIV_INTERN
 +ulint
 +log_archive_noarchivelog(void)
 +/*==========================*/
 +{
 +loop:
 +	mutex_enter(&(log_sys->mutex));
 +
 +	if (log_sys->archiving_state == LOG_ARCH_STOPPED
 +	    || log_sys->archiving_state == LOG_ARCH_OFF) {
 +
 +		log_sys->archiving_state = LOG_ARCH_OFF;
 +
 +		os_event_set(log_sys->archiving_on);
 +
 +		mutex_exit(&(log_sys->mutex));
 +
 +		return(DB_SUCCESS);
 +	}
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +	log_archive_stop();
 +
 +	os_thread_sleep(500000);
 +
 +	goto loop;
 +}
 +
 +/****************************************************************//**
 +Start archiving the log so that a gap may occur in the archived log files.
 + at return	DB_SUCCESS or DB_ERROR */
 +UNIV_INTERN
 +ulint
 +log_archive_archivelog(void)
 +/*========================*/
 +{
 +	mutex_enter(&(log_sys->mutex));
 +
 +	if (log_sys->archiving_state == LOG_ARCH_OFF) {
 +
 +		log_sys->archiving_state = LOG_ARCH_ON;
 +
 +		log_sys->archived_lsn
 +			= ut_uint64_align_down(log_sys->lsn,
 +					       OS_FILE_LOG_BLOCK_SIZE);
 +		mutex_exit(&(log_sys->mutex));
 +
 +		return(DB_SUCCESS);
 +	}
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +	return(DB_ERROR);
 +}
 +
 +/****************************************************************//**
 +Tries to establish a big enough margin of free space in the log groups, such
 +that a new log entry can be catenated without an immediate need for
 +archiving. */
 +static
 +void
 +log_archive_margin(void)
 +/*====================*/
 +{
 +	log_t*	log		= log_sys;
 +	ulint	age;
 +	ibool	sync;
 +	ulint	dummy;
 +loop:
 +	mutex_enter(&(log->mutex));
 +
 +	if (log->archiving_state == LOG_ARCH_OFF) {
 +		mutex_exit(&(log->mutex));
 +
 +		return;
 +	}
 +
 +	age = log->lsn - log->archived_lsn;
 +
 +	if (age > log->max_archived_lsn_age) {
 +
 +		/* An archiving is urgent: we have to do synchronous i/o */
 +
 +		sync = TRUE;
 +
 +	} else if (age > log->max_archived_lsn_age_async) {
 +
 +		/* An archiving is not urgent: we do asynchronous i/o */
 +
 +		sync = FALSE;
 +	} else {
 +		/* No archiving required yet */
 +
 +		mutex_exit(&(log->mutex));
 +
 +		return;
 +	}
 +
 +	mutex_exit(&(log->mutex));
 +
 +	log_archive_do(sync, &dummy);
 +
 +	if (sync == TRUE) {
 +		/* Check again that enough was written to the archive */
 +
 +		goto loop;
 +	}
 +}
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +/********************************************************************//**
 +Checks that there is enough free space in the log to start a new query step.
 +Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this
 +function may only be called if the calling thread owns no synchronization
 +objects! */
 +UNIV_INTERN
 +void
 +log_check_margins(void)
 +/*===================*/
 +{
 +loop:
 +	log_flush_margin();
 +
 +	log_checkpoint_margin();
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	log_archive_margin();
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	mutex_enter(&(log_sys->mutex));
 +	ut_ad(!recv_no_log_write);
 +
 +	if (log_sys->check_flush_or_checkpoint) {
 +
 +		mutex_exit(&(log_sys->mutex));
 +
 +		goto loop;
 +	}
 +
 +	mutex_exit(&(log_sys->mutex));
 +}
 +
 +/****************************************************************//**
 +Makes a checkpoint at the latest lsn and writes it to first page of each
 +data file in the database, so that we know that the file spaces contain
 +all modifications up to that lsn. This can only be called at database
 +shutdown. This function also writes all log in log files to the log archive. */
 +UNIV_INTERN
 +void
 +logs_empty_and_mark_files_at_shutdown(void)
 +/*=======================================*/
 +{
 +	lsn_t			lsn;
 +	ulint			arch_log_no;
 +	ulint			count = 0;
 +	ulint			pending_io;
 +	enum srv_thread_type	active_thd;
 +	const char*		thread_name;
 +	ibool			server_busy;
 +
 +	ib_logf(IB_LOG_LEVEL_INFO, "Starting shutdown...");
 +
 +	while (srv_fast_shutdown == 0 && trx_rollback_or_clean_is_active) {
 +		/* we should wait until rollback after recovery end
 +		for slow shutdown */
 +		os_thread_sleep(100000);
 +	}
 +
 +	/* Wait until the master thread and all other operations are idle: our
 +	algorithm only works if the server is idle at shutdown */
 +
 +	srv_shutdown_state = SRV_SHUTDOWN_CLEANUP;
 +loop:
 +	os_thread_sleep(100000);
 +
 +	count++;
 +
 +	/* We need the monitor threads to stop before we proceed with
 +	a shutdown. */
 +
 +	thread_name = srv_any_background_threads_are_active();
 +
 +	if (thread_name != NULL) {
 +		/* Print a message every 60 seconds if we are waiting
 +		for the monitor thread to exit. Master and worker
 +		threads check will be done later. */
 +
 +		if (srv_print_verbose_log && count > 600) {
 +			ib_logf(IB_LOG_LEVEL_INFO,
 +				"Waiting for %s to exit", thread_name);
 +			count = 0;
 +		}
 +
 +		goto loop;
 +	}
 +
 +	/* Check that there are no longer transactions, except for
 +	PREPARED ones. We need this wait even for the 'very fast'
 +	shutdown, because the InnoDB layer may have committed or
 +	prepared transactions and we don't want to lose them. */
 +
 +	if (ulint total_trx = srv_was_started && !srv_read_only_mode
 +	    && srv_force_recovery < SRV_FORCE_NO_TRX_UNDO
 +	    ? trx_sys_any_active_transactions() : 0) {
 +		if (srv_print_verbose_log && count > 600) {
 +			ib_logf(IB_LOG_LEVEL_INFO,
 +				"Waiting for %lu active transactions to finish",
 +				(ulong) total_trx);
 +
 +			count = 0;
 +		}
 +
 +		goto loop;
 +	}
 +
 +	/* Check that the background threads are suspended */
 +
 +	active_thd = srv_get_active_thread_type();
 +
 +	if (active_thd != SRV_NONE) {
 +
 +		if (active_thd == SRV_PURGE) {
 +			srv_purge_wakeup();
 +		}
 +
 +		/* The srv_lock_timeout_thread, srv_error_monitor_thread
 +		and srv_monitor_thread should already exit by now. The
 +		only threads to be suspended are the master threads
 +		and worker threads (purge threads). Print the thread
 +		type if any of such threads not in suspended mode */
 +		if (srv_print_verbose_log && count > 600) {
 +			const char*	thread_type = "<null>";
 +
 +			switch (active_thd) {
 +			case SRV_NONE:
 +				/* This shouldn't happen because we've
 +				already checked for this case before
 +				entering the if(). We handle it here
 +				to avoid a compiler warning. */
 +				ut_error;
 +			case SRV_WORKER:
 +				thread_type = "worker threads";
 +				break;
 +			case SRV_MASTER:
 +				thread_type = "master thread";
 +				break;
 +			case SRV_PURGE:
 +				thread_type = "purge thread";
 +				break;
 +			}
 +
 +			ib_logf(IB_LOG_LEVEL_INFO,
 +				"Waiting for %s to be suspended",
 +				thread_type);
 +			count = 0;
 +		}
 +
 +		goto loop;
 +	}
 +
 +	/* At this point only page_cleaner should be active. We wait
 +	here to let it complete the flushing of the buffer pools
 +	before proceeding further. */
 +	srv_shutdown_state = SRV_SHUTDOWN_FLUSH_PHASE;
 +	count = 0;
 +	while (buf_page_cleaner_is_active) {
 +		++count;
 +		os_thread_sleep(100000);
 +		if (srv_print_verbose_log && count > 600) {
 +			ib_logf(IB_LOG_LEVEL_INFO,
 +				"Waiting for page_cleaner to "
 +				"finish flushing of buffer pool");
 +			count = 0;
 +		}
 +	}
 +
 +	mutex_enter(&log_sys->mutex);
 +	server_busy = log_sys->n_pending_checkpoint_writes
 +#ifdef UNIV_LOG_ARCHIVE
 +		|| log_sys->n_pending_archive_ios
 +#endif /* UNIV_LOG_ARCHIVE */
 +		|| log_sys->n_pending_writes;
 +	mutex_exit(&log_sys->mutex);
 +
 +	if (server_busy) {
 +		if (srv_print_verbose_log && count > 600) {
 +			ib_logf(IB_LOG_LEVEL_INFO,
 +				"Pending checkpoint_writes: %lu. "
 +				"Pending log flush writes: %lu",
 +				(ulong) log_sys->n_pending_checkpoint_writes,
 +				(ulong) log_sys->n_pending_writes);
 +			count = 0;
 +		}
 +		goto loop;
 +	}
 +
 +	pending_io = buf_pool_check_no_pending_io();
 +
 +	if (pending_io) {
 +		if (srv_print_verbose_log && count > 600) {
 +			ib_logf(IB_LOG_LEVEL_INFO,
 +				"Waiting for %lu buffer page I/Os to complete",
 +				(ulong) pending_io);
 +			count = 0;
 +		}
 +
 +		goto loop;
 +	}
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	log_archive_all();
 +#endif /* UNIV_LOG_ARCHIVE */
 +	if (srv_fast_shutdown == 2) {
 +		if (!srv_read_only_mode) {
 +			ib_logf(IB_LOG_LEVEL_INFO,
 +				"MySQL has requested a very fast shutdown "
 +				"without flushing the InnoDB buffer pool to "
 +				"data files. At the next mysqld startup "
 +				"InnoDB will do a crash recovery!");
 +
 +			/* In this fastest shutdown we do not flush the
 +			buffer pool:
 +
 +			it is essentially a 'crash' of the InnoDB server.
 +			Make sure that the log is all flushed to disk, so
 +			that we can recover all committed transactions in
 +			a crash recovery. We must not write the lsn stamps
 +			to the data files, since at a startup InnoDB deduces
 +			from the stamps if the previous shutdown was clean. */
 +
 +			log_buffer_flush_to_disk();
 +
 +			/* Check that the background threads stay suspended */
 +			thread_name = srv_any_background_threads_are_active();
 +
 +			if (thread_name != NULL) {
 +				ib_logf(IB_LOG_LEVEL_WARN,
 +					"Background thread %s woke up "
 +					"during shutdown", thread_name);
 +				goto loop;
 +			}
 +		}
 +
 +		srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE;
 +
 +		fil_close_all_files();
 +
 +		thread_name = srv_any_background_threads_are_active();
 +
 +		ut_a(!thread_name);
 +
 +		return;
 +	}
 +
 +	if (!srv_read_only_mode) {
 +		log_make_checkpoint_at(LSN_MAX, TRUE);
 +	}
 +
 +	mutex_enter(&log_sys->mutex);
 +
 +	lsn = log_sys->lsn;
 +
 +	if (lsn != log_sys->last_checkpoint_lsn
 +#ifdef UNIV_LOG_ARCHIVE
 +	    || (srv_log_archive_on
 +		&& lsn != log_sys->archived_lsn + LOG_BLOCK_HDR_SIZE)
 +#endif /* UNIV_LOG_ARCHIVE */
 +	    ) {
 +
 +		mutex_exit(&log_sys->mutex);
 +
 +		goto loop;
 +	}
 +
 +	arch_log_no = 0;
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	UT_LIST_GET_FIRST(log_sys->log_groups)->archived_file_no;
 +
 +	if (0 == UT_LIST_GET_FIRST(log_sys->log_groups)->archived_offset) {
 +
 +		arch_log_no--;
 +	}
 +
 +	log_archive_close_groups(TRUE);
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	mutex_exit(&log_sys->mutex);
 +
 +	/* Check that the background threads stay suspended */
 +	thread_name = srv_any_background_threads_are_active();
 +	if (thread_name != NULL) {
 +		ib_logf(IB_LOG_LEVEL_WARN,
 +			"Background thread %s woke up during shutdown",
 +			thread_name);
 +
 +		goto loop;
 +	}
 +
 +	if (!srv_read_only_mode) {
 +		fil_flush_file_spaces(FIL_TABLESPACE);
 +		fil_flush_file_spaces(FIL_LOG);
 +	}
 +
 +	/* The call fil_write_flushed_lsn_to_data_files() will pass the buffer
 +	pool: therefore it is essential that the buffer pool has been
 +	completely flushed to disk! (We do not call fil_write... if the
 +	'very fast' shutdown is enabled.) */
 +
 +	if (!buf_all_freed()) {
 +
 +		if (srv_print_verbose_log && count > 600) {
 +			ib_logf(IB_LOG_LEVEL_INFO,
 +				"Waiting for dirty buffer pages to be flushed");
 +			count = 0;
 +		}
 +
 +		goto loop;
 +	}
 +
 +	srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE;
 +
 +	/* Make some checks that the server really is quiet */
 +	srv_thread_type	type = srv_get_active_thread_type();
 +	ut_a(type == SRV_NONE);
 +
 +	bool	freed = buf_all_freed();
 +	ut_a(freed);
 +
 +	ut_a(lsn == log_sys->lsn);
 +
 +	if (lsn < srv_start_lsn) {
 +		ib_logf(IB_LOG_LEVEL_ERROR,
 +			"Log sequence number at shutdown " LSN_PF " "
 +			"is lower than at startup " LSN_PF "!",
 +			lsn, srv_start_lsn);
 +	}
 +
 +	srv_shutdown_lsn = lsn;
 +
 +	if (!srv_read_only_mode) {
 +		fil_write_flushed_lsn_to_data_files(lsn, arch_log_no);
 +
 +		fil_flush_file_spaces(FIL_TABLESPACE);
 +	}
 +
 +	fil_close_all_files();
 +
 +	/* Make some checks that the server really is quiet */
 +	type = srv_get_active_thread_type();
 +	ut_a(type == SRV_NONE);
 +
 +	freed = buf_all_freed();
 +	ut_a(freed);
 +
 +	ut_a(lsn == log_sys->lsn);
 +}
 +
 +#ifdef UNIV_LOG_DEBUG
 +/******************************************************//**
 +Checks by parsing that the catenated log segment for a single mtr is
 +consistent. */
 +UNIV_INTERN
 +ibool
 +log_check_log_recs(
 +/*===============*/
 +	const byte*	buf,		/*!< in: pointer to the start of
 +					the log segment in the
 +					log_sys->buf log buffer */
 +	ulint		len,		/*!< in: segment length in bytes */
 +	ib_uint64_t	buf_start_lsn)	/*!< in: buffer start lsn */
 +{
 +	ib_uint64_t	contiguous_lsn;
 +	ib_uint64_t	scanned_lsn;
 +	const byte*	start;
 +	const byte*	end;
 +	byte*		buf1;
 +	byte*		scan_buf;
 +
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +
 +	if (len == 0) {
 +
 +		return(TRUE);
 +	}
 +
 +	start = ut_align_down(buf, OS_FILE_LOG_BLOCK_SIZE);
 +	end = ut_align(buf + len, OS_FILE_LOG_BLOCK_SIZE);
 +
 +	buf1 = mem_alloc((end - start) + OS_FILE_LOG_BLOCK_SIZE);
 +	scan_buf = ut_align(buf1, OS_FILE_LOG_BLOCK_SIZE);
 +
 +	ut_memcpy(scan_buf, start, end - start);
 +
 +	recv_scan_log_recs((buf_pool_get_n_pages()
 +			   - (recv_n_pool_free_frames * srv_buf_pool_instances))
 +			   * UNIV_PAGE_SIZE, FALSE, scan_buf, end - start,
 +			   ut_uint64_align_down(buf_start_lsn,
 +						OS_FILE_LOG_BLOCK_SIZE),
 +			   &contiguous_lsn, &scanned_lsn);
 +
 +	ut_a(scanned_lsn == buf_start_lsn + len);
 +	ut_a(recv_sys->recovered_lsn == scanned_lsn);
 +
 +	mem_free(buf1);
 +
 +	return(TRUE);
 +}
 +#endif /* UNIV_LOG_DEBUG */
 +
 +/******************************************************//**
 +Peeks the current lsn.
 + at return	TRUE if success, FALSE if could not get the log system mutex */
 +UNIV_INTERN
 +ibool
 +log_peek_lsn(
 +/*=========*/
 +	lsn_t*	lsn)	/*!< out: if returns TRUE, current lsn is here */
 +{
 +	if (0 == mutex_enter_nowait(&(log_sys->mutex))) {
 +		*lsn = log_sys->lsn;
 +
 +		mutex_exit(&(log_sys->mutex));
 +
 +		return(TRUE);
 +	}
 +
 +	return(FALSE);
 +}
 +
 +/******************************************************//**
 +Prints info of the log. */
 +UNIV_INTERN
 +void
 +log_print(
 +/*======*/
 +	FILE*	file)	/*!< in: file where to print */
 +{
 +	double	time_elapsed;
 +	time_t	current_time;
 +
 +	mutex_enter(&(log_sys->mutex));
 +
 +	fprintf(file,
 +		"Log sequence number " LSN_PF "\n"
 +		"Log flushed up to   " LSN_PF "\n"
 +		"Pages flushed up to " LSN_PF "\n"
 +		"Last checkpoint at  " LSN_PF "\n",
 +		log_sys->lsn,
 +		log_sys->flushed_to_disk_lsn,
 +		log_buf_pool_get_oldest_modification(),
 +		log_sys->last_checkpoint_lsn);
 +
 +	current_time = time(NULL);
 +
 +	time_elapsed = difftime(current_time,
 +				log_sys->last_printout_time);
 +
 +	if (time_elapsed <= 0) {
 +		time_elapsed = 1;
 +	}
 +
 +	fprintf(file,
 +		"%lu pending log writes, %lu pending chkp writes\n"
 +		"%lu log i/o's done, %.2f log i/o's/second\n",
 +		(ulong) log_sys->n_pending_writes,
 +		(ulong) log_sys->n_pending_checkpoint_writes,
 +		(ulong) log_sys->n_log_ios,
 +		((double)(log_sys->n_log_ios - log_sys->n_log_ios_old)
 +		 / time_elapsed));
 +
 +	log_sys->n_log_ios_old = log_sys->n_log_ios;
 +	log_sys->last_printout_time = current_time;
 +
 +	mutex_exit(&(log_sys->mutex));
 +}
 +
 +/**********************************************************************//**
 +Refreshes the statistics used to print per-second averages. */
 +UNIV_INTERN
 +void
 +log_refresh_stats(void)
 +/*===================*/
 +{
 +	log_sys->n_log_ios_old = log_sys->n_log_ios;
 +	log_sys->last_printout_time = time(NULL);
 +}
 +
 +/********************************************************//**
 +Closes a log group. */
 +static
 +void
 +log_group_close(
 +/*===========*/
 +	log_group_t*	group)		/* in,own: log group to close */
 +{
 +	ulint	i;
 +
 +	for (i = 0; i < group->n_files; i++) {
 +		mem_free(group->file_header_bufs_ptr[i]);
 +#ifdef UNIV_LOG_ARCHIVE
 +		mem_free(group->archive_file_header_bufs_ptr[i]);
 +#endif /* UNIV_LOG_ARCHIVE */
 +	}
 +
 +	mem_free(group->file_header_bufs_ptr);
 +	mem_free(group->file_header_bufs);
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	mem_free(group->archive_file_header_bufs_ptr);
 +	mem_free(group->archive_file_header_bufs);
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	mem_free(group->checkpoint_buf_ptr);
 +
 +	mem_free(group);
 +}
 +
 +/********************************************************//**
 +Closes all log groups. */
 +UNIV_INTERN
 +void
 +log_group_close_all(void)
 +/*=====================*/
 +{
 +	log_group_t*	group;
 +
 +	group = UT_LIST_GET_FIRST(log_sys->log_groups);
 +
 +	while (UT_LIST_GET_LEN(log_sys->log_groups) > 0) {
 +		log_group_t*	prev_group = group;
 +
 +		group = UT_LIST_GET_NEXT(log_groups, group);
 +		UT_LIST_REMOVE(log_groups, log_sys->log_groups, prev_group);
 +
 +		log_group_close(prev_group);
 +	}
 +}
 +
 +/********************************************************//**
 +Shutdown the log system but do not release all the memory. */
 +UNIV_INTERN
 +void
 +log_shutdown(void)
 +/*==============*/
 +{
 +	log_group_close_all();
 +
 +	mem_free(log_sys->buf_ptr);
 +	log_sys->buf_ptr = NULL;
 +	log_sys->buf = NULL;
 +	mem_free(log_sys->checkpoint_buf_ptr);
 +	log_sys->checkpoint_buf_ptr = NULL;
 +	log_sys->checkpoint_buf = NULL;
 +
 +	os_event_free(log_sys->no_flush_event);
 +	os_event_free(log_sys->one_flushed_event);
 +
 +	rw_lock_free(&log_sys->checkpoint_lock);
 +
 +	mutex_free(&log_sys->mutex);
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	rw_lock_free(&log_sys->archive_lock);
 +	os_event_create();
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +#ifdef UNIV_LOG_DEBUG
 +	recv_sys_debug_free();
 +#endif
 +
 +	recv_sys_close();
 +}
 +
 +/********************************************************//**
 +Free the log system data structures. */
 +UNIV_INTERN
 +void
 +log_mem_free(void)
 +/*==============*/
 +{
 +	if (log_sys != NULL) {
 +		recv_sys_mem_free();
 +		mem_free(log_sys);
 +
 +		log_sys = NULL;
 +	}
 +}
 +#endif /* !UNIV_HOTBACKUP */
diff --cc storage/innobase/log/log0recv.cc
index 6137c84b21d,00000000000..a90041aaa90
mode 100644,000000..100644
--- a/storage/innobase/log/log0recv.cc
+++ b/storage/innobase/log/log0recv.cc
@@@ -1,4017 -1,0 +1,3946 @@@
 +/*****************************************************************************
 +
 +Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
 +Copyright (c) 2012, Facebook Inc.
- Copyright (c) 2017, MariaDB Corporation. All Rights Reserved.
++Copyright (c) 2017, MariaDB Corporation.
 +
 +This program is free software; you can redistribute it and/or modify it under
 +the terms of the GNU General Public License as published by the Free Software
 +Foundation; version 2 of the License.
 +
 +This program is distributed in the hope that it will be useful, but WITHOUT
 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 +
 +You should have received a copy of the GNU General Public License along with
 +this program; if not, write to the Free Software Foundation, Inc.,
 +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
 +
 +*****************************************************************************/
 +
 +/**************************************************//**
 + at file log/log0recv.cc
 +Recovery
 +
 +Created 9/20/1997 Heikki Tuuri
 +*******************************************************/
 +
 +// First include (the generated) my_config.h, to get correct platform defines.
 +#include "my_config.h"
 +#include <stdio.h>                              // Solaris/x86 header file bug
 +
 +#include <vector>
 +#include "log0recv.h"
 +
 +#ifdef UNIV_NONINL
 +#include "log0recv.ic"
 +#endif
 +
 +#include "mem0mem.h"
 +#include "buf0buf.h"
 +#include "buf0flu.h"
 +#include "mtr0mtr.h"
 +#include "mtr0log.h"
 +#include "page0cur.h"
 +#include "page0zip.h"
 +#include "btr0btr.h"
 +#include "btr0cur.h"
 +#include "ibuf0ibuf.h"
 +#include "trx0undo.h"
 +#include "trx0rec.h"
 +#include "fil0fil.h"
 +#ifndef UNIV_HOTBACKUP
 +# include "buf0rea.h"
 +# include "srv0srv.h"
 +# include "srv0start.h"
 +# include "trx0roll.h"
 +# include "row0merge.h"
 +# include "sync0sync.h"
 +#else /* !UNIV_HOTBACKUP */
 +
 +
 +/** This is set to FALSE if the backup was originally taken with the
 +mysqlbackup --include regexp option: then we do not want to create tables in
 +directories which were not included */
 +UNIV_INTERN ibool	recv_replay_file_ops	= TRUE;
 +#endif /* !UNIV_HOTBACKUP */
 +
 +/** Log records are stored in the hash table in chunks at most of this size;
 +this must be less than UNIV_PAGE_SIZE as it is stored in the buffer pool */
 +#define RECV_DATA_BLOCK_SIZE	(MEM_MAX_ALLOC_IN_BUF - sizeof(recv_data_t))
 +
 +/** Read-ahead area in applying log records to file pages */
 +#define RECV_READ_AHEAD_AREA	32
 +
 +/** The recovery system */
- UNIV_INTERN recv_sys_t*	recv_sys = NULL;
++UNIV_INTERN recv_sys_t*	recv_sys;
 +/** TRUE when applying redo log records during crash recovery; FALSE
 +otherwise.  Note that this is FALSE while a background thread is
 +rolling back incomplete transactions. */
 +UNIV_INTERN ibool	recv_recovery_on;
 +#ifdef UNIV_LOG_ARCHIVE
 +/** TRUE when applying redo log records from an archived log file */
 +UNIV_INTERN ibool	recv_recovery_from_backup_on;
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +#ifndef UNIV_HOTBACKUP
 +/** TRUE when recv_init_crash_recovery() has been called. */
 +UNIV_INTERN ibool	recv_needed_recovery;
 +# ifdef UNIV_DEBUG
 +/** TRUE if writing to the redo log (mtr_commit) is forbidden.
 +Protected by log_sys->mutex. */
 +UNIV_INTERN ibool	recv_no_log_write = FALSE;
 +# endif /* UNIV_DEBUG */
 +
 +/** TRUE if buf_page_is_corrupted() should check if the log sequence
 +number (FIL_PAGE_LSN) is in the future.  Initially FALSE, and set by
 +recv_recovery_from_checkpoint_start_func(). */
 +UNIV_INTERN ibool	recv_lsn_checks_on;
 +
 +/** There are two conditions under which we scan the logs, the first
 +is normal startup and the second is when we do a recovery from an
 +archive.
 +This flag is set if we are doing a scan from the last checkpoint during
 +startup. If we find log entries that were written after the last checkpoint
 +we know that the server was not cleanly shutdown. We must then initialize
 +the crash recovery environment before attempting to store these entries in
 +the log hash table. */
 +static ibool		recv_log_scan_is_startup_type;
 +
 +/** If the following is TRUE, the buffer pool file pages must be invalidated
 +after recovery and no ibuf operations are allowed; this becomes TRUE if
 +the log record hash table becomes too full, and log records must be merged
 +to file pages already before the recovery is finished: in this case no
 +ibuf operations are allowed, as they could modify the pages read in the
 +buffer pool before the pages have been recovered to the up-to-date state.
 +
 +TRUE means that recovery is running and no operations on the log files
 +are allowed yet: the variable name is misleading. */
 +UNIV_INTERN ibool	recv_no_ibuf_operations;
 +/** TRUE when the redo log is being backed up */
 +# define recv_is_making_a_backup		FALSE
 +/** TRUE when recovering from a backed up redo log file */
 +# define recv_is_from_backup			FALSE
 +#else /* !UNIV_HOTBACKUP */
 +# define recv_needed_recovery			FALSE
 +/** TRUE when the redo log is being backed up */
 +UNIV_INTERN ibool	recv_is_making_a_backup	= FALSE;
 +/** TRUE when recovering from a backed up redo log file */
 +UNIV_INTERN ibool	recv_is_from_backup	= FALSE;
 +# define buf_pool_get_curr_size() (5 * 1024 * 1024)
 +#endif /* !UNIV_HOTBACKUP */
- /** The following counter is used to decide when to print info on
- log scan */
- static ulint	recv_scan_print_counter;
 +
 +/** The type of the previous parsed redo log record */
 +static ulint	recv_previous_parsed_rec_type;
 +/** The offset of the previous parsed redo log record */
 +static ulint	recv_previous_parsed_rec_offset;
 +/** The 'multi' flag of the previous parsed redo log record */
 +static ulint	recv_previous_parsed_rec_is_multi;
 +
 +/** Maximum page number encountered in the redo log */
 +UNIV_INTERN ulint	recv_max_parsed_page_no;
 +
 +/** This many frames must be left free in the buffer pool when we scan
 +the log and store the scanned log records in the buffer pool: we will
 +use these free frames to read in pages when we start applying the
 +log records to the database.
 +This is the default value. If the actual size of the buffer pool is
 +larger than 10 MB we'll set this value to 512. */
 +UNIV_INTERN ulint	recv_n_pool_free_frames;
 +
 +/** The maximum lsn we see for a page during the recovery process. If this
 +is bigger than the lsn we are able to scan up to, that is an indication that
 +the recovery failed and the database may be corrupt. */
 +UNIV_INTERN lsn_t	recv_max_page_lsn;
 +
 +#ifdef UNIV_PFS_THREAD
 +UNIV_INTERN mysql_pfs_key_t	trx_rollback_clean_thread_key;
 +#endif /* UNIV_PFS_THREAD */
 +
 +#ifdef UNIV_PFS_MUTEX
 +UNIV_INTERN mysql_pfs_key_t	recv_sys_mutex_key;
 +#endif /* UNIV_PFS_MUTEX */
 +
 +#ifndef UNIV_HOTBACKUP
 +# ifdef UNIV_PFS_THREAD
 +UNIV_INTERN mysql_pfs_key_t	recv_writer_thread_key;
 +# endif /* UNIV_PFS_THREAD */
 +
 +# ifdef UNIV_PFS_MUTEX
 +UNIV_INTERN mysql_pfs_key_t	recv_writer_mutex_key;
 +# endif /* UNIV_PFS_MUTEX */
 +
 +/** Flag indicating if recv_writer thread is active. */
 +static volatile bool		recv_writer_thread_active;
 +UNIV_INTERN os_thread_t		recv_writer_thread_handle = 0;
 +#endif /* !UNIV_HOTBACKUP */
 +
 +/* prototypes */
 +
 +#ifndef UNIV_HOTBACKUP
 +/*******************************************************//**
 +Initialize crash recovery environment. Can be called iff
 +recv_needed_recovery == FALSE. */
 +static
 +void
 +recv_init_crash_recovery(void);
 +/*===========================*/
 +#endif /* !UNIV_HOTBACKUP */
 +
 +/********************************************************//**
 +Creates the recovery system. */
 +UNIV_INTERN
 +void
 +recv_sys_create(void)
 +/*=================*/
 +{
 +	if (recv_sys != NULL) {
 +
 +		return;
 +	}
 +
 +	recv_sys = static_cast<recv_sys_t*>(mem_zalloc(sizeof(*recv_sys)));
 +
 +	mutex_create(recv_sys_mutex_key, &recv_sys->mutex, SYNC_RECV);
 +
 +#ifndef UNIV_HOTBACKUP
 +	mutex_create(recv_writer_mutex_key, &recv_sys->writer_mutex,
 +		     SYNC_LEVEL_VARYING);
 +#endif /* !UNIV_HOTBACKUP */
 +
 +	recv_sys->heap = NULL;
 +	recv_sys->addr_hash = NULL;
 +}
 +
 +/********************************************************//**
 +Release recovery system mutexes. */
 +UNIV_INTERN
 +void
 +recv_sys_close(void)
 +/*================*/
 +{
 +	if (recv_sys != NULL) {
 +		if (recv_sys->addr_hash != NULL) {
 +			hash_table_free(recv_sys->addr_hash);
 +		}
 +
 +		if (recv_sys->heap != NULL) {
 +			mem_heap_free(recv_sys->heap);
 +		}
 +
 +		if (recv_sys->buf != NULL) {
 +			ut_free(recv_sys->buf);
 +		}
 +
 +		if (recv_sys->last_block_buf_start != NULL) {
 +			mem_free(recv_sys->last_block_buf_start);
 +		}
 +
 +#ifndef UNIV_HOTBACKUP
 +		ut_ad(!recv_writer_thread_active);
 +		mutex_free(&recv_sys->writer_mutex);
 +#endif /* !UNIV_HOTBACKUP */
 +
 +		mutex_free(&recv_sys->mutex);
 +
 +		mem_free(recv_sys);
 +		recv_sys = NULL;
 +	}
 +}
 +
 +/********************************************************//**
 +Frees the recovery system memory. */
 +UNIV_INTERN
 +void
 +recv_sys_mem_free(void)
 +/*===================*/
 +{
 +	if (recv_sys != NULL) {
 +		if (recv_sys->addr_hash != NULL) {
 +			hash_table_free(recv_sys->addr_hash);
 +		}
 +
 +		if (recv_sys->heap != NULL) {
 +			mem_heap_free(recv_sys->heap);
 +		}
 +
 +		if (recv_sys->buf != NULL) {
 +			ut_free(recv_sys->buf);
 +		}
 +
 +		if (recv_sys->last_block_buf_start != NULL) {
 +			mem_free(recv_sys->last_block_buf_start);
 +		}
 +
 +		mem_free(recv_sys);
 +		recv_sys = NULL;
 +	}
 +}
 +
 +#ifndef UNIV_HOTBACKUP
 +/************************************************************
 +Reset the state of the recovery system variables. */
 +UNIV_INTERN
 +void
 +recv_sys_var_init(void)
 +/*===================*/
 +{
 +	recv_lsn_checks_on = FALSE;
 +
 +	recv_n_pool_free_frames = 256;
 +
 +	recv_recovery_on = FALSE;
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	recv_recovery_from_backup_on = FALSE;
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	recv_needed_recovery = FALSE;
 +
 +	recv_lsn_checks_on = FALSE;
 +
 +	recv_log_scan_is_startup_type = FALSE;
 +
 +	recv_no_ibuf_operations = FALSE;
 +
- 	recv_scan_print_counter	= 0;
- 
 +	recv_previous_parsed_rec_type	= 999999;
 +
 +	recv_previous_parsed_rec_offset	= 0;
 +
 +	recv_previous_parsed_rec_is_multi = 0;
 +
 +	recv_max_parsed_page_no	= 0;
 +
 +	recv_n_pool_free_frames	= 256;
 +
 +	recv_max_page_lsn = 0;
 +}
 +
 +/******************************************************************//**
 +recv_writer thread tasked with flushing dirty pages from the buffer
 +pools.
 + at return a dummy parameter */
 +extern "C" UNIV_INTERN
 +os_thread_ret_t
 +DECLARE_THREAD(recv_writer_thread)(
 +/*===============================*/
 +	void*	arg MY_ATTRIBUTE((unused)))
 +			/*!< in: a dummy parameter required by
 +			os_thread_create */
 +{
 +	ut_ad(!srv_read_only_mode);
 +
 +#ifdef UNIV_PFS_THREAD
 +	pfs_register_thread(recv_writer_thread_key);
 +#endif /* UNIV_PFS_THREAD */
 +
 +#ifdef UNIV_DEBUG_THREAD_CREATION
 +	fprintf(stderr, "InnoDB: recv_writer thread running, id %lu\n",
 +		os_thread_pf(os_thread_get_curr_id()));
 +#endif /* UNIV_DEBUG_THREAD_CREATION */
 +
 +	while (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
 +
 +		os_thread_sleep(100000);
 +
 +		mutex_enter(&recv_sys->writer_mutex);
 +
 +		if (!recv_recovery_on) {
 +			mutex_exit(&recv_sys->writer_mutex);
 +			break;
 +		}
 +
 +		/* Flush pages from end of LRU if required */
 +		buf_flush_LRU_tail();
 +
 +		mutex_exit(&recv_sys->writer_mutex);
 +	}
 +
 +	recv_writer_thread_active = false;
 +
 +	/* We count the number of threads in os_thread_exit().
 +	A created thread should always use that to exit and not
 +	use return() to exit. */
 +	os_thread_exit(NULL);
 +
 +	OS_THREAD_DUMMY_RETURN;
 +}
 +#endif /* !UNIV_HOTBACKUP */
 +
 +/************************************************************
 +Inits the recovery system for a recovery operation. */
 +UNIV_INTERN
 +void
 +recv_sys_init(
 +/*==========*/
 +	ulint	available_memory)	/*!< in: available memory in bytes */
 +{
 +	if (recv_sys->heap != NULL) {
 +
 +		return;
 +	}
 +
 +#ifndef UNIV_HOTBACKUP
 +	mutex_enter(&(recv_sys->mutex));
 +
 +	recv_sys->heap = mem_heap_create_typed(256,
 +					MEM_HEAP_FOR_RECV_SYS);
 +#else /* !UNIV_HOTBACKUP */
 +	recv_sys->heap = mem_heap_create(256);
 +	recv_is_from_backup = TRUE;
 +#endif /* !UNIV_HOTBACKUP */
 +
 +	/* Set appropriate value of recv_n_pool_free_frames. */
 +	if (buf_pool_get_curr_size() >= (10 * 1024 * 1024)) {
 +		/* Buffer pool of size greater than 10 MB. */
 +		recv_n_pool_free_frames = 512;
 +	}
 +
 +	recv_sys->buf = static_cast<byte*>(ut_malloc(RECV_PARSING_BUF_SIZE));
 +	recv_sys->len = 0;
 +	recv_sys->recovered_offset = 0;
 +
 +	recv_sys->addr_hash = hash_create(available_memory / 512);
 +	recv_sys->n_addrs = 0;
 +
 +	recv_sys->apply_log_recs = FALSE;
 +	recv_sys->apply_batch_on = FALSE;
 +
 +	recv_sys->last_block_buf_start = static_cast<byte*>(
 +		mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE));
 +
 +	recv_sys->last_block = static_cast<byte*>(ut_align(
 +		recv_sys->last_block_buf_start, OS_FILE_LOG_BLOCK_SIZE));
 +
 +	recv_sys->found_corrupt_log = FALSE;
++	recv_sys->progress_time = ut_time();
 +
 +	recv_max_page_lsn = 0;
 +
 +	/* Call the constructor for recv_sys_t::dblwr member */
 +	new (&recv_sys->dblwr) recv_dblwr_t();
 +
 +	mutex_exit(&(recv_sys->mutex));
 +}
 +
 +/********************************************************//**
 +Empties the hash table when it has been fully processed. */
 +static
 +void
 +recv_sys_empty_hash(void)
 +/*=====================*/
 +{
 +	ut_ad(mutex_own(&(recv_sys->mutex)));
 +
 +	if (recv_sys->n_addrs != 0) {
 +		fprintf(stderr,
 +			"InnoDB: Error: %lu pages with log records"
 +			" were left unprocessed!\n"
 +			"InnoDB: Maximum page number with"
 +			" log records on it %lu\n",
 +			(ulong) recv_sys->n_addrs,
 +			(ulong) recv_max_parsed_page_no);
 +		ut_error;
 +	}
 +
 +	hash_table_free(recv_sys->addr_hash);
 +	mem_heap_empty(recv_sys->heap);
 +
 +	recv_sys->addr_hash = hash_create(buf_pool_get_curr_size() / 512);
 +}
 +
 +#ifndef UNIV_HOTBACKUP
 +# ifndef UNIV_LOG_DEBUG
 +/********************************************************//**
 +Frees the recovery system. */
 +static
 +void
 +recv_sys_debug_free(void)
 +/*=====================*/
 +{
 +	mutex_enter(&(recv_sys->mutex));
 +
 +	hash_table_free(recv_sys->addr_hash);
 +	mem_heap_free(recv_sys->heap);
 +	ut_free(recv_sys->buf);
 +	mem_free(recv_sys->last_block_buf_start);
 +
 +	recv_sys->buf = NULL;
 +	recv_sys->heap = NULL;
 +	recv_sys->addr_hash = NULL;
 +	recv_sys->last_block_buf_start = NULL;
 +
 +	mutex_exit(&(recv_sys->mutex));
 +
 +	/* Free up the flush_rbt. */
 +	buf_flush_free_flush_rbt();
 +}
 +# endif /* UNIV_LOG_DEBUG */
 +
 +# ifdef UNIV_LOG_ARCHIVE
 +/********************************************************//**
 +Truncates possible corrupted or extra records from a log group. */
 +static
 +void
 +recv_truncate_group(
 +/*================*/
 +	log_group_t*	group,		/*!< in: log group */
 +	lsn_t		recovered_lsn,	/*!< in: recovery succeeded up to this
 +					lsn */
 +	lsn_t		limit_lsn,	/*!< in: this was the limit for
 +					recovery */
 +	lsn_t		checkpoint_lsn,	/*!< in: recovery was started from this
 +					checkpoint */
 +	lsn_t		archived_lsn)	/*!< in: the log has been archived up to
 +					this lsn */
 +{
 +	lsn_t		start_lsn;
 +	lsn_t		end_lsn;
 +	lsn_t		finish_lsn1;
 +	lsn_t		finish_lsn2;
 +	lsn_t		finish_lsn;
 +
 +	if (archived_lsn == LSN_MAX) {
 +		/* Checkpoint was taken in the NOARCHIVELOG mode */
 +		archived_lsn = checkpoint_lsn;
 +	}
 +
 +	finish_lsn1 = ut_uint64_align_down(archived_lsn,
 +					   OS_FILE_LOG_BLOCK_SIZE)
 +		+ log_group_get_capacity(group);
 +
 +	finish_lsn2 = ut_uint64_align_up(recovered_lsn,
 +					 OS_FILE_LOG_BLOCK_SIZE)
 +		+ recv_sys->last_log_buf_size;
 +
 +	if (limit_lsn != LSN_MAX) {
 +		/* We do not know how far we should erase log records: erase
 +		as much as possible */
 +
 +		finish_lsn = finish_lsn1;
 +	} else {
 +		/* It is enough to erase the length of the log buffer */
 +		finish_lsn = finish_lsn1 < finish_lsn2
 +			? finish_lsn1 : finish_lsn2;
 +	}
 +
 +	ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
 +
 +	memset(log_sys->buf, 0, RECV_SCAN_SIZE);
 +
 +	start_lsn = ut_uint64_align_down(recovered_lsn,
 +					 OS_FILE_LOG_BLOCK_SIZE);
 +
 +	if (start_lsn != recovered_lsn) {
 +		/* Copy the last incomplete log block to the log buffer and
 +		edit its data length: */
 +		lsn_t	diff = recovered_lsn - start_lsn;
 +
 +		ut_a(diff <= 0xFFFFUL);
 +
 +		ut_memcpy(log_sys->buf, recv_sys->last_block,
 +			  OS_FILE_LOG_BLOCK_SIZE);
 +		log_block_set_data_len(log_sys->buf, (ulint) diff);
 +	}
 +
 +	if (start_lsn >= finish_lsn) {
 +
 +		return;
 +	}
 +
 +	for (;;) {
 +		ulint	len;
 +
 +		end_lsn = start_lsn + RECV_SCAN_SIZE;
 +
 +		if (end_lsn > finish_lsn) {
 +
 +			end_lsn = finish_lsn;
 +		}
 +
 +		len = (ulint) (end_lsn - start_lsn);
 +
 +		log_group_write_buf(group, log_sys->buf, len, start_lsn, 0);
 +		if (end_lsn >= finish_lsn) {
 +
 +			return;
 +		}
 +
 +		memset(log_sys->buf, 0, RECV_SCAN_SIZE);
 +
 +		start_lsn = end_lsn;
 +	}
 +}
 +
 +/********************************************************//**
 +Copies the log segment between group->recovered_lsn and recovered_lsn from the
 +most up-to-date log group to group, so that it contains the latest log data. */
 +static
 +void
 +recv_copy_group(
 +/*============*/
 +	log_group_t*	up_to_date_group,	/*!< in: the most up-to-date log
 +						group */
 +	log_group_t*	group,			/*!< in: copy to this log
 +						group */
 +	lsn_t		recovered_lsn)		/*!< in: recovery succeeded up
 +						to this lsn */
 +{
 +	lsn_t		start_lsn;
 +	lsn_t		end_lsn;
 +
 +	if (group->scanned_lsn >= recovered_lsn) {
 +
 +		return;
 +	}
 +
 +	ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
 +
 +	start_lsn = ut_uint64_align_down(group->scanned_lsn,
 +					 OS_FILE_LOG_BLOCK_SIZE);
 +	for (;;) {
 +		ulint	len;
 +
 +		end_lsn = start_lsn + RECV_SCAN_SIZE;
 +
 +		if (end_lsn > recovered_lsn) {
 +			end_lsn = ut_uint64_align_up(recovered_lsn,
 +						     OS_FILE_LOG_BLOCK_SIZE);
 +		}
 +
 +		log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
 +				       up_to_date_group, start_lsn, end_lsn);
 +
 +		len = (ulint) (end_lsn - start_lsn);
 +
 +		log_group_write_buf(group, log_sys->buf, len, start_lsn, 0);
 +
 +		if (end_lsn >= recovered_lsn) {
 +
 +			return;
 +		}
 +
 +		start_lsn = end_lsn;
 +	}
 +}
 +# endif /* UNIV_LOG_ARCHIVE */
 +
 +/********************************************************//**
 +Copies a log segment from the most up-to-date log group to the other log
 +groups, so that they all contain the latest log data. Also writes the info
 +about the latest checkpoint to the groups, and inits the fields in the group
 +memory structs to up-to-date values. */
 +static
 +void
 +recv_synchronize_groups(
 +/*====================*/
 +#ifdef UNIV_LOG_ARCHIVE
 +	log_group_t*	up_to_date_group	/*!< in: the most up-to-date
 +						log group */
 +#endif
 +	)
 +{
 +	lsn_t		start_lsn;
 +	lsn_t		end_lsn;
 +	lsn_t		recovered_lsn;
 +
 +	recovered_lsn = recv_sys->recovered_lsn;
 +
 +	/* Read the last recovered log block to the recovery system buffer:
 +	the block is always incomplete */
 +
 +	start_lsn = ut_uint64_align_down(recovered_lsn,
 +					 OS_FILE_LOG_BLOCK_SIZE);
 +	end_lsn = ut_uint64_align_up(recovered_lsn, OS_FILE_LOG_BLOCK_SIZE);
 +
 +	ut_a(start_lsn != end_lsn);
 +
 +	log_group_read_log_seg(LOG_RECOVER, recv_sys->last_block,
 +#ifdef UNIV_LOG_ARCHIVE
 +			       up_to_date_group,
 +#else /* UNIV_LOG_ARCHIVE */
 +			       UT_LIST_GET_FIRST(log_sys->log_groups),
 +#endif /* UNIV_LOG_ARCHIVE */
 +			       start_lsn, end_lsn);
 +
 +	for (log_group_t* group = UT_LIST_GET_FIRST(log_sys->log_groups);
 +	     group;
 +	     group = UT_LIST_GET_NEXT(log_groups, group)) {
 +#ifdef UNIV_LOG_ARCHIVE
 +		if (group != up_to_date_group) {
 +
 +			/* Copy log data if needed */
 +
 +			recv_copy_group(group, up_to_date_group,
 +					recovered_lsn);
 +		}
 +#endif /* UNIV_LOG_ARCHIVE */
 +		/* Update the fields in the group struct to correspond to
 +		recovered_lsn */
 +
 +		log_group_set_fields(group, recovered_lsn);
 +	}
 +
 +	/* Copy the checkpoint info to the groups; remember that we have
 +	incremented checkpoint_no by one, and the info will not be written
 +	over the max checkpoint info, thus making the preservation of max
 +	checkpoint info on disk certain */
 +
 +	log_groups_write_checkpoint_info();
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +	/* Wait for the checkpoint write to complete */
 +	rw_lock_s_lock(&(log_sys->checkpoint_lock));
 +	rw_lock_s_unlock(&(log_sys->checkpoint_lock));
 +
 +	mutex_enter(&(log_sys->mutex));
 +}
 +#endif /* !UNIV_HOTBACKUP */
 +
 +/***********************************************************************//**
 +Checks the consistency of the checkpoint info
 + at return	TRUE if ok */
 +static
 +ibool
 +recv_check_cp_is_consistent(
 +/*========================*/
 +	const byte*	buf)	/*!< in: buffer containing checkpoint info */
 +{
 +	ulint	fold;
 +
 +	fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
 +
 +	if ((fold & 0xFFFFFFFFUL) != mach_read_from_4(
 +		    buf + LOG_CHECKPOINT_CHECKSUM_1)) {
 +		return(FALSE);
 +	}
 +
 +	fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
 +			      LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
 +
 +	if ((fold & 0xFFFFFFFFUL) != mach_read_from_4(
 +		    buf + LOG_CHECKPOINT_CHECKSUM_2)) {
 +		return(FALSE);
 +	}
 +
 +	return(TRUE);
 +}
 +
 +#ifndef UNIV_HOTBACKUP
 +/********************************************************//**
 +Looks for the maximum consistent checkpoint from the log groups.
 + at return	error code or DB_SUCCESS */
 +static MY_ATTRIBUTE((nonnull, warn_unused_result))
 +dberr_t
 +recv_find_max_checkpoint(
 +/*=====================*/
 +	log_group_t**	max_group,	/*!< out: max group */
 +	ulint*		max_field)	/*!< out: LOG_CHECKPOINT_1 or
 +					LOG_CHECKPOINT_2 */
 +{
 +	log_group_t*	group;
 +	ib_uint64_t	max_no;
 +	ib_uint64_t	checkpoint_no;
 +	ulint		field;
 +	byte*		buf;
 +
 +	group = UT_LIST_GET_FIRST(log_sys->log_groups);
 +
 +	max_no = 0;
 +	*max_group = NULL;
 +	*max_field = 0;
 +
 +	buf = log_sys->checkpoint_buf;
 +
 +	while (group) {
 +		group->state = LOG_GROUP_CORRUPTED;
 +
 +		for (field = LOG_CHECKPOINT_1; field <= LOG_CHECKPOINT_2;
 +		     field += LOG_CHECKPOINT_2 - LOG_CHECKPOINT_1) {
 +
 +			log_group_read_checkpoint_info(group, field);
 +
 +			if (!recv_check_cp_is_consistent(buf)) {
 +#ifdef UNIV_DEBUG
 +				if (log_debug_writes) {
 +					fprintf(stderr,
 +						"InnoDB: Checkpoint in group"
 +						" %lu at %lu invalid, %lu\n",
 +						(ulong) group->id,
 +						(ulong) field,
 +						(ulong) mach_read_from_4(
 +							buf
 +							+ LOG_CHECKPOINT_CHECKSUM_1));
 +
 +				}
 +#endif /* UNIV_DEBUG */
 +				goto not_consistent;
 +			}
 +
 +			group->state = LOG_GROUP_OK;
 +
 +			group->lsn = mach_read_from_8(
 +				buf + LOG_CHECKPOINT_LSN);
 +			group->lsn_offset = mach_read_from_4(
 +				buf + LOG_CHECKPOINT_OFFSET_LOW32);
 +			group->lsn_offset |= ((lsn_t) mach_read_from_4(
 +				buf + LOG_CHECKPOINT_OFFSET_HIGH32)) << 32;
 +			checkpoint_no = mach_read_from_8(
 +				buf + LOG_CHECKPOINT_NO);
 +
 +#ifdef UNIV_DEBUG
 +			if (log_debug_writes) {
 +				fprintf(stderr,
 +					"InnoDB: Checkpoint number %lu"
 +					" found in group %lu\n",
 +					(ulong) checkpoint_no,
 +					(ulong) group->id);
 +			}
 +#endif /* UNIV_DEBUG */
 +
 +			if (checkpoint_no >= max_no) {
 +				*max_group = group;
 +				*max_field = field;
 +				max_no = checkpoint_no;
 +			}
 +
 +not_consistent:
 +			;
 +		}
 +
 +		group = UT_LIST_GET_NEXT(log_groups, group);
 +	}
 +
 +	if (*max_group == NULL) {
 +
 +		fprintf(stderr,
 +			"InnoDB: No valid checkpoint found.\n"
 +			"InnoDB: If you are attempting downgrade"
 +			" from MySQL 5.7.9 or later,\n"
 +			"InnoDB: please refer to " REFMAN
 +			"upgrading-downgrading.html\n"
 +			"InnoDB: If this error appears when you are"
 +			" creating an InnoDB database,\n"
 +			"InnoDB: the problem may be that during"
 +			" an earlier attempt you managed\n"
 +			"InnoDB: to create the InnoDB data files,"
 +			" but log file creation failed.\n"
 +			"InnoDB: If that is the case, please refer to\n"
 +			"InnoDB: " REFMAN "error-creating-innodb.html\n");
 +		return(DB_ERROR);
 +	}
 +
 +	return(DB_SUCCESS);
 +}
 +#else /* !UNIV_HOTBACKUP */
 +/*******************************************************************//**
 +Reads the checkpoint info needed in hot backup.
 + at return	TRUE if success */
 +UNIV_INTERN
 +ibool
 +recv_read_checkpoint_info_for_backup(
 +/*=================================*/
 +	const byte*	hdr,	/*!< in: buffer containing the log group
 +				header */
 +	lsn_t*		lsn,	/*!< out: checkpoint lsn */
 +	lsn_t*		offset,	/*!< out: checkpoint offset in the log group */
 +	lsn_t*		cp_no,	/*!< out: checkpoint number */
 +	lsn_t*		first_header_lsn)
 +				/*!< out: lsn of of the start of the
 +				first log file */
 +{
 +	ulint		max_cp		= 0;
 +	ib_uint64_t	max_cp_no	= 0;
 +	const byte*	cp_buf;
 +
 +	cp_buf = hdr + LOG_CHECKPOINT_1;
 +
 +	if (recv_check_cp_is_consistent(cp_buf)) {
 +		max_cp_no = mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO);
 +		max_cp = LOG_CHECKPOINT_1;
 +	}
 +
 +	cp_buf = hdr + LOG_CHECKPOINT_2;
 +
 +	if (recv_check_cp_is_consistent(cp_buf)) {
 +		if (mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO) > max_cp_no) {
 +			max_cp = LOG_CHECKPOINT_2;
 +		}
 +	}
 +
 +	if (max_cp == 0) {
 +		return(FALSE);
 +	}
 +
 +	cp_buf = hdr + max_cp;
 +
 +	*lsn = mach_read_from_8(cp_buf + LOG_CHECKPOINT_LSN);
 +	*offset = mach_read_from_4(
 +		cp_buf + LOG_CHECKPOINT_OFFSET_LOW32);
 +	*offset |= ((lsn_t) mach_read_from_4(
 +			    cp_buf + LOG_CHECKPOINT_OFFSET_HIGH32)) << 32;
 +
 +	*cp_no = mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO);
 +
 +	*first_header_lsn = mach_read_from_8(hdr + LOG_FILE_START_LSN);
 +
 +	return(TRUE);
 +}
 +#endif /* !UNIV_HOTBACKUP */
 +
 +/******************************************************//**
 +Checks the 4-byte checksum to the trailer checksum field of a log
 +block.  We also accept a log block in the old format before
 +InnoDB-3.23.52 where the checksum field contains the log block number.
 + at return TRUE if ok, or if the log block may be in the format of InnoDB
 +version predating 3.23.52 */
 +static
 +ibool
 +log_block_checksum_is_ok_or_old_format(
 +/*===================================*/
 +	const byte*	block)	/*!< in: pointer to a log block */
 +{
 +#ifdef UNIV_LOG_DEBUG
 +	return(TRUE);
 +#endif /* UNIV_LOG_DEBUG */
 +	if (log_block_calc_checksum(block) == log_block_get_checksum(block)) {
 +
 +		return(TRUE);
 +	}
 +
 +	if (log_block_get_hdr_no(block) == log_block_get_checksum(block)) {
 +
 +		/* We assume the log block is in the format of
 +		InnoDB version < 3.23.52 and the block is ok */
 +#if 0
 +		fprintf(stderr,
 +			"InnoDB: Scanned old format < InnoDB-3.23.52"
 +			" log block number %lu\n",
 +			log_block_get_hdr_no(block));
 +#endif
 +		return(TRUE);
 +	}
 +
 +	return(FALSE);
 +}
 +
 +#ifdef UNIV_HOTBACKUP
 +/*******************************************************************//**
 +Scans the log segment and n_bytes_scanned is set to the length of valid
 +log scanned. */
 +UNIV_INTERN
 +void
 +recv_scan_log_seg_for_backup(
 +/*=========================*/
 +	byte*		buf,		/*!< in: buffer containing log data */
 +	ulint		buf_len,	/*!< in: data length in that buffer */
 +	lsn_t*		scanned_lsn,	/*!< in/out: lsn of buffer start,
 +					we return scanned lsn */
 +	ulint*		scanned_checkpoint_no,
 +					/*!< in/out: 4 lowest bytes of the
 +					highest scanned checkpoint number so
 +					far */
 +	ulint*		n_bytes_scanned)/*!< out: how much we were able to
 +					scan, smaller than buf_len if log
 +					data ended here */
 +{
 +	ulint	data_len;
 +	byte*	log_block;
 +	ulint	no;
 +
 +	*n_bytes_scanned = 0;
 +
 +	for (log_block = buf; log_block < buf + buf_len;
 +	     log_block += OS_FILE_LOG_BLOCK_SIZE) {
 +
 +		no = log_block_get_hdr_no(log_block);
 +
 +#if 0
 +		fprintf(stderr, "Log block header no %lu\n", no);
 +#endif
 +
 +		if (no != log_block_convert_lsn_to_no(*scanned_lsn)
 +		    || !log_block_checksum_is_ok_or_old_format(log_block)) {
 +#if 0
 +			fprintf(stderr,
 +				"Log block n:o %lu, scanned lsn n:o %lu\n",
 +				no, log_block_convert_lsn_to_no(*scanned_lsn));
 +#endif
 +			/* Garbage or an incompletely written log block */
 +
 +			log_block += OS_FILE_LOG_BLOCK_SIZE;
 +#if 0
 +			fprintf(stderr,
 +				"Next log block n:o %lu\n",
 +				log_block_get_hdr_no(log_block));
 +#endif
 +			break;
 +		}
 +
 +		if (*scanned_checkpoint_no > 0
 +		    && log_block_get_checkpoint_no(log_block)
 +		    < *scanned_checkpoint_no
 +		    && *scanned_checkpoint_no
 +		    - log_block_get_checkpoint_no(log_block)
 +		    > 0x80000000UL) {
 +
 +			/* Garbage from a log buffer flush which was made
 +			before the most recent database recovery */
 +#if 0
 +			fprintf(stderr,
 +				"Scanned cp n:o %lu, block cp n:o %lu\n",
 +				*scanned_checkpoint_no,
 +				log_block_get_checkpoint_no(log_block));
 +#endif
 +			break;
 +		}
 +
 +		data_len = log_block_get_data_len(log_block);
 +
 +		*scanned_checkpoint_no
 +			= log_block_get_checkpoint_no(log_block);
 +		*scanned_lsn += data_len;
 +
 +		*n_bytes_scanned += data_len;
 +
 +		if (data_len < OS_FILE_LOG_BLOCK_SIZE) {
 +			/* Log data ends here */
 +
 +#if 0
 +			fprintf(stderr, "Log block data len %lu\n",
 +				data_len);
 +#endif
 +			break;
 +		}
 +	}
 +}
 +#endif /* UNIV_HOTBACKUP */
 +
 +/*******************************************************************//**
 +Tries to parse a single log record body and also applies it to a page if
 +specified. File ops are parsed, but not applied in this function.
 + at return	log record end, NULL if not a complete record */
 +static
 +byte*
 +recv_parse_or_apply_log_rec_body(
 +/*=============================*/
 +	byte		type,	/*!< in: type */
 +	byte*		ptr,	/*!< in: pointer to a buffer */
 +	byte*		end_ptr,/*!< in: pointer to the buffer end */
 +	buf_block_t*	block,	/*!< in/out: buffer block or NULL; if
 +				not NULL, then the log record is
 +				applied to the page, and the log
 +				record should be complete then */
 +	mtr_t*		mtr,	/*!< in: mtr or NULL; should be non-NULL
 +				if and only if block is non-NULL */
 +	ulint		space_id)
 +				/*!< in: tablespace id obtained by
 +				parsing initial log record */
 +{
 +	dict_index_t*	index	= NULL;
 +	page_t*		page;
 +	page_zip_des_t*	page_zip;
 +#ifdef UNIV_DEBUG
 +	ulint		page_type;
 +#endif /* UNIV_DEBUG */
 +
 +	ut_ad(!block == !mtr);
 +
 +	if (block) {
 +		page = block->frame;
 +		page_zip = buf_block_get_page_zip(block);
 +		ut_d(page_type = fil_page_get_type(page));
 +	} else {
 +		page = NULL;
 +		page_zip = NULL;
 +		ut_d(page_type = FIL_PAGE_TYPE_ALLOCATED);
 +	}
 +
 +	switch (type) {
 +#ifdef UNIV_LOG_LSN_DEBUG
 +	case MLOG_LSN:
 +		/* The LSN is checked in recv_parse_log_rec(). */
 +		break;
 +#endif /* UNIV_LOG_LSN_DEBUG */
 +	case MLOG_1BYTE: case MLOG_2BYTES: case MLOG_4BYTES: case MLOG_8BYTES:
 +#ifdef UNIV_DEBUG
 +		if (page && page_type == FIL_PAGE_TYPE_ALLOCATED
 +		    && end_ptr >= ptr + 2) {
 +			/* It is OK to set FIL_PAGE_TYPE and certain
 +			list node fields on an empty page.  Any other
 +			write is not OK. */
 +
 +			/* NOTE: There may be bogus assertion failures for
 +			dict_hdr_create(), trx_rseg_header_create(),
 +			trx_sys_create_doublewrite_buf(), and
 +			trx_sysf_create().
 +			These are only called during database creation. */
 +			ulint	offs = mach_read_from_2(ptr);
 +
 +			switch (type) {
 +			default:
 +				ut_error;
 +			case MLOG_2BYTES:
 +				/* Note that this can fail when the
 +				redo log been written with something
 +				older than InnoDB Plugin 1.0.4. */
 +				ut_ad(offs == FIL_PAGE_TYPE
 +				      || offs == IBUF_TREE_SEG_HEADER
 +				      + IBUF_HEADER + FSEG_HDR_OFFSET
 +				      || offs == PAGE_BTR_IBUF_FREE_LIST
 +				      + PAGE_HEADER + FIL_ADDR_BYTE
 +				      || offs == PAGE_BTR_IBUF_FREE_LIST
 +				      + PAGE_HEADER + FIL_ADDR_BYTE
 +				      + FIL_ADDR_SIZE
 +				      || offs == PAGE_BTR_SEG_LEAF
 +				      + PAGE_HEADER + FSEG_HDR_OFFSET
 +				      || offs == PAGE_BTR_SEG_TOP
 +				      + PAGE_HEADER + FSEG_HDR_OFFSET
 +				      || offs == PAGE_BTR_IBUF_FREE_LIST_NODE
 +				      + PAGE_HEADER + FIL_ADDR_BYTE
 +				      + 0 /*FLST_PREV*/
 +				      || offs == PAGE_BTR_IBUF_FREE_LIST_NODE
 +				      + PAGE_HEADER + FIL_ADDR_BYTE
 +				      + FIL_ADDR_SIZE /*FLST_NEXT*/);
 +				break;
 +			case MLOG_4BYTES:
 +				/* Note that this can fail when the
 +				redo log been written with something
 +				older than InnoDB Plugin 1.0.4. */
 +				ut_ad(0
 +				      || offs == IBUF_TREE_SEG_HEADER
 +				      + IBUF_HEADER + FSEG_HDR_SPACE
 +				      || offs == IBUF_TREE_SEG_HEADER
 +				      + IBUF_HEADER + FSEG_HDR_PAGE_NO
 +				      || offs == PAGE_BTR_IBUF_FREE_LIST
 +				      + PAGE_HEADER/* flst_init */
 +				      || offs == PAGE_BTR_IBUF_FREE_LIST
 +				      + PAGE_HEADER + FIL_ADDR_PAGE
 +				      || offs == PAGE_BTR_IBUF_FREE_LIST
 +				      + PAGE_HEADER + FIL_ADDR_PAGE
 +				      + FIL_ADDR_SIZE
 +				      || offs == PAGE_BTR_SEG_LEAF
 +				      + PAGE_HEADER + FSEG_HDR_PAGE_NO
 +				      || offs == PAGE_BTR_SEG_LEAF
 +				      + PAGE_HEADER + FSEG_HDR_SPACE
 +				      || offs == PAGE_BTR_SEG_TOP
 +				      + PAGE_HEADER + FSEG_HDR_PAGE_NO
 +				      || offs == PAGE_BTR_SEG_TOP
 +				      + PAGE_HEADER + FSEG_HDR_SPACE
 +				      || offs == PAGE_BTR_IBUF_FREE_LIST_NODE
 +				      + PAGE_HEADER + FIL_ADDR_PAGE
 +				      + 0 /*FLST_PREV*/
 +				      || offs == PAGE_BTR_IBUF_FREE_LIST_NODE
 +				      + PAGE_HEADER + FIL_ADDR_PAGE
 +				      + FIL_ADDR_SIZE /*FLST_NEXT*/);
 +				break;
 +			}
 +		}
 +#endif /* UNIV_DEBUG */
 +		ptr = mlog_parse_nbytes(type, ptr, end_ptr, page, page_zip);
 +		break;
 +	case MLOG_REC_INSERT: case MLOG_COMP_REC_INSERT:
 +		ut_ad(!page || page_type == FIL_PAGE_INDEX);
 +
 +		if (NULL != (ptr = mlog_parse_index(
 +				     ptr, end_ptr,
 +				     type == MLOG_COMP_REC_INSERT,
 +				     &index))) {
 +			ut_a(!page
 +			     || (ibool)!!page_is_comp(page)
 +			     == dict_table_is_comp(index->table));
 +			ptr = page_cur_parse_insert_rec(FALSE, ptr, end_ptr,
 +							block, index, mtr);
 +		}
 +		break;
 +	case MLOG_REC_CLUST_DELETE_MARK: case MLOG_COMP_REC_CLUST_DELETE_MARK:
 +		ut_ad(!page || page_type == FIL_PAGE_INDEX);
 +
 +		if (NULL != (ptr = mlog_parse_index(
 +				     ptr, end_ptr,
 +				     type == MLOG_COMP_REC_CLUST_DELETE_MARK,
 +				     &index))) {
 +			ut_a(!page
 +			     || (ibool)!!page_is_comp(page)
 +			     == dict_table_is_comp(index->table));
 +			ptr = btr_cur_parse_del_mark_set_clust_rec(
 +				ptr, end_ptr, page, page_zip, index);
 +		}
 +		break;
 +	case MLOG_COMP_REC_SEC_DELETE_MARK:
 +		ut_ad(!page || page_type == FIL_PAGE_INDEX);
 +		/* This log record type is obsolete, but we process it for
 +		backward compatibility with MySQL 5.0.3 and 5.0.4. */
 +		ut_a(!page || page_is_comp(page));
 +		ut_a(!page_zip);
 +		ptr = mlog_parse_index(ptr, end_ptr, TRUE, &index);
 +		if (!ptr) {
 +			break;
 +		}
 +		/* Fall through */
 +	case MLOG_REC_SEC_DELETE_MARK:
 +		ut_ad(!page || page_type == FIL_PAGE_INDEX);
 +		ptr = btr_cur_parse_del_mark_set_sec_rec(ptr, end_ptr,
 +							 page, page_zip);
 +		break;
 +	case MLOG_REC_UPDATE_IN_PLACE: case MLOG_COMP_REC_UPDATE_IN_PLACE:
 +		ut_ad(!page || page_type == FIL_PAGE_INDEX);
 +
 +		if (NULL != (ptr = mlog_parse_index(
 +				     ptr, end_ptr,
 +				     type == MLOG_COMP_REC_UPDATE_IN_PLACE,
 +				     &index))) {
 +			ut_a(!page
 +			     || (ibool)!!page_is_comp(page)
 +			     == dict_table_is_comp(index->table));
 +			ptr = btr_cur_parse_update_in_place(ptr, end_ptr, page,
 +							    page_zip, index);
 +		}
 +		break;
 +	case MLOG_LIST_END_DELETE: case MLOG_COMP_LIST_END_DELETE:
 +	case MLOG_LIST_START_DELETE: case MLOG_COMP_LIST_START_DELETE:
 +		ut_ad(!page || page_type == FIL_PAGE_INDEX);
 +
 +		if (NULL != (ptr = mlog_parse_index(
 +				     ptr, end_ptr,
 +				     type == MLOG_COMP_LIST_END_DELETE
 +				     || type == MLOG_COMP_LIST_START_DELETE,
 +				     &index))) {
 +			ut_a(!page
 +			     || (ibool)!!page_is_comp(page)
 +			     == dict_table_is_comp(index->table));
 +			ptr = page_parse_delete_rec_list(type, ptr, end_ptr,
 +							 block, index, mtr);
 +		}
 +		break;
 +	case MLOG_LIST_END_COPY_CREATED: case MLOG_COMP_LIST_END_COPY_CREATED:
 +		ut_ad(!page || page_type == FIL_PAGE_INDEX);
 +
 +		if (NULL != (ptr = mlog_parse_index(
 +				     ptr, end_ptr,
 +				     type == MLOG_COMP_LIST_END_COPY_CREATED,
 +				     &index))) {
 +			ut_a(!page
 +			     || (ibool)!!page_is_comp(page)
 +			     == dict_table_is_comp(index->table));
 +			ptr = page_parse_copy_rec_list_to_created_page(
 +				ptr, end_ptr, block, index, mtr);
 +		}
 +		break;
 +	case MLOG_PAGE_REORGANIZE:
 +	case MLOG_COMP_PAGE_REORGANIZE:
 +	case MLOG_ZIP_PAGE_REORGANIZE:
 +		ut_ad(!page || page_type == FIL_PAGE_INDEX);
 +
 +		if (NULL != (ptr = mlog_parse_index(
 +				     ptr, end_ptr,
 +				     type != MLOG_PAGE_REORGANIZE,
 +				     &index))) {
 +			ut_a(!page
 +			     || (ibool)!!page_is_comp(page)
 +			     == dict_table_is_comp(index->table));
 +			ptr = btr_parse_page_reorganize(
 +				ptr, end_ptr, index,
 +				type == MLOG_ZIP_PAGE_REORGANIZE,
 +				block, mtr);
 +		}
 +		break;
 +	case MLOG_PAGE_CREATE: case MLOG_COMP_PAGE_CREATE:
 +		/* Allow anything in page_type when creating a page. */
 +		ut_a(!page_zip);
 +		ptr = page_parse_create(ptr, end_ptr,
 +					type == MLOG_COMP_PAGE_CREATE,
 +					block, mtr);
 +		break;
 +	case MLOG_UNDO_INSERT:
 +		ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
 +		ptr = trx_undo_parse_add_undo_rec(ptr, end_ptr, page);
 +		break;
 +	case MLOG_UNDO_ERASE_END:
 +		ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
 +		ptr = trx_undo_parse_erase_page_end(ptr, end_ptr, page, mtr);
 +		break;
 +	case MLOG_UNDO_INIT:
 +		/* Allow anything in page_type when creating a page. */
 +		ptr = trx_undo_parse_page_init(ptr, end_ptr, page, mtr);
 +		break;
 +	case MLOG_UNDO_HDR_DISCARD:
 +		ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
 +		ptr = trx_undo_parse_discard_latest(ptr, end_ptr, page, mtr);
 +		break;
 +	case MLOG_UNDO_HDR_CREATE:
 +	case MLOG_UNDO_HDR_REUSE:
 +		ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
 +		ptr = trx_undo_parse_page_header(type, ptr, end_ptr,
 +						 page, mtr);
 +		break;
 +	case MLOG_REC_MIN_MARK: case MLOG_COMP_REC_MIN_MARK:
 +		ut_ad(!page || page_type == FIL_PAGE_INDEX);
 +		/* On a compressed page, MLOG_COMP_REC_MIN_MARK
 +		will be followed by MLOG_COMP_REC_DELETE
 +		or MLOG_ZIP_WRITE_HEADER(FIL_PAGE_PREV, FIL_NULL)
 +		in the same mini-transaction. */
 +		ut_a(type == MLOG_COMP_REC_MIN_MARK || !page_zip);
 +		ptr = btr_parse_set_min_rec_mark(
 +			ptr, end_ptr, type == MLOG_COMP_REC_MIN_MARK,
 +			page, mtr);
 +		break;
 +	case MLOG_REC_DELETE: case MLOG_COMP_REC_DELETE:
 +		ut_ad(!page || page_type == FIL_PAGE_INDEX);
 +
 +		if (NULL != (ptr = mlog_parse_index(
 +				     ptr, end_ptr,
 +				     type == MLOG_COMP_REC_DELETE,
 +				     &index))) {
 +			ut_a(!page
 +			     || (ibool)!!page_is_comp(page)
 +			     == dict_table_is_comp(index->table));
 +			ptr = page_cur_parse_delete_rec(ptr, end_ptr,
 +							block, index, mtr);
 +		}
 +		break;
 +	case MLOG_IBUF_BITMAP_INIT:
 +		/* Allow anything in page_type when creating a page. */
 +		ptr = ibuf_parse_bitmap_init(ptr, end_ptr, block, mtr);
 +		break;
 +	case MLOG_INIT_FILE_PAGE:
 +		/* Allow anything in page_type when creating a page. */
 +		ptr = fsp_parse_init_file_page(ptr, end_ptr, block);
 +		break;
 +	case MLOG_WRITE_STRING:
 +		ut_ad(!page || page_type != FIL_PAGE_TYPE_ALLOCATED);
 +		ptr = mlog_parse_string(ptr, end_ptr, page, page_zip);
 +		break;
 +	case MLOG_FILE_RENAME:
 +		/* Do not rerun file-based log entries if this is
 +		IO completion from a page read. */
 +		if (page == NULL) {
 +			ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type,
 +							 space_id, 0);
 +		}
 +		break;
 +	case MLOG_FILE_CREATE:
 +	case MLOG_FILE_DELETE:
 +	case MLOG_FILE_CREATE2:
 +		/* Do not rerun file-based log entries if this is
 +		IO completion from a page read. */
 +		if (page == NULL) {
 +			ptr = fil_op_log_parse_or_replay(ptr, end_ptr,
 +							 type, 0, 0);
 +		}
 +		break;
 +	case MLOG_ZIP_WRITE_NODE_PTR:
 +		ut_ad(!page || page_type == FIL_PAGE_INDEX);
 +		ptr = page_zip_parse_write_node_ptr(ptr, end_ptr,
 +						    page, page_zip);
 +		break;
 +	case MLOG_ZIP_WRITE_BLOB_PTR:
 +		ut_ad(!page || page_type == FIL_PAGE_INDEX);
 +		ptr = page_zip_parse_write_blob_ptr(ptr, end_ptr,
 +						    page, page_zip);
 +		break;
 +	case MLOG_ZIP_WRITE_HEADER:
 +		ut_ad(!page || page_type == FIL_PAGE_INDEX);
 +		ptr = page_zip_parse_write_header(ptr, end_ptr,
 +						  page, page_zip);
 +		break;
 +	case MLOG_ZIP_PAGE_COMPRESS:
 +		/* Allow anything in page_type when creating a page. */
 +		ptr = page_zip_parse_compress(ptr, end_ptr,
 +					      page, page_zip);
 +		break;
 +	case MLOG_ZIP_PAGE_COMPRESS_NO_DATA:
 +		if (NULL != (ptr = mlog_parse_index(
 +				ptr, end_ptr, TRUE, &index))) {
 +
 +			ut_a(!page || ((ibool)!!page_is_comp(page)
 +				== dict_table_is_comp(index->table)));
 +			ptr = page_zip_parse_compress_no_data(
 +				ptr, end_ptr, page, page_zip, index);
 +		}
 +		break;
 +	default:
 +		ptr = NULL;
 +		recv_sys->found_corrupt_log = TRUE;
 +	}
 +
 +	if (index) {
 +		dict_table_t*	table = index->table;
 +
 +		dict_mem_index_free(index);
 +		dict_mem_table_free(table);
 +	}
 +
 +	return(ptr);
 +}
 +
 +/*********************************************************************//**
 +Calculates the fold value of a page file address: used in inserting or
 +searching for a log record in the hash table.
 + at return	folded value */
 +UNIV_INLINE
 +ulint
 +recv_fold(
 +/*======*/
 +	ulint	space,	/*!< in: space */
 +	ulint	page_no)/*!< in: page number */
 +{
 +	return(ut_fold_ulint_pair(space, page_no));
 +}
 +
 +/*********************************************************************//**
 +Calculates the hash value of a page file address: used in inserting or
 +searching for a log record in the hash table.
 + at return	folded value */
 +UNIV_INLINE
 +ulint
 +recv_hash(
 +/*======*/
 +	ulint	space,	/*!< in: space */
 +	ulint	page_no)/*!< in: page number */
 +{
 +	return(hash_calc_hash(recv_fold(space, page_no), recv_sys->addr_hash));
 +}
 +
 +/*********************************************************************//**
 +Gets the hashed file address struct for a page.
 + at return	file address struct, NULL if not found from the hash table */
 +static
 +recv_addr_t*
 +recv_get_fil_addr_struct(
 +/*=====================*/
 +	ulint	space,	/*!< in: space id */
 +	ulint	page_no)/*!< in: page number */
 +{
 +	recv_addr_t*	recv_addr;
 +
 +	for (recv_addr = static_cast<recv_addr_t*>(
 +			HASH_GET_FIRST(recv_sys->addr_hash,
 +				       recv_hash(space, page_no)));
 +	     recv_addr != 0;
 +	     recv_addr = static_cast<recv_addr_t*>(
 +		     HASH_GET_NEXT(addr_hash, recv_addr))) {
 +
 +		if (recv_addr->space == space
 +		    && recv_addr->page_no == page_no) {
 +
 +			return(recv_addr);
 +		}
 +	}
 +
 +	return(NULL);
 +}
 +
 +/*******************************************************************//**
 +Adds a new log record to the hash table of log records. */
 +static
 +void
 +recv_add_to_hash_table(
 +/*===================*/
 +	byte	type,		/*!< in: log record type */
 +	ulint	space,		/*!< in: space id */
 +	ulint	page_no,	/*!< in: page number */
 +	byte*	body,		/*!< in: log record body */
 +	byte*	rec_end,	/*!< in: log record end */
 +	lsn_t	start_lsn,	/*!< in: start lsn of the mtr */
 +	lsn_t	end_lsn)	/*!< in: end lsn of the mtr */
 +{
 +	recv_t*		recv;
 +	ulint		len;
 +	recv_data_t*	recv_data;
 +	recv_data_t**	prev_field;
 +	recv_addr_t*	recv_addr;
 +
 +	if (fil_tablespace_deleted_or_being_deleted_in_mem(space, -1)) {
 +		/* The tablespace does not exist any more: do not store the
 +		log record */
 +
 +		return;
 +	}
 +
 +	len = rec_end - body;
 +
 +	recv = static_cast<recv_t*>(
 +		mem_heap_alloc(recv_sys->heap, sizeof(recv_t)));
 +
 +	recv->type = type;
 +	recv->len = rec_end - body;
 +	recv->start_lsn = start_lsn;
 +	recv->end_lsn = end_lsn;
 +
 +	recv_addr = recv_get_fil_addr_struct(space, page_no);
 +
 +	if (recv_addr == NULL) {
 +		recv_addr = static_cast<recv_addr_t*>(
 +			mem_heap_alloc(recv_sys->heap, sizeof(recv_addr_t)));
 +
 +		recv_addr->space = space;
 +		recv_addr->page_no = page_no;
 +		recv_addr->state = RECV_NOT_PROCESSED;
 +
 +		UT_LIST_INIT(recv_addr->rec_list);
 +
 +		HASH_INSERT(recv_addr_t, addr_hash, recv_sys->addr_hash,
 +			    recv_fold(space, page_no), recv_addr);
 +		recv_sys->n_addrs++;
 +#if 0
 +		fprintf(stderr, "Inserting log rec for space %lu, page %lu\n",
 +			space, page_no);
 +#endif
 +	}
 +
 +	UT_LIST_ADD_LAST(rec_list, recv_addr->rec_list, recv);
 +
 +	prev_field = &(recv->data);
 +
 +	/* Store the log record body in chunks of less than UNIV_PAGE_SIZE:
 +	recv_sys->heap grows into the buffer pool, and bigger chunks could not
 +	be allocated */
 +
 +	while (rec_end > body) {
 +
 +		len = rec_end - body;
 +
 +		if (len > RECV_DATA_BLOCK_SIZE) {
 +			len = RECV_DATA_BLOCK_SIZE;
 +		}
 +
 +		recv_data = static_cast<recv_data_t*>(
 +			mem_heap_alloc(recv_sys->heap,
 +				       sizeof(recv_data_t) + len));
 +
 +		*prev_field = recv_data;
 +
 +		memcpy(recv_data + 1, body, len);
 +
 +		prev_field = &(recv_data->next);
 +
 +		body += len;
 +	}
 +
 +	*prev_field = NULL;
 +}
 +
 +/*********************************************************************//**
 +Copies the log record body from recv to buf. */
 +static
 +void
 +recv_data_copy_to_buf(
 +/*==================*/
 +	byte*	buf,	/*!< in: buffer of length at least recv->len */
 +	recv_t*	recv)	/*!< in: log record */
 +{
 +	recv_data_t*	recv_data;
 +	ulint		part_len;
 +	ulint		len;
 +
 +	len = recv->len;
 +	recv_data = recv->data;
 +
 +	while (len > 0) {
 +		if (len > RECV_DATA_BLOCK_SIZE) {
 +			part_len = RECV_DATA_BLOCK_SIZE;
 +		} else {
 +			part_len = len;
 +		}
 +
 +		ut_memcpy(buf, ((byte*) recv_data) + sizeof(recv_data_t),
 +			  part_len);
 +		buf += part_len;
 +		len -= part_len;
 +
 +		recv_data = recv_data->next;
 +	}
 +}
 +
 +/************************************************************************//**
 +Applies the hashed log records to the page, if the page lsn is less than the
 +lsn of a log record. This can be called when a buffer page has just been
 +read in, or also for a page already in the buffer pool. */
 +UNIV_INTERN
 +void
 +recv_recover_page_func(
 +/*===================*/
 +#ifndef UNIV_HOTBACKUP
 +	ibool		just_read_in,
 +				/*!< in: TRUE if the i/o handler calls
 +				this for a freshly read page */
 +#endif /* !UNIV_HOTBACKUP */
 +	buf_block_t*	block)	/*!< in/out: buffer block */
 +{
 +	page_t*		page;
 +	page_zip_des_t*	page_zip;
 +	recv_addr_t*	recv_addr;
 +	recv_t*		recv;
 +	byte*		buf;
 +	lsn_t		start_lsn;
 +	lsn_t		end_lsn;
 +	lsn_t		page_lsn;
 +	lsn_t		page_newest_lsn;
 +	ibool		modification_to_page;
 +#ifndef UNIV_HOTBACKUP
 +	ibool		success;
 +#endif /* !UNIV_HOTBACKUP */
 +	mtr_t		mtr;
++	ib_time_t	time;
 +
 +	mutex_enter(&(recv_sys->mutex));
 +
 +	if (recv_sys->apply_log_recs == FALSE) {
 +
 +		/* Log records should not be applied now */
 +
 +		mutex_exit(&(recv_sys->mutex));
 +
 +		return;
 +	}
 +
 +	recv_addr = recv_get_fil_addr_struct(buf_block_get_space(block),
 +					     buf_block_get_page_no(block));
 +
 +	if ((recv_addr == NULL)
 +	    || (recv_addr->state == RECV_BEING_PROCESSED)
 +	    || (recv_addr->state == RECV_PROCESSED)) {
 +
 +		mutex_exit(&(recv_sys->mutex));
 +
 +		return;
 +	}
 +
 +#if 0
 +	fprintf(stderr, "Recovering space %lu, page %lu\n",
 +		buf_block_get_space(block), buf_block_get_page_no(block));
 +#endif
 +
 +	recv_addr->state = RECV_BEING_PROCESSED;
 +
 +	mutex_exit(&(recv_sys->mutex));
 +
 +	mtr_start(&mtr);
 +	mtr_set_log_mode(&mtr, MTR_LOG_NONE);
 +
 +	page = block->frame;
 +	page_zip = buf_block_get_page_zip(block);
 +
 +#ifndef UNIV_HOTBACKUP
 +	if (just_read_in) {
 +		/* Move the ownership of the x-latch on the page to
 +		this OS thread, so that we can acquire a second
 +		x-latch on it.  This is needed for the operations to
 +		the page to pass the debug checks. */
 +
 +		rw_lock_x_lock_move_ownership(&block->lock);
 +	}
 +
 +	success = buf_page_get_known_nowait(RW_X_LATCH, block,
 +					    BUF_KEEP_OLD,
 +					    __FILE__, __LINE__,
 +					    &mtr);
 +	ut_a(success);
 +
 +	buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
 +#endif /* !UNIV_HOTBACKUP */
 +
 +	/* Read the newest modification lsn from the page */
 +	page_lsn = mach_read_from_8(page + FIL_PAGE_LSN);
 +
 +#ifndef UNIV_HOTBACKUP
 +	/* It may be that the page has been modified in the buffer
 +	pool: read the newest modification lsn there */
 +
 +	page_newest_lsn = buf_page_get_newest_modification(&block->page);
 +
 +	if (page_newest_lsn) {
 +
 +		page_lsn = page_newest_lsn;
 +	}
 +#else /* !UNIV_HOTBACKUP */
 +	/* In recovery from a backup we do not really use the buffer pool */
 +	page_newest_lsn = 0;
 +#endif /* !UNIV_HOTBACKUP */
 +
 +	modification_to_page = FALSE;
 +	start_lsn = end_lsn = 0;
 +
 +	recv = UT_LIST_GET_FIRST(recv_addr->rec_list);
 +
 +	while (recv) {
 +		end_lsn = recv->end_lsn;
 +
 +		if (recv->len > RECV_DATA_BLOCK_SIZE) {
 +			/* We have to copy the record body to a separate
 +			buffer */
 +
 +			buf = static_cast<byte*>(mem_alloc(recv->len));
 +
 +			recv_data_copy_to_buf(buf, recv);
 +		} else {
 +			buf = ((byte*)(recv->data)) + sizeof(recv_data_t);
 +		}
 +
 +		if (recv->type == MLOG_INIT_FILE_PAGE) {
 +			page_lsn = page_newest_lsn;
 +
 +			memset(FIL_PAGE_LSN + page, 0, 8);
 +			memset(UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM
 +			       + page, 0, 8);
 +
 +			if (page_zip) {
 +				memset(FIL_PAGE_LSN + page_zip->data, 0, 8);
 +			}
 +		}
 +
 +		if (recv->start_lsn >= page_lsn) {
 +
 +			lsn_t	end_lsn;
 +
 +			if (!modification_to_page) {
 +
 +				modification_to_page = TRUE;
 +				start_lsn = recv->start_lsn;
 +			}
 +
 +			DBUG_PRINT("ib_log",
 +				   ("apply " DBUG_LSN_PF ": %u len %u "
 +				    "page %u:%u", recv->start_lsn,
 +				    (unsigned) recv->type,
 +				    (unsigned) recv->len,
 +				    (unsigned) recv_addr->space,
 +				    (unsigned) recv_addr->page_no));
 +
 +			recv_parse_or_apply_log_rec_body(recv->type, buf,
 +							 buf + recv->len,
 +							 block, &mtr,
 +							 recv_addr->space);
 +
 +			end_lsn = recv->start_lsn + recv->len;
 +			mach_write_to_8(FIL_PAGE_LSN + page, end_lsn);
 +			mach_write_to_8(UNIV_PAGE_SIZE
 +					- FIL_PAGE_END_LSN_OLD_CHKSUM
 +					+ page, end_lsn);
 +
 +			if (page_zip) {
 +				mach_write_to_8(FIL_PAGE_LSN
 +						+ page_zip->data, end_lsn);
 +			}
 +		}
 +
 +		if (recv->len > RECV_DATA_BLOCK_SIZE) {
 +			mem_free(buf);
 +		}
 +
 +		recv = UT_LIST_GET_NEXT(rec_list, recv);
 +	}
 +
 +#ifdef UNIV_ZIP_DEBUG
 +	if (fil_page_get_type(page) == FIL_PAGE_INDEX) {
 +		page_zip_des_t*	page_zip = buf_block_get_page_zip(block);
 +
 +		ut_a(!page_zip
 +		     || page_zip_validate_low(page_zip, page, NULL, FALSE));
 +	}
 +#endif /* UNIV_ZIP_DEBUG */
 +
 +#ifndef UNIV_HOTBACKUP
 +	if (modification_to_page) {
 +		ut_a(block);
 +
 +		log_flush_order_mutex_enter();
 +		buf_flush_recv_note_modification(block, start_lsn, end_lsn);
 +		log_flush_order_mutex_exit();
 +	}
 +#endif /* !UNIV_HOTBACKUP */
 +
 +	/* Make sure that committing mtr does not change the modification
 +	lsn values of page */
 +
 +	mtr.modifications = FALSE;
 +
 +	mtr_commit(&mtr);
 +
++	time = ut_time();
++
 +	mutex_enter(&(recv_sys->mutex));
 +
 +	if (recv_max_page_lsn < page_lsn) {
 +		recv_max_page_lsn = page_lsn;
 +	}
 +
 +	recv_addr->state = RECV_PROCESSED;
 +
- 	ut_a(recv_sys->n_addrs);
- 	recv_sys->n_addrs--;
- 
- 	mutex_exit(&(recv_sys->mutex));
++	ut_a(recv_sys->n_addrs > 0);
++	if (--recv_sys->n_addrs && recv_sys->progress_time - time >= 15) {
++		recv_sys->progress_time = time;
++		ut_print_timestamp(stderr);
++		fprintf(stderr,
++			"  InnoDB: To recover: " ULINTPF " pages from log\n",
++			recv_sys->n_addrs);
++	}
 +
++	mutex_exit(&recv_sys->mutex);
 +}
 +
 +#ifndef UNIV_HOTBACKUP
 +/*******************************************************************//**
 +Reads in pages which have hashed log records, from an area around a given
 +page number.
 + at return	number of pages found */
 +static
 +ulint
 +recv_read_in_area(
 +/*==============*/
 +	ulint	space,	/*!< in: space */
 +	ulint	zip_size,/*!< in: compressed page size in bytes, or 0 */
 +	ulint	page_no)/*!< in: page number */
 +{
 +	recv_addr_t* recv_addr;
 +	ulint	page_nos[RECV_READ_AHEAD_AREA];
 +	ulint	low_limit;
 +	ulint	n;
 +
 +	low_limit = page_no - (page_no % RECV_READ_AHEAD_AREA);
 +
 +	n = 0;
 +
 +	for (page_no = low_limit; page_no < low_limit + RECV_READ_AHEAD_AREA;
 +	     page_no++) {
 +		recv_addr = recv_get_fil_addr_struct(space, page_no);
 +
 +		if (recv_addr && !buf_page_peek(space, page_no)) {
 +
 +			mutex_enter(&(recv_sys->mutex));
 +
 +			if (recv_addr->state == RECV_NOT_PROCESSED) {
 +				recv_addr->state = RECV_BEING_READ;
 +
 +				page_nos[n] = page_no;
 +
 +				n++;
 +			}
 +
 +			mutex_exit(&(recv_sys->mutex));
 +		}
 +	}
 +
 +	buf_read_recv_pages(FALSE, space, zip_size, page_nos, n);
- 	/*
- 	fprintf(stderr, "Recv pages at %lu n %lu\n", page_nos[0], n);
- 	*/
 +	return(n);
 +}
 +
- /*******************************************************************//**
- Empties the hash table of stored log records, applying them to appropriate
- pages. */
++/** Apply the hash table of stored log records to persistent data pages.
++ at param[in]	last_batch	whether the change buffer merge will be
++				performed as part of the operation */
 +UNIV_INTERN
 +void
- recv_apply_hashed_log_recs(
- /*=======================*/
- 	ibool	allow_ibuf)	/*!< in: if TRUE, also ibuf operations are
- 				allowed during the application; if FALSE,
- 				no ibuf operations are allowed, and after
- 				the application all file pages are flushed to
- 				disk and invalidated in buffer pool: this
- 				alternative means that no new log records
- 				can be generated during the application;
- 				the caller must in this case own the log
- 				mutex */
++recv_apply_hashed_log_recs(bool last_batch)
 +{
- 	recv_addr_t* recv_addr;
- 	ulint	i;
- 	ibool	has_printed	= FALSE;
- 	mtr_t	mtr;
- loop:
- 	mutex_enter(&(recv_sys->mutex));
- 
- 	if (recv_sys->apply_batch_on) {
++	for (;;) {
++		mutex_enter(&recv_sys->mutex);
 +
- 		mutex_exit(&(recv_sys->mutex));
++		if (!recv_sys->apply_batch_on) {
++			break;
++		}
 +
++		mutex_exit(&recv_sys->mutex);
 +		os_thread_sleep(500000);
- 
- 		goto loop;
 +	}
 +
- 	ut_ad((!allow_ibuf) == mutex_own(&log_sys->mutex));
++	ut_ad(!last_batch == mutex_own(&log_sys->mutex));
 +
- 	if (!allow_ibuf) {
++	if (!last_batch) {
 +		recv_no_ibuf_operations = TRUE;
 +	}
 +
++	if (ulint n = recv_sys->n_addrs) {
++		const char* msg = last_batch
++			? "Starting final batch to recover "
++			: "Starting a batch to recover ";
++		ib_logf(IB_LOG_LEVEL_INFO,
++			"%s" ULINTPF " pages from redo log", msg, n);
++	}
++
 +	recv_sys->apply_log_recs = TRUE;
 +	recv_sys->apply_batch_on = TRUE;
 +
- 	for (i = 0; i < hash_get_n_cells(recv_sys->addr_hash); i++) {
- 
- 		for (recv_addr = static_cast<recv_addr_t*>(
- 				HASH_GET_FIRST(recv_sys->addr_hash, i));
- 		     recv_addr != 0;
++	for (ulint i = 0; i < hash_get_n_cells(recv_sys->addr_hash); i++) {
++		for (recv_addr_t* recv_addr = static_cast<recv_addr_t*>(
++			     HASH_GET_FIRST(recv_sys->addr_hash, i));
++		     recv_addr;
 +		     recv_addr = static_cast<recv_addr_t*>(
 +				HASH_GET_NEXT(addr_hash, recv_addr))) {
 +
 +			ulint	space = recv_addr->space;
 +			ulint	zip_size = fil_space_get_zip_size(space);
 +			ulint	page_no = recv_addr->page_no;
 +
 +			if (recv_addr->state == RECV_NOT_PROCESSED) {
- 				if (!has_printed) {
- 					ib_logf(IB_LOG_LEVEL_INFO,
- 						"Starting an apply batch"
- 						" of log records"
- 						" to the database...");
- 					fputs("InnoDB: Progress in percent: ",
- 					      stderr);
- 					has_printed = TRUE;
- 				}
- 
- 				mutex_exit(&(recv_sys->mutex));
++				mutex_exit(&recv_sys->mutex);
 +
 +				if (buf_page_peek(space, page_no)) {
- 					buf_block_t*	block;
- 
++					mtr_t		mtr;
 +					mtr_start(&mtr);
- 
- 					block = buf_page_get(
++					buf_block_t*	block = buf_page_get(
 +						space, zip_size, page_no,
 +						RW_X_LATCH, &mtr);
 +					buf_block_dbg_add_level(
 +						block, SYNC_NO_ORDER_CHECK);
 +
 +					recv_recover_page(FALSE, block);
 +					mtr_commit(&mtr);
 +				} else {
 +					recv_read_in_area(space, zip_size,
 +							  page_no);
 +				}
 +
- 				mutex_enter(&(recv_sys->mutex));
++				mutex_enter(&recv_sys->mutex);
 +			}
 +		}
- 
- 		if (has_printed
- 		    && (i * 100) / hash_get_n_cells(recv_sys->addr_hash)
- 		    != ((i + 1) * 100)
- 		    / hash_get_n_cells(recv_sys->addr_hash)) {
- 
- 			fprintf(stderr, "%lu ", (ulong)
- 				((i * 100)
- 				 / hash_get_n_cells(recv_sys->addr_hash)));
- 		}
 +	}
 +
 +	/* Wait until all the pages have been processed */
 +
 +	while (recv_sys->n_addrs != 0) {
 +
 +		mutex_exit(&(recv_sys->mutex));
 +
 +		os_thread_sleep(500000);
 +
 +		mutex_enter(&(recv_sys->mutex));
 +	}
 +
- 	if (has_printed) {
- 
- 		fprintf(stderr, "\n");
- 	}
- 
- 	if (!allow_ibuf) {
++	if (!last_batch) {
 +		bool	success;
 +
 +		/* Flush all the file pages to disk and invalidate them in
 +		the buffer pool */
 +
 +		ut_d(recv_no_log_write = TRUE);
 +		mutex_exit(&(recv_sys->mutex));
 +		mutex_exit(&(log_sys->mutex));
 +
 +		/* Stop the recv_writer thread from issuing any LRU
 +		flush batches. */
 +		mutex_enter(&recv_sys->writer_mutex);
 +
 +		/* Wait for any currently run batch to end. */
 +		buf_flush_wait_LRU_batch_end();
 +
 +		success = buf_flush_list(ULINT_MAX, LSN_MAX, NULL);
 +
 +		ut_a(success);
 +
 +		buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
 +
 +		buf_pool_invalidate();
 +
 +		/* Allow batches from recv_writer thread. */
 +		mutex_exit(&recv_sys->writer_mutex);
 +
 +		mutex_enter(&(log_sys->mutex));
 +		mutex_enter(&(recv_sys->mutex));
 +		ut_d(recv_no_log_write = FALSE);
 +
 +		recv_no_ibuf_operations = FALSE;
 +	}
 +
 +	recv_sys->apply_log_recs = FALSE;
 +	recv_sys->apply_batch_on = FALSE;
 +
 +	recv_sys_empty_hash();
 +
- 	if (has_printed) {
- 		fprintf(stderr, "InnoDB: Apply batch completed\n");
- 	}
- 
- 	mutex_exit(&(recv_sys->mutex));
++	mutex_exit(&recv_sys->mutex);
 +}
 +#else /* !UNIV_HOTBACKUP */
 +/*******************************************************************//**
 +Applies log records in the hash table to a backup. */
 +UNIV_INTERN
 +void
 +recv_apply_log_recs_for_backup(void)
 +/*================================*/
 +{
 +	recv_addr_t*	recv_addr;
 +	ulint		n_hash_cells;
 +	buf_block_t*	block;
 +	ulint		actual_size;
 +	ibool		success;
 +	ulint		error;
 +	ulint		i;
 +
 +	recv_sys->apply_log_recs = TRUE;
 +	recv_sys->apply_batch_on = TRUE;
 +
 +	block = back_block1;
 +
- 	ib_logf(IB_LOG_LEVEL_INFO,
- 		"Starting an apply batch of log records to the database...");
- 
- 	fputs("InnoDB: Progress in percent: ", stderr);
- 
 +	n_hash_cells = hash_get_n_cells(recv_sys->addr_hash);
 +
 +	for (i = 0; i < n_hash_cells; i++) {
 +		/* The address hash table is externally chained */
 +		recv_addr = hash_get_nth_cell(recv_sys->addr_hash, i)->node;
 +
 +		while (recv_addr != NULL) {
 +
 +			ulint	zip_size
 +				= fil_space_get_zip_size(recv_addr->space);
 +
 +			if (zip_size == ULINT_UNDEFINED) {
 +#if 0
 +				fprintf(stderr,
 +					"InnoDB: Warning: cannot apply"
 +					" log record to"
 +					" tablespace %lu page %lu,\n"
 +					"InnoDB: because tablespace with"
 +					" that id does not exist.\n",
 +					recv_addr->space, recv_addr->page_no);
 +#endif
 +				recv_addr->state = RECV_PROCESSED;
 +
 +				ut_a(recv_sys->n_addrs);
 +				recv_sys->n_addrs--;
 +
 +				goto skip_this_recv_addr;
 +			}
 +
 +			/* We simulate a page read made by the buffer pool, to
 +			make sure the recovery apparatus works ok. We must init
 +			the block. */
 +
 +			buf_page_init_for_backup_restore(
 +				recv_addr->space, recv_addr->page_no,
 +				zip_size, block);
 +
 +			/* Extend the tablespace's last file if the page_no
 +			does not fall inside its bounds; we assume the last
 +			file is auto-extending, and mysqlbackup copied the file
 +			when it still was smaller */
 +
 +			success = fil_extend_space_to_desired_size(
 +				&actual_size,
 +				recv_addr->space, recv_addr->page_no + 1);
 +			if (!success) {
 +				fprintf(stderr,
 +					"InnoDB: Fatal error: cannot extend"
 +					" tablespace %u to hold %u pages\n",
 +					recv_addr->space, recv_addr->page_no);
 +
 +				exit(1);
 +			}
 +
 +			/* Read the page from the tablespace file using the
 +			fil0fil.cc routines */
 +
 +			if (zip_size) {
 +				error = fil_io(OS_FILE_READ, true,
 +					       recv_addr->space, zip_size,
 +					       recv_addr->page_no, 0, zip_size,
 +					       block->page.zip.data, NULL);
 +				if (error == DB_SUCCESS
 +				    && !buf_zip_decompress(block, TRUE)) {
 +					exit(1);
 +				}
 +			} else {
 +				error = fil_io(OS_FILE_READ, true,
 +					       recv_addr->space, 0,
 +					       recv_addr->page_no, 0,
 +					       UNIV_PAGE_SIZE,
 +					       block->frame, NULL);
 +			}
 +
 +			if (error != DB_SUCCESS) {
 +				fprintf(stderr,
 +					"InnoDB: Fatal error: cannot read"
 +					" from tablespace"
 +					" %lu page number %lu\n",
 +					(ulong) recv_addr->space,
 +					(ulong) recv_addr->page_no);
 +
 +				exit(1);
 +			}
 +
 +			/* Apply the log records to this page */
 +			recv_recover_page(FALSE, block);
 +
 +			/* Write the page back to the tablespace file using the
 +			fil0fil.cc routines */
 +
 +			buf_flush_init_for_writing(
 +				block->frame, buf_block_get_page_zip(block),
 +				mach_read_from_8(block->frame + FIL_PAGE_LSN));
 +
 +			if (zip_size) {
 +				error = fil_io(OS_FILE_WRITE, true,
 +					       recv_addr->space, zip_size,
 +					       recv_addr->page_no, 0,
 +					       zip_size,
 +					       block->page.zip.data, NULL);
 +			} else {
 +				error = fil_io(OS_FILE_WRITE, true,
 +					       recv_addr->space, 0,
 +					       recv_addr->page_no, 0,
 +					       UNIV_PAGE_SIZE,
 +					       block->frame, NULL);
 +			}
 +skip_this_recv_addr:
 +			recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
 +		}
- 
- 		if ((100 * i) / n_hash_cells
- 		    != (100 * (i + 1)) / n_hash_cells) {
- 			fprintf(stderr, "%lu ",
- 				(ulong) ((100 * i) / n_hash_cells));
- 			fflush(stderr);
- 		}
 +	}
 +
 +	recv_sys_empty_hash();
 +}
 +#endif /* !UNIV_HOTBACKUP */
 +
 +/*******************************************************************//**
 +Tries to parse a single log record and returns its length.
 + at return	length of the record, or 0 if the record was not complete */
 +static
 +ulint
 +recv_parse_log_rec(
 +/*===============*/
 +	byte*	ptr,	/*!< in: pointer to a buffer */
 +	byte*	end_ptr,/*!< in: pointer to the buffer end */
 +	byte*	type,	/*!< out: type */
 +	ulint*	space,	/*!< out: space id */
 +	ulint*	page_no,/*!< out: page number */
 +	byte**	body)	/*!< out: log record body start */
 +{
 +	byte*	new_ptr;
 +
 +	*body = NULL;
 +
 +	if (ptr == end_ptr) {
 +
 +		return(0);
 +	}
 +
 +	if (*ptr == MLOG_MULTI_REC_END) {
 +
 +		*type = *ptr;
 +
 +		return(1);
 +	}
 +
 +	if (*ptr == MLOG_DUMMY_RECORD) {
 +		*type = *ptr;
 +
 +		*space = ULINT_UNDEFINED - 1; /* For debugging */
 +
 +		return(1);
 +	}
 +
 +	new_ptr = mlog_parse_initial_log_record(ptr, end_ptr, type, space,
 +						page_no);
 +	*body = new_ptr;
 +
 +	if (UNIV_UNLIKELY(!new_ptr)) {
 +
 +		return(0);
 +	}
 +
 +#ifdef UNIV_LOG_LSN_DEBUG
 +	if (*type == MLOG_LSN) {
 +		lsn_t	lsn = (lsn_t) *space << 32 | *page_no;
 +# ifdef UNIV_LOG_DEBUG
 +		ut_a(lsn == log_sys->old_lsn);
 +# else /* UNIV_LOG_DEBUG */
 +		ut_a(lsn == recv_sys->recovered_lsn);
 +# endif /* UNIV_LOG_DEBUG */
 +	}
 +#endif /* UNIV_LOG_LSN_DEBUG */
 +
 +	new_ptr = recv_parse_or_apply_log_rec_body(*type, new_ptr, end_ptr,
 +						   NULL, NULL, *space);
 +	if (UNIV_UNLIKELY(new_ptr == NULL)) {
 +
 +		return(0);
 +	}
 +
 +	if (*page_no > recv_max_parsed_page_no) {
 +		recv_max_parsed_page_no = *page_no;
 +	}
 +
 +	return(new_ptr - ptr);
 +}
 +
 +/*******************************************************//**
 +Calculates the new value for lsn when more data is added to the log. */
 +static
 +lsn_t
 +recv_calc_lsn_on_data_add(
 +/*======================*/
 +	lsn_t		lsn,	/*!< in: old lsn */
 +	ib_uint64_t	len)	/*!< in: this many bytes of data is
 +				added, log block headers not included */
 +{
 +	ulint		frag_len;
 +	ib_uint64_t	lsn_len;
 +
 +	frag_len = (lsn % OS_FILE_LOG_BLOCK_SIZE) - LOG_BLOCK_HDR_SIZE;
 +	ut_ad(frag_len < OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
 +	      - LOG_BLOCK_TRL_SIZE);
 +	lsn_len = len;
 +	lsn_len += (lsn_len + frag_len)
 +		/ (OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
 +		   - LOG_BLOCK_TRL_SIZE)
 +		* (LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE);
 +
 +	return(lsn + lsn_len);
 +}
 +
 +#ifdef UNIV_LOG_DEBUG
 +/*******************************************************//**
 +Checks that the parser recognizes incomplete initial segments of a log
 +record as incomplete. */
 +static
 +void
 +recv_check_incomplete_log_recs(
 +/*===========================*/
 +	byte*	ptr,	/*!< in: pointer to a complete log record */
 +	ulint	len)	/*!< in: length of the log record */
 +{
 +	ulint	i;
 +	byte	type;
 +	ulint	space;
 +	ulint	page_no;
 +	byte*	body;
 +
 +	for (i = 0; i < len; i++) {
 +		ut_a(0 == recv_parse_log_rec(ptr, ptr + i, &type, &space,
 +					     &page_no, &body));
 +	}
 +}
 +#endif /* UNIV_LOG_DEBUG */
 +
 +/*******************************************************//**
 +Prints diagnostic info of corrupt log. */
 +static
 +void
 +recv_report_corrupt_log(
 +/*====================*/
 +	byte*	ptr,	/*!< in: pointer to corrupt log record */
 +	byte	type,	/*!< in: type of the record */
 +	ulint	space,	/*!< in: space id, this may also be garbage */
 +	ulint	page_no)/*!< in: page number, this may also be garbage */
 +{
 +	fprintf(stderr,
 +		"InnoDB: ############### CORRUPT LOG RECORD FOUND\n"
 +		"InnoDB: Log record type %lu, space id %lu, page number %lu\n"
 +		"InnoDB: Log parsing proceeded successfully up to " LSN_PF "\n"
 +		"InnoDB: Previous log record type %lu, is multi %lu\n"
 +		"InnoDB: Recv offset %lu, prev %lu\n",
 +		(ulong) type, (ulong) space, (ulong) page_no,
 +		recv_sys->recovered_lsn,
 +		(ulong) recv_previous_parsed_rec_type,
 +		(ulong) recv_previous_parsed_rec_is_multi,
 +		(ulong) (ptr - recv_sys->buf),
 +		(ulong) recv_previous_parsed_rec_offset);
 +
 +	if ((ulint)(ptr - recv_sys->buf + 100)
 +	    > recv_previous_parsed_rec_offset
 +	    && (ulint)(ptr - recv_sys->buf + 100
 +		       - recv_previous_parsed_rec_offset)
 +	    < 200000) {
 +		fputs("InnoDB: Hex dump of corrupt log starting"
 +		      " 100 bytes before the start\n"
 +		      "InnoDB: of the previous log rec,\n"
 +		      "InnoDB: and ending 100 bytes after the start"
 +		      " of the corrupt rec:\n",
 +		      stderr);
 +
 +		ut_print_buf(stderr,
 +			     recv_sys->buf
 +			     + recv_previous_parsed_rec_offset - 100,
 +			     ptr - recv_sys->buf + 200
 +			     - recv_previous_parsed_rec_offset);
 +		putc('\n', stderr);
 +	}
 +
 +#ifndef UNIV_HOTBACKUP
 +	if (!srv_force_recovery) {
 +		fputs("InnoDB: Set innodb_force_recovery"
 +		      " to ignore this error.\n", stderr);
 +		ut_error;
 +	}
 +#endif /* !UNIV_HOTBACKUP */
 +
 +	fputs("InnoDB: WARNING: the log file may have been corrupt and it\n"
 +	      "InnoDB: is possible that the log scan did not proceed\n"
 +	      "InnoDB: far enough in recovery! Please run CHECK TABLE\n"
 +	      "InnoDB: on your InnoDB tables to check that they are ok!\n"
 +	      "InnoDB: If mysqld crashes after this recovery, look at\n"
 +	      "InnoDB: " REFMAN "forcing-innodb-recovery.html\n"
 +	      "InnoDB: about forcing recovery.\n", stderr);
 +
 +	fflush(stderr);
 +}
 +
 +/*******************************************************//**
 +Parses log records from a buffer and stores them to a hash table to wait
 +merging to file pages.
 + at return	currently always returns FALSE */
 +static
 +ibool
 +recv_parse_log_recs(
 +/*================*/
 +	ibool	store_to_hash)	/*!< in: TRUE if the records should be stored
 +				to the hash table; this is set to FALSE if just
 +				debug checking is needed */
 +{
 +	byte*	ptr;
 +	byte*	end_ptr;
 +	ulint	single_rec;
 +	ulint	len;
 +	ulint	total_len;
 +	lsn_t	new_recovered_lsn;
 +	lsn_t	old_lsn;
 +	byte	type;
 +	ulint	space;
 +	ulint	page_no;
 +	byte*	body;
 +	ulint	n_recs;
 +
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +	ut_ad(recv_sys->parse_start_lsn != 0);
 +loop:
 +	ptr = recv_sys->buf + recv_sys->recovered_offset;
 +
 +	end_ptr = recv_sys->buf + recv_sys->len;
 +
 +	if (ptr == end_ptr) {
 +
 +		return(FALSE);
 +	}
 +
 +	single_rec = (ulint)*ptr & MLOG_SINGLE_REC_FLAG;
 +
 +	if (single_rec || *ptr == MLOG_DUMMY_RECORD) {
 +		/* The mtr only modified a single page, or this is a file op */
 +
 +		old_lsn = recv_sys->recovered_lsn;
 +
 +		/* Try to parse a log record, fetching its type, space id,
 +		page no, and a pointer to the body of the log record */
 +
 +		len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
 +					 &page_no, &body);
 +
 +		if (len == 0 || recv_sys->found_corrupt_log) {
 +			if (recv_sys->found_corrupt_log) {
 +
 +				recv_report_corrupt_log(ptr,
 +							type, space, page_no);
 +			}
 +
 +			return(FALSE);
 +		}
 +
 +		new_recovered_lsn = recv_calc_lsn_on_data_add(old_lsn, len);
 +
 +		if (new_recovered_lsn > recv_sys->scanned_lsn) {
 +			/* The log record filled a log block, and we require
 +			that also the next log block should have been scanned
 +			in */
 +
 +			return(FALSE);
 +		}
 +
 +		recv_previous_parsed_rec_type = (ulint) type;
 +		recv_previous_parsed_rec_offset = recv_sys->recovered_offset;
 +		recv_previous_parsed_rec_is_multi = 0;
 +
 +		recv_sys->recovered_offset += len;
 +		recv_sys->recovered_lsn = new_recovered_lsn;
 +
 +		DBUG_PRINT("ib_log",
 +			   ("scan " DBUG_LSN_PF ": log rec %u len %u "
 +			    "page %u:%u", old_lsn,
 +			    (unsigned) type, (unsigned) len,
 +			    (unsigned) space, (unsigned) page_no));
 +
 +		if (type == MLOG_DUMMY_RECORD) {
 +			/* Do nothing */
 +
 +		} else if (!store_to_hash) {
 +			/* In debug checking, update a replicate page
 +			according to the log record, and check that it
 +			becomes identical with the original page */
 +#ifdef UNIV_LOG_DEBUG
 +			recv_check_incomplete_log_recs(ptr, len);
 +#endif/* UNIV_LOG_DEBUG */
 +
 +		} else if (type == MLOG_FILE_CREATE
 +			   || type == MLOG_FILE_CREATE2
 +			   || type == MLOG_FILE_RENAME
 +			   || type == MLOG_FILE_DELETE) {
 +			ut_a(space);
 +#ifdef UNIV_HOTBACKUP
 +			if (recv_replay_file_ops) {
 +
 +				/* In mysqlbackup --apply-log, replay an .ibd
 +				file operation, if possible; note that
 +				fil_path_to_mysql_datadir is set in mysqlbackup
 +				to point to the datadir we should use there */
 +
 +				if (NULL == fil_op_log_parse_or_replay(
 +					    body, end_ptr, type,
 +					    space, page_no)) {
 +					fprintf(stderr,
 +						"InnoDB: Error: file op"
 +						" log record of type %lu"
 +						" space %lu not complete in\n"
 +						"InnoDB: the replay phase."
 +						" Path %s\n",
 +						(ulint) type, space,
 +						(char*)(body + 2));
 +
 +					ut_error;
 +				}
 +			}
 +#endif
 +			/* In normal mysqld crash recovery we do not try to
 +			replay file operations */
 +#ifdef UNIV_LOG_LSN_DEBUG
 +		} else if (type == MLOG_LSN) {
 +			/* Do not add these records to the hash table.
 +			The page number and space id fields are misused
 +			for something else. */
 +#endif /* UNIV_LOG_LSN_DEBUG */
 +		} else {
 +			recv_add_to_hash_table(type, space, page_no, body,
 +					       ptr + len, old_lsn,
 +					       recv_sys->recovered_lsn);
 +		}
 +	} else {
 +		/* Check that all the records associated with the single mtr
 +		are included within the buffer */
 +
 +		total_len = 0;
 +		n_recs = 0;
 +
 +		for (;;) {
 +			len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
 +						 &page_no, &body);
 +			if (len == 0 || recv_sys->found_corrupt_log) {
 +
 +				if (recv_sys->found_corrupt_log) {
 +
 +					recv_report_corrupt_log(
 +						ptr, type, space, page_no);
 +				}
 +
 +				return(FALSE);
 +			}
 +
 +			recv_previous_parsed_rec_type = (ulint) type;
 +			recv_previous_parsed_rec_offset
 +				= recv_sys->recovered_offset + total_len;
 +			recv_previous_parsed_rec_is_multi = 1;
 +
 +#ifdef UNIV_LOG_DEBUG
 +			if ((!store_to_hash) && (type != MLOG_MULTI_REC_END)) {
 +				recv_check_incomplete_log_recs(ptr, len);
 +			}
 +#endif /* UNIV_LOG_DEBUG */
 +
 +			DBUG_PRINT("ib_log",
 +				   ("scan " DBUG_LSN_PF ": multi-log rec %u "
 +				    "len %u page %u:%u",
 +				    recv_sys->recovered_lsn,
 +				    (unsigned) type, (unsigned) len,
 +				    (unsigned) space, (unsigned) page_no));
 +
 +			total_len += len;
 +			n_recs++;
 +
 +			ptr += len;
 +
 +			if (type == MLOG_MULTI_REC_END) {
 +
 +				/* Found the end mark for the records */
 +
 +				break;
 +			}
 +		}
 +
 +		new_recovered_lsn = recv_calc_lsn_on_data_add(
 +			recv_sys->recovered_lsn, total_len);
 +
 +		if (new_recovered_lsn > recv_sys->scanned_lsn) {
 +			/* The log record filled a log block, and we require
 +			that also the next log block should have been scanned
 +			in */
 +
 +			return(FALSE);
 +		}
 +
 +		/* Add all the records to the hash table */
 +
 +		ptr = recv_sys->buf + recv_sys->recovered_offset;
 +
 +		for (;;) {
 +			old_lsn = recv_sys->recovered_lsn;
 +			len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
 +						 &page_no, &body);
 +			if (recv_sys->found_corrupt_log) {
 +
 +				recv_report_corrupt_log(ptr,
 +							type, space, page_no);
 +			}
 +
 +			ut_a(len != 0);
 +			ut_a(0 == ((ulint)*ptr & MLOG_SINGLE_REC_FLAG));
 +
 +			recv_sys->recovered_offset += len;
 +			recv_sys->recovered_lsn
 +				= recv_calc_lsn_on_data_add(old_lsn, len);
 +			if (type == MLOG_MULTI_REC_END) {
 +
 +				/* Found the end mark for the records */
 +
 +				break;
 +			}
 +
 +			if (store_to_hash
 +#ifdef UNIV_LOG_LSN_DEBUG
 +			    && type != MLOG_LSN
 +#endif /* UNIV_LOG_LSN_DEBUG */
 +			    ) {
 +				recv_add_to_hash_table(type, space, page_no,
 +						       body, ptr + len,
 +						       old_lsn,
 +						       new_recovered_lsn);
 +			}
 +
 +			ptr += len;
 +		}
 +	}
 +
 +	goto loop;
 +}
 +
 +/*******************************************************//**
 +Adds data from a new log block to the parsing buffer of recv_sys if
 +recv_sys->parse_start_lsn is non-zero.
 + at return	TRUE if more data added */
 +static
 +ibool
 +recv_sys_add_to_parsing_buf(
 +/*========================*/
 +	const byte*	log_block,	/*!< in: log block */
 +	lsn_t		scanned_lsn)	/*!< in: lsn of how far we were able
 +					to find data in this log block */
 +{
 +	ulint	more_len;
 +	ulint	data_len;
 +	ulint	start_offset;
 +	ulint	end_offset;
 +
 +	ut_ad(scanned_lsn >= recv_sys->scanned_lsn);
 +
 +	if (!recv_sys->parse_start_lsn) {
 +		/* Cannot start parsing yet because no start point for
 +		it found */
 +
 +		return(FALSE);
 +	}
 +
 +	data_len = log_block_get_data_len(log_block);
 +
 +	if (recv_sys->parse_start_lsn >= scanned_lsn) {
 +
 +		return(FALSE);
 +
 +	} else if (recv_sys->scanned_lsn >= scanned_lsn) {
 +
 +		return(FALSE);
 +
 +	} else if (recv_sys->parse_start_lsn > recv_sys->scanned_lsn) {
 +		more_len = (ulint) (scanned_lsn - recv_sys->parse_start_lsn);
 +	} else {
 +		more_len = (ulint) (scanned_lsn - recv_sys->scanned_lsn);
 +	}
 +
 +	if (more_len == 0) {
 +
 +		return(FALSE);
 +	}
 +
 +	ut_ad(data_len >= more_len);
 +
 +	start_offset = data_len - more_len;
 +
 +	if (start_offset < LOG_BLOCK_HDR_SIZE) {
 +		start_offset = LOG_BLOCK_HDR_SIZE;
 +	}
 +
 +	end_offset = data_len;
 +
 +	if (end_offset > OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
 +		end_offset = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE;
 +	}
 +
 +	ut_ad(start_offset <= end_offset);
 +
 +	if (start_offset < end_offset) {
 +		ut_memcpy(recv_sys->buf + recv_sys->len,
 +			  log_block + start_offset, end_offset - start_offset);
 +
 +		recv_sys->len += end_offset - start_offset;
 +
 +		ut_a(recv_sys->len <= RECV_PARSING_BUF_SIZE);
 +	}
 +
 +	return(TRUE);
 +}
 +
 +/*******************************************************//**
 +Moves the parsing buffer data left to the buffer start. */
 +static
 +void
 +recv_sys_justify_left_parsing_buf(void)
 +/*===================================*/
 +{
 +	ut_memmove(recv_sys->buf, recv_sys->buf + recv_sys->recovered_offset,
 +		   recv_sys->len - recv_sys->recovered_offset);
 +
 +	recv_sys->len -= recv_sys->recovered_offset;
 +
 +	recv_sys->recovered_offset = 0;
 +}
 +
 +/*******************************************************//**
 +Scans log from a buffer and stores new log data to the parsing buffer.
 +Parses and hashes the log records if new data found.  Unless
 +UNIV_HOTBACKUP is defined, this function will apply log records
 +automatically when the hash table becomes full.
 + at return TRUE if limit_lsn has been reached, or not able to scan any
 +more in this log group */
 +UNIV_INTERN
 +ibool
 +recv_scan_log_recs(
 +/*===============*/
 +	ulint		available_memory,/*!< in: we let the hash table of recs
 +					to grow to this size, at the maximum */
 +	ibool		store_to_hash,	/*!< in: TRUE if the records should be
 +					stored to the hash table; this is set
 +					to FALSE if just debug checking is
 +					needed */
 +	const byte*	buf,		/*!< in: buffer containing a log
 +					segment or garbage */
 +	ulint		len,		/*!< in: buffer length */
 +	lsn_t		start_lsn,	/*!< in: buffer start lsn */
 +	lsn_t*		contiguous_lsn,	/*!< in/out: it is known that all log
 +					groups contain contiguous log data up
 +					to this lsn */
 +	lsn_t*		group_scanned_lsn)/*!< out: scanning succeeded up to
 +					this lsn */
 +{
 +	const byte*	log_block;
 +	ulint		no;
 +	lsn_t		scanned_lsn;
 +	ibool		finished;
 +	ulint		data_len;
 +	ibool		more_data;
 +
 +	ut_ad(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
 +	ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0);
 +	ut_ad(len >= OS_FILE_LOG_BLOCK_SIZE);
 +	ut_a(store_to_hash <= TRUE);
 +
 +	finished = FALSE;
 +
 +	log_block = buf;
 +	scanned_lsn = start_lsn;
 +	more_data = FALSE;
 +
 +	do {
 +		no = log_block_get_hdr_no(log_block);
 +		/*
 +		fprintf(stderr, "Log block header no %lu\n", no);
 +
 +		fprintf(stderr, "Scanned lsn no %lu\n",
 +		log_block_convert_lsn_to_no(scanned_lsn));
 +		*/
 +		if (no != log_block_convert_lsn_to_no(scanned_lsn)
 +		    || !log_block_checksum_is_ok_or_old_format(log_block)) {
 +
 +			if (no == log_block_convert_lsn_to_no(scanned_lsn)
 +			    && !log_block_checksum_is_ok_or_old_format(
 +				    log_block)) {
 +				fprintf(stderr,
 +					"InnoDB: Log block no %lu at"
 +					" lsn " LSN_PF " has\n"
 +					"InnoDB: ok header, but checksum field"
 +					" contains %lu, should be %lu\n",
 +					(ulong) no,
 +					scanned_lsn,
 +					(ulong) log_block_get_checksum(
 +						log_block),
 +					(ulong) log_block_calc_checksum(
 +						log_block));
 +			}
 +
 +			/* Garbage or an incompletely written log block */
 +
 +			finished = TRUE;
 +
 +			break;
 +		}
 +
 +		if (log_block_get_flush_bit(log_block)) {
 +			/* This block was a start of a log flush operation:
 +			we know that the previous flush operation must have
 +			been completed for all log groups before this block
 +			can have been flushed to any of the groups. Therefore,
 +			we know that log data is contiguous up to scanned_lsn
 +			in all non-corrupt log groups. */
 +
 +			if (scanned_lsn > *contiguous_lsn) {
 +				*contiguous_lsn = scanned_lsn;
 +			}
 +		}
 +
 +		data_len = log_block_get_data_len(log_block);
 +
 +		if ((store_to_hash || (data_len == OS_FILE_LOG_BLOCK_SIZE))
 +		    && scanned_lsn + data_len > recv_sys->scanned_lsn
 +		    && (recv_sys->scanned_checkpoint_no > 0)
 +		    && (log_block_get_checkpoint_no(log_block)
 +			< recv_sys->scanned_checkpoint_no)
 +		    && (recv_sys->scanned_checkpoint_no
 +			- log_block_get_checkpoint_no(log_block)
 +			> 0x80000000UL)) {
 +
 +			/* Garbage from a log buffer flush which was made
 +			before the most recent database recovery */
 +
 +			finished = TRUE;
 +#ifdef UNIV_LOG_DEBUG
 +			/* This is not really an error, but currently
 +			we stop here in the debug version: */
 +
 +			ut_error;
 +#endif
 +			break;
 +		}
 +
 +		if (!recv_sys->parse_start_lsn
 +		    && (log_block_get_first_rec_group(log_block) > 0)) {
 +
 +			/* We found a point from which to start the parsing
 +			of log records */
 +
 +			recv_sys->parse_start_lsn = scanned_lsn
 +				+ log_block_get_first_rec_group(log_block);
 +			recv_sys->scanned_lsn = recv_sys->parse_start_lsn;
 +			recv_sys->recovered_lsn = recv_sys->parse_start_lsn;
 +		}
 +
 +		scanned_lsn += data_len;
 +
 +		if (scanned_lsn > recv_sys->scanned_lsn) {
 +
 +			/* We have found more entries. If this scan is
 + 			of startup type, we must initiate crash recovery
 +			environment before parsing these log records. */
 +
 +#ifndef UNIV_HOTBACKUP
 +			if (recv_log_scan_is_startup_type
 +			    && !recv_needed_recovery) {
- 
 +				if (!srv_read_only_mode) {
 +					ib_logf(IB_LOG_LEVEL_INFO,
- 						"Log scan progressed past the "
- 						"checkpoint lsn " LSN_PF "",
++						"Starting crash recovery from "
++						"checkpoint LSN=" LSN_PF,
 +						recv_sys->scanned_lsn);
 +
 +					recv_init_crash_recovery();
 +				} else {
 +					ib_logf(IB_LOG_LEVEL_ERROR,
 +						"innodb_read_only prevents"
 +						" crash recovery");
 +					recv_needed_recovery = TRUE;
 +					return(TRUE);
 +				}
 +			}
 +#endif /* !UNIV_HOTBACKUP */
 +
 +			/* We were able to find more log data: add it to the
 +			parsing buffer if parse_start_lsn is already
 +			non-zero */
 +
 +			if (recv_sys->len + 4 * OS_FILE_LOG_BLOCK_SIZE
 +			    >= RECV_PARSING_BUF_SIZE) {
 +				fprintf(stderr,
 +					"InnoDB: Error: log parsing"
 +					" buffer overflow."
 +					" Recovery may have failed!\n");
 +
 +				recv_sys->found_corrupt_log = TRUE;
 +
 +#ifndef UNIV_HOTBACKUP
 +				if (!srv_force_recovery) {
 +					fputs("InnoDB: Set"
 +					      " innodb_force_recovery"
 +					      " to ignore this error.\n",
 +					      stderr);
 +					ut_error;
 +				}
 +#endif /* !UNIV_HOTBACKUP */
 +
 +			} else if (!recv_sys->found_corrupt_log) {
 +				more_data = recv_sys_add_to_parsing_buf(
 +					log_block, scanned_lsn);
 +			}
 +
 +			recv_sys->scanned_lsn = scanned_lsn;
 +			recv_sys->scanned_checkpoint_no
 +				= log_block_get_checkpoint_no(log_block);
 +		}
 +
 +		if (data_len < OS_FILE_LOG_BLOCK_SIZE) {
 +			/* Log data for this group ends here */
 +
 +			finished = TRUE;
 +			break;
 +		} else {
 +			log_block += OS_FILE_LOG_BLOCK_SIZE;
 +		}
 +	} while (log_block < buf + len && !finished);
 +
 +	*group_scanned_lsn = scanned_lsn;
 +
- 	if (recv_needed_recovery
- 	    || (recv_is_from_backup && !recv_is_making_a_backup)) {
- 		recv_scan_print_counter++;
- 
- 		if (finished || (recv_scan_print_counter % 80 == 0)) {
- 
- 			fprintf(stderr,
- 				"InnoDB: Doing recovery: scanned up to"
- 				" log sequence number " LSN_PF "\n",
- 				*group_scanned_lsn);
- 		}
- 	}
- 
 +	if (more_data && !recv_sys->found_corrupt_log) {
 +		/* Try to parse more log records */
 +
 +		recv_parse_log_recs(store_to_hash);
 +
 +#ifndef UNIV_HOTBACKUP
 +		if (store_to_hash
 +		    && mem_heap_get_size(recv_sys->heap) > available_memory) {
 +
 +			/* Hash table of log records has grown too big:
 +			empty it; FALSE means no ibuf operations
 +			allowed, as we cannot add new records to the
 +			log yet: they would be produced by ibuf
 +			operations */
 +
- 			recv_apply_hashed_log_recs(FALSE);
++			recv_apply_hashed_log_recs(false);
 +		}
 +#endif /* !UNIV_HOTBACKUP */
 +
 +		if (recv_sys->recovered_offset > RECV_PARSING_BUF_SIZE / 4) {
 +			/* Move parsing buffer data to the buffer start */
 +
 +			recv_sys_justify_left_parsing_buf();
 +		}
 +	}
 +
 +	return(finished);
 +}
 +
 +#ifndef UNIV_HOTBACKUP
 +/*******************************************************//**
 +Scans log from a buffer and stores new log data to the parsing buffer. Parses
 +and hashes the log records if new data found. */
 +static
 +void
 +recv_group_scan_log_recs(
 +/*=====================*/
 +	log_group_t*	group,		/*!< in: log group */
 +	lsn_t*		contiguous_lsn,	/*!< in/out: it is known that all log
 +					groups contain contiguous log data up
 +					to this lsn */
 +	lsn_t*		group_scanned_lsn)/*!< out: scanning succeeded up to
 +					this lsn */
 +{
 +	ibool	finished;
 +	lsn_t	start_lsn;
 +	lsn_t	end_lsn;
 +
 +	finished = FALSE;
 +
 +	start_lsn = *contiguous_lsn;
 +
 +	while (!finished) {
 +		end_lsn = start_lsn + RECV_SCAN_SIZE;
 +
 +		log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
 +				       group, start_lsn, end_lsn);
 +
 +		finished = recv_scan_log_recs(
 +			(buf_pool_get_n_pages()
 +			- (recv_n_pool_free_frames * srv_buf_pool_instances))
 +			* UNIV_PAGE_SIZE,
 +			TRUE, log_sys->buf, RECV_SCAN_SIZE,
 +			start_lsn, contiguous_lsn, group_scanned_lsn);
 +		start_lsn = end_lsn;
 +	}
 +
 +#ifdef UNIV_DEBUG
 +	if (log_debug_writes) {
 +		fprintf(stderr,
 +			"InnoDB: Scanned group %lu up to"
 +			" log sequence number " LSN_PF "\n",
 +			(ulong) group->id,
 +			*group_scanned_lsn);
 +	}
 +#endif /* UNIV_DEBUG */
 +}
 +
 +/*******************************************************//**
 +Initialize crash recovery environment. Can be called iff
 +recv_needed_recovery == FALSE. */
 +static
 +void
 +recv_init_crash_recovery(void)
 +/*==========================*/
 +{
 +	ut_ad(!srv_read_only_mode);
 +	ut_a(!recv_needed_recovery);
 +
 +	recv_needed_recovery = TRUE;
 +
- 	ib_logf(IB_LOG_LEVEL_INFO, "Database was not shutdown normally!");
- 	ib_logf(IB_LOG_LEVEL_INFO, "Starting crash recovery.");
- 	ib_logf(IB_LOG_LEVEL_INFO,
- 		"Reading tablespace information from the .ibd files...");
- 
 +	fil_load_single_table_tablespaces();
 +
 +	/* If we are using the doublewrite method, we will
 +	check if there are half-written pages in data files,
 +	and restore them from the doublewrite buffer if
 +	possible */
 +
 +	if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
 +
 +		ib_logf(IB_LOG_LEVEL_INFO,
- 			"Restoring possible half-written data pages ");
- 
- 		ib_logf(IB_LOG_LEVEL_INFO,
++			"Restoring possible half-written data pages "
 +			"from the doublewrite buffer...");
 +
 +		buf_dblwr_process();
 +
 +		/* Spawn the background thread to flush dirty pages
 +		from the buffer pools. */
 +		recv_writer_thread_active = true;
 +		recv_writer_thread_handle = os_thread_create(
 +			recv_writer_thread, 0, 0);
 +	}
 +}
 +
 +/********************************************************//**
 +Recovers from a checkpoint. When this function returns, the database is able
 +to start processing of new user transactions, but the function
 +recv_recovery_from_checkpoint_finish should be called later to complete
 +the recovery and free the resources used in it.
 + at return	error code or DB_SUCCESS */
 +UNIV_INTERN
 +dberr_t
 +recv_recovery_from_checkpoint_start_func(
 +/*=====================================*/
 +#ifdef UNIV_LOG_ARCHIVE
 +	ulint	type,		/*!< in: LOG_CHECKPOINT or LOG_ARCHIVE */
 +	lsn_t	limit_lsn,	/*!< in: recover up to this lsn if possible */
 +#endif /* UNIV_LOG_ARCHIVE */
 +	lsn_t	min_flushed_lsn,/*!< in: min flushed lsn from data files */
 +	lsn_t	max_flushed_lsn)/*!< in: max flushed lsn from data files */
 +{
 +	log_group_t*	group;
 +	log_group_t*	max_cp_group;
 +	ulint		max_cp_field;
 +	lsn_t		checkpoint_lsn;
 +	ib_uint64_t	checkpoint_no;
 +	lsn_t		group_scanned_lsn = 0;
 +	lsn_t		contiguous_lsn;
 +#ifdef UNIV_LOG_ARCHIVE
 +	log_group_t*	up_to_date_group;
 +	lsn_t		archived_lsn;
 +#endif /* UNIV_LOG_ARCHIVE */
 +	byte*		buf;
 +	byte		log_hdr_buf[LOG_FILE_HDR_SIZE];
 +	dberr_t		err;
 +
 +	/* Initialize red-black tree for fast insertions into the
 +	flush_list during recovery process. */
 +	buf_flush_init_flush_rbt();
 +
 +	ut_when_dtor<recv_dblwr_t> tmp(recv_sys->dblwr);
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	ut_ad(type != LOG_CHECKPOINT || limit_lsn == LSN_MAX);
 +/** TRUE when recovering from a checkpoint */
 +# define TYPE_CHECKPOINT	(type == LOG_CHECKPOINT)
 +/** Recover up to this log sequence number */
 +# define LIMIT_LSN		limit_lsn
 +#else /* UNIV_LOG_ARCHIVE */
 +/** TRUE when recovering from a checkpoint */
 +# define TYPE_CHECKPOINT	1
 +/** Recover up to this log sequence number */
 +# define LIMIT_LSN		LSN_MAX
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	if (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO) {
 +
 +		ib_logf(IB_LOG_LEVEL_INFO,
 +			"The user has set SRV_FORCE_NO_LOG_REDO on, "
 +			"skipping log redo");
 +
 +		return(DB_SUCCESS);
 +	}
 +
 +	recv_recovery_on = TRUE;
 +
 +	recv_sys->limit_lsn = LIMIT_LSN;
 +
 +	mutex_enter(&(log_sys->mutex));
 +
 +	/* Look for the latest checkpoint from any of the log groups */
 +
 +	err = recv_find_max_checkpoint(&max_cp_group, &max_cp_field);
 +
 +	if (err != DB_SUCCESS) {
 +
 +		mutex_exit(&(log_sys->mutex));
 +
 +		return(err);
 +	}
 +
 +	log_group_read_checkpoint_info(max_cp_group, max_cp_field);
 +
 +	buf = log_sys->checkpoint_buf;
 +
 +	checkpoint_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_LSN);
 +	checkpoint_no = mach_read_from_8(buf + LOG_CHECKPOINT_NO);
 +#ifdef UNIV_LOG_ARCHIVE
 +	archived_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN);
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	/* Read the first log file header to print a note if this is
 +	a recovery from a restored InnoDB Hot Backup */
 +
 +	fil_io(OS_FILE_READ | OS_FILE_LOG, true, max_cp_group->space_id, 0,
 +	       0, 0, LOG_FILE_HDR_SIZE,
 +	       log_hdr_buf, max_cp_group);
 +
 +	if (0 == ut_memcmp(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
 +			   (byte*)"ibbackup", (sizeof "ibbackup") - 1)) {
 +
 +		if (srv_read_only_mode) {
 +
 +			ib_logf(IB_LOG_LEVEL_ERROR,
 +				"Cannot restore from mysqlbackup, InnoDB "
 +				"running in read-only mode!");
 +
 +			return(DB_ERROR);
 +		}
 +
 +		/* This log file was created by mysqlbackup --restore: print
 +		a note to the user about it */
 +
 +		ib_logf(IB_LOG_LEVEL_INFO,
 +			"The log file was created by mysqlbackup --apply-log "
 +			"at %s. The following crash recovery is part of a "
 +			"normal restore.",
 +			log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP);
 +
 +		/* Wipe over the label now */
 +
 +		memset(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
 +		       ' ', 4);
 +		/* Write to the log file to wipe over the label */
 +		fil_io(OS_FILE_WRITE | OS_FILE_LOG, true,
 +		       max_cp_group->space_id, 0,
 +		       0, 0, OS_FILE_LOG_BLOCK_SIZE,
 +		       log_hdr_buf, max_cp_group);
 +	}
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	group = UT_LIST_GET_FIRST(log_sys->log_groups);
 +
 +	while (group) {
 +		log_checkpoint_get_nth_group_info(buf, group->id,
 +						  &(group->archived_file_no),
 +						  &(group->archived_offset));
 +
 +		group = UT_LIST_GET_NEXT(log_groups, group);
 +	}
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	if (TYPE_CHECKPOINT) {
 +		/* Start reading the log groups from the checkpoint lsn up. The
 +		variable contiguous_lsn contains an lsn up to which the log is
 +		known to be contiguously written to all log groups. */
 +
 +		recv_sys->parse_start_lsn = checkpoint_lsn;
 +		recv_sys->scanned_lsn = checkpoint_lsn;
 +		recv_sys->scanned_checkpoint_no = 0;
 +		recv_sys->recovered_lsn = checkpoint_lsn;
 +
 +		srv_start_lsn = checkpoint_lsn;
 +	}
 +
 +	contiguous_lsn = ut_uint64_align_down(recv_sys->scanned_lsn,
 +					      OS_FILE_LOG_BLOCK_SIZE);
 +#ifdef UNIV_LOG_ARCHIVE
 +	if (TYPE_CHECKPOINT) {
 +		up_to_date_group = max_cp_group;
 +	} else {
 +		ulint	capacity;
 +
 +		/* Try to recover the remaining part from logs: first from
 +		the logs of the archived group */
 +
 +		group = recv_sys->archive_group;
 +		capacity = log_group_get_capacity(group);
 +
 +		if (recv_sys->scanned_lsn > checkpoint_lsn + capacity
 +		    || checkpoint_lsn > recv_sys->scanned_lsn + capacity) {
 +
 +			mutex_exit(&(log_sys->mutex));
 +
 +			/* The group does not contain enough log: probably
 +			an archived log file was missing or corrupt */
 +
 +			return(DB_ERROR);
 +		}
 +
 +		recv_group_scan_log_recs(group, &contiguous_lsn,
 +					 &group_scanned_lsn);
 +		if (recv_sys->scanned_lsn < checkpoint_lsn) {
 +
 +			mutex_exit(&(log_sys->mutex));
 +
 +			/* The group did not contain enough log: an archived
 +			log file was missing or invalid, or the log group
 +			was corrupt */
 +
 +			return(DB_ERROR);
 +		}
 +
 +		group->scanned_lsn = group_scanned_lsn;
 +		up_to_date_group = group;
 +	}
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	ut_ad(RECV_SCAN_SIZE <= log_sys->buf_size);
 +
 +	group = UT_LIST_GET_FIRST(log_sys->log_groups);
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	if ((type == LOG_ARCHIVE) && (group == recv_sys->archive_group)) {
 +		group = UT_LIST_GET_NEXT(log_groups, group);
 +	}
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	/* Set the flag to publish that we are doing startup scan. */
 +	recv_log_scan_is_startup_type = TYPE_CHECKPOINT;
 +	while (group) {
 +#ifdef UNIV_LOG_ARCHIVE
 +		lsn_t	old_scanned_lsn	= recv_sys->scanned_lsn;
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +		recv_group_scan_log_recs(group, &contiguous_lsn,
 +					 &group_scanned_lsn);
 +		group->scanned_lsn = group_scanned_lsn;
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +		if (old_scanned_lsn < group_scanned_lsn) {
 +			/* We found a more up-to-date group */
 +
 +			up_to_date_group = group;
 +		}
 +
 +		if ((type == LOG_ARCHIVE)
 +		    && (group == recv_sys->archive_group)) {
 +			group = UT_LIST_GET_NEXT(log_groups, group);
 +		}
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +		group = UT_LIST_GET_NEXT(log_groups, group);
 +	}
 +
 +	/* Done with startup scan. Clear the flag. */
 +	recv_log_scan_is_startup_type = FALSE;
 +
 +	if (srv_read_only_mode && recv_needed_recovery) {
 +		return(DB_READ_ONLY);
 +	}
 +
 +	if (TYPE_CHECKPOINT) {
 +		/* NOTE: we always do a 'recovery' at startup, but only if
 +		there is something wrong we will print a message to the
 +		user about recovery: */
 +
 +		if (checkpoint_lsn != max_flushed_lsn
 +		    || checkpoint_lsn != min_flushed_lsn) {
 +
 +			if (checkpoint_lsn < max_flushed_lsn) {
 +
 +				ib_logf(IB_LOG_LEVEL_WARN,
 +					"The log sequence number "
 +					"in the ibdata files is higher "
 +					"than the log sequence number "
 +					"in the ib_logfiles! Are you sure "
 +					"you are using the right "
 +					"ib_logfiles to start up the database. "
 +					"Log sequence number in the "
 +					"ib_logfiles is " LSN_PF ", log"
 +					"sequence numbers stamped "
 +					"to ibdata file headers are between "
 +					"" LSN_PF " and " LSN_PF ".",
 +					checkpoint_lsn,
 +					min_flushed_lsn,
 +					max_flushed_lsn);
 +			}
 +
 +			if (!recv_needed_recovery) {
 +				ib_logf(IB_LOG_LEVEL_INFO,
 +					"The log sequence numbers "
 +					LSN_PF " and " LSN_PF
 +					" in ibdata files do not match"
 +					" the log sequence number "
 +					LSN_PF
 +					" in the ib_logfiles!",
 +					min_flushed_lsn,
 +					max_flushed_lsn,
 +					checkpoint_lsn);
 +
 +				if (!srv_read_only_mode) {
 +					recv_init_crash_recovery();
 +				} else {
 +					ib_logf(IB_LOG_LEVEL_ERROR,
 +						"Can't initiate database "
 +						"recovery, running "
 +						"in read-only-mode.");
 +					return(DB_READ_ONLY);
 +				}
 +			}
 +		}
 +	}
 +
 +	/* We currently have only one log group */
 +	if (group_scanned_lsn < checkpoint_lsn
 +	    || group_scanned_lsn < recv_max_page_lsn) {
 +		ib_logf(IB_LOG_LEVEL_ERROR,
 +			"We scanned the log up to "
 +			LSN_PF ". A checkpoint was at " LSN_PF
 +			" and the maximum LSN on a database page was " LSN_PF
 +			". It is possible that the database is now corrupt!",
 +			group_scanned_lsn, checkpoint_lsn, recv_max_page_lsn);
 +	}
 +
 +	if (recv_sys->recovered_lsn < checkpoint_lsn) {
 +
 +		mutex_exit(&(log_sys->mutex));
 +
 +		if (recv_sys->recovered_lsn >= LIMIT_LSN) {
 +
 +			return(DB_SUCCESS);
 +		}
 +
 +		/* No harm in trying to do RO access. */
 +		if (!srv_read_only_mode) {
 +			ut_error;
 +		}
 +
 +		return(DB_ERROR);
 +	}
 +
 +	/* Synchronize the uncorrupted log groups to the most up-to-date log
 +	group; we also copy checkpoint info to groups */
 +
 +	log_sys->next_checkpoint_lsn = checkpoint_lsn;
 +	log_sys->next_checkpoint_no = checkpoint_no + 1;
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	log_sys->archived_lsn = archived_lsn;
 +
 +	recv_synchronize_groups(up_to_date_group);
 +#else /* UNIV_LOG_ARCHIVE */
 +	recv_synchronize_groups();
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	if (!recv_needed_recovery) {
 +		ut_a(checkpoint_lsn == recv_sys->recovered_lsn);
 +	} else {
 +		srv_start_lsn = recv_sys->recovered_lsn;
 +	}
 +
 +	log_sys->lsn = recv_sys->recovered_lsn;
 +
 +	ut_memcpy(log_sys->buf, recv_sys->last_block, OS_FILE_LOG_BLOCK_SIZE);
 +
 +	log_sys->buf_free = (ulint) log_sys->lsn % OS_FILE_LOG_BLOCK_SIZE;
 +	log_sys->buf_next_to_write = log_sys->buf_free;
 +	log_sys->written_to_some_lsn = log_sys->lsn;
 +	log_sys->written_to_all_lsn = log_sys->lsn;
 +
 +	log_sys->last_checkpoint_lsn = checkpoint_lsn;
 +
 +	MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
 +		    log_sys->lsn - log_sys->last_checkpoint_lsn);
 +
 +	log_sys->next_checkpoint_no = checkpoint_no + 1;
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	if (archived_lsn == LSN_MAX) {
 +
 +		log_sys->archiving_state = LOG_ARCH_OFF;
 +	}
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	mutex_enter(&recv_sys->mutex);
 +
 +	recv_sys->apply_log_recs = TRUE;
 +
 +	mutex_exit(&recv_sys->mutex);
 +
 +	mutex_exit(&log_sys->mutex);
 +
 +	recv_lsn_checks_on = TRUE;
 +
 +	/* The database is now ready to start almost normal processing of user
 +	transactions: transaction rollbacks and the application of the log
 +	records in the hash table can be run in background. */
 +
 +	return(DB_SUCCESS);
 +
 +#undef TYPE_CHECKPOINT
 +#undef LIMIT_LSN
 +}
 +
 +/********************************************************//**
 +Completes recovery from a checkpoint. */
 +UNIV_INTERN
 +void
 +recv_recovery_from_checkpoint_finish(void)
 +/*======================================*/
 +{
 +	if (recv_needed_recovery) {
 +		trx_sys_print_mysql_master_log_pos();
 +		trx_sys_print_mysql_binlog_offset();
 +	}
 +
 +	if (recv_sys->found_corrupt_log) {
 +
 +		fprintf(stderr,
 +			"InnoDB: WARNING: the log file may have been"
 +			" corrupt and it\n"
 +			"InnoDB: is possible that the log scan or parsing"
 +			" did not proceed\n"
 +			"InnoDB: far enough in recovery. Please run"
 +			" CHECK TABLE\n"
 +			"InnoDB: on your InnoDB tables to check that"
 +			" they are ok!\n"
 +			"InnoDB: It may be safest to recover your"
 +			" InnoDB database from\n"
 +			"InnoDB: a backup!\n");
 +	}
 +
 +	/* Make sure that the recv_writer thread is done. This is
 +	required because it grabs various mutexes and we want to
 +	ensure that when we enable sync_order_checks there is no
 +	mutex currently held by any thread. */
 +	mutex_enter(&recv_sys->writer_mutex);
 +
 +	/* Free the resources of the recovery system */
 +	recv_recovery_on = FALSE;
 +
 +	/* By acquring the mutex we ensure that the recv_writer thread
 +	won't trigger any more LRU batchtes. Now wait for currently
 +	in progress batches to finish. */
 +	buf_flush_wait_LRU_batch_end();
 +
 +	mutex_exit(&recv_sys->writer_mutex);
 +
 +	ulint count = 0;
 +	while (recv_writer_thread_active) {
 +		++count;
 +		os_thread_sleep(100000);
 +		if (srv_print_verbose_log && count > 600) {
 +			ib_logf(IB_LOG_LEVEL_INFO,
 +				"Waiting for recv_writer to "
 +				"finish flushing of buffer pool");
 +			count = 0;
 +		}
 +	}
 +
 +#ifdef __WIN__
 +	if (recv_writer_thread_handle) {
 +		CloseHandle(recv_writer_thread_handle);
 +	}
 +#endif /* __WIN__ */
 +
 +#ifndef UNIV_LOG_DEBUG
 +	recv_sys_debug_free();
 +#endif
 +	/* Roll back any recovered data dictionary transactions, so
 +	that the data dictionary tables will be free of any locks.
 +	The data dictionary latch should guarantee that there is at
 +	most one data dictionary transaction active at a time. */
 +	if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO) {
 +		trx_rollback_or_clean_recovered(FALSE);
 +	}
 +}
 +
 +/********************************************************//**
 +Initiates the rollback of active transactions. */
 +UNIV_INTERN
 +void
 +recv_recovery_rollback_active(void)
 +/*===============================*/
 +{
 +#ifdef UNIV_SYNC_DEBUG
 +	/* Wait for a while so that created threads have time to suspend
 +	themselves before we switch the latching order checks on */
 +	os_thread_sleep(1000000);
 +
 +	ut_ad(!recv_writer_thread_active);
 +
 +	/* Switch latching order checks on in sync0sync.cc */
 +	sync_order_checks_on = TRUE;
 +#endif
 +	/* We can't start any (DDL) transactions if UNDO logging
 +	has been disabled, additionally disable ROLLBACK of recovered
 +	user transactions. */
 +	if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO
 +	    && !srv_read_only_mode) {
 +
 +		/* Drop partially created indexes. */
 +		row_merge_drop_temp_indexes();
 +		/* Drop temporary tables. */
 +		row_mysql_drop_temp_tables();
 +
 +		/* Drop any auxiliary tables that were not dropped when the
 +		parent table was dropped. This can happen if the parent table
 +		was dropped but the server crashed before the auxiliary tables
 +		were dropped. */
 +		fts_drop_orphaned_tables();
 +
 +		/* Rollback the uncommitted transactions which have no user
 +		session */
 +
 +		trx_rollback_or_clean_is_active = true;
 +		os_thread_create(trx_rollback_or_clean_all_recovered, 0, 0);
 +	}
 +}
 +
 +/******************************************************//**
 +Resets the logs. The contents of log files will be lost! */
 +UNIV_INTERN
 +void
 +recv_reset_logs(
 +/*============*/
 +#ifdef UNIV_LOG_ARCHIVE
 +	ulint		arch_log_no,	/*!< in: next archived log file number */
 +	ibool		new_logs_created,/*!< in: TRUE if resetting logs
 +					is done at the log creation;
 +					FALSE if it is done after
 +					archive recovery */
 +#endif /* UNIV_LOG_ARCHIVE */
 +	lsn_t		lsn)		/*!< in: reset to this lsn
 +					rounded up to be divisible by
 +					OS_FILE_LOG_BLOCK_SIZE, after
 +					which we add
 +					LOG_BLOCK_HDR_SIZE */
 +{
 +	log_group_t*	group;
 +
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +
 +	log_sys->lsn = ut_uint64_align_up(lsn, OS_FILE_LOG_BLOCK_SIZE);
 +
 +	group = UT_LIST_GET_FIRST(log_sys->log_groups);
 +
 +	while (group) {
 +		group->lsn = log_sys->lsn;
 +		group->lsn_offset = LOG_FILE_HDR_SIZE;
 +#ifdef UNIV_LOG_ARCHIVE
 +		group->archived_file_no = arch_log_no;
 +		group->archived_offset = 0;
 +
 +		if (!new_logs_created) {
 +			recv_truncate_group(group, group->lsn, group->lsn,
 +					    group->lsn, group->lsn);
 +		}
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +		group = UT_LIST_GET_NEXT(log_groups, group);
 +	}
 +
 +	log_sys->buf_next_to_write = 0;
 +	log_sys->written_to_some_lsn = log_sys->lsn;
 +	log_sys->written_to_all_lsn = log_sys->lsn;
 +
 +	log_sys->next_checkpoint_no = 0;
 +	log_sys->last_checkpoint_lsn = 0;
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	log_sys->archived_lsn = log_sys->lsn;
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	log_block_init(log_sys->buf, log_sys->lsn);
 +	log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
 +
 +	log_sys->buf_free = LOG_BLOCK_HDR_SIZE;
 +	log_sys->lsn += LOG_BLOCK_HDR_SIZE;
 +
 +	MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
 +		    (log_sys->lsn - log_sys->last_checkpoint_lsn));
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +	/* Reset the checkpoint fields in logs */
 +
 +	log_make_checkpoint_at(LSN_MAX, TRUE);
 +
 +	mutex_enter(&(log_sys->mutex));
 +}
 +#endif /* !UNIV_HOTBACKUP */
 +
 +#ifdef UNIV_HOTBACKUP
 +/******************************************************//**
 +Creates new log files after a backup has been restored. */
 +UNIV_INTERN
 +void
 +recv_reset_log_files_for_backup(
 +/*============================*/
 +	const char*	log_dir,	/*!< in: log file directory path */
 +	ulint		n_log_files,	/*!< in: number of log files */
 +	lsn_t		log_file_size,	/*!< in: log file size */
 +	lsn_t		lsn)		/*!< in: new start lsn, must be
 +					divisible by OS_FILE_LOG_BLOCK_SIZE */
 +{
 +	os_file_t	log_file;
 +	ibool		success;
 +	byte*		buf;
 +	ulint		i;
 +	ulint		log_dir_len;
 +	char		name[5000];
 +	static const char ib_logfile_basename[] = "ib_logfile";
 +
 +	log_dir_len = strlen(log_dir);
 +	/* full path name of ib_logfile consists of log dir path + basename
 +	+ number. This must fit in the name buffer.
 +	*/
 +	ut_a(log_dir_len + strlen(ib_logfile_basename) + 11  < sizeof(name));
 +
 +	buf = ut_malloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
 +	memset(buf, '\0', LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
 +
 +	for (i = 0; i < n_log_files; i++) {
 +
 +		sprintf(name, "%s%s%lu", log_dir,
 +			ib_logfile_basename, (ulong) i);
 +
 +		log_file = os_file_create_simple(innodb_file_log_key,
 +						 name, OS_FILE_CREATE,
 +						 OS_FILE_READ_WRITE,
 +						 &success);
 +		if (!success) {
 +			fprintf(stderr,
 +				"InnoDB: Cannot create %s. Check that"
 +				" the file does not exist yet.\n", name);
 +
 +			exit(1);
 +		}
 +
 +		fprintf(stderr,
 +			"Setting log file size to %llu\n",
 +			log_file_size);
 +
 +		success = os_file_set_size(name, log_file, log_file_size);
 +
 +		if (!success) {
 +			fprintf(stderr,
 +				"InnoDB: Cannot set %s size to %llu\n",
 +				name, log_file_size);
 +			exit(1);
 +		}
 +
 +		os_file_flush(log_file);
 +		os_file_close(log_file);
 +	}
 +
 +	/* We pretend there is a checkpoint at lsn + LOG_BLOCK_HDR_SIZE */
 +
 +	log_reset_first_header_and_checkpoint(buf, lsn);
 +
 +	log_block_init_in_old_format(buf + LOG_FILE_HDR_SIZE, lsn);
 +	log_block_set_first_rec_group(buf + LOG_FILE_HDR_SIZE,
 +				      LOG_BLOCK_HDR_SIZE);
 +	sprintf(name, "%s%s%lu", log_dir, ib_logfile_basename, (ulong)0);
 +
 +	log_file = os_file_create_simple(innodb_file_log_key,
 +					 name, OS_FILE_OPEN,
 +					 OS_FILE_READ_WRITE, &success);
 +	if (!success) {
 +		fprintf(stderr, "InnoDB: Cannot open %s.\n", name);
 +
 +		exit(1);
 +	}
 +
 +	os_file_write(name, log_file, buf, 0,
 +		      LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
 +	os_file_flush(log_file);
 +	os_file_close(log_file);
 +
 +	ut_free(buf);
 +}
 +#endif /* UNIV_HOTBACKUP */
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +/* Dead code */
 +/******************************************************//**
 +Reads from the archive of a log group and performs recovery.
 + at return	TRUE if no more complete consistent archive files */
 +static
 +ibool
 +log_group_recover_from_archive_file(
 +/*================================*/
 +	log_group_t*	group)		/*!< in: log group */
 +{
 +	os_file_t	file_handle;
 +	ib_uint64_t	start_lsn;
 +	ib_uint64_t	file_end_lsn;
 +	ib_uint64_t	dummy_lsn;
 +	ib_uint64_t	scanned_lsn;
 +	ulint		len;
 +	ibool		ret;
 +	byte*		buf;
 +	os_offset_t	read_offset;
 +	os_offset_t	file_size;
 +	int		input_char;
 +	char		name[10000];
 +
 +	ut_a(0);
 +
 +try_open_again:
 +	buf = log_sys->buf;
 +
 +	/* Add the file to the archive file space; open the file */
 +
 +	log_archived_file_name_gen(name, group->id, group->archived_file_no);
 +
 +	file_handle = os_file_create(innodb_file_log_key,
 +				     name, OS_FILE_OPEN,
 +				     OS_FILE_LOG, OS_FILE_AIO, &ret);
 +
 +	if (ret == FALSE) {
 +ask_again:
 +		fprintf(stderr,
 +			"InnoDB: Do you want to copy additional"
 +			" archived log files\n"
 +			"InnoDB: to the directory\n");
 +		fprintf(stderr,
 +			"InnoDB: or were these all the files needed"
 +			" in recovery?\n");
 +		fprintf(stderr,
 +			"InnoDB: (Y == copy more files; N == this is all)?");
 +
 +		input_char = getchar();
 +
 +		if (input_char == (int) 'N') {
 +
 +			return(TRUE);
 +		} else if (input_char == (int) 'Y') {
 +
 +			goto try_open_again;
 +		} else {
 +			goto ask_again;
 +		}
 +	}
 +
 +	file_size = os_file_get_size(file_handle);
 +	ut_a(file_size != (os_offset_t) -1);
 +
 +	fprintf(stderr, "InnoDB: Opened archived log file %s\n", name);
 +
 +	ret = os_file_close(file_handle);
 +
 +	if (file_size < LOG_FILE_HDR_SIZE) {
 +		fprintf(stderr,
 +			"InnoDB: Archive file header incomplete %s\n", name);
 +
 +		return(TRUE);
 +	}
 +
 +	ut_a(ret);
 +
 +	/* Add the archive file as a node to the space */
 +
 +	fil_node_create(name, 1 + file_size / UNIV_PAGE_SIZE,
 +			group->archive_space_id, FALSE);
 +#if RECV_SCAN_SIZE < LOG_FILE_HDR_SIZE
 +# error "RECV_SCAN_SIZE < LOG_FILE_HDR_SIZE"
 +#endif
 +
 +	/* Read the archive file header */
 +	fil_io(OS_FILE_READ | OS_FILE_LOG, true, group->archive_space_id, 0, 0,
 +	       LOG_FILE_HDR_SIZE, buf, NULL);
 +
 +	/* Check if the archive file header is consistent */
 +
 +	if (mach_read_from_4(buf + LOG_GROUP_ID) != group->id
 +	    || mach_read_from_4(buf + LOG_FILE_NO)
 +	    != group->archived_file_no) {
 +		fprintf(stderr,
 +			"InnoDB: Archive file header inconsistent %s\n", name);
 +
 +		return(TRUE);
 +	}
 +
 +	if (!mach_read_from_4(buf + LOG_FILE_ARCH_COMPLETED)) {
 +		fprintf(stderr,
 +			"InnoDB: Archive file not completely written %s\n",
 +			name);
 +
 +		return(TRUE);
 +	}
 +
 +	start_lsn = mach_read_from_8(buf + LOG_FILE_START_LSN);
 +	file_end_lsn = mach_read_from_8(buf + LOG_FILE_END_LSN);
 +
 +	if (!recv_sys->scanned_lsn) {
 +
 +		if (recv_sys->parse_start_lsn < start_lsn) {
 +			fprintf(stderr,
 +				"InnoDB: Archive log file %s"
 +				" starts from too big a lsn\n",
 +				name);
 +			return(TRUE);
 +		}
 +
 +		recv_sys->scanned_lsn = start_lsn;
 +	}
 +
 +	if (recv_sys->scanned_lsn != start_lsn) {
 +
 +		fprintf(stderr,
 +			"InnoDB: Archive log file %s starts from"
 +			" a wrong lsn\n",
 +			name);
 +		return(TRUE);
 +	}
 +
 +	read_offset = LOG_FILE_HDR_SIZE;
 +
 +	for (;;) {
 +		len = RECV_SCAN_SIZE;
 +
 +		if (read_offset + len > file_size) {
 +			len = ut_calc_align_down(file_size - read_offset,
 +						 OS_FILE_LOG_BLOCK_SIZE);
 +		}
 +
 +		if (len == 0) {
 +
 +			break;
 +		}
 +
 +#ifdef UNIV_DEBUG
 +		if (log_debug_writes) {
 +			fprintf(stderr,
 +				"InnoDB: Archive read starting at"
 +				" lsn %llu, len %lu from file %s\n",
 +				start_lsn,
 +				(ulong) len, name);
 +		}
 +#endif /* UNIV_DEBUG */
 +
 +		fil_io(OS_FILE_READ | OS_FILE_LOG, true,
 +		       group->archive_space_id, read_offset / UNIV_PAGE_SIZE,
 +		       read_offset % UNIV_PAGE_SIZE, len, buf, NULL);
 +
 +		ret = recv_scan_log_recs(
 +			(buf_pool_get_n_pages()
 +			- (recv_n_pool_free_frames * srv_buf_pool_instances))
 +			* UNIV_PAGE_SIZE, TRUE, buf, len, start_lsn,
 +			&dummy_lsn, &scanned_lsn);
 +
 +		if (scanned_lsn == file_end_lsn) {
 +
 +			return(FALSE);
 +		}
 +
 +		if (ret) {
 +			fprintf(stderr,
 +				"InnoDB: Archive log file %s"
 +				" does not scan right\n",
 +				name);
 +			return(TRUE);
 +		}
 +
 +		read_offset += len;
 +		start_lsn += len;
 +
 +		ut_ad(start_lsn == scanned_lsn);
 +	}
 +
 +	return(FALSE);
 +}
 +
 +/********************************************************//**
 +Recovers from archived log files, and also from log files, if they exist.
 + at return	error code or DB_SUCCESS */
 +UNIV_INTERN
 +ulint
 +recv_recovery_from_archive_start(
 +/*=============================*/
 +	ib_uint64_t	min_flushed_lsn,/*!< in: min flushed lsn field from the
 +					data files */
 +	ib_uint64_t	limit_lsn,	/*!< in: recover up to this lsn if
 +					possible */
 +	ulint		first_log_no)	/*!< in: number of the first archived
 +					log file to use in the recovery; the
 +					file will be searched from
 +					INNOBASE_LOG_ARCH_DIR specified in
 +					server config file */
 +{
 +	log_group_t*	group;
 +	ulint		group_id;
 +	ulint		trunc_len;
 +	ibool		ret;
 +	ulint		err;
 +
 +	ut_a(0);
 +
 +	recv_sys_create();
 +	recv_sys_init(buf_pool_get_curr_size());
 +
 +	recv_recovery_on = TRUE;
 +	recv_recovery_from_backup_on = TRUE;
 +
 +	recv_sys->limit_lsn = limit_lsn;
 +
 +	group_id = 0;
 +
 +	group = UT_LIST_GET_FIRST(log_sys->log_groups);
 +
 +	while (group) {
 +		if (group->id == group_id) {
 +
 +			break;
 +		}
 +
 +		group = UT_LIST_GET_NEXT(log_groups, group);
 +	}
 +
 +	if (!group) {
 +		fprintf(stderr,
 +			"InnoDB: There is no log group defined with id %lu!\n",
 +			(ulong) group_id);
 +		return(DB_ERROR);
 +	}
 +
 +	group->archived_file_no = first_log_no;
 +
 +	recv_sys->parse_start_lsn = min_flushed_lsn;
 +
 +	recv_sys->scanned_lsn = 0;
 +	recv_sys->scanned_checkpoint_no = 0;
 +	recv_sys->recovered_lsn = recv_sys->parse_start_lsn;
 +
 +	recv_sys->archive_group = group;
 +
 +	ret = FALSE;
 +
 +	mutex_enter(&(log_sys->mutex));
 +
 +	while (!ret) {
 +		ret = log_group_recover_from_archive_file(group);
 +
 +		/* Close and truncate a possible processed archive file
 +		from the file space */
 +
 +		trunc_len = UNIV_PAGE_SIZE
 +			* fil_space_get_size(group->archive_space_id);
 +		if (trunc_len > 0) {
 +			fil_space_truncate_start(group->archive_space_id,
 +						 trunc_len);
 +		}
 +
 +		group->archived_file_no++;
 +	}
 +
 +	if (recv_sys->recovered_lsn < limit_lsn) {
 +
 +		if (!recv_sys->scanned_lsn) {
 +
 +			recv_sys->scanned_lsn = recv_sys->parse_start_lsn;
 +		}
 +
 +		mutex_exit(&(log_sys->mutex));
 +
 +		err = recv_recovery_from_checkpoint_start(LOG_ARCHIVE,
 +							  limit_lsn,
 +							  LSN_MAX,
 +							  LSN_MAX);
 +		if (err != DB_SUCCESS) {
 +
 +			return(err);
 +		}
 +
 +		mutex_enter(&(log_sys->mutex));
 +	}
 +
 +	if (limit_lsn != LSN_MAX) {
 +
- 		recv_apply_hashed_log_recs(FALSE);
++		recv_apply_hashed_log_recs(false);
 +
 +		recv_reset_logs(0, FALSE, recv_sys->recovered_lsn);
 +	}
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +	return(DB_SUCCESS);
 +}
 +
 +/********************************************************//**
 +Completes recovery from archive. */
 +UNIV_INTERN
 +void
 +recv_recovery_from_archive_finish(void)
 +/*===================================*/
 +{
 +	recv_recovery_from_checkpoint_finish();
 +
 +	recv_recovery_from_backup_on = FALSE;
 +}
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +
 +void recv_dblwr_t::add(byte* page)
 +{
 +	pages.push_back(page);
 +}
 +
 +byte* recv_dblwr_t::find_page(ulint space_id, ulint page_no)
 +{
 +	std::vector<byte*> matches;
 +	byte*	result = 0;
 +
 +	for (std::list<byte*>::iterator i = pages.begin();
 +	     i != pages.end(); ++i) {
 +
 +		if ((page_get_space_id(*i) == space_id)
 +		    && (page_get_page_no(*i) == page_no)) {
 +			matches.push_back(*i);
 +		}
 +	}
 +
 +	if (matches.size() == 1) {
 +		result = matches[0];
 +	} else if (matches.size() > 1) {
 +
 +		lsn_t max_lsn	= 0;
 +		lsn_t page_lsn	= 0;
 +
 +		for (std::vector<byte*>::iterator i = matches.begin();
 +		     i != matches.end(); ++i) {
 +
 +			page_lsn = mach_read_from_8(*i + FIL_PAGE_LSN);
 +
 +			if (page_lsn > max_lsn) {
 +				max_lsn = page_lsn;
 +				result = *i;
 +			}
 +		}
 +	}
 +
 +	return(result);
 +}
 +
diff --cc storage/innobase/srv/srv0start.cc
index 5fd3adb3191,00000000000..127ccb34f52
mode 100644,000000..100644
--- a/storage/innobase/srv/srv0start.cc
+++ b/storage/innobase/srv/srv0start.cc
@@@ -1,3255 -1,0 +1,3255 @@@
 +/*****************************************************************************
 +
 +Copyright (c) 1996, 2016, Oracle and/or its affiliates. All rights reserved.
 +Copyright (c) 2008, Google Inc.
 +Copyright (c) 2009, Percona Inc.
- Copyright (c) 2013, 2017, MariaDB Corporation
++Copyright (c) 2013, 2017, MariaDB Corporation.
 +
 +Portions of this file contain modifications contributed and copyrighted by
 +Google, Inc. Those modifications are gratefully acknowledged and are described
 +briefly in the InnoDB documentation. The contributions by Google are
 +incorporated with their permission, and subject to the conditions contained in
 +the file COPYING.Google.
 +
 +Portions of this file contain modifications contributed and copyrighted
 +by Percona Inc.. Those modifications are
 +gratefully acknowledged and are described briefly in the InnoDB
 +documentation. The contributions by Percona Inc. are incorporated with
 +their permission, and subject to the conditions contained in the file
 +COPYING.Percona.
 +
 +This program is free software; you can redistribute it and/or modify it under
 +the terms of the GNU General Public License as published by the Free Software
 +Foundation; version 2 of the License.
 +
 +This program is distributed in the hope that it will be useful, but WITHOUT
 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 +
 +You should have received a copy of the GNU General Public License along with
 +this program; if not, write to the Free Software Foundation, Inc.,
 +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
 +
 +*****************************************************************************/
 +
 +/********************************************************************//**
 + at file srv/srv0start.cc
 +Starts the InnoDB database server
 +
 +Created 2/16/1996 Heikki Tuuri
 +*************************************************************************/
 +
 +#include "mysqld.h"
 +#include "pars0pars.h"
 +#include "row0ftsort.h"
 +#include "ut0mem.h"
 +#include "mem0mem.h"
 +#include "data0data.h"
 +#include "data0type.h"
 +#include "dict0dict.h"
 +#include "buf0buf.h"
 +#include "buf0dump.h"
 +#include "os0file.h"
 +#include "os0thread.h"
 +#include "fil0fil.h"
 +#include "fsp0fsp.h"
 +#include "rem0rec.h"
 +#include "mtr0mtr.h"
 +#include "log0log.h"
 +#include "log0recv.h"
 +#include "page0page.h"
 +#include "page0cur.h"
 +#include "trx0trx.h"
 +#include "trx0sys.h"
 +#include "btr0btr.h"
 +#include "btr0cur.h"
 +#include "rem0rec.h"
 +#include "ibuf0ibuf.h"
 +#include "srv0start.h"
 +#include "srv0srv.h"
 +#ifndef UNIV_HOTBACKUP
 +# include "trx0rseg.h"
 +# include "os0proc.h"
 +# include "sync0sync.h"
 +# include "buf0flu.h"
 +# include "buf0rea.h"
 +# include "dict0boot.h"
 +# include "dict0load.h"
 +# include "dict0stats_bg.h"
 +# include "que0que.h"
 +# include "usr0sess.h"
 +# include "lock0lock.h"
 +# include "trx0roll.h"
 +# include "trx0purge.h"
 +# include "lock0lock.h"
 +# include "pars0pars.h"
 +# include "btr0sea.h"
 +# include "rem0cmp.h"
 +# include "dict0crea.h"
 +# include "row0ins.h"
 +# include "row0sel.h"
 +# include "row0upd.h"
 +# include "row0row.h"
 +# include "row0mysql.h"
 +# include "btr0pcur.h"
 +# include "os0sync.h"
 +# include "zlib.h"
 +# include "ut0crc32.h"
 +
 +/** Log sequence number immediately after startup */
 +UNIV_INTERN lsn_t	srv_start_lsn;
 +/** Log sequence number at shutdown */
 +UNIV_INTERN lsn_t	srv_shutdown_lsn;
 +
 +#ifdef HAVE_DARWIN_THREADS
 +# include <sys/utsname.h>
 +/** TRUE if the F_FULLFSYNC option is available */
 +UNIV_INTERN ibool	srv_have_fullfsync = FALSE;
 +#endif
 +
 +/** TRUE if a raw partition is in use */
 +UNIV_INTERN ibool	srv_start_raw_disk_in_use = FALSE;
 +
 +/** TRUE if the server is being started, before rolling back any
 +incomplete transactions */
 +UNIV_INTERN ibool	srv_startup_is_before_trx_rollback_phase = FALSE;
 +/** TRUE if the server is being started */
 +UNIV_INTERN ibool	srv_is_being_started = FALSE;
 +/** TRUE if the server was successfully started */
 +UNIV_INTERN ibool	srv_was_started = FALSE;
 +/** TRUE if innobase_start_or_create_for_mysql() has been called */
 +static ibool		srv_start_has_been_called = FALSE;
 +
 +/** At a shutdown this value climbs from SRV_SHUTDOWN_NONE to
 +SRV_SHUTDOWN_CLEANUP and then to SRV_SHUTDOWN_LAST_PHASE, and so on */
 +UNIV_INTERN enum srv_shutdown_state	srv_shutdown_state = SRV_SHUTDOWN_NONE;
 +
 +/** Files comprising the system tablespace */
 +static os_file_t	files[1000];
 +
 +/** io_handler_thread parameters for thread identification */
 +static ulint		n[SRV_MAX_N_IO_THREADS + 6];
 +/** io_handler_thread identifiers, 32 is the maximum number of purge threads  */
 +static os_thread_id_t	thread_ids[SRV_MAX_N_IO_THREADS + 6 + 32];
 +
 +/** Thead handles */
 +static os_thread_t	thread_handles[SRV_MAX_N_IO_THREADS + 6 + 32];
 +static os_thread_t	buf_flush_page_cleaner_thread_handle;
 +static os_thread_t	buf_dump_thread_handle;
 +static os_thread_t	dict_stats_thread_handle;
 +/** Status variables, is thread started ?*/
 +static bool		thread_started[SRV_MAX_N_IO_THREADS + 6 + 32] = {false};
 +static bool		buf_flush_page_cleaner_thread_started = false;
 +static bool		buf_dump_thread_started = false;
 +static bool		dict_stats_thread_started = false;
 +
 +/** We use this mutex to test the return value of pthread_mutex_trylock
 +   on successful locking. HP-UX does NOT return 0, though Linux et al do. */
 +static os_fast_mutex_t	srv_os_test_mutex;
 +
 +/** Name of srv_monitor_file */
 +static char*	srv_monitor_file_name;
 +#endif /* !UNIV_HOTBACKUP */
 +
 +/** Default undo tablespace size in UNIV_PAGEs count (10MB). */
 +static const ulint SRV_UNDO_TABLESPACE_SIZE_IN_PAGES =
 +	((1024 * 1024) * 10) / UNIV_PAGE_SIZE_DEF;
 +
 +/** */
 +#define SRV_N_PENDING_IOS_PER_THREAD	OS_AIO_N_PENDING_IOS_PER_THREAD
 +#define SRV_MAX_N_PENDING_SYNC_IOS	100
 +
 +/** The round off to MB is similar as done in srv_parse_megabytes() */
 +#define CALC_NUMBER_OF_PAGES(size)  ((size) / (1024 * 1024)) * \
 +				  ((1024 * 1024) / (UNIV_PAGE_SIZE))
 +#ifdef UNIV_PFS_THREAD
 +/* Keys to register InnoDB threads with performance schema */
 +UNIV_INTERN mysql_pfs_key_t	io_handler_thread_key;
 +UNIV_INTERN mysql_pfs_key_t	srv_lock_timeout_thread_key;
 +UNIV_INTERN mysql_pfs_key_t	srv_error_monitor_thread_key;
 +UNIV_INTERN mysql_pfs_key_t	srv_monitor_thread_key;
 +UNIV_INTERN mysql_pfs_key_t	srv_master_thread_key;
 +UNIV_INTERN mysql_pfs_key_t	srv_purge_thread_key;
 +#endif /* UNIV_PFS_THREAD */
 +
 +/*********************************************************************//**
 +Convert a numeric string that optionally ends in G or M or K, to a number
 +containing megabytes.
 + at return	next character in string */
 +static
 +char*
 +srv_parse_megabytes(
 +/*================*/
 +	char*	str,	/*!< in: string containing a quantity in bytes */
 +	ulint*	megs)	/*!< out: the number in megabytes */
 +{
 +	char*	endp;
 +	ulint	size;
 +
 +	size = strtoul(str, &endp, 10);
 +
 +	str = endp;
 +
 +	switch (*str) {
 +	case 'G': case 'g':
 +		size *= 1024;
 +		/* fall through */
 +	case 'M': case 'm':
 +		str++;
 +		break;
 +	case 'K': case 'k':
 +		size /= 1024;
 +		str++;
 +		break;
 +	default:
 +		size /= 1024 * 1024;
 +		break;
 +	}
 +
 +	*megs = size;
 +	return(str);
 +}
 +
 +/*********************************************************************//**
 +Check if a file can be opened in read-write mode.
 + at return	true if it doesn't exist or can be opened in rw mode. */
 +static
 +bool
 +srv_file_check_mode(
 +/*================*/
 +	const char*	name)		/*!< in: filename to check */
 +{
 +	os_file_stat_t	stat;
 +
 +	memset(&stat, 0x0, sizeof(stat));
 +
 +	dberr_t		err = os_file_get_status(name, &stat, true);
 +
 +	if (err == DB_FAIL) {
 +
 +		ib_logf(IB_LOG_LEVEL_ERROR,
 +			"os_file_get_status() failed on '%s'. Can't determine "
 +			"file permissions", name);
 +
 +		return(false);
 +
 +	} else if (err == DB_SUCCESS) {
 +
 +		/* Note: stat.rw_perm is only valid of files */
 +
 +		if (stat.type == OS_FILE_TYPE_FILE) {
 +
 +			if (!stat.rw_perm) {
 +
 +				ib_logf(IB_LOG_LEVEL_ERROR,
 +					"%s can't be opened in %s mode",
 +					name,
 +					srv_read_only_mode
 +					? "read" : "read-write");
 +
 +				return(false);
 +			}
 +		} else {
 +			/* Not a regular file, bail out. */
 +
 +			ib_logf(IB_LOG_LEVEL_ERROR,
 +				"'%s' not a regular file.", name);
 +
 +			return(false);
 +		}
 +	} else {
 +
 +		/* This is OK. If the file create fails on RO media, there
 +		is nothing we can do. */
 +
 +		ut_a(err == DB_NOT_FOUND);
 +	}
 +
 +	return(true);
 +}
 +
 +/*********************************************************************//**
 +Reads the data files and their sizes from a character string given in
 +the .cnf file.
 + at return	TRUE if ok, FALSE on parse error */
 +UNIV_INTERN
 +ibool
 +srv_parse_data_file_paths_and_sizes(
 +/*================================*/
 +	char*	str)	/*!< in/out: the data file path string */
 +{
 +	char*	input_str;
 +	char*	path;
 +	ulint	size;
 +	ulint	i	= 0;
 +
 +	srv_auto_extend_last_data_file = FALSE;
 +	srv_last_file_size_max = 0;
 +	srv_data_file_names = NULL;
 +	srv_data_file_sizes = NULL;
 +	srv_data_file_is_raw_partition = NULL;
 +
 +	input_str = str;
 +
 +	/* First calculate the number of data files and check syntax:
 +	path:size[M | G];path:size[M | G]... . Note that a Windows path may
 +	contain a drive name and a ':'. */
 +
 +	while (*str != '\0') {
 +		path = str;
 +
 +		while ((*str != ':' && *str != '\0')
 +		       || (*str == ':'
 +			   && (*(str + 1) == '\\' || *(str + 1) == '/'
 +			       || *(str + 1) == ':'))) {
 +			str++;
 +		}
 +
 +		if (*str == '\0') {
 +			return(FALSE);
 +		}
 +
 +		str++;
 +
 +		str = srv_parse_megabytes(str, &size);
 +
 +		if (0 == strncmp(str, ":autoextend",
 +				 (sizeof ":autoextend") - 1)) {
 +
 +			str += (sizeof ":autoextend") - 1;
 +
 +			if (0 == strncmp(str, ":max:",
 +					 (sizeof ":max:") - 1)) {
 +
 +				str += (sizeof ":max:") - 1;
 +
 +				str = srv_parse_megabytes(str, &size);
 +			}
 +
 +			if (*str != '\0') {
 +
 +				return(FALSE);
 +			}
 +		}
 +
 +		if (strlen(str) >= 6
 +		    && *str == 'n'
 +		    && *(str + 1) == 'e'
 +		    && *(str + 2) == 'w') {
 +			str += 3;
 +		}
 +
 +		if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') {
 +			str += 3;
 +		}
 +
 +		if (size == 0) {
 +			return(FALSE);
 +		}
 +
 +		i++;
 +
 +		if (*str == ';') {
 +			str++;
 +		} else if (*str != '\0') {
 +
 +			return(FALSE);
 +		}
 +	}
 +
 +	if (i == 0) {
 +		/* If innodb_data_file_path was defined it must contain
 +		at least one data file definition */
 +
 +		return(FALSE);
 +	}
 +
 +	srv_data_file_names = static_cast<char**>(
 +		malloc(i * sizeof *srv_data_file_names));
 +
 +	srv_data_file_sizes = static_cast<ulint*>(
 +		malloc(i * sizeof *srv_data_file_sizes));
 +
 +	srv_data_file_is_raw_partition = static_cast<ulint*>(
 +		malloc(i * sizeof *srv_data_file_is_raw_partition));
 +
 +	srv_n_data_files = i;
 +
 +	/* Then store the actual values to our arrays */
 +
 +	str = input_str;
 +	i = 0;
 +
 +	while (*str != '\0') {
 +		path = str;
 +
 +		/* Note that we must step over the ':' in a Windows path;
 +		a Windows path normally looks like C:\ibdata\ibdata1:1G, but
 +		a Windows raw partition may have a specification like
 +		\\.\C::1Gnewraw or \\.\PHYSICALDRIVE2:1Gnewraw */
 +
 +		while ((*str != ':' && *str != '\0')
 +		       || (*str == ':'
 +			   && (*(str + 1) == '\\' || *(str + 1) == '/'
 +			       || *(str + 1) == ':'))) {
 +			str++;
 +		}
 +
 +		if (*str == ':') {
 +			/* Make path a null-terminated string */
 +			*str = '\0';
 +			str++;
 +		}
 +
 +		str = srv_parse_megabytes(str, &size);
 +
 +		srv_data_file_names[i] = path;
 +		srv_data_file_sizes[i] = size;
 +
 +		if (0 == strncmp(str, ":autoextend",
 +				 (sizeof ":autoextend") - 1)) {
 +
 +			srv_auto_extend_last_data_file = TRUE;
 +
 +			str += (sizeof ":autoextend") - 1;
 +
 +			if (0 == strncmp(str, ":max:",
 +					 (sizeof ":max:") - 1)) {
 +
 +				str += (sizeof ":max:") - 1;
 +
 +				str = srv_parse_megabytes(
 +					str, &srv_last_file_size_max);
 +			}
 +
 +			if (*str != '\0') {
 +
 +				return(FALSE);
 +			}
 +		}
 +
 +		(srv_data_file_is_raw_partition)[i] = 0;
 +
 +		if (strlen(str) >= 6
 +		    && *str == 'n'
 +		    && *(str + 1) == 'e'
 +		    && *(str + 2) == 'w') {
 +			str += 3;
 +			/* Initialize new raw device only during bootstrap */
 +			(srv_data_file_is_raw_partition)[i] = SRV_NEW_RAW;
 +		}
 +
 +		if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') {
 +			str += 3;
 +
 +			/* Initialize new raw device only during bootstrap */
 +			if ((srv_data_file_is_raw_partition)[i] == 0) {
 +				(srv_data_file_is_raw_partition)[i] = SRV_NEW_RAW;
 +			}
 +		}
 +
 +		i++;
 +
 +		if (*str == ';') {
 +			str++;
 +		}
 +	}
 +
 +	return(TRUE);
 +}
 +
 +/*********************************************************************//**
 +Frees the memory allocated by srv_parse_data_file_paths_and_sizes()
 +and srv_parse_log_group_home_dirs(). */
 +UNIV_INTERN
 +void
 +srv_free_paths_and_sizes(void)
 +/*==========================*/
 +{
 +	free(srv_data_file_names);
 +	srv_data_file_names = NULL;
 +	free(srv_data_file_sizes);
 +	srv_data_file_sizes = NULL;
 +	free(srv_data_file_is_raw_partition);
 +	srv_data_file_is_raw_partition = NULL;
 +}
 +
 +#ifndef UNIV_HOTBACKUP
 +/********************************************************************//**
 +I/o-handler thread function.
 + at return	OS_THREAD_DUMMY_RETURN */
 +extern "C" UNIV_INTERN
 +os_thread_ret_t
 +DECLARE_THREAD(io_handler_thread)(
 +/*==============================*/
 +	void*	arg)	/*!< in: pointer to the number of the segment in
 +			the aio array */
 +{
 +	ulint	segment;
 +
 +	segment = *((ulint*) arg);
 +
 +#ifdef UNIV_DEBUG_THREAD_CREATION
 +	fprintf(stderr, "Io handler thread %lu starts, id %lu\n", segment,
 +		os_thread_pf(os_thread_get_curr_id()));
 +#endif
 +
 +#ifdef UNIV_PFS_THREAD
 +	pfs_register_thread(io_handler_thread_key);
 +#endif /* UNIV_PFS_THREAD */
 +
 +	while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
 +		fil_aio_wait(segment);
 +	}
 +
 +	/* We count the number of threads in os_thread_exit(). A created
 +	thread should always use that to exit and not use return() to exit.
 +	The thread actually never comes here because it is exited in an
 +	os_event_wait(). */
 +
 +	os_thread_exit(NULL);
 +
 +	OS_THREAD_DUMMY_RETURN;
 +}
 +#endif /* !UNIV_HOTBACKUP */
 +
 +/*********************************************************************//**
 +Normalizes a directory path for Windows: converts slashes to backslashes. */
 +UNIV_INTERN
 +void
 +srv_normalize_path_for_win(
 +/*=======================*/
 +	char*	str MY_ATTRIBUTE((unused)))	/*!< in/out: null-terminated
 +						character string */
 +{
 +#ifdef __WIN__
 +	for (; *str; str++) {
 +
 +		if (*str == '/') {
 +			*str = '\\';
 +		}
 +	}
 +#endif
 +}
 +
 +#ifndef UNIV_HOTBACKUP
 +/*********************************************************************//**
 +Creates a log file.
 + at return	DB_SUCCESS or error code */
 +static MY_ATTRIBUTE((nonnull, warn_unused_result))
 +dberr_t
 +create_log_file(
 +/*============*/
 +	os_file_t*	file,	/*!< out: file handle */
 +	const char*	name)	/*!< in: log file name */
 +{
 +	ibool		ret;
 +
 +	*file = os_file_create(
 +		innodb_file_log_key, name,
 +		OS_FILE_CREATE|OS_FILE_ON_ERROR_NO_EXIT, OS_FILE_NORMAL,
 +		OS_LOG_FILE, &ret);
 +
 +	if (!ret) {
 +		ib_logf(IB_LOG_LEVEL_ERROR, "Cannot create %s", name);
 +		return(DB_ERROR);
 +	}
 +
 +	ib_logf(IB_LOG_LEVEL_INFO,
 +		"Setting log file %s size to %lu MB",
 +		name, (ulong) srv_log_file_size
 +		>> (20 - UNIV_PAGE_SIZE_SHIFT));
 +
 +	ret = os_file_set_size(name, *file,
 +			       (os_offset_t) srv_log_file_size
 +			       << UNIV_PAGE_SIZE_SHIFT);
 +	if (!ret) {
 +		ib_logf(IB_LOG_LEVEL_ERROR, "Cannot set log file"
 +			" %s to size %lu MB", name, (ulong) srv_log_file_size
 +			>> (20 - UNIV_PAGE_SIZE_SHIFT));
 +		return(DB_ERROR);
 +	}
 +
 +	ret = os_file_close(*file);
 +	ut_a(ret);
 +
 +	return(DB_SUCCESS);
 +}
 +
 +/** Initial number of the first redo log file */
 +#define INIT_LOG_FILE0	(SRV_N_LOG_FILES_MAX + 1)
 +
 +/*********************************************************************//**
 +Creates all log files.
 + at return	DB_SUCCESS or error code */
 +static
 +dberr_t
 +create_log_files(
 +/*=============*/
 +	bool	create_new_db,	/*!< in: TRUE if new database is being
 +				created */
 +	char*	logfilename,	/*!< in/out: buffer for log file name */
 +	size_t	dirnamelen,	/*!< in: length of the directory path */
 +	lsn_t	lsn,		/*!< in: FIL_PAGE_FILE_FLUSH_LSN value */
 +	char*&	logfile0)	/*!< out: name of the first log file */
 +{
 +	if (srv_read_only_mode) {
 +		ib_logf(IB_LOG_LEVEL_ERROR,
 +			"Cannot create log files in read-only mode");
 +		return(DB_READ_ONLY);
 +	}
 +
 +	/* We prevent system tablespace creation with existing files in
 +	data directory. So we do not delete log files when creating new system
 +	tablespace */
 +	if (!create_new_db) {
 +		/* Remove any old log files. */
 +		for (unsigned i = 0; i <= INIT_LOG_FILE0; i++) {
 +			sprintf(logfilename + dirnamelen, "ib_logfile%u", i);
 +
 +			/* Ignore errors about non-existent files or files
 +			that cannot be removed. The create_log_file() will
 +			return an error when the file exists. */
 +#ifdef __WIN__
 +			DeleteFile((LPCTSTR) logfilename);
 +#else
 +			unlink(logfilename);
 +#endif
 +			/* Crashing after deleting the first
 +			file should be recoverable. The buffer
 +			pool was clean, and we can simply create
 +			all log files from the scratch. */
 +			DBUG_EXECUTE_IF("innodb_log_abort_6",
 +					return(DB_ERROR););
 +		}
 +	}
 +
 +	ut_ad(!buf_pool_check_no_pending_io());
 +
 +	DBUG_EXECUTE_IF("innodb_log_abort_7", return(DB_ERROR););
 +
 +	for (unsigned i = 0; i < srv_n_log_files; i++) {
 +		sprintf(logfilename + dirnamelen,
 +			"ib_logfile%u", i ? i : INIT_LOG_FILE0);
 +
 +		dberr_t err = create_log_file(&files[i], logfilename);
 +
 +		if (err != DB_SUCCESS) {
 +			return(err);
 +		}
 +	}
 +
 +	DBUG_EXECUTE_IF("innodb_log_abort_8", return(DB_ERROR););
 +
 +	/* We did not create the first log file initially as
 +	ib_logfile0, so that crash recovery cannot find it until it
 +	has been completed and renamed. */
 +	sprintf(logfilename + dirnamelen, "ib_logfile%u", INIT_LOG_FILE0);
 +
 +	fil_space_create(
 +		logfilename, SRV_LOG_SPACE_FIRST_ID,
 +		fsp_flags_set_page_size(0, UNIV_PAGE_SIZE),
 +		FIL_LOG);
 +	ut_a(fil_validate());
 +
 +	logfile0 = fil_node_create(
 +		logfilename, (ulint) srv_log_file_size,
 +		SRV_LOG_SPACE_FIRST_ID, FALSE);
 +	ut_a(logfile0);
 +
 +	for (unsigned i = 1; i < srv_n_log_files; i++) {
 +		sprintf(logfilename + dirnamelen, "ib_logfile%u", i);
 +
 +		if (!fil_node_create(
 +			    logfilename,
 +			    (ulint) srv_log_file_size,
 +			    SRV_LOG_SPACE_FIRST_ID, FALSE)) {
 +			ut_error;
 +		}
 +	}
 +
 +	log_group_init(0, srv_n_log_files,
 +		       srv_log_file_size * UNIV_PAGE_SIZE,
 +		       SRV_LOG_SPACE_FIRST_ID,
 +		       SRV_LOG_SPACE_FIRST_ID + 1);
 +
 +	fil_open_log_and_system_tablespace_files();
 +
 +	/* Create a log checkpoint. */
 +	mutex_enter(&log_sys->mutex);
 +	ut_d(recv_no_log_write = FALSE);
 +	recv_reset_logs(lsn);
 +	mutex_exit(&log_sys->mutex);
 +
 +	return(DB_SUCCESS);
 +}
 +
 +/** Rename the first redo log file.
 + at param[in,out]	logfilename	buffer for the log file name
 + at param[in]	dirnamelen	length of the directory path
 + at param[in]	lsn		FIL_PAGE_FILE_FLUSH_LSN value
 + at param[in,out]	logfile0	name of the first log file
 + at return	error code
 + at retval	DB_SUCCESS	on successful operation */
 +MY_ATTRIBUTE((warn_unused_result, nonnull))
 +static
 +dberr_t
 +create_log_files_rename(
 +/*====================*/
 +	char*	logfilename,	/*!< in/out: buffer for log file name */
 +	size_t	dirnamelen,	/*!< in: length of the directory path */
 +	lsn_t	lsn,		/*!< in: FIL_PAGE_FILE_FLUSH_LSN value */
 +	char*	logfile0)	/*!< in/out: name of the first log file */
 +{
 +	/* If innodb_flush_method=O_DSYNC,
 +	we need to explicitly flush the log buffers. */
 +	fil_flush(SRV_LOG_SPACE_FIRST_ID);
 +
 +	DBUG_EXECUTE_IF("innodb_log_abort_9", return(DB_ERROR););
 +
 +	/* Close the log files, so that we can rename
 +	the first one. */
 +	fil_close_log_files(false);
 +
 +	/* Rename the first log file, now that a log
 +	checkpoint has been created. */
 +	sprintf(logfilename + dirnamelen, "ib_logfile%u", 0);
 +
 +	ib_logf(IB_LOG_LEVEL_INFO,
 +		"Renaming log file %s to %s", logfile0, logfilename);
 +
 +	mutex_enter(&log_sys->mutex);
 +	ut_ad(strlen(logfile0) == 2 + strlen(logfilename));
 +	dberr_t err = os_file_rename(
 +		innodb_file_log_key, logfile0, logfilename)
 +		? DB_SUCCESS : DB_ERROR;
 +
 +	/* Replace the first file with ib_logfile0. */
 +	strcpy(logfile0, logfilename);
 +	mutex_exit(&log_sys->mutex);
 +
 +	DBUG_EXECUTE_IF("innodb_log_abort_10", err = DB_ERROR;);
 +
 +	if (err == DB_SUCCESS) {
 +		fil_open_log_and_system_tablespace_files();
 +		ib_logf(IB_LOG_LEVEL_WARN,
 +			"New log files created, LSN=" LSN_PF, lsn);
 +	}
 +
 +	return(err);
 +}
 +
 +/*********************************************************************//**
 +Opens a log file.
 + at return	DB_SUCCESS or error code */
 +static MY_ATTRIBUTE((nonnull, warn_unused_result))
 +dberr_t
 +open_log_file(
 +/*==========*/
 +	os_file_t*	file,	/*!< out: file handle */
 +	const char*	name,	/*!< in: log file name */
 +	os_offset_t*	size)	/*!< out: file size */
 +{
 +	ibool	ret;
 +
 +	*file = os_file_create(innodb_file_log_key, name,
 +			       OS_FILE_OPEN, OS_FILE_AIO,
 +			       OS_LOG_FILE, &ret);
 +	if (!ret) {
 +		ib_logf(IB_LOG_LEVEL_ERROR, "Unable to open '%s'", name);
 +		return(DB_ERROR);
 +	}
 +
 +	*size = os_file_get_size(*file);
 +
 +	ret = os_file_close(*file);
 +	ut_a(ret);
 +	return(DB_SUCCESS);
 +}
 +
 +/*********************************************************************//**
 +Creates or opens database data files and closes them.
 + at return	DB_SUCCESS or error code */
 +static MY_ATTRIBUTE((nonnull, warn_unused_result))
 +dberr_t
 +open_or_create_data_files(
 +/*======================*/
 +	ibool*		create_new_db,	/*!< out: TRUE if new database should be
 +					created */
 +#ifdef UNIV_LOG_ARCHIVE
 +	ulint*		min_arch_log_no,/*!< out: min of archived log
 +					numbers in data files */
 +	ulint*		max_arch_log_no,/*!< out: max of archived log
 +					numbers in data files */
 +#endif /* UNIV_LOG_ARCHIVE */
 +	lsn_t*		min_flushed_lsn,/*!< out: min of flushed lsn
 +					values in data files */
 +	lsn_t*		max_flushed_lsn,/*!< out: max of flushed lsn
 +					values in data files */
 +	ulint*		sum_of_new_sizes)/*!< out: sum of sizes of the
 +					new files added */
 +{
 +	ibool		ret;
 +	ulint		i;
 +	ibool		one_opened	= FALSE;
 +	ibool		one_created	= FALSE;
 +	os_offset_t	size;
 +	ulint		flags;
 +	ulint		space;
 +	ulint		rounded_size_pages;
 +	char		name[10000];
 +
 +	if (srv_n_data_files >= 1000) {
 +
 +		ib_logf(IB_LOG_LEVEL_ERROR,
 +			"Can only have < 1000 data files, you have "
 +			"defined %lu", (ulong) srv_n_data_files);
 +
 +		return(DB_ERROR);
 +	}
 +
 +	*sum_of_new_sizes = 0;
 +
 +	*create_new_db = FALSE;
 +
 +	srv_normalize_path_for_win(srv_data_home);
 +
 +	for (i = 0; i < srv_n_data_files; i++) {
 +		ulint	dirnamelen;
 +
 +		srv_normalize_path_for_win(srv_data_file_names[i]);
 +		dirnamelen = strlen(srv_data_home);
 +
 +		ut_a(dirnamelen + strlen(srv_data_file_names[i])
 +		     < (sizeof name) - 1);
 +
 +		memcpy(name, srv_data_home, dirnamelen);
 +
 +		/* Add a path separator if needed. */
 +		if (dirnamelen && name[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
 +			name[dirnamelen++] = SRV_PATH_SEPARATOR;
 +		}
 +
 +		strcpy(name + dirnamelen, srv_data_file_names[i]);
 +
 +		/* Note: It will return true if the file doesn' exist. */
 +
 +		if (!srv_file_check_mode(name)) {
 +
 +			return(DB_FAIL);
 +
 +		} else if (srv_data_file_is_raw_partition[i] == 0) {
 +
 +			/* First we try to create the file: if it already
 +			exists, ret will get value FALSE */
 +
 +			files[i] = os_file_create(
 +				innodb_file_data_key, name, OS_FILE_CREATE,
 +				OS_FILE_NORMAL, OS_DATA_FILE, &ret);
 +
 +			if (srv_read_only_mode) {
 +
 +				if (ret) {
 +					goto size_check;
 +				}
 +
 +				ib_logf(IB_LOG_LEVEL_ERROR,
 +					"Opening %s failed!", name);
 +
 +				return(DB_ERROR);
 +
 +			} else if (!ret
 +				   && os_file_get_last_error(false)
 +				   != OS_FILE_ALREADY_EXISTS
 +#ifdef UNIV_AIX
 +			    	   /* AIX 5.1 after security patch ML7 may have
 +			           errno set to 0 here, which causes our
 +				   function to return 100; work around that
 +				   AIX problem */
 +				   && os_file_get_last_error(false) != 100
 +#endif /* UNIV_AIX */
 +			    ) {
 +				ib_logf(IB_LOG_LEVEL_ERROR,
 +					"Creating or opening %s failed!",
 +					name);
 +
 +				return(DB_ERROR);
 +			}
 +
 +		} else if (srv_data_file_is_raw_partition[i] == SRV_NEW_RAW) {
 +
 +			ut_a(!srv_read_only_mode);
 +
 +			/* The partition is opened, not created; then it is
 +			written over */
 +
 +			srv_start_raw_disk_in_use = TRUE;
 +			srv_created_new_raw = TRUE;
 +
 +			files[i] = os_file_create(
 +				innodb_file_data_key, name, OS_FILE_OPEN_RAW,
 +				OS_FILE_NORMAL, OS_DATA_FILE, &ret);
 +
 +			if (!ret) {
 +				ib_logf(IB_LOG_LEVEL_ERROR,
 +					"Error in opening %s", name);
 +
 +				return(DB_ERROR);
 +			}
 +
 +			const char*	check_msg;
 +			check_msg = fil_read_first_page(
 +				files[i], FALSE, &flags, &space,
 +#ifdef UNIV_LOG_ARCHIVE
 +				min_arch_log_no, max_arch_log_no,
 +#endif /* UNIV_LOG_ARCHIVE */
 +				min_flushed_lsn, max_flushed_lsn);
 +
 +			/* If first page is valid, don't overwrite DB.
 +			It prevents overwriting DB when mysql_install_db
 +			starts mysqld multiple times during bootstrap. */
 +			if (check_msg == NULL) {
 +
 +				srv_created_new_raw = FALSE;
 +				ret = FALSE;
 +			}
 +
 +		} else if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
 +			srv_start_raw_disk_in_use = TRUE;
 +
 +			ret = FALSE;
 +		} else {
 +			ut_a(0);
 +		}
 +
 +		if (ret == FALSE) {
 +			const char* check_msg;
 +			/* We open the data file */
 +
 +			if (one_created) {
 +				ib_logf(IB_LOG_LEVEL_ERROR,
 +					"Data files can only be added at "
 +					"the end of a tablespace, but "
 +					"data file %s existed beforehand.",
 +					name);
 +				return(DB_ERROR);
 +			}
 +			if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
 +				ut_a(!srv_read_only_mode);
 +				files[i] = os_file_create(
 +					innodb_file_data_key,
 +					name, OS_FILE_OPEN_RAW,
 +					OS_FILE_NORMAL, OS_DATA_FILE, &ret);
 +			} else if (i == 0) {
 +				files[i] = os_file_create(
 +					innodb_file_data_key,
 +					name, OS_FILE_OPEN_RETRY,
 +					OS_FILE_NORMAL, OS_DATA_FILE, &ret);
 +			} else {
 +				files[i] = os_file_create(
 +					innodb_file_data_key,
 +					name, OS_FILE_OPEN, OS_FILE_NORMAL,
 +					OS_DATA_FILE, &ret);
 +			}
 +
 +			if (!ret) {
 +
 +				os_file_get_last_error(true);
 +
 +				ib_logf(IB_LOG_LEVEL_ERROR,
 +					"Can't open '%s'", name);
 +
 +				return(DB_ERROR);
 +			}
 +
 +			if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
 +
 +				goto skip_size_check;
 +			}
 +
 +size_check:
 +			size = os_file_get_size(files[i]);
 +			ut_a(size != (os_offset_t) -1);
 +
 +			/* Under some error conditions like disk full
 +			narios or file size reaching filesystem
 +			limit the data file could contain an incomplete
 +			extent at the end. When we extend a data file
 +			and if some failure happens, then also the data
 +			file could contain an incomplete extent.  So we
 +			need to round the size downward to a megabyte.*/
 +
 +			rounded_size_pages = (ulint) CALC_NUMBER_OF_PAGES(size);
 +
 +			if (i == srv_n_data_files - 1
 +			    && srv_auto_extend_last_data_file) {
 +
 +				if (srv_data_file_sizes[i] > rounded_size_pages
 +				    || (srv_last_file_size_max > 0
 +					&& srv_last_file_size_max
 +					< rounded_size_pages)) {
 +
 +					ib_logf(IB_LOG_LEVEL_ERROR,
 +						"auto-extending "
 +						"data file %s is "
 +						"of a different size "
 +						"%lu pages (rounded "
 +						"down to MB) than specified "
 +						"in the .cnf file: "
 +						"initial %lu pages, "
 +						"max %lu (relevant if "
 +						"non-zero) pages!",
 +						name,
 +						(ulong) rounded_size_pages,
 +						(ulong) srv_data_file_sizes[i],
 +						(ulong)
 +						srv_last_file_size_max);
 +
 +					return(DB_ERROR);
 +				}
 +
 +				srv_data_file_sizes[i] = rounded_size_pages;
 +			}
 +
 +			if (rounded_size_pages != srv_data_file_sizes[i]) {
 +
 +				ib_logf(IB_LOG_LEVEL_ERROR,
 +					"Data file %s is of a different "
 +					"size %lu pages (rounded down to MB) "
 +					"than specified in the .cnf file "
 +					"%lu pages!",
 +					name,
 +					(ulong) rounded_size_pages,
 +					(ulong) srv_data_file_sizes[i]);
 +
 +				return(DB_ERROR);
 +			}
 +skip_size_check:
 +
 +			/* This is the earliest location where we can load
 +			the double write buffer. */
 +			if (i == 0) {
 +				buf_dblwr_init_or_load_pages(
 +					files[i], srv_data_file_names[i], true);
 +			}
 +
 +			bool retry = true;
 +check_first_page:
 +			check_msg = fil_read_first_page(
 +				files[i], one_opened, &flags, &space,
 +#ifdef UNIV_LOG_ARCHIVE
 +				min_arch_log_no, max_arch_log_no,
 +#endif /* UNIV_LOG_ARCHIVE */
 +				min_flushed_lsn, max_flushed_lsn);
 +
 +			if (check_msg) {
 +
 +				if (retry) {
 +					fsp_open_info	fsp;
 +					const ulint	page_no = 0;
 +
 +					retry = false;
 +					fsp.id = 0;
 +					fsp.filepath = srv_data_file_names[i];
 +					fsp.file = files[i];
 +
 +					if (fil_user_tablespace_restore_page(
 +						&fsp, page_no)) {
 +						goto check_first_page;
 +					}
 +				}
 +
 +				ib_logf(IB_LOG_LEVEL_ERROR,
 +						"%s in data file %s",
 +						check_msg, name);
 +				return(DB_ERROR);
 +			}
 +
 +			/* The first file of the system tablespace must
 +			have space ID = TRX_SYS_SPACE.  The FSP_SPACE_ID
 +			field in files greater than ibdata1 are unreliable. */
 +			ut_a(one_opened || space == TRX_SYS_SPACE);
 +
 +			/* Check the flags for the first system tablespace
 +			file only. */
 +			if (!one_opened
 +			    && UNIV_PAGE_SIZE
 +			       != fsp_flags_get_page_size(flags)) {
 +
 +				ib_logf(IB_LOG_LEVEL_ERROR,
 +					"Data file \"%s\" uses page size %lu,"
 +					"but the start-up parameter "
 +					"is --innodb-page-size=%lu",
 +					name,
 +					fsp_flags_get_page_size(flags),
 +					UNIV_PAGE_SIZE);
 +
 +				return(DB_ERROR);
 +			}
 +
 +			one_opened = TRUE;
 +		} else if (!srv_read_only_mode) {
 +			/* We created the data file and now write it full of
 +			zeros */
 +
 +			one_created = TRUE;
 +
 +			if (i > 0) {
 +				ib_logf(IB_LOG_LEVEL_INFO,
 +					"Data file %s did not"
 +					" exist: new to be created",
 +					name);
 +			} else {
 +				ib_logf(IB_LOG_LEVEL_INFO,
 +					"The first specified "
 +					"data file %s did not exist: "
 +					"a new database to be created!",
 +					name);
 +
 +				*create_new_db = TRUE;
 +			}
 +
 +			ib_logf(IB_LOG_LEVEL_INFO,
 +				"Setting file %s size to %lu MB",
 +				name,
 +				(ulong) (srv_data_file_sizes[i]
 +					 >> (20 - UNIV_PAGE_SIZE_SHIFT)));
 +
 +			ib_logf(IB_LOG_LEVEL_INFO,
 +				"Database physically writes the"
 +				" file full: wait...");
 +
 +			ret = os_file_set_size(
 +				name, files[i],
 +				(os_offset_t) srv_data_file_sizes[i]
 +				<< UNIV_PAGE_SIZE_SHIFT);
 +
 +			if (!ret) {
 +				ib_logf(IB_LOG_LEVEL_ERROR,
 +					"Error in creating %s: "
 +					"probably out of disk space",
 +					name);
 +
 +				return(DB_ERROR);
 +			}
 +
 +			*sum_of_new_sizes += srv_data_file_sizes[i];
 +		}
 +
 +		ret = os_file_close(files[i]);
 +		ut_a(ret);
 +
 +		if (i == 0) {
 +			flags = fsp_flags_set_page_size(0, UNIV_PAGE_SIZE);
 +			fil_space_create(name, 0, flags, FIL_TABLESPACE);
 +		}
 +
 +		ut_a(fil_validate());
 +
 +		if (!fil_node_create(name, srv_data_file_sizes[i], 0,
 +				     srv_data_file_is_raw_partition[i] != 0)) {
 +			return(DB_ERROR);
 +		}
 +	}
 +
 +	return(DB_SUCCESS);
 +}
 +
 +/*********************************************************************//**
 +Create undo tablespace.
 + at return	DB_SUCCESS or error code */
 +static
 +dberr_t
 +srv_undo_tablespace_create(
 +/*=======================*/
 +	const char*	name,		/*!< in: tablespace name */
 +	ulint		size)		/*!< in: tablespace size in pages */
 +{
 +	os_file_t	fh;
 +	ibool		ret;
 +	dberr_t		err = DB_SUCCESS;
 +
 +	os_file_create_subdirs_if_needed(name);
 +
 +	fh = os_file_create(
 +		innodb_file_data_key,
 +		name,
 +		srv_read_only_mode ? OS_FILE_OPEN : OS_FILE_CREATE,
 +		OS_FILE_NORMAL, OS_DATA_FILE, &ret);
 +
 +	if (srv_read_only_mode && ret) {
 +		ib_logf(IB_LOG_LEVEL_INFO,
 +			"%s opened in read-only mode", name);
 +	} else if (ret == FALSE) {
 +		if (os_file_get_last_error(false) != OS_FILE_ALREADY_EXISTS
 +#ifdef UNIV_AIX
 +			/* AIX 5.1 after security patch ML7 may have
 +			errno set to 0 here, which causes our function
 +			to return 100; work around that AIX problem */
 +		    && os_file_get_last_error(false) != 100
 +#endif /* UNIV_AIX */
 +		) {
 +			ib_logf(IB_LOG_LEVEL_ERROR,
 +				"Can't create UNDO tablespace %s", name);
 +		} else {
 +			ib_logf(IB_LOG_LEVEL_ERROR,
 +				"Creating system tablespace with"
 +				" existing undo tablespaces is not"
 +				" supported. Please delete all undo"
 +				" tablespaces before creating new"
 +				" system tablespace.");
 +		}
 +		err = DB_ERROR;
 +	} else {
 +		ut_a(!srv_read_only_mode);
 +
 +		/* We created the data file and now write it full of zeros */
 +
 +		ib_logf(IB_LOG_LEVEL_INFO,
 +			"Data file %s did not exist: new to be created",
 +			name);
 +
 +		ib_logf(IB_LOG_LEVEL_INFO,
 +			"Setting file %s size to %lu MB",
 +			name, size >> (20 - UNIV_PAGE_SIZE_SHIFT));
 +
 +		ib_logf(IB_LOG_LEVEL_INFO,
 +			"Database physically writes the file full: wait...");
 +
 +		ret = os_file_set_size(name, fh, size << UNIV_PAGE_SIZE_SHIFT);
 +
 +		if (!ret) {
 +			ib_logf(IB_LOG_LEVEL_INFO,
 +				"Error in creating %s: probably out of "
 +				"disk space", name);
 +
 +			err = DB_ERROR;
 +		}
 +
 +		os_file_close(fh);
 +	}
 +
 +	return(err);
 +}
 +
 +/*********************************************************************//**
 +Open an undo tablespace.
 + at return	DB_SUCCESS or error code */
 +static
 +dberr_t
 +srv_undo_tablespace_open(
 +/*=====================*/
 +	const char*	name,		/*!< in: tablespace name */
 +	ulint		space)		/*!< in: tablespace id */
 +{
 +	os_file_t	fh;
 +	dberr_t		err	= DB_ERROR;
 +	ibool		ret;
 +	ulint		flags;
 +
 +	if (!srv_file_check_mode(name)) {
 +		ib_logf(IB_LOG_LEVEL_ERROR,
 +			"UNDO tablespaces must be %s!",
 +			srv_read_only_mode ? "writable" : "readable");
 +
 +		return(DB_ERROR);
 +	}
 +
 +	fh = os_file_create(
 +		innodb_file_data_key, name,
 +		OS_FILE_OPEN_RETRY
 +		| OS_FILE_ON_ERROR_NO_EXIT
 +		| OS_FILE_ON_ERROR_SILENT,
 +		OS_FILE_NORMAL,
 +		OS_DATA_FILE,
 +		&ret);
 +
 +	/* If the file open was successful then load the tablespace. */
 +
 +	if (ret) {
 +		os_offset_t	size;
 +
 +		size = os_file_get_size(fh);
 +		ut_a(size != (os_offset_t) -1);
 +
 +		ret = os_file_close(fh);
 +		ut_a(ret);
 +
 +		/* Load the tablespace into InnoDB's internal
 +		data structures. */
 +
 +		/* We set the biggest space id to the undo tablespace
 +		because InnoDB hasn't opened any other tablespace apart
 +		from the system tablespace. */
 +
 +		fil_set_max_space_id_if_bigger(space);
 +
 +		/* Set the compressed page size to 0 (non-compressed) */
 +		flags = fsp_flags_set_page_size(0, UNIV_PAGE_SIZE);
 +		fil_space_create(name, space, flags, FIL_TABLESPACE);
 +
 +		ut_a(fil_validate());
 +
 +		os_offset_t	n_pages = size / UNIV_PAGE_SIZE;
 +
 +		/* On 64 bit Windows ulint can be 32 bit and os_offset_t
 +		is 64 bit. It is OK to cast the n_pages to ulint because
 +		the unit has been scaled to pages and they are always
 +		32 bit. */
 +		if (fil_node_create(name, (ulint) n_pages, space, FALSE)) {
 +			err = DB_SUCCESS;
 +		}
 +	}
 +
 +	return(err);
 +}
 +
 +/********************************************************************
 +Opens the configured number of undo tablespaces.
 + at return	DB_SUCCESS or error code */
 +static
 +dberr_t
 +srv_undo_tablespaces_init(
 +/*======================*/
 +	ibool		create_new_db,		/*!< in: TRUE if new db being
 +						created */
 +	const ulint	n_conf_tablespaces,	/*!< in: configured undo
 +						tablespaces */
 +	ulint*		n_opened)		/*!< out: number of UNDO
 +						tablespaces successfully
 +						discovered and opened */
 +{
 +	ulint		i;
 +	dberr_t		err = DB_SUCCESS;
 +	ulint		prev_space_id = 0;
 +	ulint		n_undo_tablespaces;
 +	ulint		undo_tablespace_ids[TRX_SYS_N_RSEGS + 1];
 +
 +	*n_opened = 0;
 +
 +	ut_a(n_conf_tablespaces <= TRX_SYS_N_RSEGS);
 +
 +	memset(undo_tablespace_ids, 0x0, sizeof(undo_tablespace_ids));
 +
 +	/* Create the undo spaces only if we are creating a new
 +	instance. We don't allow creating of new undo tablespaces
 +	in an existing instance (yet).  This restriction exists because
 +	we check in several places for SYSTEM tablespaces to be less than
 +	the min of user defined tablespace ids. Once we implement saving
 +	the location of the undo tablespaces and their space ids this
 +	restriction will/should be lifted. */
 +
 +	for (i = 0; create_new_db && i < n_conf_tablespaces; ++i) {
 +		char	name[OS_FILE_MAX_PATH];
 +
 +		ut_snprintf(
 +			name, sizeof(name),
 +			"%s%cundo%03lu",
 +			srv_undo_dir, SRV_PATH_SEPARATOR, i + 1);
 +
 +		/* Undo space ids start from 1. */
 +		err = srv_undo_tablespace_create(
 +			name, SRV_UNDO_TABLESPACE_SIZE_IN_PAGES);
 +
 +		if (err != DB_SUCCESS) {
 +
 +			ib_logf(IB_LOG_LEVEL_ERROR,
 +				"Could not create undo tablespace '%s'.",
 +				name);
 +
 +			return(err);
 +		}
 +	}
 +
 +	/* Get the tablespace ids of all the undo segments excluding
 +	the system tablespace (0). If we are creating a new instance then
 +	we build the undo_tablespace_ids ourselves since they don't
 +	already exist. */
 +
 +	if (!create_new_db) {
 +		n_undo_tablespaces = trx_rseg_get_n_undo_tablespaces(
 +			undo_tablespace_ids);
 +	} else {
 +		n_undo_tablespaces = n_conf_tablespaces;
 +
 +		for (i = 1; i <= n_undo_tablespaces; ++i) {
 +			undo_tablespace_ids[i - 1] = i;
 +		}
 +
 +		undo_tablespace_ids[i] = ULINT_UNDEFINED;
 +	}
 +
 +	/* Open all the undo tablespaces that are currently in use. If we
 +	fail to open any of these it is a fatal error. The tablespace ids
 +	should be contiguous. It is a fatal error because they are required
 +	for recovery and are referenced by the UNDO logs (a.k.a RBS). */
 +
 +	for (i = 0; i < n_undo_tablespaces; ++i) {
 +		char	name[OS_FILE_MAX_PATH];
 +
 +		ut_snprintf(
 +			name, sizeof(name),
 +			"%s%cundo%03lu",
 +			srv_undo_dir, SRV_PATH_SEPARATOR,
 +			undo_tablespace_ids[i]);
 +
 +		/* Should be no gaps in undo tablespace ids. */
 +		ut_a(prev_space_id + 1 == undo_tablespace_ids[i]);
 +
 +		/* The system space id should not be in this array. */
 +		ut_a(undo_tablespace_ids[i] != 0);
 +		ut_a(undo_tablespace_ids[i] != ULINT_UNDEFINED);
 +
 +		/* Undo space ids start from 1. */
 +
 +		err = srv_undo_tablespace_open(name, undo_tablespace_ids[i]);
 +
 +		if (err != DB_SUCCESS) {
 +
 +			ib_logf(IB_LOG_LEVEL_ERROR,
 +				"Unable to open undo tablespace '%s'.", name);
 +
 +			return(err);
 +		}
 +
 +		prev_space_id = undo_tablespace_ids[i];
 +
 +		++*n_opened;
 +	}
 +
 +	/* Open any extra unused undo tablespaces. These must be contiguous.
 +	We stop at the first failure. These are undo tablespaces that are
 +	not in use and therefore not required by recovery. We only check
 +	that there are no gaps. */
 +
 +	for (i = prev_space_id + 1; i < TRX_SYS_N_RSEGS; ++i) {
 +		char	name[OS_FILE_MAX_PATH];
 +
 +		ut_snprintf(
 +			name, sizeof(name),
 +			"%s%cundo%03lu", srv_undo_dir, SRV_PATH_SEPARATOR, i);
 +
 +		/* Undo space ids start from 1. */
 +		err = srv_undo_tablespace_open(name, i);
 +
 +		if (err != DB_SUCCESS) {
 +			break;
 +		}
 +
 +		++n_undo_tablespaces;
 +
 +		++*n_opened;
 +	}
 +
 +	/* If the user says that there are fewer than what we find we
 +	tolerate that discrepancy but not the inverse. Because there could
 +	be unused undo tablespaces for future use. */
 +
 +	if (n_conf_tablespaces > n_undo_tablespaces) {
 +		ut_print_timestamp(stderr);
 +		fprintf(stderr,
 +			" InnoDB: Expected to open %lu undo "
 +			"tablespaces but was able\n",
 +			n_conf_tablespaces);
 +		ut_print_timestamp(stderr);
 +		fprintf(stderr,
 +			" InnoDB: to find only %lu undo "
 +			"tablespaces.\n", n_undo_tablespaces);
 +		ut_print_timestamp(stderr);
 +		fprintf(stderr,
 +			" InnoDB: Set the "
 +			"innodb_undo_tablespaces parameter to "
 +			"the\n");
 +		ut_print_timestamp(stderr);
 +		fprintf(stderr,
 +			" InnoDB: correct value and retry. Suggested "
 +			"value is %lu\n", n_undo_tablespaces);
 +
 +		return(err != DB_SUCCESS ? err : DB_ERROR);
 +
 +	} else  if (n_undo_tablespaces > 0) {
 +
 +		ib_logf(IB_LOG_LEVEL_INFO, "Opened %lu undo tablespaces",
 +			n_undo_tablespaces);
 +
 +		if (n_conf_tablespaces == 0) {
 +			ib_logf(IB_LOG_LEVEL_WARN,
 +				"Using the system tablespace for all UNDO "
 +				"logging because innodb_undo_tablespaces=0");
 +		}
 +	}
 +
 +	if (create_new_db) {
 +		mtr_t	mtr;
 +
 +		mtr_start(&mtr);
 +
 +		/* The undo log tablespace */
 +		for (i = 1; i <= n_undo_tablespaces; ++i) {
 +
 +			fsp_header_init(
 +				i, SRV_UNDO_TABLESPACE_SIZE_IN_PAGES, &mtr);
 +		}
 +
 +		mtr_commit(&mtr);
 +	}
 +
 +	return(DB_SUCCESS);
 +}
 +
 +/********************************************************************
 +Wait for the purge thread(s) to start up. */
 +static
 +void
 +srv_start_wait_for_purge_to_start()
 +/*===============================*/
 +{
 +	/* Wait for the purge coordinator and master thread to startup. */
 +
 +	purge_state_t	state = trx_purge_state();
 +
 +	ut_a(state != PURGE_STATE_DISABLED);
 +
 +	while (srv_shutdown_state == SRV_SHUTDOWN_NONE
 +	       && srv_force_recovery < SRV_FORCE_NO_BACKGROUND
 +	       && state == PURGE_STATE_INIT) {
 +
 +		switch (state = trx_purge_state()) {
 +		case PURGE_STATE_RUN:
 +		case PURGE_STATE_STOP:
 +			break;
 +
 +		case PURGE_STATE_INIT:
 +			ib_logf(IB_LOG_LEVEL_INFO,
 +				"Waiting for purge to start");
 +
 +			os_thread_sleep(50000);
 +			break;
 +
 +		case PURGE_STATE_EXIT:
 +		case PURGE_STATE_DISABLED:
 +			ut_error;
 +		}
 +	}
 +}
 +
 +/********************************************************************
 +Starts InnoDB and creates a new database if database files
 +are not found and the user wants.
 + at return	DB_SUCCESS or error code */
 +UNIV_INTERN
 +dberr_t
 +innobase_start_or_create_for_mysql(void)
 +/*====================================*/
 +{
 +	ibool		create_new_db;
 +	lsn_t		min_flushed_lsn;
 +	lsn_t		max_flushed_lsn;
 +#ifdef UNIV_LOG_ARCHIVE
 +	ulint		min_arch_log_no;
 +	ulint		max_arch_log_no;
 +#endif /* UNIV_LOG_ARCHIVE */
 +	ulint		sum_of_new_sizes;
 +	dberr_t		err;
 +	unsigned	i;
 +	ulint		srv_n_log_files_found = srv_n_log_files;
 +	ulint		io_limit;
 +	mtr_t		mtr;
 +	ib_bh_t*	ib_bh;
 +	ulint		n_recovered_trx;
 +	char		logfilename[10000];
 +	char*		logfile0	= NULL;
 +	size_t		dirnamelen;
 +	bool		sys_datafiles_created = false;
 +
 +	/* Check that os_fast_mutexes work as expected */
 +	os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &srv_os_test_mutex);
 +
 +	ut_a(0 == os_fast_mutex_trylock(&srv_os_test_mutex));
 +
 +	os_fast_mutex_unlock(&srv_os_test_mutex);
 +
 +	os_fast_mutex_lock(&srv_os_test_mutex);
 +
 +	os_fast_mutex_unlock(&srv_os_test_mutex);
 +
 +	os_fast_mutex_free(&srv_os_test_mutex);
 +
 +	high_level_read_only = srv_read_only_mode
 +		|| srv_force_recovery > SRV_FORCE_NO_TRX_UNDO;
 +
 +	if (srv_read_only_mode) {
 +		ib_logf(IB_LOG_LEVEL_INFO, "Started in read only mode");
 +	}
 +
 +#ifdef HAVE_DARWIN_THREADS
 +# ifdef F_FULLFSYNC
 +	/* This executable has been compiled on Mac OS X 10.3 or later.
 +	Assume that F_FULLFSYNC is available at run-time. */
 +	srv_have_fullfsync = TRUE;
 +# else /* F_FULLFSYNC */
 +	/* This executable has been compiled on Mac OS X 10.2
 +	or earlier.  Determine if the executable is running
 +	on Mac OS X 10.3 or later. */
 +	struct utsname utsname;
 +	if (uname(&utsname)) {
 +		ut_print_timestamp(stderr);
 +		fputs(" InnoDB: cannot determine Mac OS X version!\n", stderr);
 +	} else {
 +		srv_have_fullfsync = strcmp(utsname.release, "7.") >= 0;
 +	}
 +	if (!srv_have_fullfsync) {
 +		ut_print_timestamp(stderr);
 +		fputs(" InnoDB: On Mac OS X, fsync() may be "
 +		      "broken on internal drives,\n", stderr);
 +		ut_print_timestamp(stderr);
 +		fputs(" InnoDB: making transactions unsafe!\n", stderr);
 +	}
 +# endif /* F_FULLFSYNC */
 +#endif /* HAVE_DARWIN_THREADS */
 +
 +	ib_logf(IB_LOG_LEVEL_INFO,
 +		"Using %s to ref count buffer pool pages",
 +#ifdef PAGE_ATOMIC_REF_COUNT
 +		"atomics"
 +#else
 +		"mutexes"
 +#endif /* PAGE_ATOMIC_REF_COUNT */
 +	);
 +
 +	compile_time_assert(sizeof(ulint) == sizeof(void*));
 +
 +#ifdef UNIV_DEBUG
 +	ut_print_timestamp(stderr);
 +	fprintf(stderr,
 +		" InnoDB: !!!!!!!! UNIV_DEBUG switched on !!!!!!!!!\n");
 +#endif
 +
 +#ifdef UNIV_IBUF_DEBUG
 +	ut_print_timestamp(stderr);
 +	fprintf(stderr,
 +		" InnoDB: !!!!!!!! UNIV_IBUF_DEBUG switched on !!!!!!!!!\n");
 +# ifdef UNIV_IBUF_COUNT_DEBUG
 +	ut_print_timestamp(stderr);
 +	fprintf(stderr,
 +		" InnoDB: !!!!!!!! UNIV_IBUF_COUNT_DEBUG switched on "
 +		"!!!!!!!!!\n");
 +	ut_print_timestamp(stderr);
 +	fprintf(stderr,
 +		" InnoDB: Crash recovery will fail with UNIV_IBUF_COUNT_DEBUG\n");
 +# endif
 +#endif
 +
 +#ifdef UNIV_BLOB_DEBUG
 +	fprintf(stderr,
 +		"InnoDB: !!!!!!!! UNIV_BLOB_DEBUG switched on !!!!!!!!!\n"
 +		"InnoDB: Server restart may fail with UNIV_BLOB_DEBUG\n");
 +#endif /* UNIV_BLOB_DEBUG */
 +
 +#ifdef UNIV_SYNC_DEBUG
 +	ut_print_timestamp(stderr);
 +	fprintf(stderr,
 +		" InnoDB: !!!!!!!! UNIV_SYNC_DEBUG switched on !!!!!!!!!\n");
 +#endif
 +
 +#ifdef UNIV_SEARCH_DEBUG
 +	ut_print_timestamp(stderr);
 +	fprintf(stderr,
 +		" InnoDB: !!!!!!!! UNIV_SEARCH_DEBUG switched on !!!!!!!!!\n");
 +#endif
 +
 +#ifdef UNIV_LOG_LSN_DEBUG
 +	ut_print_timestamp(stderr);
 +	fprintf(stderr,
 +		" InnoDB: !!!!!!!! UNIV_LOG_LSN_DEBUG switched on !!!!!!!!!\n");
 +#endif /* UNIV_LOG_LSN_DEBUG */
 +#ifdef UNIV_MEM_DEBUG
 +	ut_print_timestamp(stderr);
 +	fprintf(stderr,
 +		" InnoDB: !!!!!!!! UNIV_MEM_DEBUG switched on !!!!!!!!!\n");
 +#endif
 +
 +	if (srv_use_sys_malloc) {
 +		ib_logf(IB_LOG_LEVEL_INFO,
 +			"The InnoDB memory heap is disabled");
 +	}
 +
 +#if defined(COMPILER_HINTS_ENABLED)
 +	ib_logf(IB_LOG_LEVEL_INFO,
 +		" InnoDB: Compiler hints enabled.");
 +#endif /* defined(COMPILER_HINTS_ENABLED) */
 +
 +	ib_logf(IB_LOG_LEVEL_INFO,
 +		"" IB_ATOMICS_STARTUP_MSG "");
 +
 +	ib_logf(IB_LOG_LEVEL_INFO,
 +		"" IB_MEMORY_BARRIER_STARTUP_MSG "");
 +
 +#ifndef HAVE_MEMORY_BARRIER
 +#if defined __i386__ || defined __x86_64__ || defined _M_IX86 || defined _M_X64 || defined __WIN__
 +#else
 +	ib_logf(IB_LOG_LEVEL_WARN,
 +		"MySQL was built without a memory barrier capability on this"
 +		" architecture, which might allow a mutex/rw_lock violation"
 +		" under high thread concurrency. This may cause a hang.");
 +#endif /* IA32 or AMD64 */
 +#endif /* HAVE_MEMORY_BARRIER */
 +
 +	ib_logf(IB_LOG_LEVEL_INFO,
 +		"Compressed tables use zlib " ZLIB_VERSION
 +#ifdef UNIV_ZIP_DEBUG
 +	      " with validation"
 +#endif /* UNIV_ZIP_DEBUG */
 +	      );
 +#ifdef UNIV_ZIP_COPY
 +	ib_logf(IB_LOG_LEVEL_INFO, "and extra copying");
 +#endif /* UNIV_ZIP_COPY */
 +
 +
 +	/* Since InnoDB does not currently clean up all its internal data
 +	structures in MySQL Embedded Server Library server_end(), we
 +	print an error message if someone tries to start up InnoDB a
 +	second time during the process lifetime. */
 +
 +	if (srv_start_has_been_called) {
 +		ut_print_timestamp(stderr);
 +		fprintf(stderr, " InnoDB: Error: startup called second time "
 +			"during the process\n");
 +		ut_print_timestamp(stderr);
 +		fprintf(stderr, " InnoDB: lifetime. In the MySQL Embedded "
 +			"Server Library you\n");
 +		ut_print_timestamp(stderr);
 +		fprintf(stderr, " InnoDB: cannot call server_init() more "
 +			"than once during the\n");
 +		ut_print_timestamp(stderr);
 +		fprintf(stderr, " InnoDB: process lifetime.\n");
 +	}
 +
 +	srv_start_has_been_called = TRUE;
 +
 +#ifdef UNIV_DEBUG
 +	log_do_write = TRUE;
 +#endif /* UNIV_DEBUG */
 +	/*	yydebug = TRUE; */
 +
 +	srv_is_being_started = TRUE;
 +	srv_startup_is_before_trx_rollback_phase = TRUE;
 +
 +#ifdef __WIN__
 +	switch (os_get_os_version()) {
 +	case OS_WIN95:
 +	case OS_WIN31:
 +	case OS_WINNT:
 +		/* On Win 95, 98, ME, Win32 subsystem for Windows 3.1,
 +		and NT use simulated aio. In NT Windows provides async i/o,
 +		but when run in conjunction with InnoDB Hot Backup, it seemed
 +		to corrupt the data files. */
 +
 +		srv_use_native_aio = FALSE;
 +		break;
 +
 +	case OS_WIN2000:
 +	case OS_WINXP:
 +		/* On 2000 and XP, async IO is available. */
 +		srv_use_native_aio = TRUE;
 +		break;
 +
 +	default:
 +		/* Vista and later have both async IO and condition variables */
 +		srv_use_native_aio = TRUE;
 +		srv_use_native_conditions = TRUE;
 +		break;
 +	}
 +
 +#elif defined(LINUX_NATIVE_AIO)
 +
 +	if (srv_use_native_aio) {
 +		ib_logf(IB_LOG_LEVEL_INFO, "Using Linux native AIO");
 +	}
 +#else
 +	/* Currently native AIO is supported only on windows and linux
 +	and that also when the support is compiled in. In all other
 +	cases, we ignore the setting of innodb_use_native_aio. */
 +	srv_use_native_aio = FALSE;
 +#endif /* __WIN__ */
 +
 +	if (srv_file_flush_method_str == NULL) {
 +		/* These are the default options */
 +
 +		srv_unix_file_flush_method = SRV_UNIX_FSYNC;
 +
 +		srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
 +#ifndef __WIN__
 +	} else if (0 == ut_strcmp(srv_file_flush_method_str, "fsync")) {
 +		srv_unix_file_flush_method = SRV_UNIX_FSYNC;
 +
 +	} else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DSYNC")) {
 +		srv_unix_file_flush_method = SRV_UNIX_O_DSYNC;
 +
 +	} else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) {
 +		srv_unix_file_flush_method = SRV_UNIX_O_DIRECT;
 +
 +	} else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT_NO_FSYNC")) {
 +		srv_unix_file_flush_method = SRV_UNIX_O_DIRECT_NO_FSYNC;
 +
 +	} else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) {
 +		srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC;
 +
 +	} else if (0 == ut_strcmp(srv_file_flush_method_str, "nosync")) {
 +		srv_unix_file_flush_method = SRV_UNIX_NOSYNC;
 +#else
 +	} else if (0 == ut_strcmp(srv_file_flush_method_str, "normal")) {
 +		srv_win_file_flush_method = SRV_WIN_IO_NORMAL;
 +		srv_use_native_aio = FALSE;
 +
 +	} else if (0 == ut_strcmp(srv_file_flush_method_str, "unbuffered")) {
 +		srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
 +		srv_use_native_aio = FALSE;
 +
 +	} else if (0 == ut_strcmp(srv_file_flush_method_str,
 +				  "async_unbuffered")) {
 +		srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
 +#endif /* __WIN__ */
 +	} else {
 +		ib_logf(IB_LOG_LEVEL_ERROR,
 +			"Unrecognized value %s for innodb_flush_method",
 +			srv_file_flush_method_str);
 +		return(DB_ERROR);
 +	}
 +
 +	/* Note that the call srv_boot() also changes the values of
 +	some variables to the units used by InnoDB internally */
 +
 +	/* Set the maximum number of threads which can wait for a semaphore
 +	inside InnoDB: this is the 'sync wait array' size, as well as the
 +	maximum number of threads that can wait in the 'srv_conc array' for
 +	their time to enter InnoDB. */
 +
 +#define BUF_POOL_SIZE_THRESHOLD (1024 * 1024 * 1024)
 +	srv_max_n_threads = 1   /* io_ibuf_thread */
 +			    + 1 /* io_log_thread */
 +			    + 1 /* lock_wait_timeout_thread */
 +			    + 1 /* srv_error_monitor_thread */
 +			    + 1 /* srv_monitor_thread */
 +			    + 1 /* srv_master_thread */
 +			    + 1 /* srv_purge_coordinator_thread */
 +			    + 1 /* buf_dump_thread */
 +			    + 1 /* dict_stats_thread */
 +			    + 1 /* fts_optimize_thread */
 +			    + 1 /* recv_writer_thread */
 +			    + 1 /* buf_flush_page_cleaner_thread */
 +			    + 1 /* trx_rollback_or_clean_all_recovered */
 +			    + 128 /* added as margin, for use of
 +				  InnoDB Memcached etc. */
 +			    + max_connections
 +			    + srv_n_read_io_threads
 +			    + srv_n_write_io_threads
 +			    + srv_n_purge_threads
 +			    /* FTS Parallel Sort */
 +			    + fts_sort_pll_degree * FTS_NUM_AUX_INDEX
 +			      * max_connections;
 +
 +	if (srv_buf_pool_size < BUF_POOL_SIZE_THRESHOLD) {
 +		/* If buffer pool is less than 1 GB,
 +		use only one buffer pool instance */
 +		srv_buf_pool_instances = 1;
 +	}
 +
 +	srv_boot();
 +
 +	ib_logf(IB_LOG_LEVEL_INFO,
 +		"%s CPU crc32 instructions",
 +		ut_crc32_sse2_enabled ? "Using" : "Not using");
 +
 +	if (!srv_read_only_mode) {
 +
 +		mutex_create(srv_monitor_file_mutex_key,
 +			     &srv_monitor_file_mutex, SYNC_NO_ORDER_CHECK);
 +
 +		if (srv_innodb_status) {
 +
 +			srv_monitor_file_name = static_cast<char*>(
 +				mem_alloc(
 +					strlen(fil_path_to_mysql_datadir)
 +					+ 20 + sizeof "/innodb_status."));
 +
 +			sprintf(srv_monitor_file_name, "%s/innodb_status.%lu",
 +				fil_path_to_mysql_datadir,
 +				os_proc_get_number());
 +
 +			srv_monitor_file = fopen(srv_monitor_file_name, "w+");
 +
 +			if (!srv_monitor_file) {
 +
 +				ib_logf(IB_LOG_LEVEL_ERROR,
 +					"Unable to create %s: %s",
 +					srv_monitor_file_name,
 +					strerror(errno));
 +
 +				return(DB_ERROR);
 +			}
 +		} else {
 +			srv_monitor_file_name = NULL;
 +			srv_monitor_file = os_file_create_tmpfile(NULL);
 +
 +			if (!srv_monitor_file) {
 +				return(DB_ERROR);
 +			}
 +		}
 +
 +		mutex_create(srv_dict_tmpfile_mutex_key,
 +			     &srv_dict_tmpfile_mutex, SYNC_DICT_OPERATION);
 +
 +		srv_dict_tmpfile = os_file_create_tmpfile(NULL);
 +
 +		if (!srv_dict_tmpfile) {
 +			return(DB_ERROR);
 +		}
 +
 +		mutex_create(srv_misc_tmpfile_mutex_key,
 +			     &srv_misc_tmpfile_mutex, SYNC_ANY_LATCH);
 +
 +		srv_misc_tmpfile = os_file_create_tmpfile(NULL);
 +
 +		if (!srv_misc_tmpfile) {
 +			return(DB_ERROR);
 +		}
 +	}
 +
 +	/* If user has set the value of innodb_file_io_threads then
 +	we'll emit a message telling the user that this parameter
 +	is now deprecated. */
 +	if (srv_n_file_io_threads != 4) {
 +		ib_logf(IB_LOG_LEVEL_WARN,
 +			"innodb_file_io_threads is deprecated. Please use "
 +			"innodb_read_io_threads and innodb_write_io_threads "
 +			"instead");
 +	}
 +
 +	/* Now overwrite the value on srv_n_file_io_threads */
 +	srv_n_file_io_threads = srv_n_read_io_threads;
 +
 +	if (!srv_read_only_mode) {
 +		/* Add the log and ibuf IO threads. */
 +		srv_n_file_io_threads += 2;
 +		srv_n_file_io_threads += srv_n_write_io_threads;
 +	} else {
 +		ib_logf(IB_LOG_LEVEL_INFO,
 +			"Disabling background IO write threads.");
 +
 +		srv_n_write_io_threads = 0;
 +	}
 +
 +	ut_a(srv_n_file_io_threads <= SRV_MAX_N_IO_THREADS);
 +
 +	io_limit = 8 * SRV_N_PENDING_IOS_PER_THREAD;
 +
 +	/* On Windows when using native aio the number of aio requests
 +	that a thread can handle at a given time is limited to 32
 +	i.e.: SRV_N_PENDING_IOS_PER_THREAD */
 +# ifdef __WIN__
 +	if (srv_use_native_aio) {
 +		io_limit = SRV_N_PENDING_IOS_PER_THREAD;
 +	}
 +# endif /* __WIN__ */
 +
 +	if (!os_aio_init(io_limit,
 +			 srv_n_read_io_threads,
 +			 srv_n_write_io_threads,
 +			 SRV_MAX_N_PENDING_SYNC_IOS)) {
 +
 +		ib_logf(IB_LOG_LEVEL_ERROR,
 +			"Fatal : Cannot initialize AIO sub-system");
 +
 +		return(DB_ERROR);
 +	}
 +
 +	fil_init(srv_file_per_table ? 50000 : 5000, srv_max_n_open_files);
 +
 +	double	size;
 +	char	unit;
 +
 +	if (srv_buf_pool_size >= 1024 * 1024 * 1024) {
 +		size = ((double) srv_buf_pool_size) / (1024 * 1024 * 1024);
 +		unit = 'G';
 +	} else {
 +		size = ((double) srv_buf_pool_size) / (1024 * 1024);
 +		unit = 'M';
 +	}
 +
 +	/* Print time to initialize the buffer pool */
 +	ib_logf(IB_LOG_LEVEL_INFO,
 +		"Initializing buffer pool, size = %.1f%c", size, unit);
 +
 +	err = buf_pool_init(srv_buf_pool_size, srv_buf_pool_instances);
 +
 +	if (err != DB_SUCCESS) {
 +		ib_logf(IB_LOG_LEVEL_ERROR,
 +			"Cannot allocate memory for the buffer pool");
 +
 +		return(DB_ERROR);
 +	}
 +
 +	ib_logf(IB_LOG_LEVEL_INFO,
 +		"Completed initialization of buffer pool");
 +
 +#ifdef UNIV_DEBUG
 +	/* We have observed deadlocks with a 5MB buffer pool but
 +	the actual lower limit could very well be a little higher. */
 +
 +	if (srv_buf_pool_size <= 5 * 1024 * 1024) {
 +
 +		ib_logf(IB_LOG_LEVEL_INFO,
 +			"Small buffer pool size (%luM), the flst_validate() "
 +			"debug function can cause a deadlock if the "
 +			"buffer pool fills up.",
 +			srv_buf_pool_size / 1024 / 1024);
 +	}
 +#endif /* UNIV_DEBUG */
 +
 +	fsp_init();
 +	log_init();
 +
 +	lock_sys_create(srv_lock_table_size);
 +
 +	/* Create i/o-handler threads: */
 +
 +	for (i = 0; i < srv_n_file_io_threads; ++i) {
 +
 +		n[i] = i;
 +
 +		thread_handles[i] = os_thread_create(io_handler_thread, n + i, thread_ids + i);
 +		thread_started[i] = true;
 +	}
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	if (0 != ut_strcmp(srv_log_group_home_dir, srv_arch_dir)) {
 +		ut_print_timestamp(stderr);
 +		fprintf(stderr, " InnoDB: Error: you must set the log group home dir in my.cnf\n");
 +		ut_print_timestamp(stderr);
 +		fprintf(stderr, " InnoDB: the same as log arch dir.\n");
 +
 +		return(DB_ERROR);
 +	}
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	if (srv_n_log_files * srv_log_file_size * UNIV_PAGE_SIZE
 +	    >= 512ULL * 1024ULL * 1024ULL * 1024ULL) {
 +		/* log_block_convert_lsn_to_no() limits the returned block
 +		number to 1G and given that OS_FILE_LOG_BLOCK_SIZE is 512
 +		bytes, then we have a limit of 512 GB. If that limit is to
 +		be raised, then log_block_convert_lsn_to_no() must be
 +		modified. */
 +		ib_logf(IB_LOG_LEVEL_ERROR,
 +			"Combined size of log files must be < 512 GB");
 +
 +		return(DB_ERROR);
 +	}
 +
 +	if (srv_n_log_files * srv_log_file_size >= ULINT_MAX) {
 +		/* fil_io() takes ulint as an argument and we are passing
 +		(next_offset / UNIV_PAGE_SIZE) to it in log_group_write_buf().
 +		So (next_offset / UNIV_PAGE_SIZE) must be less than ULINT_MAX.
 +		So next_offset must be < ULINT_MAX * UNIV_PAGE_SIZE. This
 +		means that we are limited to ULINT_MAX * UNIV_PAGE_SIZE which
 +		is 64 TB on 32 bit systems. */
 +		fprintf(stderr,
 +			" InnoDB: Error: combined size of log files"
 +			" must be < %lu GB\n",
 +			ULINT_MAX / 1073741824 * UNIV_PAGE_SIZE);
 +
 +		return(DB_ERROR);
 +	}
 +
 +	sum_of_new_sizes = 0;
 +
 +	for (i = 0; i < srv_n_data_files; i++) {
 +#ifndef __WIN__
 +		if (sizeof(off_t) < 5
 +		    && srv_data_file_sizes[i]
 +		    >= (ulint) (1 << (32 - UNIV_PAGE_SIZE_SHIFT))) {
 +			ut_print_timestamp(stderr);
 +			fprintf(stderr,
 +				" InnoDB: Error: file size must be < 4 GB"
 +				" with this MySQL binary\n");
 +			ut_print_timestamp(stderr);
 +			fprintf(stderr,
 +				" InnoDB: and operating system combination,"
 +				" in some OS's < 2 GB\n");
 +
 +			return(DB_ERROR);
 +		}
 +#endif
 +		sum_of_new_sizes += srv_data_file_sizes[i];
 +	}
 +
 +	if (sum_of_new_sizes < 10485760 / UNIV_PAGE_SIZE) {
 +		ib_logf(IB_LOG_LEVEL_ERROR,
 +			"Tablespace size must be at least 10 MB");
 +
 +		return(DB_ERROR);
 +	}
 +
 +	recv_sys_create();
 +	recv_sys_init(buf_pool_get_curr_size());
 +
 +	err = open_or_create_data_files(&create_new_db,
 +#ifdef UNIV_LOG_ARCHIVE
 +					&min_arch_log_no, &max_arch_log_no,
 +#endif /* UNIV_LOG_ARCHIVE */
 +					&min_flushed_lsn, &max_flushed_lsn,
 +					&sum_of_new_sizes);
 +	if (err == DB_FAIL) {
 +
 +		ib_logf(IB_LOG_LEVEL_ERROR,
 +			"The system tablespace must be writable!");
 +
 +		return(DB_ERROR);
 +
 +	} else if (err != DB_SUCCESS) {
 +
 +		ib_logf(IB_LOG_LEVEL_ERROR,
 +			"Could not open or create the system tablespace. If "
 +			"you tried to add new data files to the system "
 +			"tablespace, and it failed here, you should now "
 +			"edit innodb_data_file_path in my.cnf back to what "
 +			"it was, and remove the new ibdata files InnoDB "
 +			"created in this failed attempt. InnoDB only wrote "
 +			"those files full of zeros, but did not yet use "
 +			"them in any way. But be careful: do not remove "
 +			"old data files which contain your precious data!");
 +
 +		return(err);
 +	}
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	srv_normalize_path_for_win(srv_arch_dir);
 +	srv_arch_dir = srv_add_path_separator_if_needed(srv_arch_dir);
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	dirnamelen = strlen(srv_log_group_home_dir);
 +	ut_a(dirnamelen < (sizeof logfilename) - 10 - sizeof "ib_logfile");
 +	memcpy(logfilename, srv_log_group_home_dir, dirnamelen);
 +
 +	/* Add a path separator if needed. */
 +	if (dirnamelen && logfilename[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
 +		logfilename[dirnamelen++] = SRV_PATH_SEPARATOR;
 +	}
 +
 +	srv_log_file_size_requested = srv_log_file_size;
 +
 +	if (create_new_db) {
 +		bool success = buf_flush_list(ULINT_MAX, LSN_MAX, NULL);
 +		ut_a(success);
 +
 +		min_flushed_lsn = max_flushed_lsn = log_get_lsn();
 +
 +		buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
 +
 +		err = create_log_files(create_new_db, logfilename, dirnamelen,
 +				       max_flushed_lsn, logfile0);
 +
 +		if (err != DB_SUCCESS) {
 +			return(err);
 +		}
 +	} else {
 +		for (i = 0; i < SRV_N_LOG_FILES_MAX; i++) {
 +			os_offset_t	size;
 +			os_file_stat_t	stat_info;
 +
 +			sprintf(logfilename + dirnamelen,
 +				"ib_logfile%u", i);
 +
 +			err = os_file_get_status(
 +				logfilename, &stat_info, false);
 +
 +			if (err == DB_NOT_FOUND) {
 +				if (i == 0) {
 +					if (max_flushed_lsn
 +					    != min_flushed_lsn) {
 +						ib_logf(IB_LOG_LEVEL_ERROR,
 +							"Cannot create"
 +							" log files because"
 +							" data files are"
 +							" corrupt or"
 +							" not in sync"
 +							" with each other");
 +						return(DB_ERROR);
 +					}
 +
 +					if (max_flushed_lsn < (lsn_t) 1000) {
 +						ib_logf(IB_LOG_LEVEL_ERROR,
 +							"Cannot create"
 +							" log files because"
 +							" data files are"
 +							" corrupt or the"
 +							" database was not"
 +							" shut down cleanly"
 +							" after creating"
 +							" the data files.");
 +						return(DB_ERROR);
 +					}
 +
 +					err = create_log_files(
 +						create_new_db, logfilename,
 +						dirnamelen, max_flushed_lsn,
 +						logfile0);
 +
 +					if (err == DB_SUCCESS) {
 +						err = create_log_files_rename(
 +							logfilename,
 +							dirnamelen,
 +							max_flushed_lsn,
 +							logfile0);
 +					}
 +
 +					if (err != DB_SUCCESS) {
 +						return(err);
 +					}
 +
 +					/* Suppress the message about
 +					crash recovery. */
 +					max_flushed_lsn = min_flushed_lsn
 +						= log_get_lsn();
 +					goto files_checked;
 +				} else if (i < 2) {
 +					/* must have at least 2 log files */
 +					ib_logf(IB_LOG_LEVEL_ERROR,
 +						"Only one log file found.");
 +					return(err);
 +				}
 +
 +				/* opened all files */
 +				break;
 +			}
 +
 +			if (!srv_file_check_mode(logfilename)) {
 +				return(DB_ERROR);
 +			}
 +
 +			err = open_log_file(&files[i], logfilename, &size);
 +
 +			if (err != DB_SUCCESS) {
 +				return(err);
 +			}
 +
 +			ut_a(size != (os_offset_t) -1);
 +
 +			if (size & ((1 << UNIV_PAGE_SIZE_SHIFT) - 1)) {
 +				ib_logf(IB_LOG_LEVEL_ERROR,
 +					"Log file %s size "
 +					UINT64PF " is not a multiple of"
 +					" innodb_page_size",
 +					logfilename, size);
 +				return(DB_ERROR);
 +			}
 +
 +			size >>= UNIV_PAGE_SIZE_SHIFT;
 +
 +			if (i == 0) {
 +				srv_log_file_size = size;
 +			} else if (size != srv_log_file_size) {
 +				ib_logf(IB_LOG_LEVEL_ERROR,
 +					"Log file %s is"
 +					" of different size " UINT64PF " bytes"
 +					" than other log"
 +					" files " UINT64PF " bytes!",
 +					logfilename,
 +					size << UNIV_PAGE_SIZE_SHIFT,
 +					(os_offset_t) srv_log_file_size
 +					<< UNIV_PAGE_SIZE_SHIFT);
 +				return(DB_ERROR);
 +			}
 +		}
 +
 +		srv_n_log_files_found = i;
 +
 +		/* Create the in-memory file space objects. */
 +
 +		sprintf(logfilename + dirnamelen, "ib_logfile%u", 0);
 +
 +		fil_space_create(logfilename,
 +				 SRV_LOG_SPACE_FIRST_ID,
 +				 fsp_flags_set_page_size(0, UNIV_PAGE_SIZE),
 +				 FIL_LOG);
 +
 +		ut_a(fil_validate());
 +
 +		/* srv_log_file_size is measured in pages; if page size is 16KB,
 +		then we have a limit of 64TB on 32 bit systems */
 +		ut_a(srv_log_file_size <= ULINT_MAX);
 +
 +		for (unsigned j = 0; j < i; j++) {
 +			sprintf(logfilename + dirnamelen, "ib_logfile%u", j);
 +
 +			if (!fil_node_create(logfilename,
 +					     (ulint) srv_log_file_size,
 +					     SRV_LOG_SPACE_FIRST_ID, FALSE)) {
 +				return(DB_ERROR);
 +			}
 +		}
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +		/* Create the file space object for archived logs. Under
 +		MySQL, no archiving ever done. */
 +		fil_space_create("arch_log_space", SRV_LOG_SPACE_FIRST_ID + 1,
 +				 0, FIL_LOG);
 +#endif /* UNIV_LOG_ARCHIVE */
 +		log_group_init(0, i, srv_log_file_size * UNIV_PAGE_SIZE,
 +			       SRV_LOG_SPACE_FIRST_ID,
 +			       SRV_LOG_SPACE_FIRST_ID + 1);
 +	}
 +
 +files_checked:
 +	/* Open all log files and data files in the system
 +	tablespace: we keep them open until database
 +	shutdown */
 +
 +	fil_open_log_and_system_tablespace_files();
 +
 +	err = srv_undo_tablespaces_init(
 +		create_new_db,
 +		srv_undo_tablespaces,
 +		&srv_undo_tablespaces_open);
 +
 +	/* If the force recovery is set very high then we carry on regardless
 +	of all errors. Basically this is fingers crossed mode. */
 +
 +	if (err != DB_SUCCESS
 +	    && srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) {
 +
 +		return(err);
 +	}
 +
 +	/* Initialize objects used by dict stats gathering thread, which
 +	can also be used by recovery if it tries to drop some table */
 +	if (!srv_read_only_mode) {
 +		dict_stats_thread_init();
 +	}
 +
 +	trx_sys_file_format_init();
 +
 +	trx_sys_create();
 +
 +	if (create_new_db) {
 +		ut_a(!srv_read_only_mode);
 +
 +		mtr_start(&mtr);
 +
 +		fsp_header_init(0, sum_of_new_sizes, &mtr);
 +
 +		mtr_commit(&mtr);
 +
 +		/* To maintain backward compatibility we create only
 +		the first rollback segment before the double write buffer.
 +		All the remaining rollback segments will be created later,
 +		after the double write buffer has been created. */
 +		trx_sys_create_sys_pages();
 +
 +		ib_bh = trx_sys_init_at_db_start();
 +		n_recovered_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
 +
 +		/* The purge system needs to create the purge view and
 +		therefore requires that the trx_sys is inited. */
 +
 +		trx_purge_sys_create(srv_n_purge_threads, ib_bh);
 +
 +		err = dict_create();
 +
 +		if (err != DB_SUCCESS) {
 +			return(err);
 +		}
 +
 +		srv_startup_is_before_trx_rollback_phase = FALSE;
 +
 +		bool success = buf_flush_list(ULINT_MAX, LSN_MAX, NULL);
 +		ut_a(success);
 +
 +		min_flushed_lsn = max_flushed_lsn = log_get_lsn();
 +
 +		buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
 +
 +		/* Stamp the LSN to the data files. */
 +		fil_write_flushed_lsn_to_data_files(max_flushed_lsn, 0);
 +
 +		fil_flush_file_spaces(FIL_TABLESPACE);
 +
 +		err = create_log_files_rename(logfilename, dirnamelen,
 +					      max_flushed_lsn, logfile0);
 +
 +		if (err != DB_SUCCESS) {
 +			return(err);
 +		}
 +#ifdef UNIV_LOG_ARCHIVE
 +	} else if (srv_archive_recovery) {
 +
 +		ib_logf(IB_LOG_LEVEL_INFO,
 +			" Starting archive recovery from a backup...");
 +
 +		err = recv_recovery_from_archive_start(
 +			min_flushed_lsn, srv_archive_recovery_limit_lsn,
 +			min_arch_log_no);
 +		if (err != DB_SUCCESS) {
 +
 +			return(DB_ERROR);
 +		}
 +		/* Since ibuf init is in dict_boot, and ibuf is needed
 +		in any disk i/o, first call dict_boot */
 +
 +		err = dict_boot();
 +
 +		if (err != DB_SUCCESS) {
 +			return(err);
 +		}
 +
 +		ib_bh = trx_sys_init_at_db_start();
 +		n_recovered_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
 +
 +		/* The purge system needs to create the purge view and
 +		therefore requires that the trx_sys is inited. */
 +
 +		trx_purge_sys_create(srv_n_purge_threads, ib_bh);
 +
 +		srv_startup_is_before_trx_rollback_phase = FALSE;
 +
 +		recv_recovery_from_archive_finish();
 +#endif /* UNIV_LOG_ARCHIVE */
 +	} else {
 +
 +		/* Check if we support the max format that is stamped
 +		on the system tablespace.
 +		Note:  We are NOT allowed to make any modifications to
 +		the TRX_SYS_PAGE_NO page before recovery  because this
 +		page also contains the max_trx_id etc. important system
 +		variables that are required for recovery.  We need to
 +		ensure that we return the system to a state where normal
 +		recovery is guaranteed to work. We do this by
 +		invalidating the buffer cache, this will force the
 +		reread of the page and restoration to its last known
 +		consistent state, this is REQUIRED for the recovery
 +		process to work. */
 +		err = trx_sys_file_format_max_check(
 +			srv_max_file_format_at_startup);
 +
 +		if (err != DB_SUCCESS) {
 +			return(err);
 +		}
 +
 +		/* Invalidate the buffer pool to ensure that we reread
 +		the page that we read above, during recovery.
 +		Note that this is not as heavy weight as it seems. At
 +		this point there will be only ONE page in the buf_LRU
 +		and there must be no page in the buf_flush list. */
 +		buf_pool_invalidate();
 +
 +		/* We always try to do a recovery, even if the database had
 +		been shut down normally: this is the normal startup path */
 +
 +		err = recv_recovery_from_checkpoint_start(
 +			LOG_CHECKPOINT, LSN_MAX,
 +			min_flushed_lsn, max_flushed_lsn);
 +
 +		if (err == DB_SUCCESS) {
 +			/* Initialize the change buffer. */
 +			err = dict_boot();
 +		}
 +
 +		if (err != DB_SUCCESS) {
 +			return(err);
 +		}
 +
 +		if (!srv_read_only_mode) {
 +			if (sum_of_new_sizes > 0) {
 +				/* New data file(s) were added */
 +				mtr_start(&mtr);
 +				fsp_header_inc_size(0, sum_of_new_sizes, &mtr);
 +				mtr_commit(&mtr);
 +				/* Immediately write the log record about
 +				increased tablespace size to disk, so that it
 +				is durable even if mysqld would crash
 +				quickly */
 +				log_buffer_flush_to_disk();
 +			}
 +		}
 +
 +		const ulint	tablespace_size_in_header
 +			= fsp_header_get_tablespace_size();
 +
 +#ifdef UNIV_DEBUG
 +		/* buf_debug_prints = TRUE; */
 +#endif /* UNIV_DEBUG */
 +		ulint		sum_of_data_file_sizes = 0;
 +
 +		for (ulint d = 0; d < srv_n_data_files; d++) {
 +			sum_of_data_file_sizes += srv_data_file_sizes[d];
 +		}
 +
 +		/* Compare the system tablespace file size to what is
 +		stored in FSP_SIZE. In open_or_create_data_files()
 +		we already checked that the file sizes match the
 +		innodb_data_file_path specification. */
 +		if (srv_read_only_mode
 +		    || sum_of_data_file_sizes == tablespace_size_in_header) {
 +			/* Do not complain about the size. */
 +		} else if (!srv_auto_extend_last_data_file
 +			   || sum_of_data_file_sizes
 +			   < tablespace_size_in_header) {
 +			ib_logf(IB_LOG_LEVEL_ERROR,
 +				"Tablespace size stored in header is " ULINTPF
 +				" pages, but the sum of data file sizes is "
 +				ULINTPF " pages",
 +				tablespace_size_in_header,
 +				sum_of_data_file_sizes);
 +
 +			if (srv_force_recovery == 0
 +			    && sum_of_data_file_sizes
 +			    < tablespace_size_in_header) {
 +				ib_logf(IB_LOG_LEVEL_ERROR,
 +					"Cannot start InnoDB. The tail of"
 +					" the system tablespace is"
 +					" missing. Have you edited"
 +					" innodb_data_file_path in my.cnf"
 +					" in an inappropriate way, removing"
 +					" data files from there?"
 +					" You can set innodb_force_recovery=1"
 +					" in my.cnf to force"
 +					" a startup if you are trying to"
 +					" recover a badly corrupt database.");
 +
 +				return(DB_ERROR);
 +			}
 +		}
 +
- 		/* This must precede recv_apply_hashed_log_recs(TRUE). */
++		/* This must precede recv_apply_hashed_log_recs(true). */
 +		ib_bh = trx_sys_init_at_db_start();
 +
 +		if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
 +			/* Apply the hashed log records to the
 +			respective file pages, for the last batch of
 +			recv_group_scan_log_recs(). */
 +
- 			recv_apply_hashed_log_recs(TRUE);
++			recv_apply_hashed_log_recs(true);
 +			DBUG_PRINT("ib_log", ("apply completed"));
 +		}
 +
 +		n_recovered_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
 +
 +		/* The purge system needs to create the purge view and
 +		therefore requires that the trx_sys is inited. */
 +
 +		trx_purge_sys_create(srv_n_purge_threads, ib_bh);
 +
 +		/* recv_recovery_from_checkpoint_finish needs trx lists which
 +		are initialized in trx_sys_init_at_db_start(). */
 +
 +		recv_recovery_from_checkpoint_finish();
 +
 +		if (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE) {
 +			/* The following call is necessary for the insert
 +			buffer to work with multiple tablespaces. We must
 +			know the mapping between space id's and .ibd file
 +			names.
 +
 +			In a crash recovery, we check that the info in data
 +			dictionary is consistent with what we already know
 +			about space id's from the call of
 +			fil_load_single_table_tablespaces().
 +
 +			In a normal startup, we create the space objects for
 +			every table in the InnoDB data dictionary that has
 +			an .ibd file.
 +
 +			We also determine the maximum tablespace id used. */
 +			dict_check_t	dict_check;
 +
 +			if (recv_needed_recovery) {
 +				dict_check = DICT_CHECK_ALL_LOADED;
 +			} else if (n_recovered_trx) {
 +				dict_check = DICT_CHECK_SOME_LOADED;
 +			} else {
 +				dict_check = DICT_CHECK_NONE_LOADED;
 +			}
 +
 +			/* Create the SYS_TABLESPACES and SYS_DATAFILES system table */
 +			err = dict_create_or_check_sys_tablespace();
 +			if (err != DB_SUCCESS) {
 +				return(err);
 +			}
 +
 +			sys_datafiles_created = true;
 +
 +			/* This function assumes that SYS_DATAFILES exists */
 +			dict_check_tablespaces_and_store_max_id(dict_check);
 +		}
 +
 +		if (!srv_force_recovery
 +		    && !recv_sys->found_corrupt_log
 +		    && (srv_log_file_size_requested != srv_log_file_size
 +			|| srv_n_log_files_found != srv_n_log_files)) {
 +			/* Prepare to replace the redo log files. */
 +
 +			if (srv_read_only_mode) {
 +				ib_logf(IB_LOG_LEVEL_ERROR,
 +					"Cannot resize log files "
 +					"in read-only mode.");
 +				return(DB_READ_ONLY);
 +			}
 +
 +			/* Clean the buffer pool. */
 +			bool success = buf_flush_list(
 +				ULINT_MAX, LSN_MAX, NULL);
 +			ut_a(success);
 +
 +			DBUG_EXECUTE_IF("innodb_log_abort_1",
 +					return(DB_ERROR););
 +
 +			min_flushed_lsn = max_flushed_lsn = log_get_lsn();
 +
 +			ib_logf(IB_LOG_LEVEL_WARN,
 +				"Resizing redo log from %u*%u to %u*%u pages"
 +				", LSN=" LSN_PF,
 +				(unsigned) i,
 +				(unsigned) srv_log_file_size,
 +				(unsigned) srv_n_log_files,
 +				(unsigned) srv_log_file_size_requested,
 +				max_flushed_lsn);
 +
 +			buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
 +
 +			/* Flush the old log files. */
 +			log_buffer_flush_to_disk();
 +			/* If innodb_flush_method=O_DSYNC,
 +			we need to explicitly flush the log buffers. */
 +			fil_flush(SRV_LOG_SPACE_FIRST_ID);
 +
 +			ut_ad(max_flushed_lsn == log_get_lsn());
 +
 +			/* Prohibit redo log writes from any other
 +			threads until creating a log checkpoint at the
 +			end of create_log_files(). */
 +			ut_d(recv_no_log_write = TRUE);
 +			ut_ad(!buf_pool_check_no_pending_io());
 +
 +			DBUG_EXECUTE_IF("innodb_log_abort_3",
 +					return(DB_ERROR););
 +
 +			/* Stamp the LSN to the data files. */
 +			fil_write_flushed_lsn_to_data_files(
 +				max_flushed_lsn, 0);
 +
 +			DBUG_EXECUTE_IF("innodb_log_abort_4", err = DB_ERROR;);
 +
 +			if (err != DB_SUCCESS) {
 +				return(err);
 +			}
 +
 +			fil_flush_file_spaces(FIL_TABLESPACE);
 +
 +			/* Close and free the redo log files, so that
 +			we can replace them. */
 +			fil_close_log_files(true);
 +
 +			DBUG_EXECUTE_IF("innodb_log_abort_5",
 +					return(DB_ERROR););
 +
 +			/* Free the old log file space. */
 +			log_group_close_all();
 +
 +			ib_logf(IB_LOG_LEVEL_WARN,
 +				"Starting to delete and rewrite log files.");
 +
 +			srv_log_file_size = srv_log_file_size_requested;
 +
 +			err = create_log_files(create_new_db, logfilename,
 +					       dirnamelen, max_flushed_lsn,
 +					       logfile0);
 +
 +			if (err == DB_SUCCESS) {
 +				err = create_log_files_rename(
 +					logfilename, dirnamelen,
 +					max_flushed_lsn, logfile0);
 +			}
 +
 +			if (err != DB_SUCCESS) {
 +				return(err);
 +			}
 +		}
 +
 +		srv_startup_is_before_trx_rollback_phase = FALSE;
 +		recv_recovery_rollback_active();
 +
 +		/* It is possible that file_format tag has never
 +		been set. In this case we initialize it to minimum
 +		value.  Important to note that we can do it ONLY after
 +		we have finished the recovery process so that the
 +		image of TRX_SYS_PAGE_NO is not stale. */
 +		trx_sys_file_format_tag_init();
 +	}
 +
 +	ut_ad(err == DB_SUCCESS);
 +	ut_a(sum_of_new_sizes != ULINT_UNDEFINED);
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	/* Archiving is always off under MySQL */
 +	if (!srv_log_archive_on) {
 +		ut_a(DB_SUCCESS == log_archive_noarchivelog());
 +	} else {
 +		mutex_enter(&(log_sys->mutex));
 +
 +		start_archive = FALSE;
 +
 +		if (log_sys->archiving_state == LOG_ARCH_OFF) {
 +			start_archive = TRUE;
 +		}
 +
 +		mutex_exit(&(log_sys->mutex));
 +
 +		if (start_archive) {
 +			ut_a(DB_SUCCESS == log_archive_archivelog());
 +		}
 +	}
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	/* fprintf(stderr, "Max allowed record size %lu\n",
 +	page_get_free_space_of_empty() / 2); */
 +
 +	if (buf_dblwr == NULL) {
 +		/* Create the doublewrite buffer to a new tablespace */
 +
 +		buf_dblwr_create();
 +	}
 +
 +	/* Here the double write buffer has already been created and so
 +	any new rollback segments will be allocated after the double
 +	write buffer. The default segment should already exist.
 +	We create the new segments only if it's a new database or
 +	the database was shutdown cleanly. */
 +
 +	/* Note: When creating the extra rollback segments during an upgrade
 +	we violate the latching order, even if the change buffer is empty.
 +	We make an exception in sync0sync.cc and check srv_is_being_started
 +	for that violation. It cannot create a deadlock because we are still
 +	running in single threaded mode essentially. Only the IO threads
 +	should be running at this stage. */
 +
 +	ut_a(srv_undo_logs > 0);
 +	ut_a(srv_undo_logs <= TRX_SYS_N_RSEGS);
 +
 +	/* The number of rsegs that exist in InnoDB is given by status
 +	variable srv_available_undo_logs. The number of rsegs to use can
 +	be set using the dynamic global variable srv_undo_logs. */
 +
 +	srv_available_undo_logs = trx_sys_create_rsegs(
 +		srv_undo_tablespaces, srv_undo_logs);
 +
 +	if (srv_available_undo_logs == ULINT_UNDEFINED) {
 +		/* Can only happen if server is read only. */
 +		ut_a(srv_read_only_mode);
 +		srv_undo_logs = ULONG_UNDEFINED;
 +	}
 +
 +	if (!srv_read_only_mode) {
 +		/* Create the thread which watches the timeouts
 +		for lock waits */
 +		thread_handles[2 + SRV_MAX_N_IO_THREADS] = os_thread_create(
 +			lock_wait_timeout_thread,
 +			NULL, thread_ids + 2 + SRV_MAX_N_IO_THREADS);
 +		thread_started[2 + SRV_MAX_N_IO_THREADS] = true;
 +
 +		/* Create the thread which warns of long semaphore waits */
 +		thread_handles[3 + SRV_MAX_N_IO_THREADS] = os_thread_create(
 +			srv_error_monitor_thread,
 +			NULL, thread_ids + 3 + SRV_MAX_N_IO_THREADS);
 +		thread_started[3 + SRV_MAX_N_IO_THREADS] = true;
 +
 +		/* Create the thread which prints InnoDB monitor info */
 +		thread_handles[4 + SRV_MAX_N_IO_THREADS] = os_thread_create(
 +			srv_monitor_thread,
 +			NULL, thread_ids + 4 + SRV_MAX_N_IO_THREADS);
 +		thread_started[4 + SRV_MAX_N_IO_THREADS] = true;
 +	}
 +
 +	/* Create the SYS_FOREIGN and SYS_FOREIGN_COLS system tables */
 +	err = dict_create_or_check_foreign_constraint_tables();
 +	if (err != DB_SUCCESS) {
 +		return(err);
 +	}
 +
 +	/* Create the SYS_TABLESPACES and SYS_DATAFILES system tables if we
 +	have not done that already on crash recovery. */
 +	if (sys_datafiles_created == false) {
 +		err = dict_create_or_check_sys_tablespace();
 +		if (err != DB_SUCCESS) {
 +			return(err);
 +		}
 +	}
 +
 +	srv_is_being_started = FALSE;
 +
 +	ut_a(trx_purge_state() == PURGE_STATE_INIT);
 +
 +	/* Create the master thread which does purge and other utility
 +	operations */
 +
 +	if (!srv_read_only_mode) {
 +
 +		thread_handles[1 + SRV_MAX_N_IO_THREADS] = os_thread_create(
 +			srv_master_thread,
 +			NULL, thread_ids + (1 + SRV_MAX_N_IO_THREADS));
 +		thread_started[1 + SRV_MAX_N_IO_THREADS] = true;
 +	}
 +
 +	if (!srv_read_only_mode
 +	    && srv_force_recovery < SRV_FORCE_NO_BACKGROUND) {
 +
 +		thread_handles[5 + SRV_MAX_N_IO_THREADS] = os_thread_create(
 +			srv_purge_coordinator_thread,
 +			NULL, thread_ids + 5 + SRV_MAX_N_IO_THREADS);
 +
 +		thread_started[5 + SRV_MAX_N_IO_THREADS] = true;
 +
 +		ut_a(UT_ARR_SIZE(thread_ids)
 +		     > 5 + srv_n_purge_threads + SRV_MAX_N_IO_THREADS);
 +
 +		/* We've already created the purge coordinator thread above. */
 +		for (i = 1; i < srv_n_purge_threads; ++i) {
 +			thread_handles[5 + i + SRV_MAX_N_IO_THREADS] = os_thread_create(
 +				srv_worker_thread, NULL,
 +				thread_ids + 5 + i + SRV_MAX_N_IO_THREADS);
 +			thread_started[5 + i + SRV_MAX_N_IO_THREADS] = true;
 +		}
 +
 +		srv_start_wait_for_purge_to_start();
 +
 +	} else {
 +		purge_sys->state = PURGE_STATE_DISABLED;
 +	}
 +
 +	if (!srv_read_only_mode) {
 +		buf_flush_page_cleaner_thread_handle = os_thread_create(buf_flush_page_cleaner_thread, NULL, NULL);
 +		buf_flush_page_cleaner_thread_started = true;
 +	}
 +
 +	if (srv_print_verbose_log) {
 +		ib_logf(IB_LOG_LEVEL_INFO,
 +			"%s started; log sequence number " LSN_PF "",
 +			INNODB_VERSION_STR, srv_start_lsn);
 +	}
 +
 +	if (srv_force_recovery > 0) {
 +		ib_logf(IB_LOG_LEVEL_INFO,
 +			"!!! innodb_force_recovery is set to %lu !!!",
 +			(ulong) srv_force_recovery);
 +	}
 +
 +	if (srv_force_recovery == 0) {
 +		/* In the insert buffer we may have even bigger tablespace
 +		id's, because we may have dropped those tablespaces, but
 +		insert buffer merge has not had time to clean the records from
 +		the ibuf tree. */
 +
 +		ibuf_update_max_tablespace_id();
 +	}
 +
 +	if (!srv_read_only_mode) {
 +		/* Create the buffer pool dump/load thread */
 +		buf_dump_thread_handle = os_thread_create(buf_dump_thread, NULL, NULL);
 +		buf_dump_thread_started = true;
 +
 +		/* Create the dict stats gathering thread */
 +		dict_stats_thread_handle = os_thread_create(dict_stats_thread, NULL, NULL);
 +		dict_stats_thread_started = true;
 +
 +		/* Create the thread that will optimize the FTS sub-system. */
 +		fts_optimize_init();
 +	}
 +
 +	srv_was_started = TRUE;
 +
 +	return(DB_SUCCESS);
 +}
 +
 +#if 0
 +/********************************************************************
 +Sync all FTS cache before shutdown */
 +static
 +void
 +srv_fts_close(void)
 +/*===============*/
 +{
 +	dict_table_t*	table;
 +
 +	for (table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
 +	     table; table = UT_LIST_GET_NEXT(table_LRU, table)) {
 +		fts_t*          fts = table->fts;
 +
 +		if (fts != NULL) {
 +			fts_sync_table(table);
 +		}
 +	}
 +
 +	for (table = UT_LIST_GET_FIRST(dict_sys->table_non_LRU);
 +	     table; table = UT_LIST_GET_NEXT(table_LRU, table)) {
 +		fts_t*          fts = table->fts;
 +
 +		if (fts != NULL) {
 +			fts_sync_table(table);
 +		}
 +	}
 +}
 +#endif
 +
 +/****************************************************************//**
 +Shuts down the InnoDB database.
 + at return	DB_SUCCESS or error code */
 +UNIV_INTERN
 +dberr_t
 +innobase_shutdown_for_mysql(void)
 +/*=============================*/
 +{
 +	ulint	i;
 +
 +	if (!srv_was_started) {
 +		if (srv_is_being_started) {
 +			ib_logf(IB_LOG_LEVEL_WARN,
 +				"Shutting down an improperly started, "
 +				"or created database!");
 +		}
 +
 +		return(DB_SUCCESS);
 +	}
 +
 +	if (!srv_read_only_mode) {
 +		/* Shutdown the FTS optimize sub system. */
 +		fts_optimize_start_shutdown();
 +
 +		fts_optimize_end();
 +	}
 +
 +	/* 1. Flush the buffer pool to disk, write the current lsn to
 +	the tablespace header(s), and copy all log data to archive.
 +	The step 1 is the real InnoDB shutdown. The remaining steps 2 - ...
 +	just free data structures after the shutdown. */
 +
 +	logs_empty_and_mark_files_at_shutdown();
 +
 +	if (srv_conc_get_active_threads() != 0) {
 +		ib_logf(IB_LOG_LEVEL_WARN,
 +			"Query counter shows %ld queries still "
 +			"inside InnoDB at shutdown",
 +			srv_conc_get_active_threads());
 +	}
 +
 +	/* 2. Make all threads created by InnoDB to exit */
 +
 +	srv_shutdown_state = SRV_SHUTDOWN_EXIT_THREADS;
 +
 +	/* All threads end up waiting for certain events. Put those events
 +	to the signaled state. Then the threads will exit themselves after
 +	os_event_wait(). */
 +
 +	for (i = 0; i < 1000; i++) {
 +		/* NOTE: IF YOU CREATE THREADS IN INNODB, YOU MUST EXIT THEM
 +		HERE OR EARLIER */
 +
 +		if (!srv_read_only_mode) {
 +			/* a. Let the lock timeout thread exit */
 +			os_event_set(lock_sys->timeout_event);
 +
 +			/* b. srv error monitor thread exits automatically,
 +			no need to do anything here */
 +
 +			/* c. We wake the master thread so that it exits */
 +			srv_wake_master_thread();
 +
 +			/* d. Wakeup purge threads. */
 +			srv_purge_wakeup();
 +		}
 +
 +		/* e. Exit the i/o threads */
 +
 +		os_aio_wake_all_threads_at_shutdown();
 +
 +		/* f. dict_stats_thread is signaled from
 +		logs_empty_and_mark_files_at_shutdown() and should have
 +		already quit or is quitting right now. */
 +
 +		os_mutex_enter(os_sync_mutex);
 +
 +		if (os_thread_count == 0) {
 +			/* All the threads have exited or are just exiting;
 +			NOTE that the threads may not have completed their
 +			exit yet. Should we use pthread_join() to make sure
 +			they have exited? If we did, we would have to
 +			remove the pthread_detach() from
 +			os_thread_exit().  Now we just sleep 0.1
 +			seconds and hope that is enough! */
 +
 +			os_mutex_exit(os_sync_mutex);
 +
 +			os_thread_sleep(100000);
 +
 +			break;
 +		}
 +
 +		os_mutex_exit(os_sync_mutex);
 +
 +		os_thread_sleep(100000);
 +	}
 +
 +	if (i == 1000) {
 +		ib_logf(IB_LOG_LEVEL_WARN,
 +			"%lu threads created by InnoDB"
 +			" had not exited at shutdown!",
 +			(ulong) os_thread_count);
 +	}
 +
 +	if (srv_monitor_file) {
 +		fclose(srv_monitor_file);
 +		srv_monitor_file = 0;
 +		if (srv_monitor_file_name) {
 +			unlink(srv_monitor_file_name);
 +			mem_free(srv_monitor_file_name);
 +		}
 +	}
 +
 +	if (srv_dict_tmpfile) {
 +		fclose(srv_dict_tmpfile);
 +		srv_dict_tmpfile = 0;
 +	}
 +
 +	if (srv_misc_tmpfile) {
 +		fclose(srv_misc_tmpfile);
 +		srv_misc_tmpfile = 0;
 +	}
 +
 +	if (!srv_read_only_mode) {
 +		dict_stats_thread_deinit();
 +	}
 +
 +#ifdef __WIN__
 +	/* MDEV-361: ha_innodb.dll leaks handles on Windows
 +	MDEV-7403: should not pass recv_writer_thread_handle to
 +	CloseHandle().
 +
 +	On Windows we should call CloseHandle() for all
 +	open thread handles. */
 +	if (os_thread_count == 0) {
 +		for (i = 0; i < SRV_MAX_N_IO_THREADS + 6 + 32; ++i) {
 +			if (thread_started[i]) {
 +				CloseHandle(thread_handles[i]);
 +			}
 +		}
 +
 +		if (buf_flush_page_cleaner_thread_started) {
 +			CloseHandle(buf_flush_page_cleaner_thread_handle);
 +		}
 +
 +		if (buf_dump_thread_started) {
 +			CloseHandle(buf_dump_thread_handle);
 +		}
 +
 +		if (dict_stats_thread_started) {
 +			CloseHandle(dict_stats_thread_handle);
 +		}
 +	}
 +#endif /* __WIN __ */
 +
 +	/* This must be disabled before closing the buffer pool
 +	and closing the data dictionary.  */
 +	btr_search_disable();
 +
 +	ibuf_close();
 +	log_shutdown();
 +	trx_sys_file_format_close();
 +	trx_sys_close();
 +	lock_sys_close();
 +
 +	/* We don't create these mutexes in RO mode because we don't create
 +	the temp files that the cover. */
 +	if (!srv_read_only_mode) {
 +		mutex_free(&srv_monitor_file_mutex);
 +		mutex_free(&srv_dict_tmpfile_mutex);
 +		mutex_free(&srv_misc_tmpfile_mutex);
 +	}
 +
 +	dict_close();
 +	btr_search_sys_free();
 +
 +	/* 3. Free all InnoDB's own mutexes and the os_fast_mutexes inside
 +	them */
 +	os_aio_free();
 +	que_close();
 +	row_mysql_close();
 +	srv_mon_free();
 +	sync_close();
 +	srv_free();
 +	fil_close();
 +
 +	/* 4. Free the os_conc_mutex and all os_events and os_mutexes */
 +
 +	os_sync_free();
 +
 +	/* 5. Free all allocated memory */
 +
 +	pars_lexer_close();
 +	log_mem_free();
 +	buf_pool_free(srv_buf_pool_instances);
 +	mem_close();
 +
 +	/* ut_free_all_mem() frees all allocated memory not freed yet
 +	in shutdown, and it will also free the ut_list_mutex, so it
 +	should be the last one for all operation */
 +	ut_free_all_mem();
 +
 +	if (os_thread_count != 0
 +	    || os_event_count != 0
 +	    || os_mutex_count != 0
 +	    || os_fast_mutex_count != 0) {
 +		ib_logf(IB_LOG_LEVEL_WARN,
 +			"Some resources were not cleaned up in shutdown: "
 +			"threads %lu, events %lu, os_mutexes %lu, "
 +			"os_fast_mutexes %lu",
 +			(ulong) os_thread_count, (ulong) os_event_count,
 +			(ulong) os_mutex_count, (ulong) os_fast_mutex_count);
 +	}
 +
 +	if (dict_foreign_err_file) {
 +		fclose(dict_foreign_err_file);
 +	}
 +
 +	if (srv_print_verbose_log) {
 +		ib_logf(IB_LOG_LEVEL_INFO,
 +			"Shutdown completed; log sequence number " LSN_PF "",
 +			srv_shutdown_lsn);
 +	}
 +
 +	srv_was_started = FALSE;
 +	srv_start_has_been_called = FALSE;
 +
 +	return(DB_SUCCESS);
 +}
 +#endif /* !UNIV_HOTBACKUP */
 +
 +
 +/********************************************************************
 +Signal all per-table background threads to shutdown, and wait for them to do
 +so. */
 +UNIV_INTERN
 +void
 +srv_shutdown_table_bg_threads(void)
 +/*===============================*/
 +{
 +	dict_table_t*	table;
 +	dict_table_t*	first;
 +	dict_table_t*	last = NULL;
 +
 +	mutex_enter(&dict_sys->mutex);
 +
 +	/* Signal all threads that they should stop. */
 +	table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
 +	first = table;
 +	while (table) {
 +		dict_table_t*	next;
 +		fts_t*		fts = table->fts;
 +
 +		if (fts != NULL) {
 +			fts_start_shutdown(table, fts);
 +		}
 +
 +		next = UT_LIST_GET_NEXT(table_LRU, table);
 +
 +		if (!next) {
 +			last = table;
 +		}
 +
 +		table = next;
 +	}
 +
 +	/* We must release dict_sys->mutex here; if we hold on to it in the
 +	loop below, we will deadlock if any of the background threads try to
 +	acquire it (for example, the FTS thread by calling que_eval_sql).
 +
 +	Releasing it here and going through dict_sys->table_LRU without
 +	holding it is safe because:
 +
 +	 a) MySQL only starts the shutdown procedure after all client
 +	 threads have been disconnected and no new ones are accepted, so no
 +	 new tables are added or old ones dropped.
 +
 +	 b) Despite its name, the list is not LRU, and the order stays
 +	 fixed.
 +
 +	To safeguard against the above assumptions ever changing, we store
 +	the first and last items in the list above, and then check that
 +	they've stayed the same below. */
 +
 +	mutex_exit(&dict_sys->mutex);
 +
 +	/* Wait for the threads of each table to stop. This is not inside
 +	the above loop, because by signaling all the threads first we can
 +	overlap their shutting down delays. */
 +	table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
 +	ut_a(first == table);
 +	while (table) {
 +		dict_table_t*	next;
 +		fts_t*		fts = table->fts;
 +
 +		if (fts != NULL) {
 +			fts_shutdown(table, fts);
 +		}
 +
 +		next = UT_LIST_GET_NEXT(table_LRU, table);
 +
 +		if (table == last) {
 +			ut_a(!next);
 +		}
 +
 +		table = next;
 +	}
 +}
 +
 +/*****************************************************************//**
 +Get the meta-data filename from the table name. */
 +UNIV_INTERN
 +void
 +srv_get_meta_data_filename(
 +/*=======================*/
 +	dict_table_t*	table,		/*!< in: table */
 +	char*			filename,	/*!< out: filename */
 +	ulint			max_len)	/*!< in: filename max length */
 +{
 +	ulint			len;
 +	char*			path;
 +	char*			suffix;
 +	static const ulint	suffix_len = strlen(".cfg");
 +
 +	if (DICT_TF_HAS_DATA_DIR(table->flags)) {
 +		dict_get_and_save_data_dir_path(table, false);
 +		ut_a(table->data_dir_path);
 +
 +		path = os_file_make_remote_pathname(
 +			table->data_dir_path, table->name, "cfg");
 +	} else {
 +		path = fil_make_ibd_name(table->name, false);
 +	}
 +
 +	ut_a(path);
 +	len = ut_strlen(path);
 +	ut_a(max_len >= len);
 +
 +	suffix = path + (len - suffix_len);
 +	if (strncmp(suffix, ".cfg", suffix_len) == 0) {
 +		strcpy(filename, path);
 +	} else {
 +		ut_ad(strncmp(suffix, ".ibd", suffix_len) == 0);
 +
 +		strncpy(filename, path, len - suffix_len);
 +		suffix = filename + (len - suffix_len);
 +		strcpy(suffix, ".cfg");
 +	}
 +
 +	mem_free(path);
 +
 +	srv_normalize_path_for_win(filename);
 +}
diff --cc storage/maria/ha_maria.cc
index 3e0872b9a89,44f7b466a2a..01197ddae6d
--- a/storage/maria/ha_maria.cc
+++ b/storage/maria/ha_maria.cc
@@@ -1348,11 -1335,11 +1348,11 @@@ int ha_maria::check(THD * thd, HA_CHECK
                                   my_default_record_cache_size, READ_CACHE,
                                   share->pack.header_length, 1, MYF(MY_WME))))
        {
-         error= maria_chk_data_link(&param, file,
-                                    MY_TEST(param.testflag & T_EXTEND));
-         end_io_cache(&(param.read_cache));
+         error= maria_chk_data_link(param, file,
 -                                   test(param->testflag & T_EXTEND));
++                                   MY_TEST(param->testflag & T_EXTEND));
+         end_io_cache(&param->read_cache);
        }
-       param.testflag= old_testflag;
+       param->testflag= old_testflag;
      }
    }
    if (!error)
@@@ -1472,12 -1459,10 +1472,12 @@@ int ha_maria::repair(THD * thd, HA_CHEC
                                table->s->path.str);
        continue;
      }
-     param.testflag &= ~T_QUICK;
-     if ((param.testflag & T_REP_BY_SORT))
+     param->testflag &= ~T_QUICK;
+     if (param->testflag & T_REP_BY_SORT)
      {
-       param.testflag= (param.testflag & ~T_REP_BY_SORT) | T_REP;
+       param->testflag= (param->testflag & ~T_REP_BY_SORT) | T_REP;
 +      if (thd->vio_ok())
-         _ma_check_print_info(&param, "Retrying repair with keycache");
++        _ma_check_print_info(param, "Retrying repair with keycache");
        sql_print_information("Retrying repair of: '%s' with keycache",
                              table->s->path.str);
        continue;
@@@ -1992,11 -1972,8 +1992,11 @@@ int ha_maria::enable_indexes(uint mode
        Normally table should be locked.  This test is mostly for safety.
      */
      if (likely(file->lock_type != F_UNLCK))
-       param.testflag|= T_NO_LOCKS;
+       param->testflag|= T_NO_LOCKS;
  
 +    if (file->create_unique_index_by_sort)
-       param.testflag|= T_CREATE_UNIQUE_BY_SORT;
++      param->testflag|= T_CREATE_UNIQUE_BY_SORT;
 +
      if (bulk_insert_single_undo == BULK_INSERT_SINGLE_UNDO_AND_NO_REPAIR)
      {
        bulk_insert_single_undo= BULK_INSERT_SINGLE_UNDO_AND_REPAIR;
diff --cc storage/myisam/ha_myisam.cc
index 2b70518c8fd,f63b9c85372..2cfaa5ebdcc
--- a/storage/myisam/ha_myisam.cc
+++ b/storage/myisam/ha_myisam.cc
@@@ -921,10 -905,10 +921,10 @@@ int ha_myisam::check(THD* thd, HA_CHECK
                                   my_default_record_cache_size, READ_CACHE,
                                   share->pack.header_length, 1, MYF(MY_WME))))
        {
-         error= chk_data_link(&param, file, MY_TEST(param.testflag & T_EXTEND));
-         end_io_cache(&(param.read_cache));
 -        error= chk_data_link(param, file, test(param->testflag & T_EXTEND));
 -        end_io_cache(&(param->read_cache));
++        error= chk_data_link(param, file, MY_TEST(param->testflag & T_EXTEND));
++        end_io_cache(&param->read_cache);
        }
-       param.testflag= old_testflag;
+       param->testflag= old_testflag;
      }
    }
    if (!error)
@@@ -1471,19 -1450,16 +1471,19 @@@ int ha_myisam::enable_indexes(uint mode
        Normally table should be locked.  This test is mostly for safety.
      */
      if (likely(file->lock_type != F_UNLCK))
-       param.testflag|= T_NO_LOCKS;
-     
+       param->testflag|= T_NO_LOCKS;
 -    
++
 +    if (file->create_unique_index_by_sort)
-       param.testflag|= T_CREATE_UNIQUE_BY_SORT;
++      param->testflag|= T_CREATE_UNIQUE_BY_SORT;
 +
-     param.myf_rw&= ~MY_WAIT_IF_FULL;
-     param.sort_buffer_length=  THDVAR(thd, sort_buffer_size);
-     param.stats_method= (enum_handler_stats_method)THDVAR(thd, stats_method);
-     param.tmpdir=&mysql_tmpdir_list;
-     if ((error= (repair(thd,param,0) != HA_ADMIN_OK)) && param.retry_repair)
+     param->myf_rw&= ~MY_WAIT_IF_FULL;
+     param->sort_buffer_length=  THDVAR(thd, sort_buffer_size);
+     param->stats_method= (enum_handler_stats_method)THDVAR(thd, stats_method);
+     param->tmpdir=&mysql_tmpdir_list;
+     if ((error= (repair(thd,*param,0) != HA_ADMIN_OK)) && param->retry_repair)
      {
        sql_print_warning("Warning: Enabling keys got errno %d on %s.%s, retrying",
-                         my_errno, param.db_name, param.table_name);
+                         my_errno, param->db_name, param->table_name);
        /*
          Repairing by sort failed. Now try standard repair method.
          Still we want to fix only index file. If data file corruption
diff --cc storage/xtradb/include/log0recv.h
index 6955491bac8,afac0d4f4ab..9e6001caf71
--- a/storage/xtradb/include/log0recv.h
+++ b/storage/xtradb/include/log0recv.h
@@@ -1,6 -1,7 +1,7 @@@
  /*****************************************************************************
  
 -Copyright (c) 1997, 2010, Innobase Oy. All Rights Reserved.
 +Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
+ Copyright (c) 2017, MariaDB Corporation.
  
  This program is free software; you can redistribute it and/or modify it under
  the terms of the GNU General Public License as published by the Free Software
@@@ -299,20 -310,20 +300,12 @@@ voi
  recv_sys_var_init(void);
  /*===================*/
  #endif /* !UNIV_HOTBACKUP */
--/*******************************************************************//**
--Empties the hash table of stored log records, applying them to appropriate
--pages. */
++/** Apply the hash table of stored log records to persistent data pages.
++ at param[in]	last_batch	whether the change buffer merge will be
++				performed as part of the operation */
  UNIV_INTERN
  void
--recv_apply_hashed_log_recs(
--/*=======================*/
--	ibool	allow_ibuf);	/*!< in: if TRUE, also ibuf operations are
--				allowed during the application; if FALSE,
--				no ibuf operations are allowed, and after
--				the application all file pages are flushed to
--				disk and invalidated in buffer pool: this
--				alternative means that no new log records
--				can be generated during the application */
++recv_apply_hashed_log_recs(bool last_batch);
  #ifdef UNIV_HOTBACKUP
  /*******************************************************************//**
  Applies log records in the hash table to a backup. */
@@@ -449,7 -479,38 +444,21 @@@ struct recv_sys_t
  	ulint		n_addrs;/*!< number of not processed hashed file
  				addresses in the hash table */
  
 -/* If you modified the following defines at original file,
 -   You should also modify them. */
 -/* defined in os0file.c */
 -#define OS_AIO_MERGE_N_CONSECUTIVE	64
 -/* defined in log0recv.c */
 -#define RECV_READ_AHEAD_AREA	32
 -	time_t		stats_recv_start_time;
 -	ulint		stats_recv_turns;
 -
 -	ulint		stats_read_requested_pages;
 -	ulint		stats_read_in_area[RECV_READ_AHEAD_AREA];
 -
 -	ulint		stats_read_io_pages;
 -	ulint		stats_read_io_consecutive[OS_AIO_MERGE_N_CONSECUTIVE];
 -	ulint		stats_write_io_pages;
 -	ulint		stats_write_io_consecutive[OS_AIO_MERGE_N_CONSECUTIVE];
 -
 -	ulint		stats_doublewrite_check_pages;
 -	ulint		stats_doublewrite_overwrite_pages;
 -
 -	ulint		stats_recover_pages_with_read;
 -	ulint		stats_recover_pages_without_read;
 -
 -	ulint		stats_log_recs;
 -	ulint		stats_log_len_sum;
 +	recv_dblwr_t	dblwr;
+ 
 -	ulint		stats_applied_log_recs;
 -	ulint		stats_applied_log_len_sum;
 -	ulint		stats_pages_already_new;
++	/** Determine whether redo log recovery progress should be reported.
++	@param[in]	time	the current time
++	@return	whether progress should be reported
++		(the last report was at least 15 seconds ago) */
++	bool report(ib_time_t time)
++	{
++		if (time - progress_time < 15) {
++			return false;
++		}
+ 
 -	ib_uint64_t	stats_oldest_modified_lsn;
 -	ib_uint64_t	stats_newest_modified_lsn;
++		progress_time = time;
++		return true;
++	}
  };
  
  /** The recovery system */
diff --cc storage/xtradb/log/log0log.cc
index b39a8ed1829,00000000000..3627d85da63
mode 100644,000000..100644
--- a/storage/xtradb/log/log0log.cc
+++ b/storage/xtradb/log/log0log.cc
@@@ -1,4074 -1,0 +1,4079 @@@
 +/*****************************************************************************
 +
 +Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
 +Copyright (c) 2009, Google Inc.
- Copyright (c) 2017, MariaDB Corporation. All Rights Reserved.
++Copyright (c) 2017, MariaDB Corporation.
 +
 +Portions of this file contain modifications contributed and copyrighted by
 +Google, Inc. Those modifications are gratefully acknowledged and are described
 +briefly in the InnoDB documentation. The contributions by Google are
 +incorporated with their permission, and subject to the conditions contained in
 +the file COPYING.Google.
 +
 +This program is free software; you can redistribute it and/or modify it under
 +the terms of the GNU General Public License as published by the Free Software
 +Foundation; version 2 of the License.
 +
 +This program is distributed in the hope that it will be useful, but WITHOUT
 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 +
 +You should have received a copy of the GNU General Public License along with
 +this program; if not, write to the Free Software Foundation, Inc.,
 +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
 +
 +*****************************************************************************/
 +
 +/**************************************************//**
 + at file log/log0log.cc
 +Database log
 +
 +Created 12/9/1995 Heikki Tuuri
 +*******************************************************/
 +
 +#include "config.h"
 +#ifdef HAVE_ALLOCA_H
 +#include "alloca.h"
 +#elif defined(HAVE_MALLOC_H) 
 +#include "malloc.h"
 +#endif
 +
 +#include "log0log.h"
 +
 +#ifdef UNIV_NONINL
 +#include "log0log.ic"
 +#endif
 +
 +#ifndef UNIV_HOTBACKUP
 +#include "mem0mem.h"
 +#include "buf0buf.h"
 +#include "buf0flu.h"
 +#include "srv0srv.h"
 +#include "log0recv.h"
 +#include "fil0fil.h"
 +#include "dict0boot.h"
 +#include "srv0srv.h"
 +#include "srv0start.h"
 +#include "trx0sys.h"
 +#include "trx0trx.h"
 +#include "trx0roll.h"
 +#include "srv0mon.h"
 +
 +/*
 +General philosophy of InnoDB redo-logs:
 +
 +1) Every change to a contents of a data page must be done
 +through mtr, which in mtr_commit() writes log records
 +to the InnoDB redo log.
 +
 +2) Normally these changes are performed using a mlog_write_ulint()
 +or similar function.
 +
 +3) In some page level operations only a code number of a
 +c-function and its parameters are written to the log to
 +reduce the size of the log.
 +
 +  3a) You should not add parameters to these kind of functions
 +  (e.g. trx_undo_header_create(), trx_undo_insert_header_reuse())
 +
 +  3b) You should not add such functionality which either change
 +  working when compared with the old or are dependent on data
 +  outside of the page. These kind of functions should implement
 +  self-contained page transformation and it should be unchanged
 +  if you don't have very essential reasons to change log
 +  semantics or format.
 +
 +*/
 +
 +/* Global log system variable */
 +UNIV_INTERN log_t*	log_sys	= NULL;
 +
 +/** Pointer to the log checksum calculation function */
 +UNIV_INTERN log_checksum_func_t log_checksum_algorithm_ptr	=
 +	log_block_calc_checksum_innodb;
 +
 +#ifdef UNIV_PFS_RWLOCK
 +UNIV_INTERN mysql_pfs_key_t	checkpoint_lock_key;
 +# ifdef UNIV_LOG_ARCHIVE
 +UNIV_INTERN mysql_pfs_key_t	archive_lock_key;
 +# endif
 +#endif /* UNIV_PFS_RWLOCK */
 +
 +#ifdef UNIV_PFS_MUTEX
 +UNIV_INTERN mysql_pfs_key_t	log_sys_mutex_key;
 +UNIV_INTERN mysql_pfs_key_t	log_flush_order_mutex_key;
 +#endif /* UNIV_PFS_MUTEX */
 +
 +#ifdef UNIV_DEBUG
 +UNIV_INTERN ibool	log_do_write = TRUE;
 +#endif /* UNIV_DEBUG */
 +
 +/* These control how often we print warnings if the last checkpoint is too
 +old */
 +UNIV_INTERN ibool	log_has_printed_chkp_warning = FALSE;
 +UNIV_INTERN time_t	log_last_warning_time;
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +/* Pointer to this variable is used as the i/o-message when we do i/o to an
 +archive */
 +UNIV_INTERN byte	log_archive_io;
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +UNIV_INTERN ulint       log_disable_checkpoint_active= 0;
 +
 +/* A margin for free space in the log buffer before a log entry is catenated */
 +#define LOG_BUF_WRITE_MARGIN	(4 * OS_FILE_LOG_BLOCK_SIZE)
 +
 +/* Margins for free space in the log buffer after a log entry is catenated */
 +#define LOG_BUF_FLUSH_RATIO	2
 +#define LOG_BUF_FLUSH_MARGIN	(LOG_BUF_WRITE_MARGIN + 4 * UNIV_PAGE_SIZE)
 +
 +/* Margin for the free space in the smallest log group, before a new query
 +step which modifies the database, is started */
 +
 +#define LOG_CHECKPOINT_FREE_PER_THREAD	(4 * UNIV_PAGE_SIZE)
 +#define LOG_CHECKPOINT_EXTRA_FREE	(8 * UNIV_PAGE_SIZE)
 +
 +/* This parameter controls asynchronous making of a new checkpoint; the value
 +should be bigger than LOG_POOL_PREFLUSH_RATIO_SYNC */
 +
 +#define LOG_POOL_CHECKPOINT_RATIO_ASYNC	32
 +
 +/* This parameter controls synchronous preflushing of modified buffer pages */
 +#define LOG_POOL_PREFLUSH_RATIO_SYNC	16
 +
 +/* The same ratio for asynchronous preflushing; this value should be less than
 +the previous */
 +#define LOG_POOL_PREFLUSH_RATIO_ASYNC	8
 +
 +/* Extra margin, in addition to one log file, used in archiving */
 +#define LOG_ARCHIVE_EXTRA_MARGIN	(4 * UNIV_PAGE_SIZE)
 +
 +/* This parameter controls asynchronous writing to the archive */
 +#define LOG_ARCHIVE_RATIO_ASYNC		16
 +
 +/* Codes used in unlocking flush latches */
 +#define LOG_UNLOCK_NONE_FLUSHED_LOCK	1
 +#define LOG_UNLOCK_FLUSH_LOCK		2
 +
 +/* States of an archiving operation */
 +#define	LOG_ARCHIVE_READ	1
 +#define	LOG_ARCHIVE_WRITE	2
 +
 +/******************************************************//**
 +Completes a checkpoint write i/o to a log file. */
 +static
 +void
 +log_io_complete_checkpoint(void);
 +/*============================*/
 +#ifdef UNIV_LOG_ARCHIVE
 +/******************************************************//**
 +Completes an archiving i/o. */
 +static
 +void
 +log_io_complete_archive(void);
 +/*=========================*/
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +/****************************************************************//**
 +Returns the oldest modified block lsn in the pool, or log_sys->lsn if none
 +exists.
 + at return	LSN of oldest modification */
 +static
 +lsn_t
 +log_buf_pool_get_oldest_modification(void)
 +/*======================================*/
 +{
 +	lsn_t	lsn;
 +
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +
 +	lsn = buf_pool_get_oldest_modification();
 +
 +	if (!lsn) {
 +
 +		lsn = log_sys->lsn;
 +	}
 +
 +	return(lsn);
 +}
 +
 +/****************************************************************//**
 +Returns the oldest modified block lsn in the pool, or log_sys->lsn if none
 +exists.
 + at return	LSN of oldest modification */
 +static
 +lsn_t
 +log_buf_pool_get_oldest_modification_peek(void)
 +/*===========================================*/
 +{
 +	lsn_t	lsn;
 +
 +	lsn = buf_pool_get_oldest_modification_peek();
 +
 +	if (!lsn) {
 +
 +		lsn = log_sys->lsn;
 +	}
 +
 +	return(lsn);
 +}
 +
 +/****************************************************************//**
 +Checks if the log groups have a big enough margin of free space in
 +so that a new log entry can be written without overwriting log data
 +that is not read by the changed page bitmap thread.
 + at return TRUE if there is not enough free space. */
 +static
 +ibool
 +log_check_tracking_margin(
 +	ulint	lsn_advance)	/*!< in: an upper limit on how much log data we
 +				plan to write.  If zero, the margin will be
 +				checked for the already-written log. */
 +{
 +	lsn_t	tracked_lsn;
 +	lsn_t	tracked_lsn_age;
 +
 +	if (!srv_track_changed_pages) {
 +		return FALSE;
 +	}
 +
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +
 +	tracked_lsn = log_get_tracked_lsn();
 +	tracked_lsn_age = log_sys->lsn - tracked_lsn;
 +
 +	/* The overwrite would happen when log_sys->log_group_capacity is
 +	exceeded, but we use max_checkpoint_age for an extra safety margin. */
 +	return tracked_lsn_age + lsn_advance > log_sys->max_checkpoint_age;
 +}
 +
 +/** Extends the log buffer.
 + at param[in] len	requested minimum size in bytes */
 +static
 +void
 +log_buffer_extend(
 +	ulint	len)
 +{
 +	ulint	move_start;
 +	ulint	move_end;
 +	byte*	tmp_buf = reinterpret_cast<byte *>(alloca(OS_FILE_LOG_BLOCK_SIZE));
 +
 +	mutex_enter(&(log_sys->mutex));
 +
 +	while (log_sys->is_extending) {
 +		/* Another thread is trying to extend already.
 +		Needs to wait for. */
 +		mutex_exit(&(log_sys->mutex));
 +
 +		log_buffer_flush_to_disk();
 +
 +		mutex_enter(&(log_sys->mutex));
 +
 +		if (srv_log_buffer_size > len / UNIV_PAGE_SIZE) {
 +			/* Already extended enough by the others */
 +			mutex_exit(&(log_sys->mutex));
 +			return;
 +		}
 +	}
 +
 +	log_sys->is_extending = true;
 +
 +	while (log_sys->n_pending_writes != 0
 +	       || ut_calc_align_down(log_sys->buf_free,
 +				     OS_FILE_LOG_BLOCK_SIZE)
 +		  != ut_calc_align_down(log_sys->buf_next_to_write,
 +					OS_FILE_LOG_BLOCK_SIZE)) {
 +		/* Buffer might have >1 blocks to write still. */
 +		mutex_exit(&(log_sys->mutex));
 +
 +		log_buffer_flush_to_disk();
 +
 +		mutex_enter(&(log_sys->mutex));
 +	}
 +
 +	move_start = ut_calc_align_down(
 +		log_sys->buf_free,
 +		OS_FILE_LOG_BLOCK_SIZE);
 +	move_end = log_sys->buf_free;
 +
 +	/* store the last log block in buffer */
 +	ut_memcpy(tmp_buf, log_sys->buf + move_start,
 +		  move_end - move_start);
 +
 +	log_sys->buf_free -= move_start;
 +	log_sys->buf_next_to_write -= move_start;
 +
 +	/* reallocate log buffer */
 +	srv_log_buffer_size = len / UNIV_PAGE_SIZE + 1;
 +	mem_free(log_sys->buf_ptr);
 +	log_sys->buf_ptr = static_cast<byte*>(
 +		mem_zalloc(LOG_BUFFER_SIZE + OS_FILE_LOG_BLOCK_SIZE));
 +	log_sys->buf = static_cast<byte*>(
 +		ut_align(log_sys->buf_ptr, OS_FILE_LOG_BLOCK_SIZE));
 +	log_sys->buf_size = LOG_BUFFER_SIZE;
 +	log_sys->max_buf_free = log_sys->buf_size / LOG_BUF_FLUSH_RATIO
 +		- LOG_BUF_FLUSH_MARGIN;
 +
 +	/* restore the last log block */
 +	ut_memcpy(log_sys->buf, tmp_buf, move_end - move_start);
 +
 +	ut_ad(log_sys->is_extending);
 +	log_sys->is_extending = false;
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +	ib_logf(IB_LOG_LEVEL_INFO,
 +		"innodb_log_buffer_size was extended to %lu.",
 +		LOG_BUFFER_SIZE);
 +}
 +
 +/************************************************************//**
 +Opens the log for log_write_low. The log must be closed with log_close.
 + at return	start lsn of the log record */
 +UNIV_INTERN
 +lsn_t
 +log_open(
 +/*=====*/
 +	ulint	len)	/*!< in: length of data to be catenated */
 +{
 +	log_t*	log			= log_sys;
 +	ulint	len_upper_limit;
 +#ifdef UNIV_LOG_ARCHIVE
 +	ulint	archived_lsn_age;
 +	ulint	dummy;
 +#endif /* UNIV_LOG_ARCHIVE */
 +	ulint	count			= 0;
 +	ulint	tcount			= 0;
 +
 +	if (len >= log->buf_size / 2) {
 +		DBUG_EXECUTE_IF("ib_log_buffer_is_short_crash",
 +				DBUG_SUICIDE(););
 +
 +		/* log_buffer is too small. try to extend instead of crash. */
 +		ib_logf(IB_LOG_LEVEL_WARN,
 +			"The transaction log size is too large"
 +			" for innodb_log_buffer_size (%lu >= %lu / 2). "
 +			"Trying to extend it.",
 +			len, LOG_BUFFER_SIZE);
 +
 +		log_buffer_extend((len + 1) * 2);
 +	}
 +loop:
 +	ut_ad(!recv_no_log_write);
 +
 +	if (log->is_extending) {
 +
 +		mutex_exit(&(log->mutex));
 +
 +		/* Log buffer size is extending. Writing up to the next block
 +		should wait for the extending finished. */
 +
 +		os_thread_sleep(100000);
 +
 +		ut_ad(++count < 50);
 +
 +		goto loop;
 +	}
 +
 +	/* Calculate an upper limit for the space the string may take in the
 +	log buffer */
 +
 +	len_upper_limit = LOG_BUF_WRITE_MARGIN + (5 * len) / 4;
 +
 +	if (log->buf_free + len_upper_limit > log->buf_size) {
 +
 +		mutex_exit(&(log->mutex));
 +
 +		/* Not enough free space, do a syncronous flush of the log
 +		buffer */
 +
 +		log_buffer_flush_to_disk();
 +
 +		srv_stats.log_waits.inc();
 +
 +		ut_ad(++count < 50);
 +
 +		mutex_enter(&(log->mutex));
 +
 +		goto loop;
 +	}
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	if (log->archiving_state != LOG_ARCH_OFF) {
 +
 +		archived_lsn_age = log->lsn - log->archived_lsn;
 +		if (archived_lsn_age + len_upper_limit
 +		    > log->max_archived_lsn_age) {
 +			/* Not enough free archived space in log groups: do a
 +			synchronous archive write batch: */
 +
 +			mutex_exit(&(log->mutex));
 +
 +			ut_ad(len_upper_limit <= log->max_archived_lsn_age);
 +
 +			log_archive_do(TRUE, &dummy);
 +
 +			ut_ad(++count < 50);
 +
 +			mutex_enter(&(log->mutex));
 +
 +			goto loop;
 +		}
 +	}
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	if (log_check_tracking_margin(len_upper_limit) &&
 +		(++tcount + count < 50)) {
 +
 +		/* This log write would violate the untracked LSN free space
 +		margin.  Limit this to 50 retries as there might be situations
 +		where we have no choice but to proceed anyway, i.e. if the log
 +		is about to be overflown, log tracking or not. */
 +		mutex_exit(&(log->mutex));
 +
 +		os_thread_sleep(10000);
 +
 +		mutex_enter(&(log->mutex));
 +
 +		goto loop;
 +	}
 +
 +#ifdef UNIV_LOG_DEBUG
 +	log->old_buf_free = log->buf_free;
 +	log->old_lsn = log->lsn;
 +#endif
 +	return(log->lsn);
 +}
 +
 +/************************************************************//**
 +Writes to the log the string given. It is assumed that the caller holds the
 +log mutex. */
 +UNIV_INTERN
 +void
 +log_write_low(
 +/*==========*/
 +	byte*	str,		/*!< in: string */
 +	ulint	str_len)	/*!< in: string length */
 +{
 +	log_t*	log	= log_sys;
 +	ulint	len;
 +	ulint	data_len;
 +	byte*	log_block;
 +
 +	ut_ad(mutex_own(&(log->mutex)));
 +part_loop:
 +	ut_ad(!recv_no_log_write);
 +	/* Calculate a part length */
 +
 +	data_len = (log->buf_free % OS_FILE_LOG_BLOCK_SIZE) + str_len;
 +
 +	if (data_len <= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
 +
 +		/* The string fits within the current log block */
 +
 +		len = str_len;
 +	} else {
 +		data_len = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE;
 +
 +		len = OS_FILE_LOG_BLOCK_SIZE
 +			- (log->buf_free % OS_FILE_LOG_BLOCK_SIZE)
 +			- LOG_BLOCK_TRL_SIZE;
 +	}
 +
 +	ut_memcpy(log->buf + log->buf_free, str, len);
 +
 +	str_len -= len;
 +	str = str + len;
 +
 +	log_block = static_cast<byte*>(
 +		ut_align_down(
 +			log->buf + log->buf_free, OS_FILE_LOG_BLOCK_SIZE));
 +
 +	log_block_set_data_len(log_block, data_len);
 +
 +	if (data_len == OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
 +		/* This block became full */
 +		log_block_set_data_len(log_block, OS_FILE_LOG_BLOCK_SIZE);
 +		log_block_set_checkpoint_no(log_block,
 +					    log_sys->next_checkpoint_no);
 +		len += LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE;
 +
 +		log->lsn += len;
 +
 +		/* Initialize the next block header */
 +		log_block_init(log_block + OS_FILE_LOG_BLOCK_SIZE, log->lsn);
 +	} else {
 +		log->lsn += len;
 +	}
 +
 +	log->buf_free += len;
 +
 +	ut_ad(log->buf_free <= log->buf_size);
 +
 +	if (str_len > 0) {
 +		goto part_loop;
 +	}
 +
 +	srv_stats.log_write_requests.inc();
 +}
 +
 +/************************************************************//**
 +Closes the log.
 + at return	lsn */
 +UNIV_INTERN
 +lsn_t
 +log_close(void)
 +/*===========*/
 +{
 +	byte*		log_block;
 +	ulint		first_rec_group;
 +	lsn_t		oldest_lsn;
 +	lsn_t		lsn;
 +	lsn_t		tracked_lsn;
 +	lsn_t		tracked_lsn_age;
 +	log_t*		log	= log_sys;
 +	lsn_t		checkpoint_age;
 +
 +	ut_ad(mutex_own(&(log->mutex)));
 +	ut_ad(!recv_no_log_write);
 +
 +	lsn = log->lsn;
 +
 +	log_block = static_cast<byte*>(
 +		ut_align_down(
 +			log->buf + log->buf_free, OS_FILE_LOG_BLOCK_SIZE));
 +
 +	first_rec_group = log_block_get_first_rec_group(log_block);
 +
 +	if (first_rec_group == 0) {
 +		/* We initialized a new log block which was not written
 +		full by the current mtr: the next mtr log record group
 +		will start within this block at the offset data_len */
 +
 +		log_block_set_first_rec_group(
 +			log_block, log_block_get_data_len(log_block));
 +	}
 +
 +	if (log->buf_free > log->max_buf_free) {
 +
 +		log->check_flush_or_checkpoint = TRUE;
 +	}
 +
 +	if (srv_track_changed_pages) {
 +
 +		tracked_lsn = log_get_tracked_lsn();
 +		tracked_lsn_age = lsn - tracked_lsn;
 +
 +		if (tracked_lsn_age >= log->log_group_capacity) {
 +
 +			fprintf(stderr, "InnoDB: Error: the age of the "
 +				"oldest untracked record exceeds the log "
 +				"group capacity!\n");
 +			fprintf(stderr, "InnoDB: Error: stopping the log "
 +				"tracking thread at LSN " LSN_PF "\n",
 +				tracked_lsn);
 +			srv_track_changed_pages = FALSE;
 +		}
 +	}
 +
 +	checkpoint_age = lsn - log->last_checkpoint_lsn;
 +
 +	if (checkpoint_age >= log->log_group_capacity) {
 +		/* TODO: split btr_store_big_rec_extern_fields() into small
 +		steps so that we can release all latches in the middle, and
 +		call log_free_check() to ensure we never write over log written
 +		after the latest checkpoint. In principle, we should split all
 +		big_rec operations, but other operations are smaller. */
 +
 +		if (!log_has_printed_chkp_warning
 +		    || difftime(time(NULL), log_last_warning_time) > 15) {
 +
 +			log_has_printed_chkp_warning = TRUE;
 +			log_last_warning_time = time(NULL);
 +
 +			ut_print_timestamp(stderr);
 +			fprintf(stderr,
 +				" InnoDB: ERROR: the age of the last"
 +				" checkpoint is " LSN_PF ",\n"
 +				"InnoDB: which exceeds the log group"
 +				" capacity " LSN_PF ".\n"
 +				"InnoDB: If you are using big"
 +				" BLOB or TEXT rows, you must set the\n"
 +				"InnoDB: combined size of log files"
 +				" at least 10 times bigger than the\n"
 +				"InnoDB: largest such row.\n",
 +				checkpoint_age,
 +				log->log_group_capacity);
 +		}
 +	}
 +
 +	if (checkpoint_age <= log->max_modified_age_sync) {
 +
 +		goto function_exit;
 +	}
 +
 +	oldest_lsn = buf_pool_get_oldest_modification();
 +
 +	if (!oldest_lsn
 +	    || lsn - oldest_lsn > log->max_modified_age_sync
 +	    || checkpoint_age > log->max_checkpoint_age_async) {
 +
 +		log->check_flush_or_checkpoint = TRUE;
 +	}
 +function_exit:
 +
 +#ifdef UNIV_LOG_DEBUG
 +	log_check_log_recs(log->buf + log->old_buf_free,
 +			   log->buf_free - log->old_buf_free, log->old_lsn);
 +#endif
 +
 +	return(lsn);
 +}
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +/******************************************************//**
 +Pads the current log block full with dummy log records. Used in producing
 +consistent archived log files. */
 +static
 +void
 +log_pad_current_log_block(void)
 +/*===========================*/
 +{
 +	byte		b		= MLOG_DUMMY_RECORD;
 +	ulint		pad_length;
 +	ulint		i;
 +	lsn_t		lsn;
 +
 +	/* We retrieve lsn only because otherwise gcc crashed on HP-UX */
 +	lsn = log_reserve_and_open(OS_FILE_LOG_BLOCK_SIZE);
 +
 +	pad_length = OS_FILE_LOG_BLOCK_SIZE
 +		- (log_sys->buf_free % OS_FILE_LOG_BLOCK_SIZE)
 +		- LOG_BLOCK_TRL_SIZE;
 +	if (pad_length
 +	    == (OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
 +		- LOG_BLOCK_TRL_SIZE)) {
 +
 +		pad_length = 0;
 +	}
 +
 +	for (i = 0; i < pad_length; i++) {
 +		log_write_low(&b, 1);
 +	}
 +
 +	lsn = log_sys->lsn;
 +
 +	log_close();
 +	log_release();
 +
 +	ut_a(lsn % OS_FILE_LOG_BLOCK_SIZE == LOG_BLOCK_HDR_SIZE);
 +}
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +/******************************************************//**
 +Calculates the data capacity of a log group, when the log file headers are not
 +included.
 + at return	capacity in bytes */
 +UNIV_INTERN
 +lsn_t
 +log_group_get_capacity(
 +/*===================*/
 +	const log_group_t*	group)	/*!< in: log group */
 +{
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +
 +	return((group->file_size - LOG_FILE_HDR_SIZE) * group->n_files);
 +}
 +
 +/******************************************************//**
 +Calculates the offset within a log group, when the log file headers are not
 +included.
 + at return	size offset (<= offset) */
 +UNIV_INLINE
 +lsn_t
 +log_group_calc_size_offset(
 +/*=======================*/
 +	lsn_t			offset,	/*!< in: real offset within the
 +					log group */
 +	const log_group_t*	group)	/*!< in: log group */
 +{
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +
 +	return(offset - LOG_FILE_HDR_SIZE * (1 + offset / group->file_size));
 +}
 +
 +/******************************************************//**
 +Calculates the offset within a log group, when the log file headers are
 +included.
 + at return	real offset (>= offset) */
 +UNIV_INLINE
 +lsn_t
 +log_group_calc_real_offset(
 +/*=======================*/
 +	lsn_t			offset,	/*!< in: size offset within the
 +					log group */
 +	const log_group_t*	group)	/*!< in: log group */
 +{
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +
 +	return(offset + LOG_FILE_HDR_SIZE
 +	       * (1 + offset / (group->file_size - LOG_FILE_HDR_SIZE)));
 +}
 +
 +/******************************************************//**
 +Calculates the offset of an lsn within a log group.
 + at return	offset within the log group */
 +static
 +lsn_t
 +log_group_calc_lsn_offset(
 +/*======================*/
 +	lsn_t			lsn,	/*!< in: lsn */
 +	const log_group_t*	group)	/*!< in: log group */
 +{
 +	lsn_t	gr_lsn;
 +	lsn_t	gr_lsn_size_offset;
 +	lsn_t	difference;
 +	lsn_t	group_size;
 +	lsn_t	offset;
 +
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +
 +	gr_lsn = group->lsn;
 +
 +	gr_lsn_size_offset = log_group_calc_size_offset(group->lsn_offset, group);
 +
 +	group_size = log_group_get_capacity(group);
 +
 +	if (lsn >= gr_lsn) {
 +
 +		difference = lsn - gr_lsn;
 +	} else {
 +		difference = gr_lsn - lsn;
 +
 +		difference = difference % group_size;
 +
 +		difference = group_size - difference;
 +	}
 +
 +	offset = (gr_lsn_size_offset + difference) % group_size;
 +
 +	/* fprintf(stderr,
 +	"Offset is " LSN_PF " gr_lsn_offset is " LSN_PF
 +	" difference is " LSN_PF "\n",
 +	offset, gr_lsn_size_offset, difference);
 +	*/
 +
 +	return(log_group_calc_real_offset(offset, group));
 +}
 +#endif /* !UNIV_HOTBACKUP */
 +
 +#ifdef UNIV_DEBUG
 +UNIV_INTERN ibool	log_debug_writes = FALSE;
 +#endif /* UNIV_DEBUG */
 +
 +/*******************************************************************//**
 +Calculates where in log files we find a specified lsn.
 + at return	log file number */
 +UNIV_INTERN
 +ulint
 +log_calc_where_lsn_is(
 +/*==================*/
 +	ib_int64_t*	log_file_offset,	/*!< out: offset in that file
 +						(including the header) */
 +	ib_uint64_t	first_header_lsn,	/*!< in: first log file start
 +						lsn */
 +	ib_uint64_t	lsn,			/*!< in: lsn whose position to
 +						determine */
 +	ulint		n_log_files,		/*!< in: total number of log
 +						files */
 +	ib_int64_t	log_file_size)		/*!< in: log file size
 +						(including the header) */
 +{
 +	ib_int64_t	capacity	= log_file_size - LOG_FILE_HDR_SIZE;
 +	ulint		file_no;
 +	ib_int64_t	add_this_many;
 +
 +	if (lsn < first_header_lsn) {
 +		add_this_many = 1 + (first_header_lsn - lsn)
 +			/ (capacity * (ib_int64_t) n_log_files);
 +		lsn += add_this_many
 +			* capacity * (ib_int64_t) n_log_files;
 +	}
 +
 +	ut_a(lsn >= first_header_lsn);
 +
 +	file_no = ((ulint)((lsn - first_header_lsn) / capacity))
 +		% n_log_files;
 +	*log_file_offset = (lsn - first_header_lsn) % capacity;
 +
 +	*log_file_offset = *log_file_offset + LOG_FILE_HDR_SIZE;
 +
 +	return(file_no);
 +}
 +
 +#ifndef UNIV_HOTBACKUP
 +/********************************************************//**
 +Sets the field values in group to correspond to a given lsn. For this function
 +to work, the values must already be correctly initialized to correspond to
 +some lsn, for instance, a checkpoint lsn. */
 +UNIV_INTERN
 +void
 +log_group_set_fields(
 +/*=================*/
 +	log_group_t*	group,	/*!< in/out: group */
 +	lsn_t		lsn)	/*!< in: lsn for which the values should be
 +				set */
 +{
 +	group->lsn_offset = log_group_calc_lsn_offset(lsn, group);
 +	group->lsn = lsn;
 +}
 +
 +/*****************************************************************//**
 +Calculates the recommended highest values for lsn - last_checkpoint_lsn,
 +lsn - buf_get_oldest_modification(), and lsn - max_archive_lsn_age.
 + at return error value FALSE if the smallest log group is too small to
 +accommodate the number of OS threads in the database server */
 +static
 +ibool
 +log_calc_max_ages(void)
 +/*===================*/
 +{
 +	log_group_t*	group;
 +	lsn_t		margin;
 +	ulint		free;
 +	ibool		success		= TRUE;
 +	lsn_t		smallest_capacity;
 +	lsn_t		archive_margin;
 +	lsn_t		smallest_archive_margin;
 +
 +	mutex_enter(&(log_sys->mutex));
 +
 +	group = UT_LIST_GET_FIRST(log_sys->log_groups);
 +
 +	ut_ad(group);
 +
 +	smallest_capacity = LSN_MAX;
 +	smallest_archive_margin = LSN_MAX;
 +
 +	while (group) {
 +		if (log_group_get_capacity(group) < smallest_capacity) {
 +
 +			smallest_capacity = log_group_get_capacity(group);
 +		}
 +
 +		archive_margin = log_group_get_capacity(group)
 +			- (group->file_size - LOG_FILE_HDR_SIZE)
 +			- LOG_ARCHIVE_EXTRA_MARGIN;
 +
 +		if (archive_margin < smallest_archive_margin) {
 +
 +			smallest_archive_margin = archive_margin;
 +		}
 +
 +		group = UT_LIST_GET_NEXT(log_groups, group);
 +	}
 +
 +	/* Add extra safety */
 +	smallest_capacity = smallest_capacity - smallest_capacity / 10;
 +
 +	/* For each OS thread we must reserve so much free space in the
 +	smallest log group that it can accommodate the log entries produced
 +	by single query steps: running out of free log space is a serious
 +	system error which requires rebooting the database. */
 +
 +	free = LOG_CHECKPOINT_FREE_PER_THREAD * (10 + srv_thread_concurrency)
 +		+ LOG_CHECKPOINT_EXTRA_FREE;
 +	if (free >= smallest_capacity / 2) {
 +		success = FALSE;
 +
 +		goto failure;
 +	} else {
 +		margin = smallest_capacity - free;
 +	}
 +
 +	margin = margin - margin / 10;	/* Add still some extra safety */
 +
 +	log_sys->log_group_capacity = smallest_capacity;
 +
 +	log_sys->max_modified_age_async = margin
 +		- margin / LOG_POOL_PREFLUSH_RATIO_ASYNC;
 +	log_sys->max_modified_age_sync = margin
 +		- margin / LOG_POOL_PREFLUSH_RATIO_SYNC;
 +
 +	log_sys->max_checkpoint_age_async = margin - margin
 +		/ LOG_POOL_CHECKPOINT_RATIO_ASYNC;
 +	log_sys->max_checkpoint_age = margin;
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	log_sys->max_archived_lsn_age = smallest_archive_margin;
 +
 +	log_sys->max_archived_lsn_age_async = smallest_archive_margin
 +		- smallest_archive_margin / LOG_ARCHIVE_RATIO_ASYNC;
 +#endif /* UNIV_LOG_ARCHIVE */
 +failure:
 +	mutex_exit(&(log_sys->mutex));
 +
 +	if (!success) {
 +		fprintf(stderr,
 +			"InnoDB: Error: ib_logfiles are too small"
 +			" for innodb_thread_concurrency %lu.\n"
 +			"InnoDB: The combined size of ib_logfiles"
 +			" should be bigger than\n"
 +			"InnoDB: 200 kB * innodb_thread_concurrency.\n"
 +			"InnoDB: To get mysqld to start up, set"
 +			" innodb_thread_concurrency in my.cnf\n"
 +			"InnoDB: to a lower value, for example, to 8."
 +			" After an ERROR-FREE shutdown\n"
 +			"InnoDB: of mysqld you can adjust the size of"
 +			" ib_logfiles, as explained in\n"
 +			"InnoDB: " REFMAN "adding-and-removing.html\n"
 +			"InnoDB: Cannot continue operation."
 +			" Calling exit(1).\n",
 +			(ulong) srv_thread_concurrency);
 +
 +		exit(1);
 +	}
 +
 +	return(success);
 +}
 +
 +/******************************************************//**
 +Initializes the log. */
 +UNIV_INTERN
 +void
 +log_init(void)
 +/*==========*/
 +{
 +	log_sys = static_cast<log_t*>(mem_alloc(sizeof(log_t)));
 +
 +	mutex_create(log_sys_mutex_key, &log_sys->mutex, SYNC_LOG);
 +
 +	mutex_create(log_flush_order_mutex_key,
 +		     &log_sys->log_flush_order_mutex,
 +		     SYNC_LOG_FLUSH_ORDER);
 +
 +	mutex_enter(&(log_sys->mutex));
 +
 +	/* Start the lsn from one log block from zero: this way every
 +	log record has a start lsn != zero, a fact which we will use */
 +
 +	log_sys->lsn = LOG_START_LSN;
 +
 +	ut_a(LOG_BUFFER_SIZE >= 16 * OS_FILE_LOG_BLOCK_SIZE);
 +	ut_a(LOG_BUFFER_SIZE >= 4 * UNIV_PAGE_SIZE);
 +
 +	log_sys->buf_ptr = static_cast<byte*>(
 +		mem_zalloc(LOG_BUFFER_SIZE + OS_FILE_LOG_BLOCK_SIZE));
 +
 +	log_sys->buf = static_cast<byte*>(
 +		ut_align(log_sys->buf_ptr, OS_FILE_LOG_BLOCK_SIZE));
 +
 +	log_sys->buf_size = LOG_BUFFER_SIZE;
 +	log_sys->is_extending = false;
 +
 +	log_sys->max_buf_free = log_sys->buf_size / LOG_BUF_FLUSH_RATIO
 +		- LOG_BUF_FLUSH_MARGIN;
 +	log_sys->check_flush_or_checkpoint = TRUE;
 +	UT_LIST_INIT(log_sys->log_groups);
 +
 +	log_sys->n_log_ios = 0;
 +
 +	log_sys->n_log_ios_old = log_sys->n_log_ios;
 +	log_sys->last_printout_time = time(NULL);
 +	/*----------------------------*/
 +
 +	log_sys->buf_next_to_write = 0;
 +
 +	log_sys->write_lsn = 0;
 +	log_sys->current_flush_lsn = 0;
 +	log_sys->flushed_to_disk_lsn = 0;
 +
 +	log_sys->written_to_some_lsn = log_sys->lsn;
 +	log_sys->written_to_all_lsn = log_sys->lsn;
 +
 +	log_sys->n_pending_writes = 0;
 +
 +	log_sys->no_flush_event = os_event_create();
 +
 +	os_event_set(log_sys->no_flush_event);
 +
 +	log_sys->one_flushed_event = os_event_create();
 +
 +	os_event_set(log_sys->one_flushed_event);
 +
 +	/*----------------------------*/
 +
 +	log_sys->next_checkpoint_no = 0;
 +	log_sys->last_checkpoint_lsn = log_sys->lsn;
 +	log_sys->next_checkpoint_lsn = log_sys->lsn;
 +	log_sys->n_pending_checkpoint_writes = 0;
 +
 +
 +	rw_lock_create(checkpoint_lock_key, &log_sys->checkpoint_lock,
 +		       SYNC_NO_ORDER_CHECK);
 +
 +	log_sys->checkpoint_buf_ptr = static_cast<byte*>(
 +		mem_zalloc(2 * OS_FILE_LOG_BLOCK_SIZE));
 +
 +	log_sys->checkpoint_buf = static_cast<byte*>(
 +		ut_align(log_sys->checkpoint_buf_ptr, OS_FILE_LOG_BLOCK_SIZE));
 +
 +	/*----------------------------*/
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	/* Under MySQL, log archiving is always off */
 +	log_sys->archiving_state = LOG_ARCH_OFF;
 +	log_sys->archived_lsn = log_sys->lsn;
 +	log_sys->next_archived_lsn = 0;
 +
 +	log_sys->n_pending_archive_ios = 0;
 +
 +	rw_lock_create(archive_lock_key, &log_sys->archive_lock,
 +		       SYNC_NO_ORDER_CHECK);
 +
 +	log_sys->archive_buf_ptr = static_cast<byte*>(
 +		mem_zalloc(LOG_ARCHIVE_BUF_SIZE + OS_FILE_LOG_BLOCK_SIZE));
 +
 +	log_sys->archive_buf = static_cast<byte*>(
 +		ut_align(log_sys->archive_buf_ptr, OS_FILE_LOG_BLOCK_SIZE));
 +
 +	log_sys->archive_buf_size = LOG_ARCHIVE_BUF_SIZE;
 +
 +	log_sys->archiving_on = os_event_create();
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	log_sys->tracked_lsn = 0;
 +
 +	/*----------------------------*/
 +
 +	log_block_init(log_sys->buf, log_sys->lsn);
 +	log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
 +
 +	log_sys->buf_free = LOG_BLOCK_HDR_SIZE;
 +	log_sys->lsn = LOG_START_LSN + LOG_BLOCK_HDR_SIZE;
 +
 +	MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
 +		    log_sys->lsn - log_sys->last_checkpoint_lsn);
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +#ifdef UNIV_LOG_DEBUG
 +	recv_sys_create();
 +	recv_sys_init(buf_pool_get_curr_size());
 +
 +	recv_sys->parse_start_lsn = log_sys->lsn;
 +	recv_sys->scanned_lsn = log_sys->lsn;
 +	recv_sys->scanned_checkpoint_no = 0;
 +	recv_sys->recovered_lsn = log_sys->lsn;
 +	recv_sys->limit_lsn = LSN_MAX;
 +#endif
 +}
 +
 +/******************************************************************//**
 +Inits a log group to the log system. */
 +UNIV_INTERN
 +void
 +log_group_init(
 +/*===========*/
 +	ulint	id,			/*!< in: group id */
 +	ulint	n_files,		/*!< in: number of log files */
 +	lsn_t	file_size,		/*!< in: log file size in bytes */
 +	ulint	space_id,		/*!< in: space id of the file space
 +					which contains the log files of this
 +					group */
 +	ulint	archive_space_id)	/*!< in: space id of the file space
 +					which contains some archived log
 +					files for this group; currently, only
 +					for the first log group this is
 +					used */
 +{
 +	ulint	i;
 +
 +	log_group_t*	group;
 +
 +	group = static_cast<log_group_t*>(mem_alloc(sizeof(log_group_t)));
 +
 +	group->id = id;
 +	group->n_files = n_files;
 +	group->file_size = file_size;
 +	group->space_id = space_id;
 +	group->state = LOG_GROUP_OK;
 +	group->lsn = LOG_START_LSN;
 +	group->lsn_offset = LOG_FILE_HDR_SIZE;
 +	group->n_pending_writes = 0;
 +
 +	group->file_header_bufs_ptr = static_cast<byte**>(
 +		mem_zalloc(sizeof(byte*) * n_files));
 +
 +	group->file_header_bufs = static_cast<byte**>(
 +		mem_zalloc(sizeof(byte**) * n_files));
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	group->archive_file_header_bufs_ptr = static_cast<byte**>(
 +		mem_zalloc( sizeof(byte*) * n_files));
 +
 +	group->archive_file_header_bufs = static_cast<byte**>(
 +		mem_zalloc(sizeof(byte*) * n_files));
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	for (i = 0; i < n_files; i++) {
 +		group->file_header_bufs_ptr[i] = static_cast<byte*>(
 +			mem_zalloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE));
 +
 +		group->file_header_bufs[i] = static_cast<byte*>(
 +			ut_align(group->file_header_bufs_ptr[i],
 +				 OS_FILE_LOG_BLOCK_SIZE));
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +		group->archive_file_header_bufs_ptr[i] = static_cast<byte*>(
 +			mem_zalloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE));
 +
 +		group->archive_file_header_bufs[i] = static_cast<byte*>(
 +			ut_align(group->archive_file_header_bufs_ptr[i],
 +				 OS_FILE_LOG_BLOCK_SIZE));
 +#endif /* UNIV_LOG_ARCHIVE */
 +	}
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	group->archive_space_id = archive_space_id;
 +
 +	group->archived_file_no = LOG_START_LSN;
 +	group->archived_offset = 0;
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	group->checkpoint_buf_ptr = static_cast<byte*>(
 +		mem_zalloc(2 * OS_FILE_LOG_BLOCK_SIZE));
 +
 +	group->checkpoint_buf = static_cast<byte*>(
 +		ut_align(group->checkpoint_buf_ptr,OS_FILE_LOG_BLOCK_SIZE));
 +
 +	UT_LIST_ADD_LAST(log_groups, log_sys->log_groups, group);
 +
 +	ut_a(log_calc_max_ages());
 +}
 +
 +/******************************************************************//**
 +Does the unlockings needed in flush i/o completion. */
 +UNIV_INLINE
 +void
 +log_flush_do_unlocks(
 +/*=================*/
 +	ulint	code)	/*!< in: any ORed combination of LOG_UNLOCK_FLUSH_LOCK
 +			and LOG_UNLOCK_NONE_FLUSHED_LOCK */
 +{
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +
 +	/* NOTE that we must own the log mutex when doing the setting of the
 +	events: this is because transactions will wait for these events to
 +	be set, and at that moment the log flush they were waiting for must
 +	have ended. If the log mutex were not reserved here, the i/o-thread
 +	calling this function might be preempted for a while, and when it
 +	resumed execution, it might be that a new flush had been started, and
 +	this function would erroneously signal the NEW flush as completed.
 +	Thus, the changes in the state of these events are performed
 +	atomically in conjunction with the changes in the state of
 +	log_sys->n_pending_writes etc. */
 +
 +	if (code & LOG_UNLOCK_NONE_FLUSHED_LOCK) {
 +		os_event_set(log_sys->one_flushed_event);
 +	}
 +
 +	if (code & LOG_UNLOCK_FLUSH_LOCK) {
 +		os_event_set(log_sys->no_flush_event);
 +	}
 +}
 +
 +/******************************************************************//**
 +Checks if a flush is completed for a log group and does the completion
 +routine if yes.
 + at return	LOG_UNLOCK_NONE_FLUSHED_LOCK or 0 */
 +UNIV_INLINE
 +ulint
 +log_group_check_flush_completion(
 +/*=============================*/
 +	log_group_t*	group)	/*!< in: log group */
 +{
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +
 +	if (!log_sys->one_flushed && group->n_pending_writes == 0) {
 +#ifdef UNIV_DEBUG
 +		if (log_debug_writes) {
 +			fprintf(stderr,
 +				"Log flushed first to group %lu\n",
 +				(ulong) group->id);
 +		}
 +#endif /* UNIV_DEBUG */
 +		log_sys->written_to_some_lsn = log_sys->write_lsn;
 +		log_sys->one_flushed = TRUE;
 +
 +		return(LOG_UNLOCK_NONE_FLUSHED_LOCK);
 +	}
 +
 +#ifdef UNIV_DEBUG
 +	if (log_debug_writes && (group->n_pending_writes == 0)) {
 +
 +		fprintf(stderr, "Log flushed to group %lu\n",
 +			(ulong) group->id);
 +	}
 +#endif /* UNIV_DEBUG */
 +	return(0);
 +}
 +
 +/******************************************************//**
 +Checks if a flush is completed and does the completion routine if yes.
 + at return	LOG_UNLOCK_FLUSH_LOCK or 0 */
 +static
 +ulint
 +log_sys_check_flush_completion(void)
 +/*================================*/
 +{
 +	ulint	move_start;
 +	ulint	move_end;
 +
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +
 +	if (log_sys->n_pending_writes == 0) {
 +
 +		log_sys->written_to_all_lsn = log_sys->write_lsn;
 +		log_sys->buf_next_to_write = log_sys->write_end_offset;
 +
 +		if (log_sys->write_end_offset > log_sys->max_buf_free / 2) {
 +			/* Move the log buffer content to the start of the
 +			buffer */
 +
 +			move_start = ut_calc_align_down(
 +				log_sys->write_end_offset,
 +				OS_FILE_LOG_BLOCK_SIZE);
 +			move_end = ut_calc_align(log_sys->buf_free,
 +						 OS_FILE_LOG_BLOCK_SIZE);
 +
 +			ut_memmove(log_sys->buf, log_sys->buf + move_start,
 +				   move_end - move_start);
 +			log_sys->buf_free -= move_start;
 +
 +			log_sys->buf_next_to_write -= move_start;
 +		}
 +
 +		return(LOG_UNLOCK_FLUSH_LOCK);
 +	}
 +
 +	return(0);
 +}
 +
 +/******************************************************//**
 +Completes an i/o to a log file. */
 +UNIV_INTERN
 +void
 +log_io_complete(
 +/*============*/
 +	log_group_t*	group)	/*!< in: log group or a dummy pointer */
 +{
 +	ulint	unlock;
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	if ((byte*) group == &log_archive_io) {
 +		/* It was an archive write */
 +
 +		log_io_complete_archive();
 +
 +		return;
 +	}
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	if ((ulint) group & 0x1UL) {
 +		/* It was a checkpoint write */
 +		group = (log_group_t*)((ulint) group - 1);
 +
 +		if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
 +		    && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT
 +		    && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
 +
 +			fil_flush(group->space_id);
 +		}
 +
 +#ifdef UNIV_DEBUG
 +		if (log_debug_writes) {
 +			fprintf(stderr,
 +				"Checkpoint info written to group %lu\n",
 +				group->id);
 +		}
 +#endif /* UNIV_DEBUG */
 +		log_io_complete_checkpoint();
 +
 +		return;
 +	}
 +
 +	ut_error;	/*!< We currently use synchronous writing of the
 +			logs and cannot end up here! */
 +
 +	if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
 +	    && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT
 +	    && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
 +	    && thd_flush_log_at_trx_commit(NULL) != 2) {
 +
 +		fil_flush(group->space_id);
 +	}
 +
 +	mutex_enter(&(log_sys->mutex));
 +	ut_ad(!recv_no_log_write);
 +
 +	ut_a(group->n_pending_writes > 0);
 +	ut_a(log_sys->n_pending_writes > 0);
 +
 +	group->n_pending_writes--;
 +	log_sys->n_pending_writes--;
 +	MONITOR_DEC(MONITOR_PENDING_LOG_WRITE);
 +
 +	unlock = log_group_check_flush_completion(group);
 +	unlock = unlock | log_sys_check_flush_completion();
 +
 +	log_flush_do_unlocks(unlock);
 +
 +	mutex_exit(&(log_sys->mutex));
 +}
 +
 +/******************************************************//**
 +Writes a log file header to a log file space. */
 +static
 +void
 +log_group_file_header_flush(
 +/*========================*/
 +	log_group_t*	group,		/*!< in: log group */
 +	ulint		nth_file,	/*!< in: header to the nth file in the
 +					log file space */
 +	lsn_t		start_lsn)	/*!< in: log file data starts at this
 +					lsn */
 +{
 +	byte*	buf;
 +	lsn_t	dest_offset;
 +
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +	ut_ad(!recv_no_log_write);
 +	ut_a(nth_file < group->n_files);
 +
 +	buf = *(group->file_header_bufs + nth_file);
 +
 +	mach_write_to_4(buf + LOG_GROUP_ID, group->id);
 +	mach_write_to_8(buf + LOG_FILE_START_LSN, start_lsn);
 +
 +	/* Wipe over possible label of mysqlbackup --restore */
 +	memcpy(buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP, "    ", 4);
 +
 +	mach_write_to_4(buf + LOG_FILE_OS_FILE_LOG_BLOCK_SIZE,
 +			srv_log_block_size);
 +
 +	dest_offset = nth_file * group->file_size;
 +
 +#ifdef UNIV_DEBUG
 +	if (log_debug_writes) {
 +		fprintf(stderr,
 +			"Writing log file header to group %lu file %lu\n",
 +			(ulong) group->id, (ulong) nth_file);
 +	}
 +#endif /* UNIV_DEBUG */
 +	if (log_do_write) {
 +		log_sys->n_log_ios++;
 +
 +		MONITOR_INC(MONITOR_LOG_IO);
 +
 +		srv_stats.os_log_pending_writes.inc();
 +
 +		fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, group->space_id, 0,
 +		       (ulint) (dest_offset / UNIV_PAGE_SIZE),
 +		       (ulint) (dest_offset % UNIV_PAGE_SIZE),
 +		       OS_FILE_LOG_BLOCK_SIZE,
 +		       buf, group);
 +
 +		srv_stats.os_log_pending_writes.dec();
 +	}
 +}
 +
 +/******************************************************//**
 +Stores a 4-byte checksum to the trailer checksum field of a log block
 +before writing it to a log file. This checksum is used in recovery to
 +check the consistency of a log block. */
 +static
 +void
 +log_block_store_checksum(
 +/*=====================*/
 +	byte*	block)	/*!< in/out: pointer to a log block */
 +{
 +	log_block_set_checksum(block, log_block_calc_checksum(block));
 +}
 +
 +/******************************************************//**
 +Writes a buffer to a log file group. */
 +UNIV_INTERN
 +void
 +log_group_write_buf(
 +/*================*/
 +	log_group_t*	group,		/*!< in: log group */
 +	byte*		buf,		/*!< in: buffer */
 +	ulint		len,		/*!< in: buffer len; must be divisible
 +					by OS_FILE_LOG_BLOCK_SIZE */
 +	lsn_t		start_lsn,	/*!< in: start lsn of the buffer; must
 +					be divisible by
 +					OS_FILE_LOG_BLOCK_SIZE */
 +	ulint		new_data_offset)/*!< in: start offset of new data in
 +					buf: this parameter is used to decide
 +					if we have to write a new log file
 +					header */
 +{
 +	ulint		write_len;
 +	ibool		write_header;
 +	lsn_t		next_offset;
 +	ulint		i;
 +
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +	ut_ad(!recv_no_log_write);
 +	ut_a(len % OS_FILE_LOG_BLOCK_SIZE == 0);
 +	ut_a(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
 +
 +	if (new_data_offset == 0) {
 +		write_header = TRUE;
 +	} else {
 +		write_header = FALSE;
 +	}
 +loop:
 +	if (len == 0) {
 +
 +		return;
 +	}
 +
 +	next_offset = log_group_calc_lsn_offset(start_lsn, group);
 +
 +	if ((next_offset % group->file_size == LOG_FILE_HDR_SIZE)
 +	    && write_header) {
 +		/* We start to write a new log file instance in the group */
 +
 +		ut_a(next_offset / group->file_size <= ULINT_MAX);
 +
 +		log_group_file_header_flush(group, (ulint)
 +					    (next_offset / group->file_size),
 +					    start_lsn);
 +		srv_stats.os_log_written.add(OS_FILE_LOG_BLOCK_SIZE);
 +
 +		srv_stats.log_writes.inc();
 +	}
 +
 +	if ((next_offset % group->file_size) + len > group->file_size) {
 +
 +		/* if the above condition holds, then the below expression
 +		is < len which is ulint, so the typecast is ok */
 +		write_len = (ulint)
 +			(group->file_size - (next_offset % group->file_size));
 +	} else {
 +		write_len = len;
 +	}
 +
 +#ifdef UNIV_DEBUG
 +	if (log_debug_writes) {
 +
 +		fprintf(stderr,
 +			"Writing log file segment to group %lu"
 +			" offset " LSN_PF " len %lu\n"
 +			"start lsn " LSN_PF "\n"
 +			"First block n:o %lu last block n:o %lu\n",
 +			(ulong) group->id, next_offset,
 +			write_len,
 +			start_lsn,
 +			(ulong) log_block_get_hdr_no(buf),
 +			(ulong) log_block_get_hdr_no(
 +				buf + write_len - OS_FILE_LOG_BLOCK_SIZE));
 +		ut_a(log_block_get_hdr_no(buf)
 +		     == log_block_convert_lsn_to_no(start_lsn));
 +
 +		for (i = 0; i < write_len / OS_FILE_LOG_BLOCK_SIZE; i++) {
 +
 +			ut_a(log_block_get_hdr_no(buf) + i
 +			     == log_block_get_hdr_no(
 +				     buf + i * OS_FILE_LOG_BLOCK_SIZE));
 +		}
 +	}
 +#endif /* UNIV_DEBUG */
 +	/* Calculate the checksums for each log block and write them to
 +	the trailer fields of the log blocks */
 +
 +	for (i = 0; i < write_len / OS_FILE_LOG_BLOCK_SIZE; i++) {
 +		log_block_store_checksum(buf + i * OS_FILE_LOG_BLOCK_SIZE);
 +	}
 +
 +	if (log_do_write) {
 +		log_sys->n_log_ios++;
 +
 +		MONITOR_INC(MONITOR_LOG_IO);
 +
 +		srv_stats.os_log_pending_writes.inc();
 +
 +		ut_a(next_offset / UNIV_PAGE_SIZE <= ULINT_MAX);
 +
 +		fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, group->space_id, 0,
 +		       (ulint) (next_offset / UNIV_PAGE_SIZE),
 +		       (ulint) (next_offset % UNIV_PAGE_SIZE), write_len, buf,
 +		       group);
 +
 +		srv_stats.os_log_pending_writes.dec();
 +
 +		srv_stats.os_log_written.add(write_len);
 +		srv_stats.log_writes.inc();
 +	}
 +
 +	if (write_len < len) {
 +		start_lsn += write_len;
 +		len -= write_len;
 +		buf += write_len;
 +
 +		write_header = TRUE;
 +
 +		goto loop;
 +	}
 +}
 +
 +/******************************************************//**
 +This function is called, e.g., when a transaction wants to commit. It checks
 +that the log has been written to the log file up to the last log entry written
 +by the transaction. If there is a flush running, it waits and checks if the
 +flush flushed enough. If not, starts a new flush. */
 +UNIV_INTERN
 +void
 +log_write_up_to(
 +/*============*/
 +	lsn_t	lsn,	/*!< in: log sequence number up to which
 +			the log should be written,
 +			LSN_MAX if not specified */
 +	ulint	wait,	/*!< in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
 +			or LOG_WAIT_ALL_GROUPS */
 +	ibool	flush_to_disk)
 +			/*!< in: TRUE if we want the written log
 +			also to be flushed to disk */
 +{
 +	log_group_t*	group;
 +	ulint		start_offset;
 +	ulint		end_offset;
 +	ulint		area_start;
 +	ulint		area_end;
 +#ifdef UNIV_DEBUG
 +	ulint		loop_count	= 0;
 +#endif /* UNIV_DEBUG */
 +	ulint		unlock;
 +	ib_uint64_t	write_lsn;
 +	ib_uint64_t	flush_lsn;
 +
 +	ut_ad(!srv_read_only_mode);
 +
 +	if (recv_no_ibuf_operations) {
 +		/* Recovery is running and no operations on the log files are
 +		allowed yet (the variable name .._no_ibuf_.. is misleading) */
 +
 +		return;
 +	}
 +
 +loop:
 +	ut_ad(++loop_count < 100);
 +
 +	mutex_enter(&(log_sys->mutex));
 +	ut_ad(!recv_no_log_write);
 +
 +	if (flush_to_disk
 +	    && log_sys->flushed_to_disk_lsn >= lsn) {
 +
 +		mutex_exit(&(log_sys->mutex));
 +
 +		return;
 +	}
 +
 +	if (!flush_to_disk
 +	    && (log_sys->written_to_all_lsn >= lsn
 +		|| (log_sys->written_to_some_lsn >= lsn
 +		    && wait != LOG_WAIT_ALL_GROUPS))) {
 +
 +		mutex_exit(&(log_sys->mutex));
 +
 +		return;
 +	}
 +
 +	if (log_sys->n_pending_writes > 0) {
 +		/* A write (+ possibly flush to disk) is running */
 +
 +		if (flush_to_disk
 +		    && log_sys->current_flush_lsn >= lsn) {
 +			/* The write + flush will write enough: wait for it to
 +			complete */
 +
 +			goto do_waits;
 +		}
 +
 +		if (!flush_to_disk
 +		    && log_sys->write_lsn >= lsn) {
 +			/* The write will write enough: wait for it to
 +			complete */
 +
 +			goto do_waits;
 +		}
 +
 +		mutex_exit(&(log_sys->mutex));
 +
 +		/* Wait for the write to complete and try to start a new
 +		write */
 +
 +		os_event_wait(log_sys->no_flush_event);
 +
 +		goto loop;
 +	}
 +
 +	if (!flush_to_disk
 +	    && log_sys->buf_free == log_sys->buf_next_to_write) {
 +		/* Nothing to write and no flush to disk requested */
 +
 +		mutex_exit(&(log_sys->mutex));
 +
 +		return;
 +	}
 +
 +#ifdef UNIV_DEBUG
 +	if (log_debug_writes) {
 +		fprintf(stderr,
 +			"Writing log from " LSN_PF " up to lsn " LSN_PF "\n",
 +			log_sys->written_to_all_lsn,
 +			log_sys->lsn);
 +	}
 +#endif /* UNIV_DEBUG */
 +	log_sys->n_pending_writes++;
 +	MONITOR_INC(MONITOR_PENDING_LOG_WRITE);
 +
 +	group = UT_LIST_GET_FIRST(log_sys->log_groups);
 +	group->n_pending_writes++;	/*!< We assume here that we have only
 +					one log group! */
 +
 +	os_event_reset(log_sys->no_flush_event);
 +	os_event_reset(log_sys->one_flushed_event);
 +
 +	start_offset = log_sys->buf_next_to_write;
 +	end_offset = log_sys->buf_free;
 +
 +	area_start = ut_calc_align_down(start_offset, OS_FILE_LOG_BLOCK_SIZE);
 +	area_end = ut_calc_align(end_offset, OS_FILE_LOG_BLOCK_SIZE);
 +
 +	ut_ad(area_end - area_start > 0);
 +
 +	log_sys->write_lsn = log_sys->lsn;
 +
 +	if (flush_to_disk) {
 +		log_sys->current_flush_lsn = log_sys->lsn;
 +	}
 +
 +	log_sys->one_flushed = FALSE;
 +
 +	log_block_set_flush_bit(log_sys->buf + area_start, TRUE);
 +	log_block_set_checkpoint_no(
 +		log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
 +		log_sys->next_checkpoint_no);
 +
 +	/* Copy the last, incompletely written, log block a log block length
 +	up, so that when the flush operation writes from the log buffer, the
 +	segment to write will not be changed by writers to the log */
 +
 +	ut_memcpy(log_sys->buf + area_end,
 +		  log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
 +		  OS_FILE_LOG_BLOCK_SIZE);
 +
 +	log_sys->buf_free += OS_FILE_LOG_BLOCK_SIZE;
 +	log_sys->write_end_offset = log_sys->buf_free;
 +
 +	group = UT_LIST_GET_FIRST(log_sys->log_groups);
 +
 +	/* Do the write to the log files */
 +
 +	while (group) {
 +		log_group_write_buf(
 +			group, log_sys->buf + area_start,
 +			area_end - area_start,
 +			ut_uint64_align_down(log_sys->written_to_all_lsn,
 +					     OS_FILE_LOG_BLOCK_SIZE),
 +			start_offset - area_start);
 +
 +		log_group_set_fields(group, log_sys->write_lsn);
 +
 +		group = UT_LIST_GET_NEXT(log_groups, group);
 +	}
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +	if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC
 +	    || srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) {
 +		/* O_DSYNC or ALL_O_DIRECT means the OS did not buffer the log
 +		file at all: so we have also flushed to disk what we have
 +		written */
 +
 +		log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
 +
 +	} else if (flush_to_disk) {
 +
 +		group = UT_LIST_GET_FIRST(log_sys->log_groups);
 +
 +		fil_flush(group->space_id);
 +		log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
 +	}
 +
 +	mutex_enter(&(log_sys->mutex));
 +
 +	group = UT_LIST_GET_FIRST(log_sys->log_groups);
 +
 +	ut_a(group->n_pending_writes == 1);
 +	ut_a(log_sys->n_pending_writes == 1);
 +
 +	group->n_pending_writes--;
 +	log_sys->n_pending_writes--;
 +	MONITOR_DEC(MONITOR_PENDING_LOG_WRITE);
 +
 +	unlock = log_group_check_flush_completion(group);
 +	unlock = unlock | log_sys_check_flush_completion();
 +
 +	log_flush_do_unlocks(unlock);
 +
 +	write_lsn = log_sys->write_lsn;
 +	flush_lsn = log_sys->flushed_to_disk_lsn;
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +	innobase_mysql_log_notify(write_lsn, flush_lsn);
 +
 +	return;
 +
 +do_waits:
 +	mutex_exit(&(log_sys->mutex));
 +
 +	switch (wait) {
 +	case LOG_WAIT_ONE_GROUP:
 +		os_event_wait(log_sys->one_flushed_event);
 +		break;
 +	case LOG_WAIT_ALL_GROUPS:
 +		os_event_wait(log_sys->no_flush_event);
 +		break;
 +#ifdef UNIV_DEBUG
 +	case LOG_NO_WAIT:
 +		break;
 +	default:
 +		ut_error;
 +#endif /* UNIV_DEBUG */
 +	}
 +}
 +
 +/****************************************************************//**
 +Does a syncronous flush of the log buffer to disk. */
 +UNIV_INTERN
 +void
 +log_buffer_flush_to_disk(void)
 +/*==========================*/
 +{
 +	lsn_t	lsn;
 +
 +	ut_ad(!srv_read_only_mode);
 +	mutex_enter(&(log_sys->mutex));
 +
 +	lsn = log_sys->lsn;
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +	log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS, TRUE);
 +}
 +
 +/****************************************************************//**
 +This functions writes the log buffer to the log file and if 'flush'
 +is set it forces a flush of the log file as well. This is meant to be
 +called from background master thread only as it does not wait for
 +the write (+ possible flush) to finish. */
 +UNIV_INTERN
 +void
 +log_buffer_sync_in_background(
 +/*==========================*/
 +	ibool	flush)	/*!< in: flush the logs to disk */
 +{
 +	lsn_t	lsn;
 +
 +	mutex_enter(&(log_sys->mutex));
 +
 +	lsn = log_sys->lsn;
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +	log_write_up_to(lsn, LOG_NO_WAIT, flush);
 +}
 +
 +/********************************************************************
 +
 +Tries to establish a big enough margin of free space in the log buffer, such
 +that a new log entry can be catenated without an immediate need for a flush. */
 +static
 +void
 +log_flush_margin(void)
 +/*==================*/
 +{
 +	log_t*	log	= log_sys;
 +	lsn_t	lsn	= 0;
 +
 +	mutex_enter(&(log->mutex));
 +
 +	if (log->buf_free > log->max_buf_free) {
 +
 +		if (log->n_pending_writes > 0) {
 +			/* A flush is running: hope that it will provide enough
 +			free space */
 +		} else {
 +			lsn = log->lsn;
 +		}
 +	}
 +
 +	mutex_exit(&(log->mutex));
 +
 +	if (lsn) {
 +		log_write_up_to(lsn, LOG_NO_WAIT, FALSE);
 +	}
 +}
 +
 +/****************************************************************//**
 +Advances the smallest lsn for which there are unflushed dirty blocks in the
 +buffer pool. NOTE: this function may only be called if the calling thread owns
 +no synchronization objects!
 + at return false if there was a flush batch of the same type running,
 +which means that we could not start this flush batch */
 +static
 +bool
 +log_preflush_pool_modified_pages(
 +/*=============================*/
 +	lsn_t	new_oldest)	/*!< in: try to advance oldest_modified_lsn
 +				at least to this lsn */
 +{
 +	lsn_t	current_oldest;
 +	ulint	i;
 +
 +	if (recv_recovery_on) {
 +		/* If the recovery is running, we must first apply all
 +		log records to their respective file pages to get the
 +		right modify lsn values to these pages: otherwise, there
 +		might be pages on disk which are not yet recovered to the
 +		current lsn, and even after calling this function, we could
 +		not know how up-to-date the disk version of the database is,
 +		and we could not make a new checkpoint on the basis of the
 +		info on the buffer pool only. */
 +
- 		recv_apply_hashed_log_recs(TRUE);
++		recv_apply_hashed_log_recs(true);
 +	}
 +
 +	if (!buf_page_cleaner_is_active
 +	    || (srv_foreground_preflush
 +		== SRV_FOREGROUND_PREFLUSH_SYNC_PREFLUSH)
 +	    || (new_oldest == LSN_MAX)) {
 +
 +		ulint n_pages;
 +
 +		bool success = buf_flush_list(ULINT_MAX, new_oldest, &n_pages);
 +
 +		buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
 +
 +		if (!success) {
 +			MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS);
 +		}
 +
 +		MONITOR_INC_VALUE_CUMULATIVE(
 +			MONITOR_FLUSH_SYNC_TOTAL_PAGE,
 +			MONITOR_FLUSH_SYNC_COUNT,
 +			MONITOR_FLUSH_SYNC_PAGES,
 +			n_pages);
 +
 +		return(success);
 +	}
 +
 +	ut_ad(srv_foreground_preflush == SRV_FOREGROUND_PREFLUSH_EXP_BACKOFF);
 +
 +	current_oldest = buf_pool_get_oldest_modification();
 +	i = 0;
 +
 +	while (current_oldest < new_oldest && current_oldest) {
 +
 +		while (!buf_flush_flush_list_in_progress()) {
 +
 +			/* If a flush list flush by the cleaner thread is not
 +			running, backoff until one is started.  */
 +			os_thread_sleep(ut_rnd_interval(0, 1 << i));
 +			i++;
 +			i %= 16;
 +		}
 +		buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
 +
 +		current_oldest = buf_pool_get_oldest_modification();
 +	}
 +
 +	return(current_oldest >= new_oldest || !current_oldest);
 +}
 +
 +/******************************************************//**
 +Completes a checkpoint. */
 +static
 +void
 +log_complete_checkpoint(void)
 +/*=========================*/
 +{
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +	ut_ad(log_sys->n_pending_checkpoint_writes == 0);
 +
 +	log_sys->next_checkpoint_no++;
 +
 +	ut_ad(log_sys->next_checkpoint_lsn >= log_sys->last_checkpoint_lsn);
 +	log_sys->last_checkpoint_lsn = log_sys->next_checkpoint_lsn;
 +	MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
 +		    log_sys->lsn - log_sys->last_checkpoint_lsn);
 +
 +	rw_lock_x_unlock_gen(&(log_sys->checkpoint_lock), LOG_CHECKPOINT);
 +}
 +
 +/******************************************************//**
 +Completes an asynchronous checkpoint info write i/o to a log file. */
 +static
 +void
 +log_io_complete_checkpoint(void)
 +/*============================*/
 +{
 +	mutex_enter(&(log_sys->mutex));
 +
 +	ut_ad(log_sys->n_pending_checkpoint_writes > 0);
 +
 +	log_sys->n_pending_checkpoint_writes--;
 +	MONITOR_DEC(MONITOR_PENDING_CHECKPOINT_WRITE);
 +
 +	if (log_sys->n_pending_checkpoint_writes == 0) {
 +		log_complete_checkpoint();
 +	}
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +	/* Wake the redo log watching thread to parse the log up to this
 +	checkpoint. */
 +	if (srv_track_changed_pages) {
 +		os_event_reset(srv_redo_log_tracked_event);
 +		os_event_set(srv_checkpoint_completed_event);
 +	}
 +}
 +
 +/*******************************************************************//**
 +Writes info to a checkpoint about a log group. */
 +static
 +void
 +log_checkpoint_set_nth_group_info(
 +/*==============================*/
 +	byte*	buf,	/*!< in: buffer for checkpoint info */
 +	ulint	n,	/*!< in: nth slot */
 +	lsn_t	file_no)/*!< in: archived file number */
 +{
 +	ut_ad(n < LOG_MAX_N_GROUPS);
 +
 +	mach_write_to_8(buf + LOG_CHECKPOINT_GROUP_ARRAY +
 +			8 * n + LOG_CHECKPOINT_ARCHIVED_FILE_NO,
 +			file_no);
 +}
 +
 +/*******************************************************************//**
 +Gets info from a checkpoint about a log group. */
 +UNIV_INTERN
 +void
 +log_checkpoint_get_nth_group_info(
 +/*==============================*/
 +	const byte*	buf,	/*!< in: buffer containing checkpoint info */
 +	ulint		n,	/*!< in: nth slot */
 +	lsn_t*		file_no)/*!< out: archived file number */
 +{
 +	ut_ad(n < LOG_MAX_N_GROUPS);
 +
 +	*file_no = mach_read_from_8(buf + LOG_CHECKPOINT_GROUP_ARRAY +
 +				8 * n + LOG_CHECKPOINT_ARCHIVED_FILE_NO);
 +}
 +
 +/******************************************************//**
 +Writes the checkpoint info to a log group header. */
 +static
 +void
 +log_group_checkpoint(
 +/*=================*/
 +	log_group_t*	group)	/*!< in: log group */
 +{
 +	log_group_t*	group2;
 +#ifdef UNIV_LOG_ARCHIVE
 +	ib_uint64_t	archived_lsn;
 +#endif /* UNIV_LOG_ARCHIVE */
 +	lsn_t		lsn_offset;
 +	ulint		write_offset;
 +	ulint		fold;
 +	byte*		buf;
 +	ulint		i;
 +
 +	ut_ad(!srv_read_only_mode);
 +	ut_ad(srv_shutdown_state != SRV_SHUTDOWN_LAST_PHASE);
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +	ut_a(LOG_CHECKPOINT_SIZE <= OS_FILE_LOG_BLOCK_SIZE);
 +
 +	buf = group->checkpoint_buf;
 +
 +#ifdef UNIV_DEBUG
 +	lsn_t		old_next_checkpoint_lsn
 +		= mach_read_from_8(buf + LOG_CHECKPOINT_LSN);
 +	ut_ad(old_next_checkpoint_lsn <= log_sys->next_checkpoint_lsn);
 +#endif /* UNIV_DEBUG */
 +	mach_write_to_8(buf + LOG_CHECKPOINT_NO, log_sys->next_checkpoint_no);
 +	mach_write_to_8(buf + LOG_CHECKPOINT_LSN, log_sys->next_checkpoint_lsn);
 +
 +	lsn_offset = log_group_calc_lsn_offset(log_sys->next_checkpoint_lsn,
 +					       group);
 +	mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET_LOW32,
 +			lsn_offset & 0xFFFFFFFFUL);
 +	mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET_HIGH32,
 +			lsn_offset >> 32);
 +
 +	mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, log_sys->buf_size);
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	if (log_sys->archiving_state == LOG_ARCH_OFF) {
 +		archived_lsn = LSN_MAX;
 +	} else {
 +		archived_lsn = log_sys->archived_lsn;
 +	}
 +
 +	mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, archived_lsn);
 +#else /* UNIV_LOG_ARCHIVE */
 +	mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, LSN_MAX);
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	for (i = 0; i < LOG_MAX_N_GROUPS; i++) {
 +		log_checkpoint_set_nth_group_info(buf, i, 0);
 +	}
 +
 +	group2 = UT_LIST_GET_FIRST(log_sys->log_groups);
 +
 +	while (group2) {
 +		log_checkpoint_set_nth_group_info(buf, group2->id,
 +#ifdef UNIV_LOG_ARCHIVE
 +						  group2->archived_file_no
 +#else /* UNIV_LOG_ARCHIVE */
 +						  0
 +#endif /* UNIV_LOG_ARCHIVE */
 +						  );
 +
 +		group2 = UT_LIST_GET_NEXT(log_groups, group2);
 +	}
 +
 +	fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
 +	mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_1, fold);
 +
 +	fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
 +			      LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
 +	mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_2, fold);
 +
 +	/* We alternate the physical place of the checkpoint info in the first
 +	log file */
 +
 +	if ((log_sys->next_checkpoint_no & 1) == 0) {
 +		write_offset = LOG_CHECKPOINT_1;
 +	} else {
 +		write_offset = LOG_CHECKPOINT_2;
 +	}
 +
 +	if (log_do_write) {
 +		if (log_sys->n_pending_checkpoint_writes == 0) {
 +
 +			rw_lock_x_lock_gen(&(log_sys->checkpoint_lock),
 +					   LOG_CHECKPOINT);
 +		}
 +
 +		log_sys->n_pending_checkpoint_writes++;
 +		MONITOR_INC(MONITOR_PENDING_CHECKPOINT_WRITE);
 +
 +		log_sys->n_log_ios++;
 +
 +		MONITOR_INC(MONITOR_LOG_IO);
 +
 +		/* We send as the last parameter the group machine address
 +		added with 1, as we want to distinguish between a normal log
 +		file write and a checkpoint field write */
 +
 +		fil_io(OS_FILE_WRITE | OS_FILE_LOG, false, group->space_id, 0,
 +		       write_offset / UNIV_PAGE_SIZE,
 +		       write_offset % UNIV_PAGE_SIZE,
 +		       OS_FILE_LOG_BLOCK_SIZE,
 +		       buf, ((byte*) group + 1));
 +
 +		ut_ad(((ulint) group & 0x1UL) == 0);
 +	}
 +}
 +#endif /* !UNIV_HOTBACKUP */
 +
 +#ifdef UNIV_HOTBACKUP
 +/******************************************************//**
 +Writes info to a buffer of a log group when log files are created in
 +backup restoration. */
 +UNIV_INTERN
 +void
 +log_reset_first_header_and_checkpoint(
 +/*==================================*/
 +	byte*		hdr_buf,/*!< in: buffer which will be written to the
 +				start of the first log file */
 +	ib_uint64_t	start)	/*!< in: lsn of the start of the first log file;
 +				we pretend that there is a checkpoint at
 +				start + LOG_BLOCK_HDR_SIZE */
 +{
 +	ulint		fold;
 +	byte*		buf;
 +	ib_uint64_t	lsn;
 +
 +	mach_write_to_4(hdr_buf + LOG_GROUP_ID, 0);
 +	mach_write_to_8(hdr_buf + LOG_FILE_START_LSN, start);
 +
 +	lsn = start + LOG_BLOCK_HDR_SIZE;
 +
 +	/* Write the label of mysqlbackup --restore */
 +	strcpy((char*) hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
 +	       "ibbackup ");
 +	ut_sprintf_timestamp((char*) hdr_buf
 +			     + (LOG_FILE_WAS_CREATED_BY_HOT_BACKUP
 +				+ (sizeof "ibbackup ") - 1));
 +	buf = hdr_buf + LOG_CHECKPOINT_1;
 +
 +	mach_write_to_8(buf + LOG_CHECKPOINT_NO, 0);
 +	mach_write_to_8(buf + LOG_CHECKPOINT_LSN, lsn);
 +
 +	mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET_LOW32,
 +			LOG_FILE_HDR_SIZE + LOG_BLOCK_HDR_SIZE);
 +	mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET_HIGH32, 0);
 +
 +	mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, 2 * 1024 * 1024);
 +
 +	mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, LSN_MAX);
 +
 +	fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
 +	mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_1, fold);
 +
 +	fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
 +			      LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
 +	mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_2, fold);
 +
 +	/* Starting from InnoDB-3.23.50, we should also write info on
 +	allocated size in the tablespace, but unfortunately we do not
 +	know it here */
 +}
 +#endif /* UNIV_HOTBACKUP */
 +
 +#ifndef UNIV_HOTBACKUP
 +/******************************************************//**
 +Reads a checkpoint info from a log group header to log_sys->checkpoint_buf. */
 +UNIV_INTERN
 +void
 +log_group_read_checkpoint_info(
 +/*===========================*/
 +	log_group_t*	group,	/*!< in: log group */
 +	ulint		field)	/*!< in: LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 */
 +{
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +
 +	log_sys->n_log_ios++;
 +
 +	MONITOR_INC(MONITOR_LOG_IO);
 +
 +	fil_io(OS_FILE_READ | OS_FILE_LOG, true, group->space_id, 0,
 +	       field / UNIV_PAGE_SIZE, field % UNIV_PAGE_SIZE,
 +	       OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL);
 +}
 +
 +/******************************************************//**
 +Writes checkpoint info to groups. */
 +UNIV_INTERN
 +void
 +log_groups_write_checkpoint_info(void)
 +/*==================================*/
 +{
 +	log_group_t*	group;
 +
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +
 +	if (!srv_read_only_mode) {
 +		for (group = UT_LIST_GET_FIRST(log_sys->log_groups);
 +		     group;
 +		     group = UT_LIST_GET_NEXT(log_groups, group)) {
 +
 +			log_group_checkpoint(group);
 +		}
 +	}
 +}
 +
 +/******************************************************//**
 +Makes a checkpoint. Note that this function does not flush dirty
 +blocks from the buffer pool: it only checks what is lsn of the oldest
 +modification in the pool, and writes information about the lsn in
 +log files. Use log_make_checkpoint_at to flush also the pool.
 + at return	TRUE if success, FALSE if a checkpoint write was already running */
 +UNIV_INTERN
 +ibool
 +log_checkpoint(
 +/*===========*/
 +	ibool	sync,		/*!< in: TRUE if synchronous operation is
 +				desired */
 +	ibool	write_always,	/*!< in: the function normally checks if the
 +				the new checkpoint would have a greater
 +				lsn than the previous one: if not, then no
 +				physical write is done; by setting this
 +				parameter TRUE, a physical write will always be
 +				made to log files */
 +        ibool   safe_to_ignore) /*!< in: TRUE if checkpoint can be ignored in
 +                                  the case checkpoint's are disabled */
 +{
 +	lsn_t	oldest_lsn;
 +
 +	ut_ad(!srv_read_only_mode);
 +
 +	if (recv_recovery_is_on()) {
- 		recv_apply_hashed_log_recs(TRUE);
++		recv_apply_hashed_log_recs(true);
 +	}
 +
 +	if (srv_unix_file_flush_method != SRV_UNIX_NOSYNC &&
 +	    srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT) {
 +		fil_flush_file_spaces(FIL_TABLESPACE);
 +	}
 +
 +	mutex_enter(&(log_sys->mutex));
 +
 +	ut_ad(!recv_no_log_write);
 +	oldest_lsn = log_buf_pool_get_oldest_modification();
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +	/* Because log also contains headers and dummy log records,
 +	if the buffer pool contains no dirty buffers, oldest_lsn
 +	gets the value log_sys->lsn from the previous function,
 +	and we must make sure that the log is flushed up to that
 +	lsn. If there are dirty buffers in the buffer pool, then our
 +	write-ahead-logging algorithm ensures that the log has been flushed
 +	up to oldest_lsn. */
 +
 +	log_write_up_to(oldest_lsn, LOG_WAIT_ALL_GROUPS, TRUE);
 +
 +	mutex_enter(&(log_sys->mutex));
 +
 +        /* Return if this is not a forced checkpoint and either there is no
 +           need for a checkpoint or if checkpoints are disabled */
 +	if (!write_always
 +	    && (log_sys->last_checkpoint_lsn >= oldest_lsn ||
 +                (safe_to_ignore && log_disable_checkpoint_active)))
 +        {
 +
 +		mutex_exit(&(log_sys->mutex));
 +
 +		return(TRUE);
 +	}
 +
 +        if (log_disable_checkpoint_active)
 +        {
 +          	/* Wait until we are allowed to do a checkpoint */
 +		mutex_exit(&(log_sys->mutex));
 +		rw_lock_s_lock(&(log_sys->checkpoint_lock));
 +		rw_lock_s_unlock(&(log_sys->checkpoint_lock));
 +                mutex_enter(&(log_sys->mutex));
 +        }
 +
 +	ut_ad(log_sys->flushed_to_disk_lsn >= oldest_lsn);
 +
 +	if (log_sys->n_pending_checkpoint_writes > 0) {
 +		/* A checkpoint write is running */
 +
 +		mutex_exit(&(log_sys->mutex));
 +
 +		if (sync) {
 +			/* Wait for the checkpoint write to complete */
 +			rw_lock_s_lock(&(log_sys->checkpoint_lock));
 +			rw_lock_s_unlock(&(log_sys->checkpoint_lock));
 +		}
 +
 +		return(FALSE);
 +	}
 +
 +	ut_ad(oldest_lsn >= log_sys->next_checkpoint_lsn);
 +	log_sys->next_checkpoint_lsn = oldest_lsn;
 +
 +#ifdef UNIV_DEBUG
 +	if (log_debug_writes) {
 +		fprintf(stderr, "Making checkpoint no "
 +			LSN_PF " at lsn " LSN_PF "\n",
 +			log_sys->next_checkpoint_no,
 +			oldest_lsn);
 +	}
 +#endif /* UNIV_DEBUG */
 +
 +	log_groups_write_checkpoint_info();
 +
 +	MONITOR_INC(MONITOR_NUM_CHECKPOINT);
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +	if (sync) {
 +		/* Wait for the checkpoint write to complete */
 +		rw_lock_s_lock(&(log_sys->checkpoint_lock));
 +		rw_lock_s_unlock(&(log_sys->checkpoint_lock));
 +	}
 +
 +	return(TRUE);
 +}
 +
 +/****************************************************************//**
 +Makes a checkpoint at a given lsn or later. */
 +UNIV_INTERN
 +void
 +log_make_checkpoint_at(
 +/*===================*/
 +	lsn_t	lsn,		/*!< in: make a checkpoint at this or a
 +				later lsn, if LSN_MAX, makes
 +				a checkpoint at the latest lsn */
 +	ibool	write_always)	/*!< in: the function normally checks if
 +				the new checkpoint would have a
 +				greater lsn than the previous one: if
 +				not, then no physical write is done;
 +				by setting this parameter TRUE, a
 +				physical write will always be made to
 +				log files */
 +{
 +	/* Preflush pages synchronously */
 +
 +	while (!log_preflush_pool_modified_pages(lsn)) {
 +		/* Flush as much as we can */
 +	}
 +
 +	while (!log_checkpoint(TRUE, write_always, FALSE)) {
 +		/* Force a checkpoint */
 +	}
 +}
 +
 +/****************************************************************//**
 +Disable checkpoints. This is used when doing a volumne snapshot
 +to ensure that we don't get checkpoint between snapshoting two
 +different volumes */
 +
 +UNIV_INTERN
 +ibool log_disable_checkpoint()
 +{
 +  mutex_enter(&(log_sys->mutex));
 +
 +  /*
 +    Wait if a checkpoint write is running.
 +    This is the same code that is used in log_checkpoint() to ensure
 +    that two checkpoints are not happening at the same time.
 +  */
 +  while (log_sys->n_pending_checkpoint_writes > 0)
 +  {
 +    mutex_exit(&(log_sys->mutex));
 +    rw_lock_s_lock(&(log_sys->checkpoint_lock));
 +    rw_lock_s_unlock(&(log_sys->checkpoint_lock));
 +    mutex_enter(&(log_sys->mutex));
 +  }
 +  /*
 +    The following should never be true; It's is here just in case of
 +    wrong usage of this function. (Better safe than sorry).
 +  */
 +
 +  if (log_disable_checkpoint_active)
 +  {
 +    mutex_exit(&(log_sys->mutex));
 +    return 1;                                   /* Already disabled */
 +  }
 +  /*
 +    Take the checkpoint lock to ensure we will not get any checkpoints
 +    running
 +  */
 +  rw_lock_x_lock_gen(&(log_sys->checkpoint_lock), LOG_CHECKPOINT);
 +  log_disable_checkpoint_active= 1;
 +  mutex_exit(&(log_sys->mutex));
 +  return 0;
 +}
 +
 +
 +/****************************************************************//**
 +Enable checkpoints that was disabled with log_disable_checkpoint()
 +This lock is called by MariaDB and only when we have done call earlier
 +to log_disable_checkpoint().
 +
 +Note: We can't take a log->mutex lock here running log_checkpoint()
 +which is waiting (log_sys->checkpoint_lock may already have it.
 +This is however safe to do without a mutex as log_disable_checkpoint
 +is protected by log_sys->checkpoint_lock.
 +*/
 +
 +UNIV_INTERN
 +void log_enable_checkpoint()
 +{
 +  ut_ad(log_disable_checkpoint_active);
 +  /* Test variable, mostly to protect against wrong usage */
 +  if (log_disable_checkpoint_active)
 +  {
 +    log_disable_checkpoint_active= 0;
 +    rw_lock_x_unlock_gen(&(log_sys->checkpoint_lock), LOG_CHECKPOINT);
 +  }
 +}
 +
 +/****************************************************************//**
 +Tries to establish a big enough margin of free space in the log groups, such
 +that a new log entry can be catenated without an immediate need for a
 +checkpoint. NOTE: this function may only be called if the calling thread
 +owns no synchronization objects! */
 +static
 +void
 +log_checkpoint_margin(void)
 +/*=======================*/
 +{
 +	log_t*		log		= log_sys;
 +	lsn_t		age;
 +	lsn_t		checkpoint_age;
 +	ib_uint64_t	advance;
 +	lsn_t		oldest_lsn;
 +	ibool		checkpoint_sync;
 +	ibool		do_checkpoint;
 +	bool		success;
 +loop:
 +	checkpoint_sync = FALSE;
 +	do_checkpoint = FALSE;
 +	advance = 0;
 +
 +	mutex_enter(&(log->mutex));
 +	ut_ad(!recv_no_log_write);
 +
 +	if (log->check_flush_or_checkpoint == FALSE) {
 +		mutex_exit(&(log->mutex));
 +
 +		return;
 +	}
 +
 +	oldest_lsn = log_buf_pool_get_oldest_modification();
 +
 +	age = log->lsn - oldest_lsn;
 +
 +	if (age > log->max_modified_age_sync) {
 +
 +		/* A flush is urgent: we have to do a synchronous preflush */
 +		advance = 2 * (age - log->max_modified_age_sync);
 +	}
 +
 +	checkpoint_age = log->lsn - log->last_checkpoint_lsn;
 +
 +	if (checkpoint_age > log->max_checkpoint_age) {
 +		/* A checkpoint is urgent: we do it synchronously */
 +
 +		checkpoint_sync = TRUE;
 +
 +		do_checkpoint = TRUE;
 +
 +	} else if (checkpoint_age > log->max_checkpoint_age_async) {
 +		/* A checkpoint is not urgent: do it asynchronously */
 +
 +		do_checkpoint = TRUE;
 +
 +		log->check_flush_or_checkpoint = FALSE;
 +	} else {
 +		log->check_flush_or_checkpoint = FALSE;
 +	}
 +
 +	mutex_exit(&(log->mutex));
 +
 +	if (advance) {
 +		lsn_t	new_oldest = oldest_lsn + advance;
 +
 +		success = log_preflush_pool_modified_pages(new_oldest);
 +
 +		/* If the flush succeeded, this thread has done its part
 +		and can proceed. If it did not succeed, there was another
 +		thread doing a flush at the same time. */
 +		if (!success) {
 +			mutex_enter(&(log->mutex));
 +
 +			log->check_flush_or_checkpoint = TRUE;
 +
 +			mutex_exit(&(log->mutex));
 +			goto loop;
 +		}
 +	}
 +
 +	if (do_checkpoint) {
 +                log_checkpoint(checkpoint_sync, FALSE, FALSE);
 +
 +		if (checkpoint_sync) {
 +
 +			goto loop;
 +		}
 +	}
 +}
 +
 +/******************************************************//**
 +Reads a specified log segment to a buffer.  Optionally releases the log mutex
 +before the I/O.  */
 +UNIV_INTERN
 +void
 +log_group_read_log_seg(
 +/*===================*/
 +	ulint		type,		/*!< in: LOG_ARCHIVE or LOG_RECOVER */
 +	byte*		buf,		/*!< in: buffer where to read */
 +	log_group_t*	group,		/*!< in: log group */
 +	lsn_t		start_lsn,	/*!< in: read area start */
 +	lsn_t		end_lsn,	/*!< in: read area end */
 +	ibool		release_mutex)	/*!< in: whether the log_sys->mutex
 +					should be released before the read */
 +{
 +	ulint	len;
 +	lsn_t	source_offset;
 +	bool	sync;
 +
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +
 +	sync = (type == LOG_RECOVER);
 +loop:
 +	source_offset = log_group_calc_lsn_offset(start_lsn, group);
 +
 +	ut_a(end_lsn - start_lsn <= ULINT_MAX);
 +	len = (ulint) (end_lsn - start_lsn);
 +
 +	ut_ad(len != 0);
 +
 +	if ((source_offset % group->file_size) + len > group->file_size) {
 +
 +		/* If the above condition is true then len (which is ulint)
 +		is > the expression below, so the typecast is ok */
 +		len = (ulint) (group->file_size -
 +			(source_offset % group->file_size));
 +	}
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	if (type == LOG_ARCHIVE) {
 +
 +		log_sys->n_pending_archive_ios++;
 +	}
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	log_sys->n_log_ios++;
 +
 +	MONITOR_INC(MONITOR_LOG_IO);
 +
 +	ut_a(source_offset / UNIV_PAGE_SIZE <= ULINT_MAX);
 +
 +	if (release_mutex) {
 +		mutex_exit(&(log_sys->mutex));
 +	}
 +
 +	fil_io(OS_FILE_READ | OS_FILE_LOG, sync, group->space_id, 0,
 +	       (ulint) (source_offset / UNIV_PAGE_SIZE),
 +	       (ulint) (source_offset % UNIV_PAGE_SIZE),
 +	       len, buf, (type == LOG_ARCHIVE) ? &log_archive_io : NULL);
 +
 +	start_lsn += len;
 +	buf += len;
 +
++	if (recv_sys->report(ut_time())) {
++		ib_logf(IB_LOG_LEVEL_INFO, "Read redo log up to LSN=" LSN_PF,
++			start_lsn);
++	}
++
 +	if (start_lsn != end_lsn) {
 +
 +		if (release_mutex) {
 +			mutex_enter(&(log_sys->mutex));
 +		}
 +		goto loop;
 +	}
 +}
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +/******************************************************//**
 +Generates an archived log file name. */
 +UNIV_INTERN
 +void
 +log_archived_file_name_gen(
 +/*=======================*/
 +	char*	buf,	/*!< in: buffer where to write */
 +	ulint	buf_len,/*!< in: buffer length */
 +	ulint	id MY_ATTRIBUTE((unused)),
 +			/*!< in: group id;
 +			currently we only archive the first group */
 +	lsn_t	file_no)/*!< in: file number */
 +{
 +	ulint	dirnamelen;
 +
 +	dirnamelen = strlen(srv_arch_dir);
 +
 +	ut_a(buf_len > dirnamelen +
 +		       IB_ARCHIVED_LOGS_SERIAL_LEN +
 +		       IB_ARCHIVED_LOGS_PREFIX_LEN + 2);
 +
 +	strcpy(buf, srv_arch_dir);
 +
 +	if (buf[dirnamelen-1] != SRV_PATH_SEPARATOR) {
 +		buf[dirnamelen++] = SRV_PATH_SEPARATOR;
 +	}
 +	sprintf(buf + dirnamelen, IB_ARCHIVED_LOGS_PREFIX 
 +		"%0" IB_TO_STR(IB_ARCHIVED_LOGS_SERIAL_LEN) "llu",
 +		(unsigned long long)file_no);
 +}
 +
 +/******************************************************//**
 +Get offset within archived log file to continue to write
 +with. */
 +UNIV_INTERN
 +void
 +log_archived_get_offset(
 +/*=====================*/
 +	log_group_t*	group,		/*!< in: log group */
 +	lsn_t		file_no,	/*!< in: archive log file number */
 +	lsn_t		archived_lsn,	/*!< in: last archived LSN */
 +	lsn_t*		offset)		/*!< out: offset within archived file */
 +{
 +	char		file_name[OS_FILE_MAX_PATH];
 +	ibool		exists;
 +	os_file_type_t	type;
 +
 +	log_archived_file_name_gen(file_name,
 +		sizeof(file_name), group->id, file_no);
 +
 +	ut_a(os_file_status(file_name, &exists,	&type));
 +
 +	if (!exists) {
 +		*offset = 0;
 +		return;
 +	}
 +
 +	*offset = archived_lsn - file_no + LOG_FILE_HDR_SIZE;
 +
 +	if (archived_lsn != LSN_MAX) {
 +		*offset = archived_lsn - file_no + LOG_FILE_HDR_SIZE;
 +	} else {
 +		/* Archiving was OFF prior startup */
 +		*offset = 0;
 +	}
 +
 +	ut_a(group->file_size >= *offset + LOG_FILE_HDR_SIZE);
 +
 +	return;
 +}
 +
 +/******************************************************//**
 +Writes a log file header to a log file space. */
 +static
 +void
 +log_group_archive_file_header_write(
 +/*================================*/
 +	log_group_t*	group,		/*!< in: log group */
 +	ulint		nth_file,	/*!< in: header to the nth file in the
 +					archive log file space */
 +	lsn_t		file_no,	/*!< in: archived file number */
 +	ib_uint64_t	start_lsn)	/*!< in: log file data starts at this
 +					lsn */
 +{
 +	byte*	buf;
 +	ulint	dest_offset;
 +
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +
 +	ut_a(nth_file < group->n_files);
 +
 +	buf = *(group->archive_file_header_bufs + nth_file);
 +
 +	mach_write_to_4(buf + LOG_GROUP_ID, group->id);
 +	mach_write_to_8(buf + LOG_FILE_START_LSN, start_lsn);
 +	mach_write_to_4(buf + LOG_FILE_NO, file_no);
 +
 +	mach_write_to_4(buf + LOG_FILE_ARCH_COMPLETED, FALSE);
 +
 +	dest_offset = nth_file * group->file_size;
 +
 +	log_sys->n_log_ios++;
 +
 +	MONITOR_INC(MONITOR_LOG_IO);
 +
 +	fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, group->archive_space_id,
 +	       0,
 +	       dest_offset / UNIV_PAGE_SIZE,
 +	       dest_offset % UNIV_PAGE_SIZE,
 +	       2 * OS_FILE_LOG_BLOCK_SIZE,
 +	       buf, &log_archive_io);
 +}
 +
 +/******************************************************//**
 +Writes a log file header to a completed archived log file. */
 +static
 +void
 +log_group_archive_completed_header_write(
 +/*=====================================*/
 +	log_group_t*	group,		/*!< in: log group */
 +	ulint		nth_file,	/*!< in: header to the nth file in the
 +					archive log file space */
 +	ib_uint64_t	end_lsn)	/*!< in: end lsn of the file */
 +{
 +	byte*	buf;
 +	ulint	dest_offset;
 +
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +	ut_a(nth_file < group->n_files);
 +
 +	buf = *(group->archive_file_header_bufs + nth_file);
 +
 +	mach_write_to_4(buf + LOG_FILE_ARCH_COMPLETED, TRUE);
 +	mach_write_to_8(buf + LOG_FILE_END_LSN, end_lsn);
 +
 +	dest_offset = nth_file * group->file_size + LOG_FILE_ARCH_COMPLETED;
 +
 +	log_sys->n_log_ios++;
 +
 +	MONITOR_INC(MONITOR_LOG_IO);
 +
 +	fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, group->archive_space_id,
 +	       0,
 +	       dest_offset / UNIV_PAGE_SIZE,
 +	       dest_offset % UNIV_PAGE_SIZE,
 +	       OS_FILE_LOG_BLOCK_SIZE,
 +	       buf + LOG_FILE_ARCH_COMPLETED,
 +	       &log_archive_io);
 +}
 +
 +/******************************************************//**
 +Does the archive writes for a single log group. */
 +static
 +void
 +log_group_archive(
 +/*==============*/
 +	log_group_t*	group)	/*!< in: log group */
 +{
 +	os_file_t	file_handle;
 +	lsn_t		start_lsn;
 +	lsn_t		end_lsn;
 +	char		name[OS_FILE_MAX_PATH];
 +	byte*		buf;
 +	ulint		len;
 +	ibool		ret;
 +	lsn_t		next_offset;
 +	ulint		n_files;
 +	ulint		open_mode;
 +
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +
 +	start_lsn = log_sys->archived_lsn;
 +
 +	ut_a(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
 +
 +	end_lsn = log_sys->next_archived_lsn;
 +
 +	ut_a(end_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
 +
 +	buf = log_sys->archive_buf;
 +
 +	n_files = 0;
 +
 +	next_offset = group->archived_offset;
 +loop:
 +	if ((next_offset % group->file_size == 0)
 +	    || (fil_space_get_size(group->archive_space_id) == 0)) {
 +
 +		/* Add the file to the archive file space; create or open the
 +		file */
 +
 +		if (next_offset % group->file_size == 0) {
 +			open_mode = OS_FILE_CREATE;
 +			if (n_files == 0) {
 +				/* Adjust archived_file_no to match start_lsn
 +				   which is written in file header as well */
 +				group->archived_file_no = start_lsn;
 +			}
 +		} else {
 +			open_mode = OS_FILE_OPEN;
 +		}
 +
 +		log_archived_file_name_gen(name, sizeof(name), group->id,
 +					   group->archived_file_no +
 +					   n_files * (group->file_size -
 +					   LOG_FILE_HDR_SIZE));
 +
 +		file_handle = os_file_create(innodb_file_log_key,
 +					     name, open_mode,
 +					     OS_FILE_AIO,
 +					     OS_DATA_FILE, &ret);
 +
 +		if (!ret && (open_mode == OS_FILE_CREATE)) {
 +			file_handle = os_file_create(
 +				innodb_file_log_key, name, OS_FILE_OPEN,
 +				OS_FILE_AIO, OS_DATA_FILE, &ret);
 +		}
 +
 +		if (!ret) {
 +			fprintf(stderr,
 +				"InnoDB: Cannot create or open"
 +				" archive log file %s.\n"
 +				"InnoDB: Cannot continue operation.\n"
 +				"InnoDB: Check that the log archive"
 +				" directory exists,\n"
 +				"InnoDB: you have access rights to it, and\n"
 +				"InnoDB: there is space available.\n", name);
 +			exit(1);
 +		}
 +
 +#ifdef UNIV_DEBUG
 +		if (log_debug_writes) {
 +			fprintf(stderr, "Created archive file %s\n", name);
 +		}
 +#endif /* UNIV_DEBUG */
 +
 +		ret = os_file_close(file_handle);
 +
 +		ut_a(ret);
 +
 +		/* Add the archive file as a node to the space */
 +
 +		ut_a(fil_node_create(name, group->file_size / UNIV_PAGE_SIZE,
 +				     group->archive_space_id, FALSE));
 +
 +		if (next_offset % group->file_size == 0) {
 +			log_group_archive_file_header_write(
 +				group, n_files,
 +				group->archived_file_no +
 +				n_files * (group->file_size - LOG_FILE_HDR_SIZE),
 +				start_lsn);
 +
 +			next_offset += LOG_FILE_HDR_SIZE;
 +		}
 +	}
 +
 +	len = end_lsn - start_lsn;
 +
 +	if (group->file_size < (next_offset % group->file_size) + len) {
 +
 +		len = group->file_size - (next_offset % group->file_size);
 +	}
 +
 +#ifdef UNIV_DEBUG
 +	if (log_debug_writes) {
 +		fprintf(stderr,
 +			"Archiving starting at lsn " LSN_PF ", len %lu"
 +			" to group %lu\n",
 +			start_lsn,
 +			(ulong) len, (ulong) group->id);
 +	}
 +#endif /* UNIV_DEBUG */
 +
 +	log_sys->n_pending_archive_ios++;
 +
 +	log_sys->n_log_ios++;
 +
 +	MONITOR_INC(MONITOR_LOG_IO);
 +
 +	fil_io(OS_FILE_WRITE | OS_FILE_LOG, false, group->archive_space_id,
 +	       0,
 +	       (ulint) (next_offset / UNIV_PAGE_SIZE),
 +	       (ulint) (next_offset % UNIV_PAGE_SIZE),
 +	       ut_calc_align(len, OS_FILE_LOG_BLOCK_SIZE), buf,
 +	       &log_archive_io);
 +
 +	start_lsn += len;
 +	next_offset += len;
 +	buf += len;
 +
 +	if (next_offset % group->file_size == 0) {
 +		n_files++;
 +	}
 +
 +	if (end_lsn != start_lsn) {
 +
 +		goto loop;
 +	}
 +
 +	group->next_archived_file_no = group->archived_file_no +
 +			n_files * (group->file_size - LOG_FILE_HDR_SIZE);
 +	group->next_archived_offset = next_offset % group->file_size;
 +
 +	ut_a(group->next_archived_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
 +}
 +
 +/*****************************************************//**
 +(Writes to the archive of each log group.) Currently, only the first
 +group is archived. */
 +static
 +void
 +log_archive_groups(void)
 +/*====================*/
 +{
 +	log_group_t*	group;
 +
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +
 +	group = UT_LIST_GET_FIRST(log_sys->log_groups);
 +
 +	log_group_archive(group);
 +}
 +
 +/*****************************************************//**
 +Completes the archiving write phase for (each log group), currently,
 +the first log group. */
 +static
 +void
 +log_archive_write_complete_groups(void)
 +/*===================================*/
 +{
 +	log_group_t*	group;
 +	lsn_t		end_offset;
 +	ulint		trunc_files;
 +	ulint		n_files;
 +	ib_uint64_t	start_lsn;
 +	ib_uint64_t	end_lsn;
 +	ulint		i;
 +
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +
 +	group = UT_LIST_GET_FIRST(log_sys->log_groups);
 +
 +	group->archived_file_no = group->next_archived_file_no;
 +	group->archived_offset = group->next_archived_offset;
 +
 +	/* Truncate from the archive file space all but the last
 +	file, or if it has been written full, all files */
 +
 +	n_files = (UNIV_PAGE_SIZE
 +		   * fil_space_get_size(group->archive_space_id))
 +		/ group->file_size;
 +	ut_ad(n_files > 0);
 +
 +	end_offset = group->archived_offset;
 +
 +	if (end_offset % group->file_size == 0) {
 +
 +		trunc_files = n_files;
 +	} else {
 +		trunc_files = n_files - 1;
 +	}
 +
 +#ifdef UNIV_DEBUG
 +	if (log_debug_writes && trunc_files) {
 +		fprintf(stderr,
 +			"Complete file(s) archived to group %lu\n",
 +			(ulong) group->id);
 +	}
 +#endif /* UNIV_DEBUG */
 +
 +	/* Calculate the archive file space start lsn */
 +	start_lsn = log_sys->next_archived_lsn
 +		- (end_offset - LOG_FILE_HDR_SIZE + trunc_files
 +		   * (group->file_size - LOG_FILE_HDR_SIZE));
 +	end_lsn = start_lsn;
 +
 +	for (i = 0; i < trunc_files; i++) {
 +
 +		end_lsn += group->file_size - LOG_FILE_HDR_SIZE;
 +
 +		/* Write a notice to the headers of archived log
 +		files that the file write has been completed */
 +
 +		log_group_archive_completed_header_write(group, i, end_lsn);
 +	}
 +
 +	fil_space_truncate_start(group->archive_space_id,
 +				 trunc_files * group->file_size);
 +
 +#ifdef UNIV_DEBUG
 +	if (log_debug_writes) {
 +		fputs("Archiving writes completed\n", stderr);
 +	}
 +#endif /* UNIV_DEBUG */
 +}
 +
 +/******************************************************//**
 +Completes an archiving i/o. */
 +static
 +void
 +log_archive_check_completion_low(void)
 +/*==================================*/
 +{
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +
 +	if (log_sys->n_pending_archive_ios == 0
 +	    && log_sys->archiving_phase == LOG_ARCHIVE_READ) {
 +
 +#ifdef UNIV_DEBUG
 +		if (log_debug_writes) {
 +			fputs("Archiving read completed\n", stderr);
 +		}
 +#endif /* UNIV_DEBUG */
 +
 +		/* Archive buffer has now been read in: start archive writes */
 +
 +		log_sys->archiving_phase = LOG_ARCHIVE_WRITE;
 +
 +		log_archive_groups();
 +	}
 +
 +	if (log_sys->n_pending_archive_ios == 0
 +	    && log_sys->archiving_phase == LOG_ARCHIVE_WRITE) {
 +
 +		log_archive_write_complete_groups();
 +
 +		log_sys->archived_lsn = log_sys->next_archived_lsn;
 +
 +		rw_lock_x_unlock_gen(&(log_sys->archive_lock), LOG_ARCHIVE);
 +	}
 +}
 +
 +/******************************************************//**
 +Completes an archiving i/o. */
 +static
 +void
 +log_io_complete_archive(void)
 +/*=========================*/
 +{
 +	log_group_t*	group;
 +
 +	mutex_enter(&(log_sys->mutex));
 +
 +	group = UT_LIST_GET_FIRST(log_sys->log_groups);
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +	fil_flush(group->archive_space_id);
 +
 +	mutex_enter(&(log_sys->mutex));
 +
 +	ut_ad(log_sys->n_pending_archive_ios > 0);
 +
 +	log_sys->n_pending_archive_ios--;
 +
 +	log_archive_check_completion_low();
 +
 +	mutex_exit(&(log_sys->mutex));
 +}
 +
 +/********************************************************************//**
 +Starts an archiving operation.
 + at return	TRUE if succeed, FALSE if an archiving operation was already running */
 +UNIV_INTERN
 +ibool
 +log_archive_do(
 +/*===========*/
 +	ibool	sync,	/*!< in: TRUE if synchronous operation is desired */
 +	ulint*	n_bytes)/*!< out: archive log buffer size, 0 if nothing to
 +			archive */
 +{
 +	ibool   calc_new_limit;
 +	lsn_t	start_lsn;
 +	lsn_t	limit_lsn	= LSN_MAX;
 +
 +	calc_new_limit = TRUE;
 +loop:
 +	mutex_enter(&(log_sys->mutex));
 +
 +	switch (log_sys->archiving_state) {
 +	case LOG_ARCH_OFF:
 +arch_none:
 +		mutex_exit(&(log_sys->mutex));
 +
 +		*n_bytes = 0;
 +
 +		return(TRUE);
 +	case LOG_ARCH_STOPPED:
 +	case LOG_ARCH_STOPPING2:
 +		mutex_exit(&(log_sys->mutex));
 +
 +		os_event_wait(log_sys->archiving_on);
 +
 +		goto loop;
 +	}
 +
 +	start_lsn = log_sys->archived_lsn;
 +
 +	if (calc_new_limit) {
 +		ut_a(log_sys->archive_buf_size % OS_FILE_LOG_BLOCK_SIZE == 0);
 +		limit_lsn = start_lsn + log_sys->archive_buf_size;
 +
 +		*n_bytes = log_sys->archive_buf_size;
 +
 +		if (limit_lsn >= log_sys->lsn) {
 +
 +			limit_lsn = ut_uint64_align_down(
 +				log_sys->lsn, OS_FILE_LOG_BLOCK_SIZE);
 +		}
 +	}
 +
 +	if (log_sys->archived_lsn >= limit_lsn) {
 +
 +		goto arch_none;
 +	}
 +
 +	if (log_sys->written_to_all_lsn < limit_lsn) {
 +
 +		mutex_exit(&(log_sys->mutex));
 +
 +		log_write_up_to(limit_lsn, LOG_WAIT_ALL_GROUPS, TRUE);
 +
 +		calc_new_limit = FALSE;
 +
 +		goto loop;
 +	}
 +
 +	if (log_sys->n_pending_archive_ios > 0) {
 +		/* An archiving operation is running */
 +
 +		mutex_exit(&(log_sys->mutex));
 +
 +		if (sync) {
 +			rw_lock_s_lock(&(log_sys->archive_lock));
 +			rw_lock_s_unlock(&(log_sys->archive_lock));
 +		}
 +
 +		*n_bytes = log_sys->archive_buf_size;
 +
 +		return(FALSE);
 +	}
 +
 +	rw_lock_x_lock_gen(&(log_sys->archive_lock), LOG_ARCHIVE);
 +
 +	log_sys->archiving_phase = LOG_ARCHIVE_READ;
 +
 +	log_sys->next_archived_lsn = limit_lsn;
 +
 +#ifdef UNIV_DEBUG
 +	if (log_debug_writes) {
 +		fprintf(stderr,
 +			"Archiving from lsn " LSN_PF " to lsn " LSN_PF "\n",
 +			log_sys->archived_lsn, limit_lsn);
 +	}
 +#endif /* UNIV_DEBUG */
 +
 +	/* Read the log segment to the archive buffer */
 +
 +	log_group_read_log_seg(LOG_ARCHIVE, log_sys->archive_buf,
 +			       UT_LIST_GET_FIRST(log_sys->log_groups),
 +			       start_lsn, limit_lsn, FALSE);
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +	if (sync) {
 +		rw_lock_s_lock(&(log_sys->archive_lock));
 +		rw_lock_s_unlock(&(log_sys->archive_lock));
 +	}
 +
 +	*n_bytes = log_sys->archive_buf_size;
 +
 +	return(TRUE);
 +}
 +
 +/****************************************************************//**
 +Writes the log contents to the archive at least up to the lsn when this
 +function was called. */
 +static
 +void
 +log_archive_all(void)
 +/*=================*/
 +{
 +	lsn_t	present_lsn;
 +
 +	mutex_enter(&(log_sys->mutex));
 +
 +	if (log_sys->archiving_state == LOG_ARCH_OFF) {
 +		mutex_exit(&(log_sys->mutex));
 +
 +		return;
 +	}
 +
 +	present_lsn = log_sys->lsn;
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +	log_pad_current_log_block();
 +
 +	for (;;) {
 +
 +		ulint	archived_bytes;
 +
 +		mutex_enter(&(log_sys->mutex));
 +
 +		if (present_lsn <= log_sys->archived_lsn) {
 +
 +			mutex_exit(&(log_sys->mutex));
 +
 +			return;
 +		}
 +
 +		mutex_exit(&(log_sys->mutex));
 +
 +		log_archive_do(TRUE, &archived_bytes);
 +
 +		if (archived_bytes == 0)
 +			return;
 +	}
 +}
 +
 +/*****************************************************//**
 +Closes the possible open archive log file (for each group) the first group,
 +and if it was open, increments the group file count by 2, if desired. */
 +static
 +void
 +log_archive_close_groups(
 +/*=====================*/
 +	ibool	increment_file_count)	/*!< in: TRUE if we want to increment
 +					the file count */
 +{
 +	log_group_t*	group;
 +	ulint		trunc_len;
 +
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +
 +	if (log_sys->archiving_state == LOG_ARCH_OFF) {
 +
 +		return;
 +	}
 +
 +	group = UT_LIST_GET_FIRST(log_sys->log_groups);
 +
 +	trunc_len = UNIV_PAGE_SIZE
 +		* fil_space_get_size(group->archive_space_id);
 +	if (trunc_len > 0) {
 +		ut_a(trunc_len == group->file_size);
 +
 +		/* Write a notice to the headers of archived log
 +		files that the file write has been completed */
 +
 +		log_group_archive_completed_header_write(
 +			group, 0, log_sys->archived_lsn);
 +
 +		fil_space_truncate_start(group->archive_space_id,
 +					 trunc_len);
 +		if (increment_file_count) {
 +			group->archived_offset = 0;
 +		}
 +
 +	}
 +}
 +
 +/****************************************************************//**
 +Writes the log contents to the archive up to the lsn when this function was
 +called, and stops the archiving. When archiving is started again, the archived
 +log file numbers start from 2 higher, so that the archiving will not write
 +again to the archived log files which exist when this function returns. */
 +static
 +void
 +log_archive_stop(void)
 +/*==================*/
 +{
 +	ibool	success;
 +
 +	mutex_enter(&(log_sys->mutex));
 +
 +	ut_ad(log_sys->archiving_state == LOG_ARCH_ON);
 +	log_sys->archiving_state = LOG_ARCH_STOPPING;
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +	log_archive_all();
 +
 +	mutex_enter(&(log_sys->mutex));
 +
 +	log_sys->archiving_state = LOG_ARCH_STOPPING2;
 +	os_event_reset(log_sys->archiving_on);
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +	/* Wait for a possible archiving operation to end */
 +
 +	rw_lock_s_lock(&(log_sys->archive_lock));
 +	rw_lock_s_unlock(&(log_sys->archive_lock));
 +
 +	mutex_enter(&(log_sys->mutex));
 +
 +	/* Close all archived log files, incrementing the file count by 2,
 +	if appropriate */
 +
 +	log_archive_close_groups(TRUE);
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +	/* Make a checkpoint, so that if recovery is needed, the file numbers
 +	of new archived log files will start from the right value */
 +
 +	success = FALSE;
 +
 +	while (!success) {
 +		success = log_checkpoint(TRUE, TRUE, FALSE);
 +	}
 +
 +	mutex_enter(&(log_sys->mutex));
 +
 +	log_sys->archiving_state = LOG_ARCH_STOPPED;
 +
 +	mutex_exit(&(log_sys->mutex));
 +}
 +
 +/****************************************************************//**
 +Starts again archiving which has been stopped.
 + at return	DB_SUCCESS or DB_ERROR */
 +UNIV_INTERN
 +ulint
 +log_archive_start(void)
 +/*===================*/
 +{
 +	mutex_enter(&(log_sys->mutex));
 +
 +	if (log_sys->archiving_state != LOG_ARCH_STOPPED) {
 +
 +		mutex_exit(&(log_sys->mutex));
 +
 +		return(DB_ERROR);
 +	}
 +
 +	log_sys->archiving_state = LOG_ARCH_ON;
 +
 +	os_event_set(log_sys->archiving_on);
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +	return(DB_SUCCESS);
 +}
 +
 +/****************************************************************//**
 +Stop archiving the log so that a gap may occur in the archived log files.
 + at return	DB_SUCCESS or DB_ERROR */
 +UNIV_INTERN
 +ulint
 +log_archive_noarchivelog(void)
 +/*==========================*/
 +{
 +	ut_ad(!srv_read_only_mode);
 +loop:
 +	mutex_enter(&(log_sys->mutex));
 +
 +	if (log_sys->archiving_state == LOG_ARCH_STOPPED
 +	    || log_sys->archiving_state == LOG_ARCH_OFF) {
 +
 +		log_sys->archiving_state = LOG_ARCH_OFF;
 +
 +		os_event_set(log_sys->archiving_on);
 +
 +		mutex_exit(&(log_sys->mutex));
 +
 +		return(DB_SUCCESS);
 +	}
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +	log_archive_stop();
 +
 +	os_thread_sleep(500000);
 +
 +	goto loop;
 +}
 +
 +/****************************************************************//**
 +Start archiving the log so that a gap may occur in the archived log files.
 + at return	DB_SUCCESS or DB_ERROR */
 +UNIV_INTERN
 +ulint
 +log_archive_archivelog(void)
 +/*========================*/
 +{
 +	ut_ad(!srv_read_only_mode);
 +
 +	mutex_enter(&(log_sys->mutex));
 +
 +	if (log_sys->archiving_state == LOG_ARCH_OFF) {
 +
 +		log_sys->archiving_state = LOG_ARCH_ON;
 +
 +		log_sys->archived_lsn
 +			= ut_uint64_align_down(log_sys->lsn,
 +					       OS_FILE_LOG_BLOCK_SIZE);
 +		mutex_exit(&(log_sys->mutex));
 +
 +		return(DB_SUCCESS);
 +	}
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +	return(DB_ERROR);
 +}
 +
 +/****************************************************************//**
 +Tries to establish a big enough margin of free space in the log groups, such
 +that a new log entry can be catenated without an immediate need for
 +archiving. */
 +static
 +void
 +log_archive_margin(void)
 +/*====================*/
 +{
 +	log_t*	log		= log_sys;
 +	ulint	age;
 +	ibool	sync;
 +	ulint	dummy;
 +loop:
 +	mutex_enter(&(log->mutex));
 +
 +	if (log->archiving_state == LOG_ARCH_OFF) {
 +		mutex_exit(&(log->mutex));
 +
 +		return;
 +	}
 +
 +	age = log->lsn - log->archived_lsn;
 +
 +	if (age > log->max_archived_lsn_age) {
 +
 +		/* An archiving is urgent: we have to do synchronous i/o */
 +
 +		sync = TRUE;
 +
 +	} else if (age > log->max_archived_lsn_age_async) {
 +
 +		/* An archiving is not urgent: we do asynchronous i/o */
 +
 +		sync = FALSE;
 +	} else {
 +		/* No archiving required yet */
 +
 +		mutex_exit(&(log->mutex));
 +
 +		return;
 +	}
 +
 +	mutex_exit(&(log->mutex));
 +
 +	log_archive_do(sync, &dummy);
 +
 +	if (sync == TRUE) {
 +		/* Check again that enough was written to the archive */
 +
 +		goto loop;
 +	}
 +}
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +/********************************************************************//**
 +Checks that there is enough free space in the log to start a new query step.
 +Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this
 +function may only be called if the calling thread owns no synchronization
 +objects! */
 +UNIV_INTERN
 +void
 +log_check_margins(void)
 +/*===================*/
 +{
 +loop:
 +	log_flush_margin();
 +
 +	log_checkpoint_margin();
 +
 +	mutex_enter(&(log_sys->mutex));
 +	if (log_check_tracking_margin(0)) {
 +
 +		mutex_exit(&(log_sys->mutex));
 +		os_thread_sleep(10000);
 +		goto loop;
 +	}
 +	mutex_exit(&(log_sys->mutex));
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	log_archive_margin();
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	mutex_enter(&(log_sys->mutex));
 +	ut_ad(!recv_no_log_write);
 +
 +	if (log_sys->check_flush_or_checkpoint) {
 +
 +		mutex_exit(&(log_sys->mutex));
 +
 +		goto loop;
 +	}
 +
 +	mutex_exit(&(log_sys->mutex));
 +}
 +
 +/****************************************************************//**
 +Makes a checkpoint at the latest lsn and writes it to first page of each
 +data file in the database, so that we know that the file spaces contain
 +all modifications up to that lsn. This can only be called at database
 +shutdown. This function also writes all log in log files to the log archive. */
 +UNIV_INTERN
 +void
 +logs_empty_and_mark_files_at_shutdown(void)
 +/*=======================================*/
 +{
 +	lsn_t			lsn;
 +	lsn_t			tracked_lsn;
 +	ulint			count = 0;
 +	ulint			pending_io;
 +	enum srv_thread_type	active_thd;
 +	const char*		thread_name;
 +	ibool			server_busy;
 +
 +	ib_logf(IB_LOG_LEVEL_INFO, "Starting shutdown...");
 +
 +        /* Enable checkpoints if someone had turned them off */
 +	if (log_disable_checkpoint_active)
 +		log_enable_checkpoint();
 +
 +	while (srv_fast_shutdown == 0 && trx_rollback_or_clean_is_active) {
 +		/* we should wait until rollback after recovery end
 +		for slow shutdown */
 +		os_thread_sleep(100000);
 +	}
 +
 +	/* Wait until the master thread and all other operations are idle: our
 +	algorithm only works if the server is idle at shutdown */
 +
 +	srv_shutdown_state = SRV_SHUTDOWN_CLEANUP;
 +loop:
 +	os_thread_sleep(100000);
 +
 +	count++;
 +
 +	/* We need the monitor threads to stop before we proceed with
 +	a shutdown. */
 +
 +	thread_name = srv_any_background_threads_are_active();
 +
 +	if (thread_name != NULL) {
 +		/* Print a message every 60 seconds if we are waiting
 +		for the monitor thread to exit. Master and worker
 +		threads check will be done later. */
 +
 +		if (srv_print_verbose_log && count > 600) {
 +			ib_logf(IB_LOG_LEVEL_INFO,
 +				"Waiting for %s to exit", thread_name);
 +			count = 0;
 +		}
 +
 +		goto loop;
 +	}
 +
 +	/* Check that there are no longer transactions, except for
 +	PREPARED ones. We need this wait even for the 'very fast'
 +	shutdown, because the InnoDB layer may have committed or
 +	prepared transactions and we don't want to lose them. */
 +
 +	if (ulint total_trx = srv_was_started && !srv_read_only_mode
 +	    && srv_force_recovery < SRV_FORCE_NO_TRX_UNDO
 +	    ? trx_sys_any_active_transactions() : 0) {
 +		if (srv_print_verbose_log && count > 600) {
 +			ib_logf(IB_LOG_LEVEL_INFO,
 +				"Waiting for %lu active transactions to finish",
 +				(ulong) total_trx);
 +
 +			count = 0;
 +		}
 +
 +		goto loop;
 +	}
 +
 +	/* Check that the background threads are suspended */
 +
 +	active_thd = srv_get_active_thread_type();
 +
 +	if (active_thd != SRV_NONE) {
 +
 +		if (active_thd == SRV_PURGE) {
 +			srv_purge_wakeup();
 +		}
 +
 +		/* The srv_lock_timeout_thread, srv_error_monitor_thread
 +		and srv_monitor_thread should already exit by now. The
 +		only threads to be suspended are the master threads
 +		and worker threads (purge threads). Print the thread
 +		type if any of such threads not in suspended mode */
 +		if (srv_print_verbose_log && count > 600) {
 +			const char*	thread_type = "<null>";
 +
 +			switch (active_thd) {
 +			case SRV_NONE:
 +				/* This shouldn't happen because we've
 +				already checked for this case before
 +				entering the if(). We handle it here
 +				to avoid a compiler warning. */
 +				ut_error;
 +			case SRV_WORKER:
 +				thread_type = "worker threads";
 +				break;
 +			case SRV_MASTER:
 +				thread_type = "master thread";
 +				break;
 +			case SRV_PURGE:
 +				thread_type = "purge thread";
 +				break;
 +			}
 +
 +			ib_logf(IB_LOG_LEVEL_INFO,
 +				"Waiting for %s to be suspended",
 +				thread_type);
 +			count = 0;
 +		}
 +
 +		goto loop;
 +	}
 +
 +	/* At this point only page_cleaner should be active. We wait
 +	here to let it complete the flushing of the buffer pools
 +	before proceeding further. */
 +	srv_shutdown_state = SRV_SHUTDOWN_FLUSH_PHASE;
 +	count = 0;
 +	while (buf_page_cleaner_is_active || buf_lru_manager_is_active) {
 +		if (srv_print_verbose_log && count == 0) {
 +			ib_logf(IB_LOG_LEVEL_INFO,
 +				"Waiting for page_cleaner to "
 +				"finish flushing of buffer pool");
 +		}
 +		++count;
 +		os_thread_sleep(100000);
 +		if (count > 600) {
 +			count = 0;
 +		}
 +	}
 +
 +	mutex_enter(&log_sys->mutex);
 +	server_busy = log_sys->n_pending_checkpoint_writes
 +#ifdef UNIV_LOG_ARCHIVE
 +		|| log_sys->n_pending_archive_ios
 +#endif /* UNIV_LOG_ARCHIVE */
 +		|| log_sys->n_pending_writes;
 +	mutex_exit(&log_sys->mutex);
 +
 +	if (server_busy) {
 +		if (srv_print_verbose_log && count > 600) {
 +			ib_logf(IB_LOG_LEVEL_INFO,
 +				"Pending checkpoint_writes: %lu. "
 +				"Pending log flush writes: %lu",
 +				(ulong) log_sys->n_pending_checkpoint_writes,
 +				(ulong) log_sys->n_pending_writes);
 +			count = 0;
 +		}
 +		goto loop;
 +	}
 +
 +	pending_io = buf_pool_check_no_pending_io();
 +
 +	if (pending_io) {
 +		if (srv_print_verbose_log && count > 600) {
 +			ib_logf(IB_LOG_LEVEL_INFO,
 +				"Waiting for %lu buffer page I/Os to complete",
 +				(ulong) pending_io);
 +			count = 0;
 +		}
 +
 +		goto loop;
 +	}
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	log_archive_all();
 +#endif /* UNIV_LOG_ARCHIVE */
 +	if (srv_fast_shutdown == 2) {
 +		if (!srv_read_only_mode) {
 +			ib_logf(IB_LOG_LEVEL_INFO,
 +				"MySQL has requested a very fast shutdown "
 +				"without flushing the InnoDB buffer pool to "
 +				"data files. At the next mysqld startup "
 +				"InnoDB will do a crash recovery!");
 +
 +			/* In this fastest shutdown we do not flush the
 +			buffer pool:
 +
 +			it is essentially a 'crash' of the InnoDB server.
 +			Make sure that the log is all flushed to disk, so
 +			that we can recover all committed transactions in
 +			a crash recovery. We must not write the lsn stamps
 +			to the data files, since at a startup InnoDB deduces
 +			from the stamps if the previous shutdown was clean. */
 +
 +			log_buffer_flush_to_disk();
 +
 +			/* Check that the background threads stay suspended */
 +			thread_name = srv_any_background_threads_are_active();
 +
 +			if (thread_name != NULL) {
 +				ib_logf(IB_LOG_LEVEL_WARN,
 +					"Background thread %s woke up "
 +					"during shutdown", thread_name);
 +				goto loop;
 +			}
 +		}
 +
 +		srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE;
 +
 +		/* Wake the log tracking thread which will then immediatelly
 +		quit because of srv_shutdown_state value */
 +		if (srv_redo_log_thread_started) {
 +			os_event_reset(srv_redo_log_tracked_event);
 +			os_event_set(srv_checkpoint_completed_event);
 +		}
 +
 +		fil_close_all_files();
 +
 +		thread_name = srv_any_background_threads_are_active();
 +
 +		ut_a(!thread_name);
 +
 +		return;
 +	}
 +
 +	if (!srv_read_only_mode) {
 +		log_make_checkpoint_at(LSN_MAX, TRUE);
 +	}
 +
 +	mutex_enter(&log_sys->mutex);
 +
 +	tracked_lsn = log_get_tracked_lsn();
 +
 +	lsn = log_sys->lsn;
 +
 +	if (lsn != log_sys->last_checkpoint_lsn
 +	    || (srv_track_changed_pages
 +		&& (tracked_lsn != log_sys->last_checkpoint_lsn))
 +#ifdef UNIV_LOG_ARCHIVE
 +	    || (srv_log_archive_on
 +		&& lsn != log_sys->archived_lsn + LOG_BLOCK_HDR_SIZE)
 +#endif /* UNIV_LOG_ARCHIVE */
 +	    ) {
 +
 +		mutex_exit(&log_sys->mutex);
 +
 +		goto loop;
 +	}
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +
 +	log_archive_close_groups(TRUE);
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	mutex_exit(&log_sys->mutex);
 +
 +	/* Check that the background threads stay suspended */
 +	thread_name = srv_any_background_threads_are_active();
 +	if (thread_name != NULL) {
 +		ib_logf(IB_LOG_LEVEL_WARN,
 +			"Background thread %s woke up during shutdown",
 +			thread_name);
 +
 +		goto loop;
 +	}
 +
 +	if (!srv_read_only_mode) {
 +		fil_flush_file_spaces(FIL_TABLESPACE);
 +		fil_flush_file_spaces(FIL_LOG);
 +	}
 +
 +	/* The call fil_write_flushed_lsn_to_data_files() will pass the buffer
 +	pool: therefore it is essential that the buffer pool has been
 +	completely flushed to disk! (We do not call fil_write... if the
 +	'very fast' shutdown is enabled.) */
 +
 +	if (!buf_all_freed()) {
 +
 +		if (srv_print_verbose_log && count > 600) {
 +			ib_logf(IB_LOG_LEVEL_INFO,
 +				"Waiting for dirty buffer pages to be flushed");
 +			count = 0;
 +		}
 +
 +		goto loop;
 +	}
 +
 +	srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE;
 +
 +	/* Signal the log following thread to quit */
 +	if (srv_redo_log_thread_started) {
 +		os_event_reset(srv_redo_log_tracked_event);
 +		os_event_set(srv_checkpoint_completed_event);
 +	}
 +
 +	/* Make some checks that the server really is quiet */
 +	srv_thread_type	type = srv_get_active_thread_type();
 +	ut_a(type == SRV_NONE);
 +
 +	bool	freed = buf_all_freed();
 +	ut_a(freed);
 +
 +	ut_a(lsn == log_sys->lsn);
 +	ut_ad(lsn == log_sys->last_checkpoint_lsn);
 +
 +	if (lsn < srv_start_lsn) {
 +		ib_logf(IB_LOG_LEVEL_ERROR,
 +			"Log sequence number at shutdown " LSN_PF " "
 +			"is lower than at startup " LSN_PF "!",
 +			lsn, srv_start_lsn);
 +	}
 +
 +	srv_shutdown_lsn = lsn;
 +
 +	if (!srv_read_only_mode) {
 +		fil_write_flushed_lsn_to_data_files(lsn, 0);
 +
 +		fil_flush_file_spaces(FIL_TABLESPACE);
 +	}
 +
 +	fil_close_all_files();
 +
 +	/* Make some checks that the server really is quiet */
 +	type = srv_get_active_thread_type();
 +	ut_a(type == SRV_NONE);
 +
 +	freed = buf_all_freed();
 +	ut_a(freed);
 +
 +	ut_a(lsn == log_sys->lsn);
 +}
 +
 +#ifdef UNIV_LOG_DEBUG
 +/******************************************************//**
 +Checks by parsing that the catenated log segment for a single mtr is
 +consistent. */
 +UNIV_INTERN
 +ibool
 +log_check_log_recs(
 +/*===============*/
 +	const byte*	buf,		/*!< in: pointer to the start of
 +					the log segment in the
 +					log_sys->buf log buffer */
 +	ulint		len,		/*!< in: segment length in bytes */
 +	ib_uint64_t	buf_start_lsn)	/*!< in: buffer start lsn */
 +{
 +	ib_uint64_t	contiguous_lsn;
 +	ib_uint64_t	scanned_lsn;
 +	const byte*	start;
 +	const byte*	end;
 +	byte*		buf1;
 +	byte*		scan_buf;
 +
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +
 +	if (len == 0) {
 +
 +		return(TRUE);
 +	}
 +
 +	start = ut_align_down(buf, OS_FILE_LOG_BLOCK_SIZE);
 +	end = ut_align(buf + len, OS_FILE_LOG_BLOCK_SIZE);
 +
 +	buf1 = mem_alloc((end - start) + OS_FILE_LOG_BLOCK_SIZE);
 +	scan_buf = ut_align(buf1, OS_FILE_LOG_BLOCK_SIZE);
 +
 +	ut_memcpy(scan_buf, start, end - start);
 +
 +	recv_scan_log_recs((buf_pool_get_n_pages()
 +			   - (recv_n_pool_free_frames * srv_buf_pool_instances))
 +			   * UNIV_PAGE_SIZE, FALSE, scan_buf, end - start,
 +			   ut_uint64_align_down(buf_start_lsn,
 +						OS_FILE_LOG_BLOCK_SIZE),
 +			   &contiguous_lsn, &scanned_lsn);
 +
 +	ut_a(scanned_lsn == buf_start_lsn + len);
 +	ut_a(recv_sys->recovered_lsn == scanned_lsn);
 +
 +	mem_free(buf1);
 +
 +	return(TRUE);
 +}
 +#endif /* UNIV_LOG_DEBUG */
 +
 +/******************************************************//**
 +Peeks the current lsn.
 + at return	TRUE if success, FALSE if could not get the log system mutex */
 +UNIV_INTERN
 +ibool
 +log_peek_lsn(
 +/*=========*/
 +	lsn_t*	lsn)	/*!< out: if returns TRUE, current lsn is here */
 +{
 +	if (0 == mutex_enter_nowait(&(log_sys->mutex))) {
 +		*lsn = log_sys->lsn;
 +
 +		mutex_exit(&(log_sys->mutex));
 +
 +		return(TRUE);
 +	}
 +
 +	return(FALSE);
 +}
 +
 +/******************************************************//**
 +Prints info of the log. */
 +UNIV_INTERN
 +void
 +log_print(
 +/*======*/
 +	FILE*	file)	/*!< in: file where to print */
 +{
 +	double	time_elapsed;
 +	time_t	current_time;
 +
 +	// mutex_enter(&(log_sys->mutex));
 +
 +	fprintf(file,
 +		"Log sequence number " LSN_PF "\n"
 +		"Log flushed up to   " LSN_PF "\n"
 +		"Pages flushed up to " LSN_PF "\n"
 +		"Last checkpoint at  " LSN_PF "\n",
 +		log_sys->lsn,
 +		log_sys->flushed_to_disk_lsn,
 +		log_buf_pool_get_oldest_modification_peek(),
 +		log_sys->last_checkpoint_lsn);
 +
 +	fprintf(file,
 +		"Max checkpoint age    " LSN_PF "\n"
 +		"Checkpoint age target " LSN_PF "\n"
 +		"Modified age          " LSN_PF "\n"
 +		"Checkpoint age        " LSN_PF "\n",
 +		log_sys->max_checkpoint_age,
 +		log_sys->max_checkpoint_age_async,
 +		log_sys->lsn -log_buf_pool_get_oldest_modification_peek(),
 +		log_sys->lsn - log_sys->last_checkpoint_lsn);
 +
 +	current_time = time(NULL);
 +
 +	time_elapsed = difftime(current_time,
 +				log_sys->last_printout_time);
 +
 +	if (time_elapsed <= 0) {
 +		time_elapsed = 1;
 +	}
 +
 +	fprintf(file,
 +		"%lu pending log writes, %lu pending chkp writes\n"
 +		"%lu log i/o's done, %.2f log i/o's/second\n",
 +		(ulong) log_sys->n_pending_writes,
 +		(ulong) log_sys->n_pending_checkpoint_writes,
 +		(ulong) log_sys->n_log_ios,
 +		((double)(log_sys->n_log_ios - log_sys->n_log_ios_old)
 +		 / time_elapsed));
 +
 +	if (srv_track_changed_pages) {
 +
 +		/* The maximum tracked LSN age is equal to the maximum
 +		checkpoint age */
 +		fprintf(file,
 +			"Log tracking enabled\n"
 +			"Log tracked up to   " LSN_PF "\n"
 +			"Max tracked LSN age " LSN_PF "\n",
 +			log_get_tracked_lsn(),
 +			log_sys->max_checkpoint_age);
 +	}
 +
 +	log_sys->n_log_ios_old = log_sys->n_log_ios;
 +	log_sys->last_printout_time = current_time;
 +
 +	//mutex_exit(&(log_sys->mutex));
 +}
 +
 +/**********************************************************************//**
 +Refreshes the statistics used to print per-second averages. */
 +UNIV_INTERN
 +void
 +log_refresh_stats(void)
 +/*===================*/
 +{
 +	log_sys->n_log_ios_old = log_sys->n_log_ios;
 +	log_sys->last_printout_time = time(NULL);
 +}
 +
 +/********************************************************//**
 +Closes a log group. */
 +static
 +void
 +log_group_close(
 +/*===========*/
 +	log_group_t*	group)		/* in,own: log group to close */
 +{
 +	ulint	i;
 +
 +	for (i = 0; i < group->n_files; i++) {
 +		mem_free(group->file_header_bufs_ptr[i]);
 +#ifdef UNIV_LOG_ARCHIVE
 +		mem_free(group->archive_file_header_bufs_ptr[i]);
 +#endif /* UNIV_LOG_ARCHIVE */
 +	}
 +
 +	mem_free(group->file_header_bufs_ptr);
 +	mem_free(group->file_header_bufs);
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	mem_free(group->archive_file_header_bufs_ptr);
 +	mem_free(group->archive_file_header_bufs);
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	mem_free(group->checkpoint_buf_ptr);
 +
 +	mem_free(group);
 +}
 +
 +/********************************************************//**
 +Closes all log groups. */
 +UNIV_INTERN
 +void
 +log_group_close_all(void)
 +/*=====================*/
 +{
 +	log_group_t*	group;
 +
 +	group = UT_LIST_GET_FIRST(log_sys->log_groups);
 +
 +	while (UT_LIST_GET_LEN(log_sys->log_groups) > 0) {
 +		log_group_t*	prev_group = group;
 +
 +		group = UT_LIST_GET_NEXT(log_groups, group);
 +		UT_LIST_REMOVE(log_groups, log_sys->log_groups, prev_group);
 +
 +		log_group_close(prev_group);
 +	}
 +}
 +
 +/********************************************************//**
 +Shutdown the log system but do not release all the memory. */
 +UNIV_INTERN
 +void
 +log_shutdown(void)
 +/*==============*/
 +{
 +	log_group_close_all();
 +
 +	mem_free(log_sys->buf_ptr);
 +	log_sys->buf_ptr = NULL;
 +	log_sys->buf = NULL;
 +	mem_free(log_sys->checkpoint_buf_ptr);
 +	log_sys->checkpoint_buf_ptr = NULL;
 +	log_sys->checkpoint_buf = NULL;
 +	mem_free(log_sys->archive_buf_ptr);
 +	log_sys->archive_buf_ptr = NULL;
 +	log_sys->archive_buf = NULL;
 +
 +	os_event_free(log_sys->no_flush_event);
 +	os_event_free(log_sys->one_flushed_event);
 +
 +	rw_lock_free(&log_sys->checkpoint_lock);
 +
 +	mutex_free(&log_sys->mutex);
 +	mutex_free(&log_sys->log_flush_order_mutex);
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	rw_lock_free(&log_sys->archive_lock);
 +	os_event_free(log_sys->archiving_on);
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +#ifdef UNIV_LOG_DEBUG
 +	recv_sys_debug_free();
 +#endif
 +
 +	recv_sys_close();
 +}
 +
 +/********************************************************//**
 +Free the log system data structures. */
 +UNIV_INTERN
 +void
 +log_mem_free(void)
 +/*==============*/
 +{
 +	if (log_sys != NULL) {
 +		recv_sys_mem_free();
 +		mem_free(log_sys);
 +
 +		log_sys = NULL;
 +	}
 +}
 +#endif /* !UNIV_HOTBACKUP */
diff --cc storage/xtradb/log/log0recv.cc
index 01975712d99,00000000000..afb8b7f00b8
mode 100644,000000..100644
--- a/storage/xtradb/log/log0recv.cc
+++ b/storage/xtradb/log/log0recv.cc
@@@ -1,3798 -1,0 +1,3727 @@@
 +/*****************************************************************************
 +
 +Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
 +Copyright (c) 2012, Facebook Inc.
- Copyright (c) 2017, MariaDB Corporation. All Rights Reserved.
++Copyright (c) 2017, MariaDB Corporation.
 +
 +This program is free software; you can redistribute it and/or modify it under
 +the terms of the GNU General Public License as published by the Free Software
 +Foundation; version 2 of the License.
 +
 +This program is distributed in the hope that it will be useful, but WITHOUT
 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 +
 +You should have received a copy of the GNU General Public License along with
 +this program; if not, write to the Free Software Foundation, Inc.,
 +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
 +
 +*****************************************************************************/
 +
 +/**************************************************//**
 + at file log/log0recv.cc
 +Recovery
 +
 +Created 9/20/1997 Heikki Tuuri
 +*******************************************************/
 +
 +// First include (the generated) my_config.h, to get correct platform defines.
 +#include "my_config.h"
 +#include <stdio.h>                              // Solaris/x86 header file bug
 +
 +#include <vector>
 +#include "log0recv.h"
 +
 +#ifdef UNIV_NONINL
 +#include "log0recv.ic"
 +#endif
 +
 +#include "config.h"
 +#ifdef HAVE_ALLOCA_H
 +#include "alloca.h"
 +#elif defined(HAVE_MALLOC_H) 
 +#include "malloc.h"
 +#endif
 +
 +#include "mem0mem.h"
 +#include "buf0buf.h"
 +#include "buf0flu.h"
 +#include "mtr0mtr.h"
 +#include "mtr0log.h"
 +#include "page0cur.h"
 +#include "page0zip.h"
 +#include "btr0btr.h"
 +#include "btr0cur.h"
 +#include "ibuf0ibuf.h"
 +#include "trx0undo.h"
 +#include "trx0rec.h"
 +#include "fil0fil.h"
 +#ifndef UNIV_HOTBACKUP
 +# include "buf0rea.h"
 +# include "srv0srv.h"
 +# include "srv0start.h"
 +# include "trx0roll.h"
 +# include "row0merge.h"
 +# include "sync0sync.h"
 +#else /* !UNIV_HOTBACKUP */
 +
 +
 +/** This is set to FALSE if the backup was originally taken with the
 +mysqlbackup --include regexp option: then we do not want to create tables in
 +directories which were not included */
 +UNIV_INTERN ibool	recv_replay_file_ops	= TRUE;
 +#endif /* !UNIV_HOTBACKUP */
 +
 +/** Log records are stored in the hash table in chunks at most of this size;
 +this must be less than UNIV_PAGE_SIZE as it is stored in the buffer pool */
 +#define RECV_DATA_BLOCK_SIZE	(MEM_MAX_ALLOC_IN_BUF - sizeof(recv_data_t))
 +
 +/** Read-ahead area in applying log records to file pages */
 +#define RECV_READ_AHEAD_AREA	32
 +
 +/** The recovery system */
- UNIV_INTERN recv_sys_t*	recv_sys = NULL;
++UNIV_INTERN recv_sys_t*	recv_sys;
 +/** TRUE when applying redo log records during crash recovery; FALSE
 +otherwise.  Note that this is FALSE while a background thread is
 +rolling back incomplete transactions. */
 +UNIV_INTERN ibool	recv_recovery_on;
 +
 +#ifndef UNIV_HOTBACKUP
 +/** TRUE when recv_init_crash_recovery() has been called. */
 +UNIV_INTERN ibool	recv_needed_recovery;
 +# ifdef UNIV_DEBUG
 +/** TRUE if writing to the redo log (mtr_commit) is forbidden.
 +Protected by log_sys->mutex. */
 +UNIV_INTERN ibool	recv_no_log_write = FALSE;
 +# endif /* UNIV_DEBUG */
 +
 +/** TRUE if buf_page_is_corrupted() should check if the log sequence
 +number (FIL_PAGE_LSN) is in the future.  Initially FALSE, and set by
 +recv_recovery_from_checkpoint_start_func(). */
 +UNIV_INTERN ibool	recv_lsn_checks_on;
 +
 +/** There are two conditions under which we scan the logs, the first
 +is normal startup and the second is when we do a recovery from an
 +archive.
 +This flag is set if we are doing a scan from the last checkpoint during
 +startup. If we find log entries that were written after the last checkpoint
 +we know that the server was not cleanly shutdown. We must then initialize
 +the crash recovery environment before attempting to store these entries in
 +the log hash table. */
 +static ibool		recv_log_scan_is_startup_type;
 +
 +/** If the following is TRUE, the buffer pool file pages must be invalidated
 +after recovery and no ibuf operations are allowed; this becomes TRUE if
 +the log record hash table becomes too full, and log records must be merged
 +to file pages already before the recovery is finished: in this case no
 +ibuf operations are allowed, as they could modify the pages read in the
 +buffer pool before the pages have been recovered to the up-to-date state.
 +
 +TRUE means that recovery is running and no operations on the log files
 +are allowed yet: the variable name is misleading. */
 +UNIV_INTERN ibool	recv_no_ibuf_operations;
 +/** TRUE when the redo log is being backed up */
 +# define recv_is_making_a_backup		FALSE
 +/** TRUE when recovering from a backed up redo log file */
 +# define recv_is_from_backup			FALSE
 +#else /* !UNIV_HOTBACKUP */
 +# define recv_needed_recovery			FALSE
 +/** TRUE when the redo log is being backed up */
 +UNIV_INTERN ibool	recv_is_making_a_backup	= FALSE;
 +/** TRUE when recovering from a backed up redo log file */
 +UNIV_INTERN ibool	recv_is_from_backup	= FALSE;
 +# define buf_pool_get_curr_size() (5 * 1024 * 1024)
 +#endif /* !UNIV_HOTBACKUP */
- /** The following counter is used to decide when to print info on
- log scan */
- static ulint	recv_scan_print_counter;
 +
 +/** The type of the previous parsed redo log record */
 +static ulint	recv_previous_parsed_rec_type;
 +/** The offset of the previous parsed redo log record */
 +static ulint	recv_previous_parsed_rec_offset;
 +/** The 'multi' flag of the previous parsed redo log record */
 +static ulint	recv_previous_parsed_rec_is_multi;
 +
 +/** Maximum page number encountered in the redo log */
 +UNIV_INTERN ulint	recv_max_parsed_page_no;
 +
 +/** This many frames must be left free in the buffer pool when we scan
 +the log and store the scanned log records in the buffer pool: we will
 +use these free frames to read in pages when we start applying the
 +log records to the database.
 +This is the default value. If the actual size of the buffer pool is
 +larger than 10 MB we'll set this value to 512. */
 +UNIV_INTERN ulint	recv_n_pool_free_frames;
 +
 +/** The maximum lsn we see for a page during the recovery process. If this
 +is bigger than the lsn we are able to scan up to, that is an indication that
 +the recovery failed and the database may be corrupt. */
 +UNIV_INTERN lsn_t	recv_max_page_lsn;
 +
 +#ifdef UNIV_PFS_THREAD
 +UNIV_INTERN mysql_pfs_key_t	trx_rollback_clean_thread_key;
 +#endif /* UNIV_PFS_THREAD */
 +
 +#ifdef UNIV_PFS_MUTEX
 +UNIV_INTERN mysql_pfs_key_t	recv_sys_mutex_key;
 +#endif /* UNIV_PFS_MUTEX */
 +
 +#ifndef UNIV_HOTBACKUP
 +# ifdef UNIV_PFS_THREAD
 +UNIV_INTERN mysql_pfs_key_t	recv_writer_thread_key;
 +# endif /* UNIV_PFS_THREAD */
 +
 +# ifdef UNIV_PFS_MUTEX
 +UNIV_INTERN mysql_pfs_key_t	recv_writer_mutex_key;
 +# endif /* UNIV_PFS_MUTEX */
 +
 +/** Flag indicating if recv_writer thread is active. */
 +static volatile bool		recv_writer_thread_active;
 +UNIV_INTERN os_thread_t		recv_writer_thread_handle = 0;
 +#endif /* !UNIV_HOTBACKUP */
 +
 +/* prototypes */
 +
 +#ifndef UNIV_HOTBACKUP
 +/*******************************************************//**
 +Initialize crash recovery environment. Can be called iff
 +recv_needed_recovery == FALSE. */
 +static
 +void
 +recv_init_crash_recovery(void);
 +/*===========================*/
 +#endif /* !UNIV_HOTBACKUP */
 +
 +/********************************************************//**
 +Creates the recovery system. */
 +UNIV_INTERN
 +void
 +recv_sys_create(void)
 +/*=================*/
 +{
 +	if (recv_sys != NULL) {
 +
 +		return;
 +	}
 +
 +	recv_sys = static_cast<recv_sys_t*>(mem_zalloc(sizeof(*recv_sys)));
 +
 +	mutex_create(recv_sys_mutex_key, &recv_sys->mutex, SYNC_RECV);
 +
 +#ifndef UNIV_HOTBACKUP
 +	mutex_create(recv_writer_mutex_key, &recv_sys->writer_mutex,
 +		     SYNC_LEVEL_VARYING);
 +#endif /* !UNIV_HOTBACKUP */
 +
 +	recv_sys->heap = NULL;
 +	recv_sys->addr_hash = NULL;
 +}
 +
 +/********************************************************//**
 +Release recovery system mutexes. */
 +UNIV_INTERN
 +void
 +recv_sys_close(void)
 +/*================*/
 +{
 +	if (recv_sys != NULL) {
 +		if (recv_sys->addr_hash != NULL) {
 +			hash_table_free(recv_sys->addr_hash);
 +		}
 +
 +		if (recv_sys->heap != NULL) {
 +			mem_heap_free(recv_sys->heap);
 +		}
 +
 +		if (recv_sys->buf != NULL) {
 +			ut_free(recv_sys->buf);
 +		}
 +
 +		if (recv_sys->last_block_buf_start != NULL) {
 +			mem_free(recv_sys->last_block_buf_start);
 +		}
 +
 +#ifndef UNIV_HOTBACKUP
 +		ut_ad(!recv_writer_thread_active);
 +		mutex_free(&recv_sys->writer_mutex);
 +#endif /* !UNIV_HOTBACKUP */
 +
 +		mutex_free(&recv_sys->mutex);
 +
 +		mem_free(recv_sys);
 +		recv_sys = NULL;
 +	}
 +}
 +
 +/********************************************************//**
 +Frees the recovery system memory. */
 +UNIV_INTERN
 +void
 +recv_sys_mem_free(void)
 +/*===================*/
 +{
 +	if (recv_sys != NULL) {
 +		if (recv_sys->addr_hash != NULL) {
 +			hash_table_free(recv_sys->addr_hash);
 +		}
 +
 +		if (recv_sys->heap != NULL) {
 +			mem_heap_free(recv_sys->heap);
 +		}
 +
 +		if (recv_sys->buf != NULL) {
 +			ut_free(recv_sys->buf);
 +		}
 +
 +		if (recv_sys->last_block_buf_start != NULL) {
 +			mem_free(recv_sys->last_block_buf_start);
 +		}
 +
 +		mem_free(recv_sys);
 +		recv_sys = NULL;
 +	}
 +}
 +
 +#ifndef UNIV_HOTBACKUP
 +/************************************************************
 +Reset the state of the recovery system variables. */
 +UNIV_INTERN
 +void
 +recv_sys_var_init(void)
 +/*===================*/
 +{
 +	recv_lsn_checks_on = FALSE;
 +
 +	recv_n_pool_free_frames = 256;
 +
 +	recv_recovery_on = FALSE;
 +
 +	recv_needed_recovery = FALSE;
 +
 +	recv_lsn_checks_on = FALSE;
 +
 +	recv_log_scan_is_startup_type = FALSE;
 +
 +	recv_no_ibuf_operations = FALSE;
 +
- 	recv_scan_print_counter	= 0;
- 
 +	recv_previous_parsed_rec_type	= 999999;
 +
 +	recv_previous_parsed_rec_offset	= 0;
 +
 +	recv_previous_parsed_rec_is_multi = 0;
 +
 +	recv_max_parsed_page_no	= 0;
 +
 +	recv_n_pool_free_frames	= 256;
 +
 +	recv_max_page_lsn = 0;
 +}
 +
 +/******************************************************************//**
 +recv_writer thread tasked with flushing dirty pages from the buffer
 +pools.
 + at return a dummy parameter */
 +extern "C" UNIV_INTERN
 +os_thread_ret_t
 +DECLARE_THREAD(recv_writer_thread)(
 +/*===============================*/
 +	void*	arg MY_ATTRIBUTE((unused)))
 +			/*!< in: a dummy parameter required by
 +			os_thread_create */
 +{
 +	ut_ad(!srv_read_only_mode);
 +
 +#ifdef UNIV_PFS_THREAD
 +	pfs_register_thread(recv_writer_thread_key);
 +#endif /* UNIV_PFS_THREAD */
 +
 +#ifdef UNIV_DEBUG_THREAD_CREATION
 +	fprintf(stderr, "InnoDB: recv_writer thread running, id %lu\n",
 +		os_thread_pf(os_thread_get_curr_id()));
 +#endif /* UNIV_DEBUG_THREAD_CREATION */
 +
 +	while (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
 +
 +		os_thread_sleep(100000);
 +
 +		mutex_enter(&recv_sys->writer_mutex);
 +
 +		if (!recv_recovery_on) {
 +			mutex_exit(&recv_sys->writer_mutex);
 +			break;
 +		}
 +
 +		/* Flush pages from end of LRU if required */
 +		buf_flush_LRU_tail();
 +
 +		mutex_exit(&recv_sys->writer_mutex);
 +	}
 +
 +	recv_writer_thread_active = false;
 +
 +	/* We count the number of threads in os_thread_exit().
 +	A created thread should always use that to exit and not
 +	use return() to exit. */
 +	os_thread_exit(NULL);
 +
 +	OS_THREAD_DUMMY_RETURN;
 +}
 +#endif /* !UNIV_HOTBACKUP */
 +
 +/************************************************************
 +Inits the recovery system for a recovery operation. */
 +UNIV_INTERN
 +void
 +recv_sys_init(
 +/*==========*/
 +	ulint	available_memory)	/*!< in: available memory in bytes */
 +{
 +	if (recv_sys->heap != NULL) {
 +
 +		return;
 +	}
 +
 +#ifndef UNIV_HOTBACKUP
 +	mutex_enter(&(recv_sys->mutex));
 +
 +	recv_sys->heap = mem_heap_create_typed(256,
 +					MEM_HEAP_FOR_RECV_SYS);
 +#else /* !UNIV_HOTBACKUP */
 +	recv_sys->heap = mem_heap_create(256);
 +	recv_is_from_backup = TRUE;
 +#endif /* !UNIV_HOTBACKUP */
 +
 +	/* Set appropriate value of recv_n_pool_free_frames. */
 +	if (buf_pool_get_curr_size() >= (10 * 1024 * 1024)) {
 +		/* Buffer pool of size greater than 10 MB. */
 +		recv_n_pool_free_frames = 512;
 +	}
 +
 +	recv_sys->buf = static_cast<byte*>(ut_malloc(RECV_PARSING_BUF_SIZE));
 +	recv_sys->len = 0;
 +	recv_sys->recovered_offset = 0;
 +
 +	recv_sys->addr_hash = hash_create(available_memory / 512);
 +	recv_sys->n_addrs = 0;
 +
 +	recv_sys->apply_log_recs = FALSE;
 +	recv_sys->apply_batch_on = FALSE;
 +
 +	recv_sys->last_block_buf_start = static_cast<byte*>(
 +		mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE));
 +
 +	recv_sys->last_block = static_cast<byte*>(ut_align(
 +		recv_sys->last_block_buf_start, OS_FILE_LOG_BLOCK_SIZE));
 +
 +	recv_sys->found_corrupt_log = FALSE;
++	recv_sys->progress_time = ut_time();
 +
 +	recv_max_page_lsn = 0;
 +
 +	/* Call the constructor for recv_sys_t::dblwr member */
 +	new (&recv_sys->dblwr) recv_dblwr_t();
 +
 +	mutex_exit(&(recv_sys->mutex));
 +}
 +
 +/********************************************************//**
 +Empties the hash table when it has been fully processed. */
 +static
 +void
 +recv_sys_empty_hash(void)
 +/*=====================*/
 +{
 +	ut_ad(mutex_own(&(recv_sys->mutex)));
 +
 +	if (recv_sys->n_addrs != 0) {
 +		fprintf(stderr,
 +			"InnoDB: Error: %lu pages with log records"
 +			" were left unprocessed!\n"
 +			"InnoDB: Maximum page number with"
 +			" log records on it %lu\n",
 +			(ulong) recv_sys->n_addrs,
 +			(ulong) recv_max_parsed_page_no);
 +		ut_error;
 +	}
 +
 +	hash_table_free(recv_sys->addr_hash);
 +	mem_heap_empty(recv_sys->heap);
 +
 +	recv_sys->addr_hash = hash_create(buf_pool_get_curr_size() / 512);
 +}
 +
 +#ifndef UNIV_HOTBACKUP
 +# ifndef UNIV_LOG_DEBUG
 +/********************************************************//**
 +Frees the recovery system. */
 +static
 +void
 +recv_sys_debug_free(void)
 +/*=====================*/
 +{
 +	mutex_enter(&(recv_sys->mutex));
 +
 +	hash_table_free(recv_sys->addr_hash);
 +	mem_heap_free(recv_sys->heap);
 +	ut_free(recv_sys->buf);
 +	mem_free(recv_sys->last_block_buf_start);
 +
 +	recv_sys->buf = NULL;
 +	recv_sys->heap = NULL;
 +	recv_sys->addr_hash = NULL;
 +	recv_sys->last_block_buf_start = NULL;
 +
 +	mutex_exit(&(recv_sys->mutex));
 +}
 +# endif /* UNIV_LOG_DEBUG */
 +
 +# ifdef UNIV_LOG_ARCHIVE
 +/********************************************************//**
 +Truncates possible corrupted or extra records from a log group. */
 +static
 +void
 +recv_truncate_group(
 +/*================*/
 +	log_group_t*	group,		/*!< in: log group */
 +	lsn_t		recovered_lsn,	/*!< in: recovery succeeded up to this
 +					lsn */
 +	lsn_t		limit_lsn,	/*!< in: this was the limit for
 +					recovery */
 +	lsn_t		checkpoint_lsn,	/*!< in: recovery was started from this
 +					checkpoint */
 +	lsn_t		archived_lsn)	/*!< in: the log has been archived up to
 +					this lsn */
 +{
 +	lsn_t		start_lsn;
 +	lsn_t		end_lsn;
 +	lsn_t		finish_lsn1;
 +	lsn_t		finish_lsn2;
 +	lsn_t		finish_lsn;
 +
 +	if (archived_lsn == LSN_MAX) {
 +		/* Checkpoint was taken in the NOARCHIVELOG mode */
 +		archived_lsn = checkpoint_lsn;
 +	}
 +
 +	finish_lsn1 = ut_uint64_align_down(archived_lsn,
 +					   OS_FILE_LOG_BLOCK_SIZE)
 +		+ log_group_get_capacity(group);
 +
 +	finish_lsn2 = ut_uint64_align_up(recovered_lsn,
 +					 OS_FILE_LOG_BLOCK_SIZE)
 +		+ recv_sys->last_log_buf_size;
 +
 +	if (limit_lsn != LSN_MAX) {
 +		/* We do not know how far we should erase log records: erase
 +		as much as possible */
 +
 +		finish_lsn = finish_lsn1;
 +	} else {
 +		/* It is enough to erase the length of the log buffer */
 +		finish_lsn = finish_lsn1 < finish_lsn2
 +			? finish_lsn1 : finish_lsn2;
 +	}
 +
 +	ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
 +
 +	memset(log_sys->buf, 0, RECV_SCAN_SIZE);
 +
 +	start_lsn = ut_uint64_align_down(recovered_lsn,
 +					 OS_FILE_LOG_BLOCK_SIZE);
 +
 +	if (start_lsn != recovered_lsn) {
 +		/* Copy the last incomplete log block to the log buffer and
 +		edit its data length: */
 +		lsn_t	diff = recovered_lsn - start_lsn;
 +
 +		ut_a(diff <= 0xFFFFUL);
 +
 +		ut_memcpy(log_sys->buf, recv_sys->last_block,
 +			  OS_FILE_LOG_BLOCK_SIZE);
 +		log_block_set_data_len(log_sys->buf, (ulint) diff);
 +	}
 +
 +	if (start_lsn >= finish_lsn) {
 +
 +		return;
 +	}
 +
 +	for (;;) {
 +		ulint	len;
 +
 +		end_lsn = start_lsn + RECV_SCAN_SIZE;
 +
 +		if (end_lsn > finish_lsn) {
 +
 +			end_lsn = finish_lsn;
 +		}
 +
 +		len = (ulint) (end_lsn - start_lsn);
 +
 +		log_group_write_buf(group, log_sys->buf, len, start_lsn, 0);
 +		if (end_lsn >= finish_lsn) {
 +
 +			return;
 +		}
 +
 +		memset(log_sys->buf, 0, RECV_SCAN_SIZE);
 +
 +		start_lsn = end_lsn;
 +	}
 +}
 +
 +/********************************************************//**
 +Copies the log segment between group->recovered_lsn and recovered_lsn from the
 +most up-to-date log group to group, so that it contains the latest log data. */
 +static
 +void
 +recv_copy_group(
 +/*============*/
 +	log_group_t*	up_to_date_group,	/*!< in: the most up-to-date log
 +						group */
 +	log_group_t*	group,			/*!< in: copy to this log
 +						group */
 +	lsn_t		recovered_lsn)		/*!< in: recovery succeeded up
 +						to this lsn */
 +{
 +	lsn_t		start_lsn;
 +	lsn_t		end_lsn;
 +
 +	if (group->scanned_lsn >= recovered_lsn) {
 +
 +		return;
 +	}
 +
 +	ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
 +
 +	start_lsn = ut_uint64_align_down(group->scanned_lsn,
 +					 OS_FILE_LOG_BLOCK_SIZE);
 +	for (;;) {
 +		ulint	len;
 +
 +		end_lsn = start_lsn + RECV_SCAN_SIZE;
 +
 +		if (end_lsn > recovered_lsn) {
 +			end_lsn = ut_uint64_align_up(recovered_lsn,
 +						     OS_FILE_LOG_BLOCK_SIZE);
 +		}
 +
 +		log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
 +				       up_to_date_group, start_lsn, end_lsn,
 +				       FALSE);
 +
 +		len = (ulint) (end_lsn - start_lsn);
 +
 +		log_group_write_buf(group, log_sys->buf, len, start_lsn, 0);
 +
 +		if (end_lsn >= recovered_lsn) {
 +
 +			return;
 +		}
 +
 +		start_lsn = end_lsn;
 +	}
 +}
 +# endif /* UNIV_LOG_ARCHIVE */
 +
 +/********************************************************//**
 +Copies a log segment from the most up-to-date log group to the other log
 +groups, so that they all contain the latest log data. Also writes the info
 +about the latest checkpoint to the groups, and inits the fields in the group
 +memory structs to up-to-date values. */
 +static
 +void
 +recv_synchronize_groups(
 +/*====================*/
 +#ifdef UNIV_LOG_ARCHIVE
 +	log_group_t*	up_to_date_group	/*!< in: the most up-to-date
 +						log group */
 +#endif
 +	)
 +{
 +	lsn_t		start_lsn;
 +	lsn_t		end_lsn;
 +	lsn_t		recovered_lsn;
 +
 +	recovered_lsn = recv_sys->recovered_lsn;
 +
 +	/* Read the last recovered log block to the recovery system buffer:
 +	the block is always incomplete */
 +
 +	start_lsn = ut_uint64_align_down(recovered_lsn,
 +					 OS_FILE_LOG_BLOCK_SIZE);
 +	end_lsn = ut_uint64_align_up(recovered_lsn, OS_FILE_LOG_BLOCK_SIZE);
 +
 +	ut_a(start_lsn != end_lsn);
 +
 +	log_group_read_log_seg(LOG_RECOVER, recv_sys->last_block,
 +#ifdef UNIV_LOG_ARCHIVE
 +			       up_to_date_group,
 +#else /* UNIV_LOG_ARCHIVE */
 +			       UT_LIST_GET_FIRST(log_sys->log_groups),
 +#endif /* UNIV_LOG_ARCHIVE */
 +			       start_lsn, end_lsn, FALSE);
 +
 +	for (log_group_t* group = UT_LIST_GET_FIRST(log_sys->log_groups);
 +	     group;
 +	     group = UT_LIST_GET_NEXT(log_groups, group)) {
 +#ifdef UNIV_LOG_ARCHIVE
 +		if (group != up_to_date_group) {
 +
 +			/* Copy log data if needed */
 +
 +			recv_copy_group(group, up_to_date_group,
 +					recovered_lsn);
 +		}
 +#endif /* UNIV_LOG_ARCHIVE */
 +		/* Update the fields in the group struct to correspond to
 +		recovered_lsn */
 +
 +		log_group_set_fields(group, recovered_lsn);
 +	}
 +
 +	/* Copy the checkpoint info to the groups; remember that we have
 +	incremented checkpoint_no by one, and the info will not be written
 +	over the max checkpoint info, thus making the preservation of max
 +	checkpoint info on disk certain */
 +
 +	log_groups_write_checkpoint_info();
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +	/* Wait for the checkpoint write to complete */
 +	rw_lock_s_lock(&(log_sys->checkpoint_lock));
 +	rw_lock_s_unlock(&(log_sys->checkpoint_lock));
 +
 +	mutex_enter(&(log_sys->mutex));
 +}
 +#endif /* !UNIV_HOTBACKUP */
 +
 +/***********************************************************************//**
 +Checks the consistency of the checkpoint info
 + at return	TRUE if ok */
 +static
 +ibool
 +recv_check_cp_is_consistent(
 +/*========================*/
 +	const byte*	buf)	/*!< in: buffer containing checkpoint info */
 +{
 +	ulint	fold;
 +
 +	fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
 +
 +	if ((fold & 0xFFFFFFFFUL) != mach_read_from_4(
 +		    buf + LOG_CHECKPOINT_CHECKSUM_1)) {
 +		return(FALSE);
 +	}
 +
 +	fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
 +			      LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
 +
 +	if ((fold & 0xFFFFFFFFUL) != mach_read_from_4(
 +		    buf + LOG_CHECKPOINT_CHECKSUM_2)) {
 +		return(FALSE);
 +	}
 +
 +	return(TRUE);
 +}
 +
 +#ifndef UNIV_HOTBACKUP
 +/********************************************************//**
 +Looks for the maximum consistent checkpoint from the log groups.
 + at return	error code or DB_SUCCESS */
 +static MY_ATTRIBUTE((nonnull, warn_unused_result))
 +dberr_t
 +recv_find_max_checkpoint(
 +/*=====================*/
 +	log_group_t**	max_group,	/*!< out: max group */
 +	ulint*		max_field)	/*!< out: LOG_CHECKPOINT_1 or
 +					LOG_CHECKPOINT_2 */
 +{
 +	log_group_t*	group;
 +	ib_uint64_t	max_no;
 +	ib_uint64_t	checkpoint_no;
 +	ulint		field;
 +	byte*		buf;
 +
 +	group = UT_LIST_GET_FIRST(log_sys->log_groups);
 +
 +	max_no = 0;
 +	*max_group = NULL;
 +	*max_field = 0;
 +
 +	buf = log_sys->checkpoint_buf;
 +
 +	while (group) {
 +
 +		ulint	log_hdr_log_block_size;
 +
 +		group->state = LOG_GROUP_CORRUPTED;
 +
 +		/* Assert that we can reuse log_sys->checkpoint_buf to read the
 +		part of the header that contains the log block size. */
 +		ut_ad(LOG_FILE_OS_FILE_LOG_BLOCK_SIZE + 4
 +		      < OS_FILE_LOG_BLOCK_SIZE);
 +
 +		fil_io(OS_FILE_READ | OS_FILE_LOG, true, group->space_id, 0,
 +		       0, 0, OS_FILE_LOG_BLOCK_SIZE,
 +		       log_sys->checkpoint_buf, NULL);
 +		log_hdr_log_block_size
 +			= mach_read_from_4(log_sys->checkpoint_buf
 +					   + LOG_FILE_OS_FILE_LOG_BLOCK_SIZE);
 +		if (log_hdr_log_block_size == 0) {
 +			/* 0 means default value */
 +			log_hdr_log_block_size = 512;
 +		}
 +		if (UNIV_UNLIKELY(log_hdr_log_block_size
 +				  != srv_log_block_size)) {
 +			fprintf(stderr,
 +				"InnoDB: Error: The block size of ib_logfile "
 +				"%lu is not equal to innodb_log_block_size "
 +				"%lu.\n"
 +				"InnoDB: Error: Suggestion - Recreate log "
 +				"files.\n",
 +				log_hdr_log_block_size, srv_log_block_size);
 +			return(DB_ERROR);
 +		}
 +
 +		for (field = LOG_CHECKPOINT_1; field <= LOG_CHECKPOINT_2;
 +		     field += LOG_CHECKPOINT_2 - LOG_CHECKPOINT_1) {
 +
 +			log_group_read_checkpoint_info(group, field);
 +
 +			if (!recv_check_cp_is_consistent(buf)) {
 +#ifdef UNIV_DEBUG
 +				if (log_debug_writes) {
 +					fprintf(stderr,
 +						"InnoDB: Checkpoint in group"
 +						" %lu at %lu invalid, %lu\n",
 +						(ulong) group->id,
 +						(ulong) field,
 +						(ulong) mach_read_from_4(
 +							buf
 +							+ LOG_CHECKPOINT_CHECKSUM_1));
 +
 +				}
 +#endif /* UNIV_DEBUG */
 +				goto not_consistent;
 +			}
 +
 +			group->state = LOG_GROUP_OK;
 +
 +			group->lsn = mach_read_from_8(
 +				buf + LOG_CHECKPOINT_LSN);
 +			group->lsn_offset = mach_read_from_4(
 +				buf + LOG_CHECKPOINT_OFFSET_LOW32);
 +			group->lsn_offset |= ((lsn_t) mach_read_from_4(
 +				buf + LOG_CHECKPOINT_OFFSET_HIGH32)) << 32;
 +			checkpoint_no = mach_read_from_8(
 +				buf + LOG_CHECKPOINT_NO);
 +
 +#ifdef UNIV_DEBUG
 +			if (log_debug_writes) {
 +				fprintf(stderr,
 +					"InnoDB: Checkpoint number %lu"
 +					" found in group %lu\n",
 +					(ulong) checkpoint_no,
 +					(ulong) group->id);
 +			}
 +#endif /* UNIV_DEBUG */
 +
 +			if (checkpoint_no >= max_no) {
 +				*max_group = group;
 +				*max_field = field;
 +				max_no = checkpoint_no;
 +			}
 +
 +not_consistent:
 +			;
 +		}
 +
 +		group = UT_LIST_GET_NEXT(log_groups, group);
 +	}
 +
 +	if (*max_group == NULL) {
 +
 +		fprintf(stderr,
 +			"InnoDB: No valid checkpoint found.\n"
 +			"InnoDB: If you are attempting downgrade"
 +			" from MySQL 5.7.9 or later,\n"
 +			"InnoDB: please refer to " REFMAN
 +			"upgrading-downgrading.html\n"
 +			"InnoDB: If this error appears when you are"
 +			" creating an InnoDB database,\n"
 +			"InnoDB: the problem may be that during"
 +			" an earlier attempt you managed\n"
 +			"InnoDB: to create the InnoDB data files,"
 +			" but log file creation failed.\n"
 +			"InnoDB: If that is the case, please refer to\n"
 +			"InnoDB: " REFMAN "error-creating-innodb.html\n");
 +		return(DB_ERROR);
 +	}
 +
 +	return(DB_SUCCESS);
 +}
 +#else /* !UNIV_HOTBACKUP */
 +/*******************************************************************//**
 +Reads the checkpoint info needed in hot backup.
 + at return	TRUE if success */
 +UNIV_INTERN
 +ibool
 +recv_read_checkpoint_info_for_backup(
 +/*=================================*/
 +	const byte*	hdr,	/*!< in: buffer containing the log group
 +				header */
 +	lsn_t*		lsn,	/*!< out: checkpoint lsn */
 +	lsn_t*		offset,	/*!< out: checkpoint offset in the log group */
 +	lsn_t*		cp_no,	/*!< out: checkpoint number */
 +	lsn_t*		first_header_lsn)
 +				/*!< out: lsn of of the start of the
 +				first log file */
 +{
 +	ulint		max_cp		= 0;
 +	ib_uint64_t	max_cp_no	= 0;
 +	const byte*	cp_buf;
 +
 +	cp_buf = hdr + LOG_CHECKPOINT_1;
 +
 +	if (recv_check_cp_is_consistent(cp_buf)) {
 +		max_cp_no = mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO);
 +		max_cp = LOG_CHECKPOINT_1;
 +	}
 +
 +	cp_buf = hdr + LOG_CHECKPOINT_2;
 +
 +	if (recv_check_cp_is_consistent(cp_buf)) {
 +		if (mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO) > max_cp_no) {
 +			max_cp = LOG_CHECKPOINT_2;
 +		}
 +	}
 +
 +	if (max_cp == 0) {
 +		return(FALSE);
 +	}
 +
 +	cp_buf = hdr + max_cp;
 +
 +	*lsn = mach_read_from_8(cp_buf + LOG_CHECKPOINT_LSN);
 +	*offset = mach_read_from_4(
 +		cp_buf + LOG_CHECKPOINT_OFFSET_LOW32);
 +	*offset |= ((lsn_t) mach_read_from_4(
 +			    cp_buf + LOG_CHECKPOINT_OFFSET_HIGH32)) << 32;
 +
 +	*cp_no = mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO);
 +
 +	*first_header_lsn = mach_read_from_8(hdr + LOG_FILE_START_LSN);
 +
 +	return(TRUE);
 +}
 +#endif /* !UNIV_HOTBACKUP */
 +
 +/******************************************************//**
 +Checks the 4-byte checksum to the trailer checksum field of a log
 +block.  We also accept a log block in the old format before
 +InnoDB-3.23.52 where the checksum field contains the log block number.
 + at return TRUE if ok, or if the log block may be in the format of InnoDB
 +version predating 3.23.52 */
 +UNIV_INTERN
 +ibool
 +log_block_checksum_is_ok_or_old_format(
 +/*===================================*/
 +	const byte*	block)	/*!< in: pointer to a log block */
 +{
 +#ifdef UNIV_LOG_DEBUG
 +	return(TRUE);
 +#endif /* UNIV_LOG_DEBUG */
 +
 +	ulint block_checksum = log_block_get_checksum(block);
 +
 +	if (UNIV_LIKELY(srv_log_checksum_algorithm ==
 +			SRV_CHECKSUM_ALGORITHM_NONE ||
 +			log_block_calc_checksum(block) == block_checksum)) {
 +
 +		return(TRUE);
 +	}
 +
 +	if (srv_log_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_STRICT_CRC32 ||
 +	    srv_log_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB ||
 +	    srv_log_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_STRICT_NONE) {
 +
 +		const char*	algo = NULL;
 +
 +		ib_logf(IB_LOG_LEVEL_ERROR,
 +			"log block checksum mismatch: expected " ULINTPF ", "
 +			"calculated checksum " ULINTPF,
 +			block_checksum,
 +			log_block_calc_checksum(block));
 +
 +		if (block_checksum == LOG_NO_CHECKSUM_MAGIC) {
 +
 +			algo = "none";
 +		} else if (block_checksum ==
 +			   log_block_calc_checksum_crc32(block)) {
 +
 +			algo = "crc32";
 +		} else if (block_checksum ==
 +			   log_block_calc_checksum_innodb(block)) {
 +
 +			algo = "innodb";
 +		}
 +
 +		if (algo) {
 +
 +			const char*	current_algo;
 +
 +			current_algo = buf_checksum_algorithm_name(
 +				(srv_checksum_algorithm_t)
 +				srv_log_checksum_algorithm);
 +
 +			ib_logf(IB_LOG_LEVEL_ERROR,
 +				"current InnoDB log checksum type: %s, "
 +				"detected log checksum type: %s",
 +				current_algo,
 +				algo);
 +		}
 +
 +		ib_logf(IB_LOG_LEVEL_FATAL,
 +			"STRICT method was specified for innodb_log_checksum, "
 +			"so we intentionally assert here.");
 +	}
 +
 +	ut_ad(srv_log_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_CRC32 ||
 +	      srv_log_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_INNODB);
 +
 +	if (block_checksum == LOG_NO_CHECKSUM_MAGIC ||
 +	    block_checksum == log_block_calc_checksum_crc32(block) ||
 +	    block_checksum == log_block_calc_checksum_innodb(block)) {
 +
 +		return(TRUE);
 +	}
 +
 +	if (log_block_get_hdr_no(block) == block_checksum) {
 +
 +		/* We assume the log block is in the format of
 +		InnoDB version < 3.23.52 and the block is ok */
 +#if 0
 +		fprintf(stderr,
 +			"InnoDB: Scanned old format < InnoDB-3.23.52"
 +			" log block number %lu\n",
 +			log_block_get_hdr_no(block));
 +#endif
 +		return(TRUE);
 +	}
 +
 +	return(FALSE);
 +}
 +
 +#ifdef UNIV_HOTBACKUP
 +/*******************************************************************//**
 +Scans the log segment and n_bytes_scanned is set to the length of valid
 +log scanned. */
 +UNIV_INTERN
 +void
 +recv_scan_log_seg_for_backup(
 +/*=========================*/
 +	byte*		buf,		/*!< in: buffer containing log data */
 +	ulint		buf_len,	/*!< in: data length in that buffer */
 +	lsn_t*		scanned_lsn,	/*!< in/out: lsn of buffer start,
 +					we return scanned lsn */
 +	ulint*		scanned_checkpoint_no,
 +					/*!< in/out: 4 lowest bytes of the
 +					highest scanned checkpoint number so
 +					far */
 +	ulint*		n_bytes_scanned)/*!< out: how much we were able to
 +					scan, smaller than buf_len if log
 +					data ended here */
 +{
 +	ulint	data_len;
 +	byte*	log_block;
 +	ulint	no;
 +
 +	*n_bytes_scanned = 0;
 +
 +	for (log_block = buf; log_block < buf + buf_len;
 +	     log_block += OS_FILE_LOG_BLOCK_SIZE) {
 +
 +		no = log_block_get_hdr_no(log_block);
 +
 +#if 0
 +		fprintf(stderr, "Log block header no %lu\n", no);
 +#endif
 +
 +		if (no != log_block_convert_lsn_to_no(*scanned_lsn)
 +		    || !log_block_checksum_is_ok_or_old_format(log_block)) {
 +#if 0
 +			fprintf(stderr,
 +				"Log block n:o %lu, scanned lsn n:o %lu\n",
 +				no, log_block_convert_lsn_to_no(*scanned_lsn));
 +#endif
 +			/* Garbage or an incompletely written log block */
 +
 +			log_block += OS_FILE_LOG_BLOCK_SIZE;
 +#if 0
 +			fprintf(stderr,
 +				"Next log block n:o %lu\n",
 +				log_block_get_hdr_no(log_block));
 +#endif
 +			break;
 +		}
 +
 +		if (*scanned_checkpoint_no > 0
 +		    && log_block_get_checkpoint_no(log_block)
 +		    < *scanned_checkpoint_no
 +		    && *scanned_checkpoint_no
 +		    - log_block_get_checkpoint_no(log_block)
 +		    > 0x80000000UL) {
 +
 +			/* Garbage from a log buffer flush which was made
 +			before the most recent database recovery */
 +#if 0
 +			fprintf(stderr,
 +				"Scanned cp n:o %lu, block cp n:o %lu\n",
 +				*scanned_checkpoint_no,
 +				log_block_get_checkpoint_no(log_block));
 +#endif
 +			break;
 +		}
 +
 +		data_len = log_block_get_data_len(log_block);
 +
 +		*scanned_checkpoint_no
 +			= log_block_get_checkpoint_no(log_block);
 +		*scanned_lsn += data_len;
 +
 +		*n_bytes_scanned += data_len;
 +
 +		if (data_len < OS_FILE_LOG_BLOCK_SIZE) {
 +			/* Log data ends here */
 +
 +#if 0
 +			fprintf(stderr, "Log block data len %lu\n",
 +				data_len);
 +#endif
 +			break;
 +		}
 +	}
 +}
 +#endif /* UNIV_HOTBACKUP */
 +
 +/*******************************************************************//**
 +Tries to parse a single log record body and also applies it to a page if
 +specified. File ops are parsed, but not applied in this function.
 + at return	log record end, NULL if not a complete record */
 +static
 +byte*
 +recv_parse_or_apply_log_rec_body(
 +/*=============================*/
 +	byte		type,	/*!< in: type */
 +	byte*		ptr,	/*!< in: pointer to a buffer */
 +	byte*		end_ptr,/*!< in: pointer to the buffer end */
 +	buf_block_t*	block,	/*!< in/out: buffer block or NULL; if
 +				not NULL, then the log record is
 +				applied to the page, and the log
 +				record should be complete then */
 +	mtr_t*		mtr,	/*!< in: mtr or NULL; should be non-NULL
 +				if and only if block is non-NULL */
 +	ulint		space_id)
 +				/*!< in: tablespace id obtained by
 +				parsing initial log record */
 +{
 +	dict_index_t*	index	= NULL;
 +	page_t*		page;
 +	page_zip_des_t*	page_zip;
 +#ifdef UNIV_DEBUG
 +	ulint		page_type;
 +#endif /* UNIV_DEBUG */
 +
 +	ut_ad(!block == !mtr);
 +
 +	if (block) {
 +		page = block->frame;
 +		page_zip = buf_block_get_page_zip(block);
 +		ut_d(page_type = fil_page_get_type(page));
 +	} else {
 +		page = NULL;
 +		page_zip = NULL;
 +		ut_d(page_type = FIL_PAGE_TYPE_ALLOCATED);
 +	}
 +
 +	switch (type) {
 +#ifdef UNIV_LOG_LSN_DEBUG
 +	case MLOG_LSN:
 +		/* The LSN is checked in recv_parse_log_rec(). */
 +		break;
 +#endif /* UNIV_LOG_LSN_DEBUG */
 +	case MLOG_1BYTE: case MLOG_2BYTES: case MLOG_4BYTES: case MLOG_8BYTES:
 +#ifdef UNIV_DEBUG
 +		if (page && page_type == FIL_PAGE_TYPE_ALLOCATED
 +		    && end_ptr >= ptr + 2) {
 +			/* It is OK to set FIL_PAGE_TYPE and certain
 +			list node fields on an empty page.  Any other
 +			write is not OK. */
 +
 +			/* NOTE: There may be bogus assertion failures for
 +			dict_hdr_create(), trx_rseg_header_create(),
 +			trx_sys_create_doublewrite_buf(), and
 +			trx_sysf_create().
 +			These are only called during database creation. */
 +			ulint	offs = mach_read_from_2(ptr);
 +
 +			switch (type) {
 +			default:
 +				ut_error;
 +			case MLOG_2BYTES:
 +				/* Note that this can fail when the
 +				redo log been written with something
 +				older than InnoDB Plugin 1.0.4. */
 +				ut_ad(offs == FIL_PAGE_TYPE
 +				      || offs == IBUF_TREE_SEG_HEADER
 +				      + IBUF_HEADER + FSEG_HDR_OFFSET
 +				      || offs == PAGE_BTR_IBUF_FREE_LIST
 +				      + PAGE_HEADER + FIL_ADDR_BYTE
 +				      || offs == PAGE_BTR_IBUF_FREE_LIST
 +				      + PAGE_HEADER + FIL_ADDR_BYTE
 +				      + FIL_ADDR_SIZE
 +				      || offs == PAGE_BTR_SEG_LEAF
 +				      + PAGE_HEADER + FSEG_HDR_OFFSET
 +				      || offs == PAGE_BTR_SEG_TOP
 +				      + PAGE_HEADER + FSEG_HDR_OFFSET
 +				      || offs == PAGE_BTR_IBUF_FREE_LIST_NODE
 +				      + PAGE_HEADER + FIL_ADDR_BYTE
 +				      + 0 /*FLST_PREV*/
 +				      || offs == PAGE_BTR_IBUF_FREE_LIST_NODE
 +				      + PAGE_HEADER + FIL_ADDR_BYTE
 +				      + FIL_ADDR_SIZE /*FLST_NEXT*/);
 +				break;
 +			case MLOG_4BYTES:
 +				/* Note that this can fail when the
 +				redo log been written with something
 +				older than InnoDB Plugin 1.0.4. */
 +				ut_ad(0
 +				      || offs == IBUF_TREE_SEG_HEADER
 +				      + IBUF_HEADER + FSEG_HDR_SPACE
 +				      || offs == IBUF_TREE_SEG_HEADER
 +				      + IBUF_HEADER + FSEG_HDR_PAGE_NO
 +				      || offs == PAGE_BTR_IBUF_FREE_LIST
 +				      + PAGE_HEADER/* flst_init */
 +				      || offs == PAGE_BTR_IBUF_FREE_LIST
 +				      + PAGE_HEADER + FIL_ADDR_PAGE
 +				      || offs == PAGE_BTR_IBUF_FREE_LIST
 +				      + PAGE_HEADER + FIL_ADDR_PAGE
 +				      + FIL_ADDR_SIZE
 +				      || offs == PAGE_BTR_SEG_LEAF
 +				      + PAGE_HEADER + FSEG_HDR_PAGE_NO
 +				      || offs == PAGE_BTR_SEG_LEAF
 +				      + PAGE_HEADER + FSEG_HDR_SPACE
 +				      || offs == PAGE_BTR_SEG_TOP
 +				      + PAGE_HEADER + FSEG_HDR_PAGE_NO
 +				      || offs == PAGE_BTR_SEG_TOP
 +				      + PAGE_HEADER + FSEG_HDR_SPACE
 +				      || offs == PAGE_BTR_IBUF_FREE_LIST_NODE
 +				      + PAGE_HEADER + FIL_ADDR_PAGE
 +				      + 0 /*FLST_PREV*/
 +				      || offs == PAGE_BTR_IBUF_FREE_LIST_NODE
 +				      + PAGE_HEADER + FIL_ADDR_PAGE
 +				      + FIL_ADDR_SIZE /*FLST_NEXT*/);
 +				break;
 +			}
 +		}
 +#endif /* UNIV_DEBUG */
 +		ptr = mlog_parse_nbytes(type, ptr, end_ptr, page, page_zip);
 +		break;
 +	case MLOG_REC_INSERT: case MLOG_COMP_REC_INSERT:
 +		ut_ad(!page || page_type == FIL_PAGE_INDEX);
 +
 +		if (NULL != (ptr = mlog_parse_index(
 +				     ptr, end_ptr,
 +				     type == MLOG_COMP_REC_INSERT,
 +				     &index))) {
 +			ut_a(!page
 +			     || (ibool)!!page_is_comp(page)
 +			     == dict_table_is_comp(index->table));
 +			ptr = page_cur_parse_insert_rec(FALSE, ptr, end_ptr,
 +							block, index, mtr);
 +		}
 +		break;
 +	case MLOG_REC_CLUST_DELETE_MARK: case MLOG_COMP_REC_CLUST_DELETE_MARK:
 +		ut_ad(!page || page_type == FIL_PAGE_INDEX);
 +
 +		if (NULL != (ptr = mlog_parse_index(
 +				     ptr, end_ptr,
 +				     type == MLOG_COMP_REC_CLUST_DELETE_MARK,
 +				     &index))) {
 +			ut_a(!page
 +			     || (ibool)!!page_is_comp(page)
 +			     == dict_table_is_comp(index->table));
 +			ptr = btr_cur_parse_del_mark_set_clust_rec(
 +				ptr, end_ptr, page, page_zip, index);
 +		}
 +		break;
 +	case MLOG_COMP_REC_SEC_DELETE_MARK:
 +		ut_ad(!page || page_type == FIL_PAGE_INDEX);
 +		/* This log record type is obsolete, but we process it for
 +		backward compatibility with MySQL 5.0.3 and 5.0.4. */
 +		ut_a(!page || page_is_comp(page));
 +		ut_a(!page_zip);
 +		ptr = mlog_parse_index(ptr, end_ptr, TRUE, &index);
 +		if (!ptr) {
 +			break;
 +		}
 +		/* Fall through */
 +	case MLOG_REC_SEC_DELETE_MARK:
 +		ut_ad(!page || page_type == FIL_PAGE_INDEX);
 +		ptr = btr_cur_parse_del_mark_set_sec_rec(ptr, end_ptr,
 +							 page, page_zip);
 +		break;
 +	case MLOG_REC_UPDATE_IN_PLACE: case MLOG_COMP_REC_UPDATE_IN_PLACE:
 +		ut_ad(!page || page_type == FIL_PAGE_INDEX);
 +
 +		if (NULL != (ptr = mlog_parse_index(
 +				     ptr, end_ptr,
 +				     type == MLOG_COMP_REC_UPDATE_IN_PLACE,
 +				     &index))) {
 +			ut_a(!page
 +			     || (ibool)!!page_is_comp(page)
 +			     == dict_table_is_comp(index->table));
 +			ptr = btr_cur_parse_update_in_place(ptr, end_ptr, page,
 +							    page_zip, index);
 +		}
 +		break;
 +	case MLOG_LIST_END_DELETE: case MLOG_COMP_LIST_END_DELETE:
 +	case MLOG_LIST_START_DELETE: case MLOG_COMP_LIST_START_DELETE:
 +		ut_ad(!page || page_type == FIL_PAGE_INDEX);
 +
 +		if (NULL != (ptr = mlog_parse_index(
 +				     ptr, end_ptr,
 +				     type == MLOG_COMP_LIST_END_DELETE
 +				     || type == MLOG_COMP_LIST_START_DELETE,
 +				     &index))) {
 +			ut_a(!page
 +			     || (ibool)!!page_is_comp(page)
 +			     == dict_table_is_comp(index->table));
 +			ptr = page_parse_delete_rec_list(type, ptr, end_ptr,
 +							 block, index, mtr);
 +		}
 +		break;
 +	case MLOG_LIST_END_COPY_CREATED: case MLOG_COMP_LIST_END_COPY_CREATED:
 +		ut_ad(!page || page_type == FIL_PAGE_INDEX);
 +
 +		if (NULL != (ptr = mlog_parse_index(
 +				     ptr, end_ptr,
 +				     type == MLOG_COMP_LIST_END_COPY_CREATED,
 +				     &index))) {
 +			ut_a(!page
 +			     || (ibool)!!page_is_comp(page)
 +			     == dict_table_is_comp(index->table));
 +			ptr = page_parse_copy_rec_list_to_created_page(
 +				ptr, end_ptr, block, index, mtr);
 +		}
 +		break;
 +	case MLOG_PAGE_REORGANIZE:
 +	case MLOG_COMP_PAGE_REORGANIZE:
 +	case MLOG_ZIP_PAGE_REORGANIZE:
 +		ut_ad(!page || page_type == FIL_PAGE_INDEX);
 +
 +		if (NULL != (ptr = mlog_parse_index(
 +				     ptr, end_ptr,
 +				     type != MLOG_PAGE_REORGANIZE,
 +				     &index))) {
 +			ut_a(!page
 +			     || (ibool)!!page_is_comp(page)
 +			     == dict_table_is_comp(index->table));
 +			ptr = btr_parse_page_reorganize(
 +				ptr, end_ptr, index,
 +				type == MLOG_ZIP_PAGE_REORGANIZE,
 +				block, mtr);
 +		}
 +		break;
 +	case MLOG_PAGE_CREATE: case MLOG_COMP_PAGE_CREATE:
 +		/* Allow anything in page_type when creating a page. */
 +		ut_a(!page_zip);
 +		ptr = page_parse_create(ptr, end_ptr,
 +					type == MLOG_COMP_PAGE_CREATE,
 +					block, mtr);
 +		break;
 +	case MLOG_UNDO_INSERT:
 +		ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
 +		ptr = trx_undo_parse_add_undo_rec(ptr, end_ptr, page);
 +		break;
 +	case MLOG_UNDO_ERASE_END:
 +		ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
 +		ptr = trx_undo_parse_erase_page_end(ptr, end_ptr, page, mtr);
 +		break;
 +	case MLOG_UNDO_INIT:
 +		/* Allow anything in page_type when creating a page. */
 +		ptr = trx_undo_parse_page_init(ptr, end_ptr, page, mtr);
 +		break;
 +	case MLOG_UNDO_HDR_DISCARD:
 +		ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
 +		ptr = trx_undo_parse_discard_latest(ptr, end_ptr, page, mtr);
 +		break;
 +	case MLOG_UNDO_HDR_CREATE:
 +	case MLOG_UNDO_HDR_REUSE:
 +		ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
 +		ptr = trx_undo_parse_page_header(type, ptr, end_ptr,
 +						 page, mtr);
 +		break;
 +	case MLOG_REC_MIN_MARK: case MLOG_COMP_REC_MIN_MARK:
 +		ut_ad(!page || page_type == FIL_PAGE_INDEX);
 +		/* On a compressed page, MLOG_COMP_REC_MIN_MARK
 +		will be followed by MLOG_COMP_REC_DELETE
 +		or MLOG_ZIP_WRITE_HEADER(FIL_PAGE_PREV, FIL_NULL)
 +		in the same mini-transaction. */
 +		ut_a(type == MLOG_COMP_REC_MIN_MARK || !page_zip);
 +		ptr = btr_parse_set_min_rec_mark(
 +			ptr, end_ptr, type == MLOG_COMP_REC_MIN_MARK,
 +			page, mtr);
 +		break;
 +	case MLOG_REC_DELETE: case MLOG_COMP_REC_DELETE:
 +		ut_ad(!page || page_type == FIL_PAGE_INDEX);
 +
 +		if (NULL != (ptr = mlog_parse_index(
 +				     ptr, end_ptr,
 +				     type == MLOG_COMP_REC_DELETE,
 +				     &index))) {
 +			ut_a(!page
 +			     || (ibool)!!page_is_comp(page)
 +			     == dict_table_is_comp(index->table));
 +			ptr = page_cur_parse_delete_rec(ptr, end_ptr,
 +							block, index, mtr);
 +		}
 +		break;
 +	case MLOG_IBUF_BITMAP_INIT:
 +		/* Allow anything in page_type when creating a page. */
 +		ptr = ibuf_parse_bitmap_init(ptr, end_ptr, block, mtr);
 +		break;
 +	case MLOG_INIT_FILE_PAGE:
 +		/* Allow anything in page_type when creating a page. */
 +		ptr = fsp_parse_init_file_page(ptr, end_ptr, block);
 +		break;
 +	case MLOG_WRITE_STRING:
 +		ut_ad(!page || page_type != FIL_PAGE_TYPE_ALLOCATED);
 +		ptr = mlog_parse_string(ptr, end_ptr, page, page_zip);
 +		break;
 +	case MLOG_FILE_RENAME:
 +		/* Do not rerun file-based log entries if this is
 +		IO completion from a page read. */
 +		if (page == NULL) {
 +			ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type,
 +							 (recv_recovery_is_on()
 +							  ? space_id : 0), 0);
 +		}
 +		break;
 +	case MLOG_FILE_CREATE:
 +	case MLOG_FILE_DELETE:
 +	case MLOG_FILE_CREATE2:
 +		/* Do not rerun file-based log entries if this is
 +		IO completion from a page read. */
 +		if (page == NULL) {
 +			ptr = fil_op_log_parse_or_replay(ptr, end_ptr,
 +							 type, 0, 0);
 +		}
 +		break;
 +	case MLOG_ZIP_WRITE_NODE_PTR:
 +		ut_ad(!page || page_type == FIL_PAGE_INDEX);
 +		ptr = page_zip_parse_write_node_ptr(ptr, end_ptr,
 +						    page, page_zip);
 +		break;
 +	case MLOG_ZIP_WRITE_BLOB_PTR:
 +		ut_ad(!page || page_type == FIL_PAGE_INDEX);
 +		ptr = page_zip_parse_write_blob_ptr(ptr, end_ptr,
 +						    page, page_zip);
 +		break;
 +	case MLOG_ZIP_WRITE_HEADER:
 +		ut_ad(!page || page_type == FIL_PAGE_INDEX);
 +		ptr = page_zip_parse_write_header(ptr, end_ptr,
 +						  page, page_zip);
 +		break;
 +	case MLOG_ZIP_PAGE_COMPRESS:
 +		/* Allow anything in page_type when creating a page. */
 +		ptr = page_zip_parse_compress(ptr, end_ptr,
 +					      page, page_zip);
 +		break;
 +	case MLOG_ZIP_PAGE_COMPRESS_NO_DATA:
 +		if (NULL != (ptr = mlog_parse_index(
 +				ptr, end_ptr, TRUE, &index))) {
 +
 +			ut_a(!page || ((ibool)!!page_is_comp(page)
 +				== dict_table_is_comp(index->table)));
 +			ptr = page_zip_parse_compress_no_data(
 +				ptr, end_ptr, page, page_zip, index);
 +		}
 +		break;
 +	default:
 +		ptr = NULL;
 +		recv_sys->found_corrupt_log = TRUE;
 +	}
 +
 +	if (index) {
 +		dict_table_t*	table = index->table;
 +
 +		dict_mem_index_free(index);
 +		dict_mem_table_free(table);
 +	}
 +
 +	return(ptr);
 +}
 +
 +/*********************************************************************//**
 +Calculates the fold value of a page file address: used in inserting or
 +searching for a log record in the hash table.
 + at return	folded value */
 +UNIV_INLINE
 +ulint
 +recv_fold(
 +/*======*/
 +	ulint	space,	/*!< in: space */
 +	ulint	page_no)/*!< in: page number */
 +{
 +	return(ut_fold_ulint_pair(space, page_no));
 +}
 +
 +/*********************************************************************//**
 +Calculates the hash value of a page file address: used in inserting or
 +searching for a log record in the hash table.
 + at return	folded value */
 +UNIV_INLINE
 +ulint
 +recv_hash(
 +/*======*/
 +	ulint	space,	/*!< in: space */
 +	ulint	page_no)/*!< in: page number */
 +{
 +	return(hash_calc_hash(recv_fold(space, page_no), recv_sys->addr_hash));
 +}
 +
 +/*********************************************************************//**
 +Gets the hashed file address struct for a page.
 + at return	file address struct, NULL if not found from the hash table */
 +static
 +recv_addr_t*
 +recv_get_fil_addr_struct(
 +/*=====================*/
 +	ulint	space,	/*!< in: space id */
 +	ulint	page_no)/*!< in: page number */
 +{
 +	recv_addr_t*	recv_addr;
 +
 +	for (recv_addr = static_cast<recv_addr_t*>(
 +			HASH_GET_FIRST(recv_sys->addr_hash,
 +				       recv_hash(space, page_no)));
 +	     recv_addr != 0;
 +	     recv_addr = static_cast<recv_addr_t*>(
 +		     HASH_GET_NEXT(addr_hash, recv_addr))) {
 +
 +		if (recv_addr->space == space
 +		    && recv_addr->page_no == page_no) {
 +
 +			return(recv_addr);
 +		}
 +	}
 +
 +	return(NULL);
 +}
 +
 +/*******************************************************************//**
 +Adds a new log record to the hash table of log records. */
 +static
 +void
 +recv_add_to_hash_table(
 +/*===================*/
 +	byte	type,		/*!< in: log record type */
 +	ulint	space,		/*!< in: space id */
 +	ulint	page_no,	/*!< in: page number */
 +	byte*	body,		/*!< in: log record body */
 +	byte*	rec_end,	/*!< in: log record end */
 +	lsn_t	start_lsn,	/*!< in: start lsn of the mtr */
 +	lsn_t	end_lsn)	/*!< in: end lsn of the mtr */
 +{
 +	recv_t*		recv;
 +	ulint		len;
 +	recv_data_t*	recv_data;
 +	recv_data_t**	prev_field;
 +	recv_addr_t*	recv_addr;
 +
 +	if (fil_tablespace_deleted_or_being_deleted_in_mem(space, -1)) {
 +		/* The tablespace does not exist any more: do not store the
 +		log record */
 +
 +		return;
 +	}
 +
 +	len = rec_end - body;
 +
 +	recv = static_cast<recv_t*>(
 +		mem_heap_alloc(recv_sys->heap, sizeof(recv_t)));
 +
 +	recv->type = type;
 +	recv->len = rec_end - body;
 +	recv->start_lsn = start_lsn;
 +	recv->end_lsn = end_lsn;
 +
 +	recv_addr = recv_get_fil_addr_struct(space, page_no);
 +
 +	if (recv_addr == NULL) {
 +		recv_addr = static_cast<recv_addr_t*>(
 +			mem_heap_alloc(recv_sys->heap, sizeof(recv_addr_t)));
 +
 +		recv_addr->space = space;
 +		recv_addr->page_no = page_no;
 +		recv_addr->state = RECV_NOT_PROCESSED;
 +
 +		UT_LIST_INIT(recv_addr->rec_list);
 +
 +		HASH_INSERT(recv_addr_t, addr_hash, recv_sys->addr_hash,
 +			    recv_fold(space, page_no), recv_addr);
 +		recv_sys->n_addrs++;
 +#if 0
 +		fprintf(stderr, "Inserting log rec for space %lu, page %lu\n",
 +			space, page_no);
 +#endif
 +	}
 +
 +	UT_LIST_ADD_LAST(rec_list, recv_addr->rec_list, recv);
 +
 +	prev_field = &(recv->data);
 +
 +	/* Store the log record body in chunks of less than UNIV_PAGE_SIZE:
 +	recv_sys->heap grows into the buffer pool, and bigger chunks could not
 +	be allocated */
 +
 +	while (rec_end > body) {
 +
 +		len = rec_end - body;
 +
 +		if (len > RECV_DATA_BLOCK_SIZE) {
 +			len = RECV_DATA_BLOCK_SIZE;
 +		}
 +
 +		recv_data = static_cast<recv_data_t*>(
 +			mem_heap_alloc(recv_sys->heap,
 +				       sizeof(recv_data_t) + len));
 +
 +		*prev_field = recv_data;
 +
 +		memcpy(recv_data + 1, body, len);
 +
 +		prev_field = &(recv_data->next);
 +
 +		body += len;
 +	}
 +
 +	*prev_field = NULL;
 +}
 +
 +/*********************************************************************//**
 +Copies the log record body from recv to buf. */
 +static
 +void
 +recv_data_copy_to_buf(
 +/*==================*/
 +	byte*	buf,	/*!< in: buffer of length at least recv->len */
 +	recv_t*	recv)	/*!< in: log record */
 +{
 +	recv_data_t*	recv_data;
 +	ulint		part_len;
 +	ulint		len;
 +
 +	len = recv->len;
 +	recv_data = recv->data;
 +
 +	while (len > 0) {
 +		if (len > RECV_DATA_BLOCK_SIZE) {
 +			part_len = RECV_DATA_BLOCK_SIZE;
 +		} else {
 +			part_len = len;
 +		}
 +
 +		ut_memcpy(buf, ((byte*) recv_data) + sizeof(recv_data_t),
 +			  part_len);
 +		buf += part_len;
 +		len -= part_len;
 +
 +		recv_data = recv_data->next;
 +	}
 +}
 +
 +/************************************************************************//**
 +Applies the hashed log records to the page, if the page lsn is less than the
 +lsn of a log record. This can be called when a buffer page has just been
 +read in, or also for a page already in the buffer pool. */
 +UNIV_INTERN
 +void
 +recv_recover_page_func(
 +/*===================*/
 +#ifndef UNIV_HOTBACKUP
 +	ibool		just_read_in,
 +				/*!< in: TRUE if the i/o handler calls
 +				this for a freshly read page */
 +#endif /* !UNIV_HOTBACKUP */
 +	buf_block_t*	block)	/*!< in/out: buffer block */
 +{
 +	page_t*		page;
 +	page_zip_des_t*	page_zip;
 +	recv_addr_t*	recv_addr;
 +	recv_t*		recv;
 +	byte*		buf;
 +	lsn_t		start_lsn;
 +	lsn_t		end_lsn;
 +	lsn_t		page_lsn;
 +	lsn_t		page_newest_lsn;
 +	ibool		modification_to_page;
 +#ifndef UNIV_HOTBACKUP
 +	ibool		success;
 +#endif /* !UNIV_HOTBACKUP */
 +	mtr_t		mtr;
++	ib_time_t	time;
 +
 +	mutex_enter(&(recv_sys->mutex));
 +
 +	if (recv_sys->apply_log_recs == FALSE) {
 +
 +		/* Log records should not be applied now */
 +
 +		mutex_exit(&(recv_sys->mutex));
 +
 +		return;
 +	}
 +
 +	recv_addr = recv_get_fil_addr_struct(buf_block_get_space(block),
 +					     buf_block_get_page_no(block));
 +
 +	if ((recv_addr == NULL)
 +		/* bugfix: http://bugs.mysql.com/bug.php?id=44140 */
 +	    || (recv_addr->state == RECV_BEING_READ && !just_read_in)
 +	    || (recv_addr->state == RECV_BEING_PROCESSED)
 +	    || (recv_addr->state == RECV_PROCESSED)) {
 +
 +		mutex_exit(&(recv_sys->mutex));
 +
 +		return;
 +	}
 +
 +#if 0
 +	fprintf(stderr, "Recovering space %lu, page %lu\n",
 +		buf_block_get_space(block), buf_block_get_page_no(block));
 +#endif
 +
 +	recv_addr->state = RECV_BEING_PROCESSED;
 +
 +	mutex_exit(&(recv_sys->mutex));
 +
 +	mtr_start(&mtr);
 +	mtr_set_log_mode(&mtr, MTR_LOG_NONE);
 +
 +	page = block->frame;
 +	page_zip = buf_block_get_page_zip(block);
 +
 +#ifndef UNIV_HOTBACKUP
 +	if (just_read_in) {
 +		/* Move the ownership of the x-latch on the page to
 +		this OS thread, so that we can acquire a second
 +		x-latch on it.  This is needed for the operations to
 +		the page to pass the debug checks. */
 +
 +		rw_lock_x_lock_move_ownership(&block->lock);
 +	}
 +
 +	success = buf_page_get_known_nowait(RW_X_LATCH, block,
 +					    BUF_KEEP_OLD,
 +					    __FILE__, __LINE__,
 +					    &mtr);
 +	ut_a(success);
 +
 +	buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
 +#endif /* !UNIV_HOTBACKUP */
 +
 +	/* Read the newest modification lsn from the page */
 +	page_lsn = mach_read_from_8(page + FIL_PAGE_LSN);
 +
 +#ifndef UNIV_HOTBACKUP
 +	/* It may be that the page has been modified in the buffer
 +	pool: read the newest modification lsn there */
 +
 +	page_newest_lsn = buf_page_get_newest_modification(&block->page);
 +
 +	if (page_newest_lsn) {
 +
 +		page_lsn = page_newest_lsn;
 +	}
 +#else /* !UNIV_HOTBACKUP */
 +	/* In recovery from a backup we do not really use the buffer pool */
 +	page_newest_lsn = 0;
 +#endif /* !UNIV_HOTBACKUP */
 +
 +	modification_to_page = FALSE;
 +	start_lsn = end_lsn = 0;
 +
 +	recv = UT_LIST_GET_FIRST(recv_addr->rec_list);
 +
 +	while (recv) {
 +		end_lsn = recv->end_lsn;
 +
 +		if (recv->len > RECV_DATA_BLOCK_SIZE) {
 +			/* We have to copy the record body to a separate
 +			buffer */
 +
 +			buf = static_cast<byte*>(mem_alloc(recv->len));
 +
 +			recv_data_copy_to_buf(buf, recv);
 +		} else {
 +			buf = ((byte*)(recv->data)) + sizeof(recv_data_t);
 +		}
 +
 +		if (recv->type == MLOG_INIT_FILE_PAGE) {
 +			page_lsn = page_newest_lsn;
 +
 +			memset(FIL_PAGE_LSN + page, 0, 8);
 +			memset(UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM
 +			       + page, 0, 8);
 +
 +			if (page_zip) {
 +				memset(FIL_PAGE_LSN + page_zip->data, 0, 8);
 +			}
 +		}
 +
 +		if (recv->start_lsn >= page_lsn) {
 +
 +			lsn_t	end_lsn;
 +
 +			if (!modification_to_page) {
 +
 +				modification_to_page = TRUE;
 +				start_lsn = recv->start_lsn;
 +			}
 +
 +			DBUG_PRINT("ib_log",
 +				   ("apply " DBUG_LSN_PF ": %u len %u "
 +				    "page %u:%u", recv->start_lsn,
 +				    (unsigned) recv->type,
 +				    (unsigned) recv->len,
 +				    (unsigned) recv_addr->space,
 +				    (unsigned) recv_addr->page_no));
 +
 +			recv_parse_or_apply_log_rec_body(recv->type, buf,
 +							 buf + recv->len,
 +							 block, &mtr,
 +							 recv_addr->space);
 +
 +			end_lsn = recv->start_lsn + recv->len;
 +			mach_write_to_8(FIL_PAGE_LSN + page, end_lsn);
 +			mach_write_to_8(UNIV_PAGE_SIZE
 +					- FIL_PAGE_END_LSN_OLD_CHKSUM
 +					+ page, end_lsn);
 +
 +			if (page_zip) {
 +				mach_write_to_8(FIL_PAGE_LSN
 +						+ page_zip->data, end_lsn);
 +			}
 +		}
 +
 +		if (recv->len > RECV_DATA_BLOCK_SIZE) {
 +			mem_free(buf);
 +		}
 +
 +		recv = UT_LIST_GET_NEXT(rec_list, recv);
 +	}
 +
 +#ifdef UNIV_ZIP_DEBUG
 +	if (fil_page_get_type(page) == FIL_PAGE_INDEX) {
 +		page_zip_des_t*	page_zip = buf_block_get_page_zip(block);
 +
 +		ut_a(!page_zip
 +		     || page_zip_validate_low(page_zip, page, NULL, FALSE));
 +	}
 +#endif /* UNIV_ZIP_DEBUG */
 +
 +#ifndef UNIV_HOTBACKUP
 +	if (modification_to_page) {
 +		ut_a(block);
 +
 +		log_flush_order_mutex_enter();
 +		buf_flush_recv_note_modification(block, start_lsn, end_lsn);
 +		log_flush_order_mutex_exit();
 +	}
 +#endif /* !UNIV_HOTBACKUP */
 +
 +	/* Make sure that committing mtr does not change the modification
 +	lsn values of page */
 +
 +	mtr.modifications = FALSE;
 +
 +	mtr_commit(&mtr);
 +
++	time = ut_time();
++
 +	mutex_enter(&(recv_sys->mutex));
 +
 +	if (recv_max_page_lsn < page_lsn) {
 +		recv_max_page_lsn = page_lsn;
 +	}
 +
 +	recv_addr->state = RECV_PROCESSED;
 +
- 	ut_a(recv_sys->n_addrs);
- 	recv_sys->n_addrs--;
- 
- 	mutex_exit(&(recv_sys->mutex));
++	ut_a(recv_sys->n_addrs > 0);
++	if (--recv_sys->n_addrs && recv_sys->progress_time - time >= 15) {
++		recv_sys->progress_time = time;
++		ut_print_timestamp(stderr);
++		fprintf(stderr,
++			"  InnoDB: To recover: " ULINTPF " pages from log\n",
++			recv_sys->n_addrs);
++	}
 +
++	mutex_exit(&recv_sys->mutex);
 +}
 +
 +#ifndef UNIV_HOTBACKUP
 +/*******************************************************************//**
 +Reads in pages which have hashed log records, from an area around a given
 +page number.
 + at return	number of pages found */
 +static
 +ulint
 +recv_read_in_area(
 +/*==============*/
 +	ulint	space,	/*!< in: space */
 +	ulint	zip_size,/*!< in: compressed page size in bytes, or 0 */
 +	ulint	page_no)/*!< in: page number */
 +{
 +	recv_addr_t* recv_addr;
 +	ulint	page_nos[RECV_READ_AHEAD_AREA];
 +	ulint	low_limit;
 +	ulint	n;
 +
 +	low_limit = page_no - (page_no % RECV_READ_AHEAD_AREA);
 +
 +	n = 0;
 +
 +	for (page_no = low_limit; page_no < low_limit + RECV_READ_AHEAD_AREA;
 +	     page_no++) {
 +		recv_addr = recv_get_fil_addr_struct(space, page_no);
 +
 +		if (recv_addr && !buf_page_peek(space, page_no)) {
 +
 +			mutex_enter(&(recv_sys->mutex));
 +
 +			if (recv_addr->state == RECV_NOT_PROCESSED) {
 +				recv_addr->state = RECV_BEING_READ;
 +
 +				page_nos[n] = page_no;
 +
 +				n++;
 +			}
 +
 +			mutex_exit(&(recv_sys->mutex));
 +		}
 +	}
 +
 +	buf_read_recv_pages(FALSE, space, zip_size, page_nos, n);
- 	/*
- 	fprintf(stderr, "Recv pages at %lu n %lu\n", page_nos[0], n);
- 	*/
 +	return(n);
 +}
 +
- /*******************************************************************//**
- Empties the hash table of stored log records, applying them to appropriate
- pages. */
++/** Apply the hash table of stored log records to persistent data pages.
++ at param[in]	last_batch	whether the change buffer merge will be
++				performed as part of the operation */
 +UNIV_INTERN
 +void
- recv_apply_hashed_log_recs(
- /*=======================*/
- 	ibool	allow_ibuf)	/*!< in: if TRUE, also ibuf operations are
- 				allowed during the application; if FALSE,
- 				no ibuf operations are allowed, and after
- 				the application all file pages are flushed to
- 				disk and invalidated in buffer pool: this
- 				alternative means that no new log records
- 				can be generated during the application;
- 				the caller must in this case own the log
- 				mutex */
++recv_apply_hashed_log_recs(bool last_batch)
 +{
- 	recv_addr_t* recv_addr;
- 	ulint	i;
- 	ibool	has_printed	= FALSE;
- 	mtr_t	mtr;
- loop:
- 	mutex_enter(&(recv_sys->mutex));
- 
- 	if (recv_sys->apply_batch_on) {
++	for (;;) {
++		mutex_enter(&recv_sys->mutex);
 +
- 		mutex_exit(&(recv_sys->mutex));
++		if (!recv_sys->apply_batch_on) {
++			break;
++		}
 +
++		mutex_exit(&recv_sys->mutex);
 +		os_thread_sleep(500000);
- 
- 		goto loop;
 +	}
 +
- 	ut_ad((allow_ibuf == 0) == (mutex_own(&log_sys->mutex) != 0));
++	ut_ad(!last_batch == mutex_own(&log_sys->mutex));
 +
- 	if (!allow_ibuf) {
++	if (!last_batch) {
 +		recv_no_ibuf_operations = TRUE;
 +	}
 +
++	if (ulint n = recv_sys->n_addrs) {
++		const char* msg = last_batch
++			? "Starting final batch to recover "
++			: "Starting a batch to recover ";
++		ib_logf(IB_LOG_LEVEL_INFO,
++			"%s" ULINTPF " pages from redo log", msg, n);
++	}
++
 +	recv_sys->apply_log_recs = TRUE;
 +	recv_sys->apply_batch_on = TRUE;
 +
- 	for (i = 0; i < hash_get_n_cells(recv_sys->addr_hash); i++) {
- 
- 		for (recv_addr = static_cast<recv_addr_t*>(
- 				HASH_GET_FIRST(recv_sys->addr_hash, i));
- 		     recv_addr != 0;
++	for (ulint i = 0; i < hash_get_n_cells(recv_sys->addr_hash); i++) {
++		for (recv_addr_t* recv_addr = static_cast<recv_addr_t*>(
++			     HASH_GET_FIRST(recv_sys->addr_hash, i));
++		     recv_addr;
 +		     recv_addr = static_cast<recv_addr_t*>(
 +				HASH_GET_NEXT(addr_hash, recv_addr))) {
 +
 +			ulint	space = recv_addr->space;
 +			ulint	zip_size = fil_space_get_zip_size(space);
 +			ulint	page_no = recv_addr->page_no;
 +
 +			if (recv_addr->state == RECV_NOT_PROCESSED) {
- 				if (!has_printed) {
- 					ib_logf(IB_LOG_LEVEL_INFO,
- 						"Starting an apply batch"
- 						" of log records"
- 						" to the database...");
- 					fputs("InnoDB: Progress in percent: ",
- 					      stderr);
- 					has_printed = TRUE;
- 				}
- 
- 				mutex_exit(&(recv_sys->mutex));
++				mutex_exit(&recv_sys->mutex);
 +
 +				if (buf_page_peek(space, page_no)) {
- 					buf_block_t*	block;
- 
++					mtr_t		mtr;
 +					mtr_start(&mtr);
- 
- 					block = buf_page_get(
++					buf_block_t*	block = buf_page_get(
 +						space, zip_size, page_no,
 +						RW_X_LATCH, &mtr);
 +					buf_block_dbg_add_level(
 +						block, SYNC_NO_ORDER_CHECK);
 +
 +					recv_recover_page(FALSE, block);
 +					mtr_commit(&mtr);
 +				} else {
 +					recv_read_in_area(space, zip_size,
 +							  page_no);
 +				}
 +
- 				mutex_enter(&(recv_sys->mutex));
++				mutex_enter(&recv_sys->mutex);
 +			}
 +		}
- 
- 		if (has_printed
- 		    && (i * 100) / hash_get_n_cells(recv_sys->addr_hash)
- 		    != ((i + 1) * 100)
- 		    / hash_get_n_cells(recv_sys->addr_hash)) {
- 
- 			fprintf(stderr, "%lu ", (ulong)
- 				((i * 100)
- 				 / hash_get_n_cells(recv_sys->addr_hash)));
- 		}
 +	}
 +
 +	/* Wait until all the pages have been processed */
 +
 +	while (recv_sys->n_addrs != 0) {
 +
 +		mutex_exit(&(recv_sys->mutex));
 +
 +		os_thread_sleep(500000);
 +
 +		mutex_enter(&(recv_sys->mutex));
 +	}
 +
- 	if (has_printed) {
- 
- 		fprintf(stderr, "\n");
- 	}
- 
- 	if (!allow_ibuf) {
++	if (!last_batch) {
 +		bool	success;
 +
 +		/* Flush all the file pages to disk and invalidate them in
 +		the buffer pool */
 +
 +		ut_d(recv_no_log_write = TRUE);
 +		mutex_exit(&(recv_sys->mutex));
 +		mutex_exit(&(log_sys->mutex));
 +
 +		/* Stop the recv_writer thread from issuing any LRU
 +		flush batches. */
 +		mutex_enter(&recv_sys->writer_mutex);
 +
 +		/* Wait for any currently run batch to end. */
 +		buf_flush_wait_LRU_batch_end();
 +
 +		success = buf_flush_list(ULINT_MAX, LSN_MAX, NULL);
 +
 +		ut_a(success);
 +
 +		buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
 +
 +		buf_pool_invalidate();
 +
 +		/* Allow batches from recv_writer thread. */
 +		mutex_exit(&recv_sys->writer_mutex);
 +
 +		mutex_enter(&(log_sys->mutex));
 +		mutex_enter(&(recv_sys->mutex));
 +		ut_d(recv_no_log_write = FALSE);
 +
 +		recv_no_ibuf_operations = FALSE;
 +	}
 +
 +	recv_sys->apply_log_recs = FALSE;
 +	recv_sys->apply_batch_on = FALSE;
 +
 +	recv_sys_empty_hash();
 +
- 	if (has_printed) {
- 		fprintf(stderr, "InnoDB: Apply batch completed\n");
- 	}
- 
- 	mutex_exit(&(recv_sys->mutex));
++	mutex_exit(&recv_sys->mutex);
 +}
 +#else /* !UNIV_HOTBACKUP */
 +/*******************************************************************//**
 +Applies log records in the hash table to a backup. */
 +UNIV_INTERN
 +void
 +recv_apply_log_recs_for_backup(void)
 +/*================================*/
 +{
 +	recv_addr_t*	recv_addr;
 +	ulint		n_hash_cells;
 +	buf_block_t*	block;
 +	ulint		actual_size;
 +	ibool		success;
 +	ulint		error;
 +	ulint		i;
 +
 +	recv_sys->apply_log_recs = TRUE;
 +	recv_sys->apply_batch_on = TRUE;
 +
 +	block = back_block1;
 +
- 	ib_logf(IB_LOG_LEVEL_INFO,
- 		"Starting an apply batch of log records to the database...");
- 
- 	fputs("InnoDB: Progress in percent: ", stderr);
- 
 +	n_hash_cells = hash_get_n_cells(recv_sys->addr_hash);
 +
 +	for (i = 0; i < n_hash_cells; i++) {
 +		/* The address hash table is externally chained */
 +		recv_addr = hash_get_nth_cell(recv_sys->addr_hash, i)->node;
 +
 +		while (recv_addr != NULL) {
 +
 +			ulint	zip_size
 +				= fil_space_get_zip_size(recv_addr->space);
 +
 +			if (zip_size == ULINT_UNDEFINED) {
 +#if 0
 +				fprintf(stderr,
 +					"InnoDB: Warning: cannot apply"
 +					" log record to"
 +					" tablespace %lu page %lu,\n"
 +					"InnoDB: because tablespace with"
 +					" that id does not exist.\n",
 +					recv_addr->space, recv_addr->page_no);
 +#endif
 +				recv_addr->state = RECV_PROCESSED;
 +
 +				ut_a(recv_sys->n_addrs);
 +				recv_sys->n_addrs--;
 +
 +				goto skip_this_recv_addr;
 +			}
 +
 +			/* We simulate a page read made by the buffer pool, to
 +			make sure the recovery apparatus works ok. We must init
 +			the block. */
 +
 +			buf_page_init_for_backup_restore(
 +				recv_addr->space, recv_addr->page_no,
 +				zip_size, block);
 +
 +			/* Extend the tablespace's last file if the page_no
 +			does not fall inside its bounds; we assume the last
 +			file is auto-extending, and mysqlbackup copied the file
 +			when it still was smaller */
 +
 +			success = fil_extend_space_to_desired_size(
 +				&actual_size,
 +				recv_addr->space, recv_addr->page_no + 1);
 +			if (!success) {
 +				fprintf(stderr,
 +					"InnoDB: Fatal error: cannot extend"
 +					" tablespace %u to hold %u pages\n",
 +					recv_addr->space, recv_addr->page_no);
 +
 +				exit(1);
 +			}
 +
 +			/* Read the page from the tablespace file using the
 +			fil0fil.cc routines */
 +
 +			if (zip_size) {
 +				error = fil_io(OS_FILE_READ, true,
 +					       recv_addr->space, zip_size,
 +					       recv_addr->page_no, 0, zip_size,
 +					       block->page.zip.data, NULL);
 +				if (error == DB_SUCCESS
 +				    && !buf_zip_decompress(block, TRUE)) {
 +					exit(1);
 +				}
 +			} else {
 +				error = fil_io(OS_FILE_READ, true,
 +					       recv_addr->space, 0,
 +					       recv_addr->page_no, 0,
 +					       UNIV_PAGE_SIZE,
 +					       block->frame, NULL);
 +			}
 +
 +			if (error != DB_SUCCESS) {
 +				fprintf(stderr,
 +					"InnoDB: Fatal error: cannot read"
 +					" from tablespace"
 +					" %lu page number %lu\n",
 +					(ulong) recv_addr->space,
 +					(ulong) recv_addr->page_no);
 +
 +				exit(1);
 +			}
 +
 +			/* Apply the log records to this page */
 +			recv_recover_page(FALSE, block);
 +
 +			/* Write the page back to the tablespace file using the
 +			fil0fil.cc routines */
 +
 +			buf_flush_init_for_writing(
 +				block->frame, buf_block_get_page_zip(block),
 +				mach_read_from_8(block->frame + FIL_PAGE_LSN));
 +
 +			if (zip_size) {
 +				error = fil_io(OS_FILE_WRITE, true,
 +					       recv_addr->space, zip_size,
 +					       recv_addr->page_no, 0,
 +					       zip_size,
 +					       block->page.zip.data, NULL);
 +			} else {
 +				error = fil_io(OS_FILE_WRITE, true,
 +					       recv_addr->space, 0,
 +					       recv_addr->page_no, 0,
 +					       UNIV_PAGE_SIZE,
 +					       block->frame, NULL);
 +			}
 +skip_this_recv_addr:
 +			recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
 +		}
- 
- 		if ((100 * i) / n_hash_cells
- 		    != (100 * (i + 1)) / n_hash_cells) {
- 			fprintf(stderr, "%lu ",
- 				(ulong) ((100 * i) / n_hash_cells));
- 			fflush(stderr);
- 		}
 +	}
 +
 +	recv_sys_empty_hash();
 +}
 +#endif /* !UNIV_HOTBACKUP */
 +
 +/*******************************************************************//**
 +Tries to parse a single log record and returns its length.
 + at return	length of the record, or 0 if the record was not complete */
 +UNIV_INTERN
 +ulint
 +recv_parse_log_rec(
 +/*===============*/
 +	byte*	ptr,	/*!< in: pointer to a buffer */
 +	byte*	end_ptr,/*!< in: pointer to the buffer end */
 +	byte*	type,	/*!< out: type */
 +	ulint*	space,	/*!< out: space id */
 +	ulint*	page_no,/*!< out: page number */
 +	byte**	body)	/*!< out: log record body start */
 +{
 +	byte*	new_ptr;
 +
 +	*body = NULL;
 +
 +	if (ptr == end_ptr) {
 +
 +		return(0);
 +	}
 +
 +	if (*ptr == MLOG_MULTI_REC_END) {
 +
 +		*type = *ptr;
 +
 +		return(1);
 +	}
 +
 +	if (*ptr == MLOG_DUMMY_RECORD) {
 +		*type = *ptr;
 +
 +		*space = ULINT_UNDEFINED - 1; /* For debugging */
 +
 +		return(1);
 +	}
 +
 +	new_ptr = mlog_parse_initial_log_record(ptr, end_ptr, type, space,
 +						page_no);
 +	*body = new_ptr;
 +
 +	if (UNIV_UNLIKELY(!new_ptr)) {
 +
 +		return(0);
 +	}
 +
 +#ifdef UNIV_LOG_LSN_DEBUG
 +	if (*type == MLOG_LSN) {
 +		lsn_t	lsn = (lsn_t) *space << 32 | *page_no;
 +# ifdef UNIV_LOG_DEBUG
 +		ut_a(lsn == log_sys->old_lsn);
 +# else /* UNIV_LOG_DEBUG */
 +		ut_a(lsn == recv_sys->recovered_lsn);
 +# endif /* UNIV_LOG_DEBUG */
 +	}
 +#endif /* UNIV_LOG_LSN_DEBUG */
 +
 +	new_ptr = recv_parse_or_apply_log_rec_body(*type, new_ptr, end_ptr,
 +						   NULL, NULL, *space);
 +	if (UNIV_UNLIKELY(new_ptr == NULL)) {
 +
 +		return(0);
 +	}
 +
 +	if (*page_no > recv_max_parsed_page_no) {
 +		recv_max_parsed_page_no = *page_no;
 +	}
 +
 +	return(new_ptr - ptr);
 +}
 +
 +/*******************************************************//**
 +Calculates the new value for lsn when more data is added to the log. */
 +UNIV_INTERN
 +lsn_t
 +recv_calc_lsn_on_data_add(
 +/*======================*/
 +	lsn_t		lsn,	/*!< in: old lsn */
 +	ib_uint64_t	len)	/*!< in: this many bytes of data is
 +				added, log block headers not included */
 +{
 +	ulint		frag_len;
 +	ib_uint64_t	lsn_len;
 +
 +	frag_len = (lsn % OS_FILE_LOG_BLOCK_SIZE) - LOG_BLOCK_HDR_SIZE;
 +	ut_ad(frag_len < OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
 +	      - LOG_BLOCK_TRL_SIZE);
 +	lsn_len = len;
 +	lsn_len += (lsn_len + frag_len)
 +		/ (OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
 +		   - LOG_BLOCK_TRL_SIZE)
 +		* (LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE);
 +
 +	return(lsn + lsn_len);
 +}
 +
 +#ifdef UNIV_LOG_DEBUG
 +/*******************************************************//**
 +Checks that the parser recognizes incomplete initial segments of a log
 +record as incomplete. */
 +static
 +void
 +recv_check_incomplete_log_recs(
 +/*===========================*/
 +	byte*	ptr,	/*!< in: pointer to a complete log record */
 +	ulint	len)	/*!< in: length of the log record */
 +{
 +	ulint	i;
 +	byte	type;
 +	ulint	space;
 +	ulint	page_no;
 +	byte*	body;
 +
 +	for (i = 0; i < len; i++) {
 +		ut_a(0 == recv_parse_log_rec(ptr, ptr + i, &type, &space,
 +					     &page_no, &body));
 +	}
 +}
 +#endif /* UNIV_LOG_DEBUG */
 +
 +/*******************************************************//**
 +Prints diagnostic info of corrupt log. */
 +static
 +void
 +recv_report_corrupt_log(
 +/*====================*/
 +	byte*	ptr,	/*!< in: pointer to corrupt log record */
 +	byte	type,	/*!< in: type of the record */
 +	ulint	space,	/*!< in: space id, this may also be garbage */
 +	ulint	page_no)/*!< in: page number, this may also be garbage */
 +{
 +	fprintf(stderr,
 +		"InnoDB: ############### CORRUPT LOG RECORD FOUND\n"
 +		"InnoDB: Log record type %lu, space id %lu, page number %lu\n"
 +		"InnoDB: Log parsing proceeded successfully up to " LSN_PF "\n"
 +		"InnoDB: Previous log record type %lu, is multi %lu\n"
 +		"InnoDB: Recv offset %lu, prev %lu\n",
 +		(ulong) type, (ulong) space, (ulong) page_no,
 +		recv_sys->recovered_lsn,
 +		(ulong) recv_previous_parsed_rec_type,
 +		(ulong) recv_previous_parsed_rec_is_multi,
 +		(ulong) (ptr - recv_sys->buf),
 +		(ulong) recv_previous_parsed_rec_offset);
 +
 +	if ((ulint)(ptr - recv_sys->buf + 100)
 +	    > recv_previous_parsed_rec_offset
 +	    && (ulint)(ptr - recv_sys->buf + 100
 +		       - recv_previous_parsed_rec_offset)
 +	    < 200000) {
 +		fputs("InnoDB: Hex dump of corrupt log starting"
 +		      " 100 bytes before the start\n"
 +		      "InnoDB: of the previous log rec,\n"
 +		      "InnoDB: and ending 100 bytes after the start"
 +		      " of the corrupt rec:\n",
 +		      stderr);
 +
 +		ut_print_buf(stderr,
 +			     recv_sys->buf
 +			     + recv_previous_parsed_rec_offset - 100,
 +			     ptr - recv_sys->buf + 200
 +			     - recv_previous_parsed_rec_offset);
 +		putc('\n', stderr);
 +	}
 +
 +#ifndef UNIV_HOTBACKUP
 +	if (!srv_force_recovery) {
 +		fputs("InnoDB: Set innodb_force_recovery"
 +		      " to ignore this error.\n", stderr);
 +		ut_error;
 +	}
 +#endif /* !UNIV_HOTBACKUP */
 +
 +	fputs("InnoDB: WARNING: the log file may have been corrupt and it\n"
 +	      "InnoDB: is possible that the log scan did not proceed\n"
 +	      "InnoDB: far enough in recovery! Please run CHECK TABLE\n"
 +	      "InnoDB: on your InnoDB tables to check that they are ok!\n"
 +	      "InnoDB: If mysqld crashes after this recovery, look at\n"
 +	      "InnoDB: " REFMAN "forcing-innodb-recovery.html\n"
 +	      "InnoDB: about forcing recovery.\n", stderr);
 +
 +	fflush(stderr);
 +}
 +
 +/*******************************************************//**
 +Parses log records from a buffer and stores them to a hash table to wait
 +merging to file pages.
 + at return	currently always returns FALSE */
 +static
 +ibool
 +recv_parse_log_recs(
 +/*================*/
 +	ibool	store_to_hash)	/*!< in: TRUE if the records should be stored
 +				to the hash table; this is set to FALSE if just
 +				debug checking is needed */
 +{
 +	byte*	ptr;
 +	byte*	end_ptr;
 +	ulint	single_rec;
 +	ulint	len;
 +	ulint	total_len;
 +	lsn_t	new_recovered_lsn;
 +	lsn_t	old_lsn;
 +	byte	type;
 +	ulint	space;
 +	ulint	page_no;
 +	byte*	body;
 +	ulint	n_recs;
 +
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +	ut_ad(recv_sys->parse_start_lsn != 0);
 +loop:
 +	ptr = recv_sys->buf + recv_sys->recovered_offset;
 +
 +	end_ptr = recv_sys->buf + recv_sys->len;
 +
 +	if (ptr == end_ptr) {
 +
 +		return(FALSE);
 +	}
 +
 +	single_rec = (ulint)*ptr & MLOG_SINGLE_REC_FLAG;
 +
 +	if (single_rec || *ptr == MLOG_DUMMY_RECORD) {
 +		/* The mtr only modified a single page, or this is a file op */
 +
 +		old_lsn = recv_sys->recovered_lsn;
 +
 +		/* Try to parse a log record, fetching its type, space id,
 +		page no, and a pointer to the body of the log record */
 +
 +		len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
 +					 &page_no, &body);
 +
 +		if (len == 0 || recv_sys->found_corrupt_log) {
 +			if (recv_sys->found_corrupt_log) {
 +
 +				recv_report_corrupt_log(ptr,
 +							type, space, page_no);
 +			}
 +
 +			return(FALSE);
 +		}
 +
 +		new_recovered_lsn = recv_calc_lsn_on_data_add(old_lsn, len);
 +
 +		if (new_recovered_lsn > recv_sys->scanned_lsn) {
 +			/* The log record filled a log block, and we require
 +			that also the next log block should have been scanned
 +			in */
 +
 +			return(FALSE);
 +		}
 +
 +		recv_previous_parsed_rec_type = (ulint) type;
 +		recv_previous_parsed_rec_offset = recv_sys->recovered_offset;
 +		recv_previous_parsed_rec_is_multi = 0;
 +
 +		recv_sys->recovered_offset += len;
 +		recv_sys->recovered_lsn = new_recovered_lsn;
 +
 +		DBUG_PRINT("ib_log",
 +			   ("scan " DBUG_LSN_PF ": log rec %u len %u "
 +			    "page %u:%u", old_lsn,
 +			    (unsigned) type, (unsigned) len,
 +			    (unsigned) space, (unsigned) page_no));
 +
 +		if (type == MLOG_DUMMY_RECORD) {
 +			/* Do nothing */
 +
 +		} else if (!store_to_hash) {
 +			/* In debug checking, update a replicate page
 +			according to the log record, and check that it
 +			becomes identical with the original page */
 +#ifdef UNIV_LOG_DEBUG
 +			recv_check_incomplete_log_recs(ptr, len);
 +#endif/* UNIV_LOG_DEBUG */
 +
 +		} else if (type == MLOG_FILE_CREATE
 +			   || type == MLOG_FILE_CREATE2
 +			   || type == MLOG_FILE_RENAME
 +			   || type == MLOG_FILE_DELETE) {
 +			ut_a(space);
 +#ifdef UNIV_HOTBACKUP
 +			if (recv_replay_file_ops) {
 +
 +				/* In mysqlbackup --apply-log, replay an .ibd
 +				file operation, if possible; note that
 +				fil_path_to_mysql_datadir is set in mysqlbackup
 +				to point to the datadir we should use there */
 +
 +				if (NULL == fil_op_log_parse_or_replay(
 +					    body, end_ptr, type,
 +					    space, page_no)) {
 +					fprintf(stderr,
 +						"InnoDB: Error: file op"
 +						" log record of type %lu"
 +						" space %lu not complete in\n"
 +						"InnoDB: the replay phase."
 +						" Path %s\n",
 +						(ulint) type, space,
 +						(char*)(body + 2));
 +
 +					ut_error;
 +				}
 +			}
 +#endif
 +			/* In normal mysqld crash recovery we do not try to
 +			replay file operations */
 +#ifdef UNIV_LOG_LSN_DEBUG
 +		} else if (type == MLOG_LSN) {
 +			/* Do not add these records to the hash table.
 +			The page number and space id fields are misused
 +			for something else. */
 +#endif /* UNIV_LOG_LSN_DEBUG */
 +		} else {
 +			recv_add_to_hash_table(type, space, page_no, body,
 +					       ptr + len, old_lsn,
 +					       recv_sys->recovered_lsn);
 +		}
 +	} else {
 +		/* Check that all the records associated with the single mtr
 +		are included within the buffer */
 +
 +		total_len = 0;
 +		n_recs = 0;
 +
 +		for (;;) {
 +			len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
 +						 &page_no, &body);
 +			if (len == 0 || recv_sys->found_corrupt_log) {
 +
 +				if (recv_sys->found_corrupt_log) {
 +
 +					recv_report_corrupt_log(
 +						ptr, type, space, page_no);
 +				}
 +
 +				return(FALSE);
 +			}
 +
 +			recv_previous_parsed_rec_type = (ulint) type;
 +			recv_previous_parsed_rec_offset
 +				= recv_sys->recovered_offset + total_len;
 +			recv_previous_parsed_rec_is_multi = 1;
 +
 +#ifdef UNIV_LOG_DEBUG
 +			if ((!store_to_hash) && (type != MLOG_MULTI_REC_END)) {
 +				recv_check_incomplete_log_recs(ptr, len);
 +			}
 +#endif /* UNIV_LOG_DEBUG */
 +
 +			DBUG_PRINT("ib_log",
 +				   ("scan " DBUG_LSN_PF ": multi-log rec %u "
 +				    "len %u page %u:%u",
 +				    recv_sys->recovered_lsn,
 +				    (unsigned) type, (unsigned) len,
 +				    (unsigned) space, (unsigned) page_no));
 +
 +			total_len += len;
 +			n_recs++;
 +
 +			ptr += len;
 +
 +			if (type == MLOG_MULTI_REC_END) {
 +
 +				/* Found the end mark for the records */
 +
 +				break;
 +			}
 +		}
 +
 +		new_recovered_lsn = recv_calc_lsn_on_data_add(
 +			recv_sys->recovered_lsn, total_len);
 +
 +		if (new_recovered_lsn > recv_sys->scanned_lsn) {
 +			/* The log record filled a log block, and we require
 +			that also the next log block should have been scanned
 +			in */
 +
 +			return(FALSE);
 +		}
 +
 +		/* Add all the records to the hash table */
 +
 +		ptr = recv_sys->buf + recv_sys->recovered_offset;
 +
 +		for (;;) {
 +			old_lsn = recv_sys->recovered_lsn;
 +			len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
 +						 &page_no, &body);
 +			if (recv_sys->found_corrupt_log) {
 +
 +				recv_report_corrupt_log(ptr,
 +							type, space, page_no);
 +			}
 +
 +			ut_a(len != 0);
 +			ut_a(0 == ((ulint)*ptr & MLOG_SINGLE_REC_FLAG));
 +
 +			recv_sys->recovered_offset += len;
 +			recv_sys->recovered_lsn
 +				= recv_calc_lsn_on_data_add(old_lsn, len);
 +			if (type == MLOG_MULTI_REC_END) {
 +
 +				/* Found the end mark for the records */
 +
 +				break;
 +			}
 +
 +			if (store_to_hash
 +#ifdef UNIV_LOG_LSN_DEBUG
 +			    && type != MLOG_LSN
 +#endif /* UNIV_LOG_LSN_DEBUG */
 +			    ) {
 +				recv_add_to_hash_table(type, space, page_no,
 +						       body, ptr + len,
 +						       old_lsn,
 +						       new_recovered_lsn);
 +			}
 +
 +			ptr += len;
 +		}
 +	}
 +
 +	goto loop;
 +}
 +
 +/*******************************************************//**
 +Adds data from a new log block to the parsing buffer of recv_sys if
 +recv_sys->parse_start_lsn is non-zero.
 + at return	TRUE if more data added */
 +static
 +ibool
 +recv_sys_add_to_parsing_buf(
 +/*========================*/
 +	const byte*	log_block,	/*!< in: log block */
 +	lsn_t		scanned_lsn)	/*!< in: lsn of how far we were able
 +					to find data in this log block */
 +{
 +	ulint	more_len;
 +	ulint	data_len;
 +	ulint	start_offset;
 +	ulint	end_offset;
 +
 +	ut_ad(scanned_lsn >= recv_sys->scanned_lsn);
 +
 +	if (!recv_sys->parse_start_lsn) {
 +		/* Cannot start parsing yet because no start point for
 +		it found */
 +
 +		return(FALSE);
 +	}
 +
 +	data_len = log_block_get_data_len(log_block);
 +
 +	if (recv_sys->parse_start_lsn >= scanned_lsn) {
 +
 +		return(FALSE);
 +
 +	} else if (recv_sys->scanned_lsn >= scanned_lsn) {
 +
 +		return(FALSE);
 +
 +	} else if (recv_sys->parse_start_lsn > recv_sys->scanned_lsn) {
 +		more_len = (ulint) (scanned_lsn - recv_sys->parse_start_lsn);
 +	} else {
 +		more_len = (ulint) (scanned_lsn - recv_sys->scanned_lsn);
 +	}
 +
 +	if (more_len == 0) {
 +
 +		return(FALSE);
 +	}
 +
 +	ut_ad(data_len >= more_len);
 +
 +	start_offset = data_len - more_len;
 +
 +	if (start_offset < LOG_BLOCK_HDR_SIZE) {
 +		start_offset = LOG_BLOCK_HDR_SIZE;
 +	}
 +
 +	end_offset = data_len;
 +
 +	if (end_offset > OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
 +		end_offset = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE;
 +	}
 +
 +	ut_ad(start_offset <= end_offset);
 +
 +	if (start_offset < end_offset) {
 +		ut_memcpy(recv_sys->buf + recv_sys->len,
 +			  log_block + start_offset, end_offset - start_offset);
 +
 +		recv_sys->len += end_offset - start_offset;
 +
 +		ut_a(recv_sys->len <= RECV_PARSING_BUF_SIZE);
 +	}
 +
 +	return(TRUE);
 +}
 +
 +/*******************************************************//**
 +Moves the parsing buffer data left to the buffer start. */
 +static
 +void
 +recv_sys_justify_left_parsing_buf(void)
 +/*===================================*/
 +{
 +	ut_memmove(recv_sys->buf, recv_sys->buf + recv_sys->recovered_offset,
 +		   recv_sys->len - recv_sys->recovered_offset);
 +
 +	recv_sys->len -= recv_sys->recovered_offset;
 +
 +	recv_sys->recovered_offset = 0;
 +}
 +
 +/*******************************************************//**
 +Scans log from a buffer and stores new log data to the parsing buffer.
 +Parses and hashes the log records if new data found.  Unless
 +UNIV_HOTBACKUP is defined, this function will apply log records
 +automatically when the hash table becomes full.
 + at return TRUE if limit_lsn has been reached, or not able to scan any
 +more in this log group */
 +UNIV_INTERN
 +ibool
 +recv_scan_log_recs(
 +/*===============*/
 +	ulint		available_memory,/*!< in: we let the hash table of recs
 +					to grow to this size, at the maximum */
 +	ibool		store_to_hash,	/*!< in: TRUE if the records should be
 +					stored to the hash table; this is set
 +					to FALSE if just debug checking is
 +					needed */
 +	const byte*	buf,		/*!< in: buffer containing a log
 +					segment or garbage */
 +	ulint		len,		/*!< in: buffer length */
 +	lsn_t		start_lsn,	/*!< in: buffer start lsn */
 +	lsn_t*		contiguous_lsn,	/*!< in/out: it is known that all log
 +					groups contain contiguous log data up
 +					to this lsn */
 +	lsn_t*		group_scanned_lsn)/*!< out: scanning succeeded up to
 +					this lsn */
 +{
 +	const byte*	log_block;
 +	ulint		no;
 +	lsn_t		scanned_lsn;
 +	ibool		finished;
 +	ulint		data_len;
 +	ibool		more_data;
 +
 +	ut_ad(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
 +	ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0);
 +	ut_ad(len >= OS_FILE_LOG_BLOCK_SIZE);
 +	ut_a(store_to_hash <= TRUE);
 +
 +	finished = FALSE;
 +
 +	log_block = buf;
 +	scanned_lsn = start_lsn;
 +	more_data = FALSE;
 +
 +	do {
 +		no = log_block_get_hdr_no(log_block);
 +		/*
 +		fprintf(stderr, "Log block header no %lu\n", no);
 +
 +		fprintf(stderr, "Scanned lsn no %lu\n",
 +		log_block_convert_lsn_to_no(scanned_lsn));
 +		*/
 +		if (no != log_block_convert_lsn_to_no(scanned_lsn)
 +		    || !log_block_checksum_is_ok_or_old_format(log_block)) {
 +
 +			if (no == log_block_convert_lsn_to_no(scanned_lsn)
 +			    && !log_block_checksum_is_ok_or_old_format(
 +				    log_block)) {
 +				fprintf(stderr,
 +					"InnoDB: Log block no %lu at"
 +					" lsn " LSN_PF " has\n"
 +					"InnoDB: ok header, but checksum field"
 +					" contains %lu, should be %lu\n",
 +					(ulong) no,
 +					scanned_lsn,
 +					(ulong) log_block_get_checksum(
 +						log_block),
 +					(ulong) log_block_calc_checksum(
 +						log_block));
 +			}
 +
 +			/* Garbage or an incompletely written log block */
 +
 +			finished = TRUE;
 +
 +			break;
 +		}
 +
 +		if (log_block_get_flush_bit(log_block)) {
 +			/* This block was a start of a log flush operation:
 +			we know that the previous flush operation must have
 +			been completed for all log groups before this block
 +			can have been flushed to any of the groups. Therefore,
 +			we know that log data is contiguous up to scanned_lsn
 +			in all non-corrupt log groups. */
 +
 +			if (scanned_lsn > *contiguous_lsn) {
 +				*contiguous_lsn = scanned_lsn;
 +			}
 +		}
 +
 +		data_len = log_block_get_data_len(log_block);
 +
 +		if ((store_to_hash || (data_len == OS_FILE_LOG_BLOCK_SIZE))
 +		    && scanned_lsn + data_len > recv_sys->scanned_lsn
 +		    && (recv_sys->scanned_checkpoint_no > 0)
 +		    && (log_block_get_checkpoint_no(log_block)
 +			< recv_sys->scanned_checkpoint_no)
 +		    && (recv_sys->scanned_checkpoint_no
 +			- log_block_get_checkpoint_no(log_block)
 +			> 0x80000000UL)) {
 +
 +			/* Garbage from a log buffer flush which was made
 +			before the most recent database recovery */
 +
 +			finished = TRUE;
 +#ifdef UNIV_LOG_DEBUG
 +			/* This is not really an error, but currently
 +			we stop here in the debug version: */
 +
 +			ut_error;
 +#endif
 +			break;
 +		}
 +
 +		if (!recv_sys->parse_start_lsn
 +		    && (log_block_get_first_rec_group(log_block) > 0)) {
 +
 +			/* We found a point from which to start the parsing
 +			of log records */
 +
 +			recv_sys->parse_start_lsn = scanned_lsn
 +				+ log_block_get_first_rec_group(log_block);
 +			recv_sys->scanned_lsn = recv_sys->parse_start_lsn;
 +			recv_sys->recovered_lsn = recv_sys->parse_start_lsn;
 +		}
 +
 +		scanned_lsn += data_len;
 +
 +		if (scanned_lsn > recv_sys->scanned_lsn) {
 +
 +			/* We have found more entries. If this scan is
 + 			of startup type, we must initiate crash recovery
 +			environment before parsing these log records. */
 +
 +#ifndef UNIV_HOTBACKUP
 +			if (recv_log_scan_is_startup_type
 +			    && !recv_needed_recovery) {
- 
 +				if (!srv_read_only_mode) {
 +					ib_logf(IB_LOG_LEVEL_INFO,
- 						"Log scan progressed past the "
- 						"checkpoint lsn " LSN_PF "",
++						"Starting crash recovery from "
++						"checkpoint LSN=" LSN_PF,
 +						recv_sys->scanned_lsn);
 +
 +					recv_init_crash_recovery();
 +				} else {
 +					ib_logf(IB_LOG_LEVEL_ERROR,
 +						"innodb_read_only prevents"
 +						" crash recovery");
 +					recv_needed_recovery = TRUE;
 +					return(TRUE);
 +				}
 +			}
 +#endif /* !UNIV_HOTBACKUP */
 +
 +			/* We were able to find more log data: add it to the
 +			parsing buffer if parse_start_lsn is already
 +			non-zero */
 +
 +			if (recv_sys->len + 4 * OS_FILE_LOG_BLOCK_SIZE
 +			    >= RECV_PARSING_BUF_SIZE) {
 +				fprintf(stderr,
 +					"InnoDB: Error: log parsing"
 +					" buffer overflow."
 +					" Recovery may have failed!\n");
 +
 +				recv_sys->found_corrupt_log = TRUE;
 +
 +#ifndef UNIV_HOTBACKUP
 +				if (!srv_force_recovery) {
 +					fputs("InnoDB: Set"
 +					      " innodb_force_recovery"
 +					      " to ignore this error.\n",
 +					      stderr);
 +					ut_error;
 +				}
 +#endif /* !UNIV_HOTBACKUP */
 +
 +			} else if (!recv_sys->found_corrupt_log) {
 +				more_data = recv_sys_add_to_parsing_buf(
 +					log_block, scanned_lsn);
 +			}
 +
 +			recv_sys->scanned_lsn = scanned_lsn;
 +			recv_sys->scanned_checkpoint_no
 +				= log_block_get_checkpoint_no(log_block);
 +		}
 +
 +		if (data_len < OS_FILE_LOG_BLOCK_SIZE) {
 +			/* Log data for this group ends here */
 +
 +			finished = TRUE;
 +			break;
 +		} else {
 +			log_block += OS_FILE_LOG_BLOCK_SIZE;
 +		}
 +	} while (log_block < buf + len && !finished);
 +
 +	*group_scanned_lsn = scanned_lsn;
 +
- 	if (recv_needed_recovery
- 	    || (recv_is_from_backup && !recv_is_making_a_backup)) {
- 		recv_scan_print_counter++;
- 
- 		if (finished || (recv_scan_print_counter % 80 == 0)) {
- 
- 			fprintf(stderr,
- 				"InnoDB: Doing recovery: scanned up to"
- 				" log sequence number " LSN_PF "\n",
- 				*group_scanned_lsn);
- 		}
- 	}
- 
 +	if (more_data && !recv_sys->found_corrupt_log) {
 +		/* Try to parse more log records */
 +
 +		recv_parse_log_recs(store_to_hash);
 +
 +#ifndef UNIV_HOTBACKUP
 +		if (store_to_hash
 +		    && mem_heap_get_size(recv_sys->heap) > available_memory) {
 +
 +			/* Hash table of log records has grown too big:
 +			empty it; FALSE means no ibuf operations
 +			allowed, as we cannot add new records to the
 +			log yet: they would be produced by ibuf
 +			operations */
 +
- 			recv_apply_hashed_log_recs(FALSE);
++			recv_apply_hashed_log_recs(false);
 +		}
 +#endif /* !UNIV_HOTBACKUP */
 +
 +		if (recv_sys->recovered_offset > RECV_PARSING_BUF_SIZE / 4) {
 +			/* Move parsing buffer data to the buffer start */
 +
 +			recv_sys_justify_left_parsing_buf();
 +		}
 +	}
 +
 +	return(finished);
 +}
 +
 +#ifndef UNIV_HOTBACKUP
 +/*******************************************************//**
 +Scans log from a buffer and stores new log data to the parsing buffer. Parses
 +and hashes the log records if new data found. */
 +static
 +void
 +recv_group_scan_log_recs(
 +/*=====================*/
 +	log_group_t*	group,		/*!< in: log group */
 +	lsn_t*		contiguous_lsn,	/*!< in/out: it is known that all log
 +					groups contain contiguous log data up
 +					to this lsn */
 +	lsn_t*		group_scanned_lsn)/*!< out: scanning succeeded up to
 +					this lsn */
 +{
 +	ibool	finished;
 +	lsn_t	start_lsn;
 +	lsn_t	end_lsn;
 +
 +	finished = FALSE;
 +
 +	start_lsn = *contiguous_lsn;
 +
 +	while (!finished) {
 +		end_lsn = start_lsn + RECV_SCAN_SIZE;
 +
 +		log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
 +				       group, start_lsn, end_lsn, FALSE);
 +
 +		finished = recv_scan_log_recs(
 +			(buf_pool_get_n_pages()
 +			- (recv_n_pool_free_frames * srv_buf_pool_instances))
 +			* UNIV_PAGE_SIZE,
 +			TRUE, log_sys->buf, RECV_SCAN_SIZE,
 +			start_lsn, contiguous_lsn, group_scanned_lsn);
 +		start_lsn = end_lsn;
 +	}
 +
 +#ifdef UNIV_DEBUG
 +	if (log_debug_writes) {
 +		fprintf(stderr,
 +			"InnoDB: Scanned group %lu up to"
 +			" log sequence number " LSN_PF "\n",
 +			(ulong) group->id,
 +			*group_scanned_lsn);
 +	}
 +#endif /* UNIV_DEBUG */
 +}
 +
 +/*******************************************************//**
 +Initialize crash recovery environment. Can be called iff
 +recv_needed_recovery == FALSE. */
 +static
 +void
 +recv_init_crash_recovery(void)
 +/*==========================*/
 +{
 +	ut_ad(!srv_read_only_mode);
 +	ut_a(!recv_needed_recovery);
 +
 +	recv_needed_recovery = TRUE;
 +
- 	ib_logf(IB_LOG_LEVEL_INFO, "Database was not shutdown normally!");
- 	ib_logf(IB_LOG_LEVEL_INFO, "Starting crash recovery.");
- 	ib_logf(IB_LOG_LEVEL_INFO,
- 		"Reading tablespace information from the .ibd files...");
- 
 +	fil_load_single_table_tablespaces();
 +
 +	/* If we are using the doublewrite method, we will
 +	check if there are half-written pages in data files,
 +	and restore them from the doublewrite buffer if
 +	possible */
 +
 +	if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
 +
 +		ib_logf(IB_LOG_LEVEL_INFO,
- 			"Restoring possible half-written data pages ");
- 
- 		ib_logf(IB_LOG_LEVEL_INFO,
++			"Restoring possible half-written data pages "
 +			"from the doublewrite buffer...");
 +
 +		buf_dblwr_process();
 +
 +		/* Spawn the background thread to flush dirty pages
 +		from the buffer pools. */
 +		recv_writer_thread_active = true;
 +		recv_writer_thread_handle = os_thread_create(
 +			recv_writer_thread, 0, 0);
 +	}
 +}
 +
 +/********************************************************//**
 +Recovers from a checkpoint. When this function returns, the database is able
 +to start processing of new user transactions, but the function
 +recv_recovery_from_checkpoint_finish should be called later to complete
 +the recovery and free the resources used in it.
 + at return	error code or DB_SUCCESS */
 +UNIV_INTERN
 +dberr_t
 +recv_recovery_from_checkpoint_start_func(
 +/*=====================================*/
 +#ifdef UNIV_LOG_ARCHIVE
 +	ulint	type,		/*!< in: LOG_CHECKPOINT or LOG_ARCHIVE */
 +	lsn_t	limit_lsn,	/*!< in: recover up to this lsn if possible */
 +#endif /* UNIV_LOG_ARCHIVE */
 +	lsn_t	min_flushed_lsn,/*!< in: min flushed lsn from data files */
 +	lsn_t	max_flushed_lsn)/*!< in: max flushed lsn from data files */
 +{
 +	log_group_t*	group;
 +	log_group_t*	max_cp_group;
 +	ulint		max_cp_field;
 +	lsn_t		checkpoint_lsn;
 +	ib_uint64_t	checkpoint_no;
 +	lsn_t		group_scanned_lsn = 0;
 +	lsn_t		contiguous_lsn;
 +#ifdef UNIV_LOG_ARCHIVE
 +	log_group_t*	up_to_date_group;
 +	lsn_t		archived_lsn;
 +#endif /* UNIV_LOG_ARCHIVE */
 +	byte*		buf;
 +	byte*		log_hdr_buf;
 +	byte*		log_hdr_buf_base = reinterpret_cast<byte *>
 +		(alloca(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE));
 +	dberr_t		err;
 +
 +	/* Initialize red-black tree for fast insertions into the
 +	flush_list during recovery process. */
 +	buf_flush_init_flush_rbt();
 +
 +	ut_when_dtor<recv_dblwr_t> tmp(recv_sys->dblwr);
 +
 +	log_hdr_buf = static_cast<byte *>
 +		(ut_align(log_hdr_buf_base, OS_FILE_LOG_BLOCK_SIZE));
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	ut_ad(type != LOG_CHECKPOINT || limit_lsn == LSN_MAX);
 +/** TRUE when recovering from a checkpoint */
 +# define TYPE_CHECKPOINT	(type == LOG_CHECKPOINT)
 +/** Recover up to this log sequence number */
 +# define LIMIT_LSN		limit_lsn
 +#else /* UNIV_LOG_ARCHIVE */
 +/** TRUE when recovering from a checkpoint */
 +# define TYPE_CHECKPOINT	1
 +/** Recover up to this log sequence number */
 +# define LIMIT_LSN		LSN_MAX
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	if (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO) {
 +
 +		ib_logf(IB_LOG_LEVEL_INFO,
 +			"The user has set SRV_FORCE_NO_LOG_REDO on, "
 +			"skipping log redo");
 +
 +		return(DB_SUCCESS);
 +	}
 +
 +	recv_recovery_on = TRUE;
 +
 +	recv_sys->limit_lsn = LIMIT_LSN;
 +
 +	mutex_enter(&(log_sys->mutex));
 +
 +	/* Look for the latest checkpoint from any of the log groups */
 +
 +	err = recv_find_max_checkpoint(&max_cp_group, &max_cp_field);
 +
 +	if (err != DB_SUCCESS) {
 +
 +		mutex_exit(&(log_sys->mutex));
 +
 +		return(err);
 +	}
 +
 +	log_group_read_checkpoint_info(max_cp_group, max_cp_field);
 +
 +	buf = log_sys->checkpoint_buf;
 +
 +	checkpoint_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_LSN);
 +	checkpoint_no = mach_read_from_8(buf + LOG_CHECKPOINT_NO);
 +#ifdef UNIV_LOG_ARCHIVE
 +	archived_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN);
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	/* Read the first log file header to print a note if this is
 +	a recovery from a restored InnoDB Hot Backup */
 +
 +	fil_io(OS_FILE_READ | OS_FILE_LOG, true, max_cp_group->space_id, 0,
 +	       0, 0, LOG_FILE_HDR_SIZE,
 +	       log_hdr_buf, max_cp_group);
 +
 +	if (0 == ut_memcmp(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
 +			   (byte*)"ibbackup", (sizeof "ibbackup") - 1)) {
 +
 +		if (srv_read_only_mode) {
 +
 +			ib_logf(IB_LOG_LEVEL_ERROR,
 +				"Cannot restore from mysqlbackup, InnoDB "
 +				"running in read-only mode!");
 +
 +			return(DB_ERROR);
 +		}
 +
 +		/* This log file was created by mysqlbackup --restore: print
 +		a note to the user about it */
 +
 +		ib_logf(IB_LOG_LEVEL_INFO,
 +			"The log file was created by mysqlbackup --apply-log "
 +			"at %s. The following crash recovery is part of a "
 +			"normal restore.",
 +			log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP);
 +
 +		/* Wipe over the label now */
 +
 +		memset(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
 +		       ' ', 4);
 +		/* Write to the log file to wipe over the label */
 +		fil_io(OS_FILE_WRITE | OS_FILE_LOG, true,
 +		       max_cp_group->space_id, 0,
 +		       0, 0, OS_FILE_LOG_BLOCK_SIZE,
 +		       log_hdr_buf, max_cp_group);
 +	}
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	group = UT_LIST_GET_FIRST(log_sys->log_groups);
 +
 +	while (group) {
 +		log_checkpoint_get_nth_group_info(buf, group->id,
 +						  &(group->archived_file_no));
 +
 +		log_archived_get_offset(group, group->archived_file_no,
 +			archived_lsn, &(group->archived_offset));
 +
 +		group = UT_LIST_GET_NEXT(log_groups, group);
 +	}
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	if (TYPE_CHECKPOINT) {
 +		/* Start reading the log groups from the checkpoint lsn up. The
 +		variable contiguous_lsn contains an lsn up to which the log is
 +		known to be contiguously written to all log groups. */
 +
 +		recv_sys->parse_start_lsn = checkpoint_lsn;
 +		recv_sys->scanned_lsn = checkpoint_lsn;
 +		recv_sys->scanned_checkpoint_no = 0;
 +		recv_sys->recovered_lsn = checkpoint_lsn;
 +
 +		srv_start_lsn = checkpoint_lsn;
 +	}
 +
 +	contiguous_lsn = ut_uint64_align_down(recv_sys->scanned_lsn,
 +					      OS_FILE_LOG_BLOCK_SIZE);
 +#ifdef UNIV_LOG_ARCHIVE
 +	if (TYPE_CHECKPOINT) {
 +		up_to_date_group = max_cp_group;
 +	} else {
 +		ulint	capacity;
 +
 +		/* Try to recover the remaining part from logs: first from
 +		the logs of the archived group */
 +
 +		group = recv_sys->archive_group;
 +		capacity = log_group_get_capacity(group);
 +
 +		if (recv_sys->scanned_lsn > checkpoint_lsn + capacity
 +		    || checkpoint_lsn > recv_sys->scanned_lsn + capacity) {
 +
 +			mutex_exit(&(log_sys->mutex));
 +
 +			/* The group does not contain enough log: probably
 +			an archived log file was missing or corrupt */
 +
 +			return(DB_ERROR);
 +		}
 +
 +		recv_group_scan_log_recs(group, &contiguous_lsn,
 +					 &group_scanned_lsn);
 +		if (recv_sys->scanned_lsn < checkpoint_lsn) {
 +
 +			mutex_exit(&(log_sys->mutex));
 +
 +			/* The group did not contain enough log: an archived
 +			log file was missing or invalid, or the log group
 +			was corrupt */
 +
 +			return(DB_ERROR);
 +		}
 +
 +		group->scanned_lsn = group_scanned_lsn;
 +		up_to_date_group = group;
 +	}
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	ut_ad(RECV_SCAN_SIZE <= log_sys->buf_size);
 +
 +	group = UT_LIST_GET_FIRST(log_sys->log_groups);
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	if ((type == LOG_ARCHIVE) && (group == recv_sys->archive_group)) {
 +		group = UT_LIST_GET_NEXT(log_groups, group);
 +	}
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	/* Set the flag to publish that we are doing startup scan. */
 +	recv_log_scan_is_startup_type = TYPE_CHECKPOINT;
 +	while (group) {
 +#ifdef UNIV_LOG_ARCHIVE
 +		lsn_t	old_scanned_lsn	= recv_sys->scanned_lsn;
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +		recv_group_scan_log_recs(group, &contiguous_lsn,
 +					 &group_scanned_lsn);
 +		group->scanned_lsn = group_scanned_lsn;
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +		if (old_scanned_lsn < group_scanned_lsn) {
 +			/* We found a more up-to-date group */
 +
 +			up_to_date_group = group;
 +		}
 +
 +		if ((type == LOG_ARCHIVE)
 +		    && (group == recv_sys->archive_group)) {
 +			group = UT_LIST_GET_NEXT(log_groups, group);
 +		}
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +		group = UT_LIST_GET_NEXT(log_groups, group);
 +	}
 +
 +	/* Done with startup scan. Clear the flag. */
 +	recv_log_scan_is_startup_type = FALSE;
 +
 +	if (srv_read_only_mode && recv_needed_recovery) {
 +		return(DB_READ_ONLY);
 +	}
 +
 +	if (TYPE_CHECKPOINT) {
 +		/* NOTE: we always do a 'recovery' at startup, but only if
 +		there is something wrong we will print a message to the
 +		user about recovery: */
 +
 +		if (checkpoint_lsn != max_flushed_lsn
 +		    || checkpoint_lsn != min_flushed_lsn) {
 +
 +			if (checkpoint_lsn < max_flushed_lsn) {
 +
 +				ib_logf(IB_LOG_LEVEL_WARN,
 +					"The log sequence number "
 +					"in the ibdata files is higher "
 +					"than the log sequence number "
 +					"in the ib_logfiles! Are you sure "
 +					"you are using the right "
 +					"ib_logfiles to start up the database. "
 +					"Log sequence number in the "
 +					"ib_logfiles is " LSN_PF ", log"
 +					"sequence numbers stamped "
 +					"to ibdata file headers are between "
 +					"" LSN_PF " and " LSN_PF ".",
 +					checkpoint_lsn,
 +					min_flushed_lsn,
 +					max_flushed_lsn);
 +			}
 +
 +			if (!recv_needed_recovery) {
 +				ib_logf(IB_LOG_LEVEL_INFO,
 +					"The log sequence numbers "
 +					LSN_PF " and " LSN_PF
 +					" in ibdata files do not match"
 +					" the log sequence number "
 +					LSN_PF
 +					" in the ib_logfiles!",
 +					min_flushed_lsn,
 +					max_flushed_lsn,
 +					checkpoint_lsn);
 +
 +				if (!srv_read_only_mode) {
 +					recv_init_crash_recovery();
 +				} else {
 +					ib_logf(IB_LOG_LEVEL_ERROR,
 +						"Can't initiate database "
 +						"recovery, running "
 +						"in read-only-mode.");
 +					return(DB_READ_ONLY);
 +				}
 +			}
 +		}
 +	}
 +
 +	/* We currently have only one log group */
 +	if (group_scanned_lsn < checkpoint_lsn
 +	    || group_scanned_lsn < recv_max_page_lsn) {
 +		ib_logf(IB_LOG_LEVEL_ERROR,
 +			"We scanned the log up to "
 +			LSN_PF ". A checkpoint was at " LSN_PF
 +			" and the maximum LSN on a database page was " LSN_PF
 +			". It is possible that the database is now corrupt!",
 +			group_scanned_lsn, checkpoint_lsn, recv_max_page_lsn);
 +	}
 +
 +	if (recv_sys->recovered_lsn < checkpoint_lsn) {
 +
 +		mutex_exit(&(log_sys->mutex));
 +
 +		if (recv_sys->recovered_lsn >= LIMIT_LSN) {
 +
 +			return(DB_SUCCESS);
 +		}
 +
 +		/* No harm in trying to do RO access. */
 +		if (!srv_read_only_mode) {
 +			ut_error;
 +		}
 +
 +		return(DB_ERROR);
 +	}
 +
 +	/* Synchronize the uncorrupted log groups to the most up-to-date log
 +	group; we also copy checkpoint info to groups */
 +
 +	log_sys->next_checkpoint_lsn = checkpoint_lsn;
 +	log_sys->next_checkpoint_no = checkpoint_no + 1;
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	log_sys->archived_lsn = archived_lsn;
 +
 +	recv_synchronize_groups(up_to_date_group);
 +#else /* UNIV_LOG_ARCHIVE */
 +	recv_synchronize_groups();
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	if (!recv_needed_recovery) {
 +		ut_a(checkpoint_lsn == recv_sys->recovered_lsn);
 +	} else {
 +		srv_start_lsn = recv_sys->recovered_lsn;
 +	}
 +
 +	log_sys->lsn = recv_sys->recovered_lsn;
 +
 +	ut_memcpy(log_sys->buf, recv_sys->last_block, OS_FILE_LOG_BLOCK_SIZE);
 +
 +	log_sys->buf_free = (ulint) log_sys->lsn % OS_FILE_LOG_BLOCK_SIZE;
 +	log_sys->buf_next_to_write = log_sys->buf_free;
 +	log_sys->written_to_some_lsn = log_sys->lsn;
 +	log_sys->written_to_all_lsn = log_sys->lsn;
 +
 +	log_sys->last_checkpoint_lsn = checkpoint_lsn;
 +
 +	MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
 +		    log_sys->lsn - log_sys->last_checkpoint_lsn);
 +
 +	log_sys->next_checkpoint_no = checkpoint_no + 1;
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	if (archived_lsn == LSN_MAX) {
 +
 +		log_sys->archiving_state = LOG_ARCH_OFF;
 +	}
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	mutex_enter(&recv_sys->mutex);
 +
 +	recv_sys->apply_log_recs = TRUE;
 +
 +	mutex_exit(&recv_sys->mutex);
 +
 +	mutex_exit(&log_sys->mutex);
 +
 +	recv_lsn_checks_on = TRUE;
 +
 +	/* The database is now ready to start almost normal processing of user
 +	transactions: transaction rollbacks and the application of the log
 +	records in the hash table can be run in background. */
 +
 +	return(DB_SUCCESS);
 +
 +#undef TYPE_CHECKPOINT
 +#undef LIMIT_LSN
 +}
 +
 +/********************************************************//**
 +Completes recovery from a checkpoint. */
 +UNIV_INTERN
 +void
 +recv_recovery_from_checkpoint_finish(void)
 +/*======================================*/
 +{
 +	if (recv_needed_recovery) {
 +		trx_sys_print_mysql_master_log_pos();
 +		trx_sys_print_mysql_binlog_offset();
 +	}
 +
 +	if (recv_sys->found_corrupt_log) {
 +
 +		fprintf(stderr,
 +			"InnoDB: WARNING: the log file may have been"
 +			" corrupt and it\n"
 +			"InnoDB: is possible that the log scan or parsing"
 +			" did not proceed\n"
 +			"InnoDB: far enough in recovery. Please run"
 +			" CHECK TABLE\n"
 +			"InnoDB: on your InnoDB tables to check that"
 +			" they are ok!\n"
 +			"InnoDB: It may be safest to recover your"
 +			" InnoDB database from\n"
 +			"InnoDB: a backup!\n");
 +	}
 +
 +	/* Make sure that the recv_writer thread is done. This is
 +	required because it grabs various mutexes and we want to
 +	ensure that when we enable sync_order_checks there is no
 +	mutex currently held by any thread. */
 +	mutex_enter(&recv_sys->writer_mutex);
 +
 +	/* Free the resources of the recovery system */
 +	recv_recovery_on = FALSE;
 +
 +	/* By acquring the mutex we ensure that the recv_writer thread
 +	won't trigger any more LRU batchtes. Now wait for currently
 +	in progress batches to finish. */
 +	buf_flush_wait_LRU_batch_end();
 +
 +	mutex_exit(&recv_sys->writer_mutex);
 +
 +	ulint count = 0;
 +	while (recv_writer_thread_active) {
 +		++count;
 +		os_thread_sleep(100000);
 +		if (srv_print_verbose_log && count > 600) {
 +			ib_logf(IB_LOG_LEVEL_INFO,
 +				"Waiting for recv_writer to "
 +				"finish flushing of buffer pool");
 +			count = 0;
 +		}
 +	}
 +
 +#ifdef __WIN__
 +	if (recv_writer_thread_handle) {
 +		CloseHandle(recv_writer_thread_handle);
 +	}
 +#endif /* __WIN__ */
 +
 +#ifndef UNIV_LOG_DEBUG
 +	recv_sys_debug_free();
 +#endif
 +	/* Free up the flush_rbt. */
 +	buf_flush_free_flush_rbt();
 +
 +	/* Roll back any recovered data dictionary transactions, so
 +	that the data dictionary tables will be free of any locks.
 +	The data dictionary latch should guarantee that there is at
 +	most one data dictionary transaction active at a time. */
 +	if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO) {
 +		trx_rollback_or_clean_recovered(FALSE);
 +	}
 +}
 +
 +/********************************************************//**
 +Initiates the rollback of active transactions. */
 +UNIV_INTERN
 +void
 +recv_recovery_rollback_active(void)
 +/*===============================*/
 +{
 +#ifdef UNIV_SYNC_DEBUG
 +	/* Wait for a while so that created threads have time to suspend
 +	themselves before we switch the latching order checks on */
 +	os_thread_sleep(1000000);
 +
 +	ut_ad(!recv_writer_thread_active);
 +
 +	/* Switch latching order checks on in sync0sync.cc */
 +	sync_order_checks_on = TRUE;
 +#endif
 +	/* We can't start any (DDL) transactions if UNDO logging
 +	has been disabled, additionally disable ROLLBACK of recovered
 +	user transactions. */
 +	if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO
 +	    && !srv_read_only_mode) {
 +
 +		/* Drop partially created indexes. */
 +		row_merge_drop_temp_indexes();
 +		/* Drop temporary tables. */
 +		row_mysql_drop_temp_tables();
 +
 +		/* Drop any auxiliary tables that were not dropped when the
 +		parent table was dropped. This can happen if the parent table
 +		was dropped but the server crashed before the auxiliary tables
 +		were dropped. */
 +		fts_drop_orphaned_tables();
 +
 +		/* Rollback the uncommitted transactions which have no user
 +		session */
 +
 +		trx_rollback_or_clean_is_active = true;
 +		os_thread_create(trx_rollback_or_clean_all_recovered, 0, 0);
 +	}
 +}
 +
 +/******************************************************//**
 +Resets the logs. The contents of log files will be lost! */
 +UNIV_INTERN
 +void
 +recv_reset_logs(
 +/*============*/
 +#ifdef UNIV_LOG_ARCHIVE
 +	ulint		arch_log_no,	/*!< in: next archived log file number */
 +	ibool		new_logs_created,/*!< in: TRUE if resetting logs
 +					is done at the log creation;
 +					FALSE if it is done after
 +					archive recovery */
 +#endif /* UNIV_LOG_ARCHIVE */
 +	lsn_t		lsn)		/*!< in: reset to this lsn
 +					rounded up to be divisible by
 +					OS_FILE_LOG_BLOCK_SIZE, after
 +					which we add
 +					LOG_BLOCK_HDR_SIZE */
 +{
 +	log_group_t*	group;
 +
 +	ut_ad(mutex_own(&(log_sys->mutex)));
 +
 +	log_sys->lsn = ut_uint64_align_up(lsn, OS_FILE_LOG_BLOCK_SIZE);
 +
 +	group = UT_LIST_GET_FIRST(log_sys->log_groups);
 +
 +	while (group) {
 +		group->lsn = log_sys->lsn;
 +		group->lsn_offset = LOG_FILE_HDR_SIZE;
 +#ifdef UNIV_LOG_ARCHIVE
 +		group->archived_file_no = arch_log_no;
 +		group->archived_offset = 0;
 +
 +		if (!new_logs_created) {
 +			recv_truncate_group(group, group->lsn, group->lsn,
 +					    group->lsn, group->lsn);
 +		}
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +		group = UT_LIST_GET_NEXT(log_groups, group);
 +	}
 +
 +	log_sys->buf_next_to_write = 0;
 +	log_sys->written_to_some_lsn = log_sys->lsn;
 +	log_sys->written_to_all_lsn = log_sys->lsn;
 +
 +	log_sys->next_checkpoint_no = 0;
 +	log_sys->last_checkpoint_lsn = 0;
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	log_sys->archived_lsn = log_sys->lsn;
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	log_sys->tracked_lsn = log_sys->lsn;
 +
 +	log_block_init(log_sys->buf, log_sys->lsn);
 +	log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
 +
 +	log_sys->buf_free = LOG_BLOCK_HDR_SIZE;
 +	log_sys->lsn += LOG_BLOCK_HDR_SIZE;
 +
 +	MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
 +		    (log_sys->lsn - log_sys->last_checkpoint_lsn));
 +
 +	mutex_exit(&(log_sys->mutex));
 +
 +	/* Reset the checkpoint fields in logs */
 +
 +	log_make_checkpoint_at(LSN_MAX, TRUE);
 +
 +	mutex_enter(&(log_sys->mutex));
 +}
 +#endif /* !UNIV_HOTBACKUP */
 +
 +#ifdef UNIV_HOTBACKUP
 +/******************************************************//**
 +Creates new log files after a backup has been restored. */
 +UNIV_INTERN
 +void
 +recv_reset_log_files_for_backup(
 +/*============================*/
 +	const char*	log_dir,	/*!< in: log file directory path */
 +	ulint		n_log_files,	/*!< in: number of log files */
 +	lsn_t		log_file_size,	/*!< in: log file size */
 +	lsn_t		lsn)		/*!< in: new start lsn, must be
 +					divisible by OS_FILE_LOG_BLOCK_SIZE */
 +{
 +	os_file_t	log_file;
 +	ibool		success;
 +	byte*		buf;
 +	ulint		i;
 +	ulint		log_dir_len;
 +	char		name[5000];
 +	static const char ib_logfile_basename[] = "ib_logfile";
 +
 +	log_dir_len = strlen(log_dir);
 +	/* full path name of ib_logfile consists of log dir path + basename
 +	+ number. This must fit in the name buffer.
 +	*/
 +	ut_a(log_dir_len + strlen(ib_logfile_basename) + 11  < sizeof(name));
 +
 +	buf = ut_malloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
 +	memset(buf, '\0', LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
 +
 +	for (i = 0; i < n_log_files; i++) {
 +
 +		sprintf(name, "%s%s%lu", log_dir,
 +			ib_logfile_basename, (ulong) i);
 +
 +		log_file = os_file_create_simple(innodb_file_log_key,
 +						 name, OS_FILE_CREATE,
 +						 OS_FILE_READ_WRITE,
 +						 &success);
 +		if (!success) {
 +			fprintf(stderr,
 +				"InnoDB: Cannot create %s. Check that"
 +				" the file does not exist yet.\n", name);
 +
 +			exit(1);
 +		}
 +
 +		fprintf(stderr,
 +			"Setting log file size to %llu\n",
 +			log_file_size);
 +
 +		success = os_file_set_size(name, log_file, log_file_size);
 +
 +		if (!success) {
 +			fprintf(stderr,
 +				"InnoDB: Cannot set %s size to %llu\n",
 +				name, log_file_size);
 +			exit(1);
 +		}
 +
 +		os_file_flush(log_file);
 +		os_file_close(log_file);
 +	}
 +
 +	/* We pretend there is a checkpoint at lsn + LOG_BLOCK_HDR_SIZE */
 +
 +	log_reset_first_header_and_checkpoint(buf, lsn);
 +
 +	log_block_init_in_old_format(buf + LOG_FILE_HDR_SIZE, lsn);
 +	log_block_set_first_rec_group(buf + LOG_FILE_HDR_SIZE,
 +				      LOG_BLOCK_HDR_SIZE);
 +	sprintf(name, "%s%s%lu", log_dir, ib_logfile_basename, (ulong)0);
 +
 +	log_file = os_file_create_simple(innodb_file_log_key,
 +					 name, OS_FILE_OPEN,
 +					 OS_FILE_READ_WRITE, &success);
 +	if (!success) {
 +		fprintf(stderr, "InnoDB: Cannot open %s.\n", name);
 +
 +		exit(1);
 +	}
 +
 +	os_file_write(name, log_file, buf, 0,
 +		      LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
 +	os_file_flush(log_file);
 +	os_file_close(log_file);
 +
 +	ut_free(buf);
 +}
 +#endif /* UNIV_HOTBACKUP */
 +
 +void recv_dblwr_t::add(byte* page)
 +{
 +	pages.push_back(page);
 +}
 +
 +byte* recv_dblwr_t::find_page(ulint space_id, ulint page_no)
 +{
 +	std::vector<byte*> matches;
 +	byte*	result = 0;
 +
 +	for (std::list<byte*>::iterator i = pages.begin();
 +	     i != pages.end(); ++i) {
 +
 +		if ((page_get_space_id(*i) == space_id)
 +		    && (page_get_page_no(*i) == page_no)) {
 +			matches.push_back(*i);
 +		}
 +	}
 +
 +	if (matches.size() == 1) {
 +		result = matches[0];
 +	} else if (matches.size() > 1) {
 +
 +		lsn_t max_lsn	= 0;
 +		lsn_t page_lsn	= 0;
 +
 +		for (std::vector<byte*>::iterator i = matches.begin();
 +		     i != matches.end(); ++i) {
 +
 +			page_lsn = mach_read_from_8(*i + FIL_PAGE_LSN);
 +
 +			if (page_lsn > max_lsn) {
 +				max_lsn = page_lsn;
 +				result = *i;
 +			}
 +		}
 +	}
 +
 +	return(result);
 +}
 +
diff --cc storage/xtradb/srv/srv0start.cc
index 258d2546634,00000000000..5bd2b861ea0
mode 100644,000000..100644
--- a/storage/xtradb/srv/srv0start.cc
+++ b/storage/xtradb/srv/srv0start.cc
@@@ -1,3346 -1,0 +1,3346 @@@
 +/*****************************************************************************
 +
 +Copyright (c) 1996, 2016, Oracle and/or its affiliates. All rights reserved.
 +Copyright (c) 2008, Google Inc.
 +Copyright (c) 2009, Percona Inc.
 +Copyright (c) 2017, MariaDB Corporation.
 +
 +Portions of this file contain modifications contributed and copyrighted by
 +Google, Inc. Those modifications are gratefully acknowledged and are described
 +briefly in the InnoDB documentation. The contributions by Google are
 +incorporated with their permission, and subject to the conditions contained in
 +the file COPYING.Google.
 +
 +Portions of this file contain modifications contributed and copyrighted
 +by Percona Inc.. Those modifications are
 +gratefully acknowledged and are described briefly in the InnoDB
 +documentation. The contributions by Percona Inc. are incorporated with
 +their permission, and subject to the conditions contained in the file
 +COPYING.Percona.
 +
 +This program is free software; you can redistribute it and/or modify it under
 +the terms of the GNU General Public License as published by the Free Software
 +Foundation; version 2 of the License.
 +
 +This program is distributed in the hope that it will be useful, but WITHOUT
 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 +
 +You should have received a copy of the GNU General Public License along with
 +this program; if not, write to the Free Software Foundation, Inc.,
 +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
 +
 +*****************************************************************************/
 +
 +/********************************************************************//**
 + at file srv/srv0start.cc
 +Starts the InnoDB database server
 +
 +Created 2/16/1996 Heikki Tuuri
 +*************************************************************************/
 +
 +#include "mysqld.h"
 +#include "pars0pars.h"
 +#include "row0ftsort.h"
 +#include "ut0mem.h"
 +#include "mem0mem.h"
 +#include "data0data.h"
 +#include "data0type.h"
 +#include "dict0dict.h"
 +#include "buf0buf.h"
 +#include "buf0dump.h"
 +#include "os0file.h"
 +#include "os0thread.h"
 +#include "fil0fil.h"
 +#include "fsp0fsp.h"
 +#include "rem0rec.h"
 +#include "mtr0mtr.h"
 +#include "log0log.h"
 +#include "log0online.h"
 +#include "log0recv.h"
 +#include "page0page.h"
 +#include "page0cur.h"
 +#include "trx0trx.h"
 +#include "trx0sys.h"
 +#include "btr0btr.h"
 +#include "btr0cur.h"
 +#include "rem0rec.h"
 +#include "ibuf0ibuf.h"
 +#include "srv0start.h"
 +#include "srv0srv.h"
 +#ifndef UNIV_HOTBACKUP
 +# include "trx0rseg.h"
 +# include "os0proc.h"
 +# include "sync0sync.h"
 +# include "buf0flu.h"
 +# include "buf0rea.h"
 +# include "dict0boot.h"
 +# include "dict0load.h"
 +# include "dict0stats_bg.h"
 +# include "que0que.h"
 +# include "usr0sess.h"
 +# include "lock0lock.h"
 +# include "trx0roll.h"
 +# include "trx0purge.h"
 +# include "lock0lock.h"
 +# include "pars0pars.h"
 +# include "btr0sea.h"
 +# include "rem0cmp.h"
 +# include "dict0crea.h"
 +# include "row0ins.h"
 +# include "row0sel.h"
 +# include "row0upd.h"
 +# include "row0row.h"
 +# include "row0mysql.h"
 +# include "btr0pcur.h"
 +# include "os0sync.h"
 +# include "zlib.h"
 +# include "ut0crc32.h"
 +# include "os0stacktrace.h"
 +
 +/** Log sequence number immediately after startup */
 +UNIV_INTERN lsn_t	srv_start_lsn;
 +/** Log sequence number at shutdown */
 +UNIV_INTERN lsn_t	srv_shutdown_lsn;
 +
 +#ifdef HAVE_DARWIN_THREADS
 +# include <sys/utsname.h>
 +/** TRUE if the F_FULLFSYNC option is available */
 +UNIV_INTERN ibool	srv_have_fullfsync = FALSE;
 +#endif
 +
 +/** TRUE if a raw partition is in use */
 +UNIV_INTERN ibool	srv_start_raw_disk_in_use = FALSE;
 +
 +/** TRUE if the server is being started, before rolling back any
 +incomplete transactions */
 +UNIV_INTERN ibool	srv_startup_is_before_trx_rollback_phase = FALSE;
 +/** TRUE if the server is being started */
 +UNIV_INTERN ibool	srv_is_being_started = FALSE;
 +/** TRUE if the server was successfully started */
 +UNIV_INTERN ibool	srv_was_started = FALSE;
 +/** TRUE if innobase_start_or_create_for_mysql() has been called */
 +static ibool		srv_start_has_been_called = FALSE;
 +
 +/** At a shutdown this value climbs from SRV_SHUTDOWN_NONE to
 +SRV_SHUTDOWN_CLEANUP and then to SRV_SHUTDOWN_LAST_PHASE, and so on */
 +UNIV_INTERN enum srv_shutdown_state	srv_shutdown_state = SRV_SHUTDOWN_NONE;
 +
 +/** Files comprising the system tablespace */
 +static os_file_t	files[1000];
 +
 +/** io_handler_thread parameters for thread identification */
 +static ulint		n[SRV_MAX_N_IO_THREADS];
 +/** io_handler_thread identifiers, 32 is the maximum number of purge threads.
 +The extra elements at the end are allocated as follows:
 +SRV_MAX_N_IO_THREADS + 1: srv_master_thread
 +SRV_MAX_N_IO_THREADS + 2: lock_wait_timeout_thread
 +SRV_MAX_N_IO_THREADS + 3: srv_error_monitor_thread
 +SRV_MAX_N_IO_THREADS + 4: srv_monitor_thread
 +SRV_MAX_N_IO_THREADS + 5: srv_redo_log_follow_thread
 +SRV_MAX_N_IO_THREADS + 6: srv_purge_coordinator_thread
 +SRV_MAX_N_IO_THREADS + 7: srv_worker_thread
 +...
 +SRV_MAX_N_IO_THREADS + 7 + srv_n_purge_threads - 1: srv_worker_thread */
 +static os_thread_id_t	thread_ids[SRV_MAX_N_IO_THREADS + 7
 +				   + SRV_MAX_N_PURGE_THREADS];
 +
 +/** Thead handles */
 +static os_thread_t	thread_handles[SRV_MAX_N_IO_THREADS + 7 + SRV_MAX_N_PURGE_THREADS];
 +static os_thread_t	buf_flush_page_cleaner_thread_handle;
 +static os_thread_t	buf_dump_thread_handle;
 +static os_thread_t	dict_stats_thread_handle;
 +static os_thread_t	buf_flush_lru_manager_thread_handle;
 +static os_thread_t	srv_redo_log_follow_thread_handle;
 +/** Status variables, is thread started ?*/
 +static bool		thread_started[SRV_MAX_N_IO_THREADS + 7 + SRV_MAX_N_PURGE_THREADS] = {false};
 +static bool		buf_flush_page_cleaner_thread_started = false;
 +static bool		buf_dump_thread_started = false;
 +static bool		dict_stats_thread_started = false;
 +static bool		buf_flush_lru_manager_thread_started = false;
 +static bool		srv_redo_log_follow_thread_started = false;
 +
 +/** We use this mutex to test the return value of pthread_mutex_trylock
 +   on successful locking. HP-UX does NOT return 0, though Linux et al do. */
 +static os_fast_mutex_t	srv_os_test_mutex;
 +
 +/** Name of srv_monitor_file */
 +static char*	srv_monitor_file_name;
 +#endif /* !UNIV_HOTBACKUP */
 +
 +/** Default undo tablespace size in UNIV_PAGEs count (10MB). */
 +static const ulint SRV_UNDO_TABLESPACE_SIZE_IN_PAGES =
 +	((1024 * 1024) * 10) / UNIV_PAGE_SIZE_DEF;
 +
 +/** */
 +#define SRV_N_PENDING_IOS_PER_THREAD	OS_AIO_N_PENDING_IOS_PER_THREAD
 +#define SRV_MAX_N_PENDING_SYNC_IOS	100
 +
 +/** The round off to MB is similar as done in srv_parse_megabytes() */
 +#define CALC_NUMBER_OF_PAGES(size)  ((size) / (1024 * 1024)) * \
 +				  ((1024 * 1024) / (UNIV_PAGE_SIZE))
 +#ifdef UNIV_PFS_THREAD
 +/* Keys to register InnoDB threads with performance schema */
 +UNIV_INTERN mysql_pfs_key_t	io_handler_thread_key;
 +UNIV_INTERN mysql_pfs_key_t	srv_lock_timeout_thread_key;
 +UNIV_INTERN mysql_pfs_key_t	srv_error_monitor_thread_key;
 +UNIV_INTERN mysql_pfs_key_t	srv_monitor_thread_key;
 +UNIV_INTERN mysql_pfs_key_t	srv_master_thread_key;
 +UNIV_INTERN mysql_pfs_key_t	srv_purge_thread_key;
 +UNIV_INTERN mysql_pfs_key_t	srv_log_tracking_thread_key;
 +#endif /* UNIV_PFS_THREAD */
 +
 +/*********************************************************************//**
 +Convert a numeric string that optionally ends in G or M or K, to a number
 +containing megabytes.
 + at return	next character in string */
 +static
 +char*
 +srv_parse_megabytes(
 +/*================*/
 +	char*	str,	/*!< in: string containing a quantity in bytes */
 +	ulint*	megs)	/*!< out: the number in megabytes */
 +{
 +	char*	endp;
 +	ulint	size;
 +
 +	size = strtoul(str, &endp, 10);
 +
 +	str = endp;
 +
 +	switch (*str) {
 +	case 'G': case 'g':
 +		size *= 1024;
 +		/* fall through */
 +	case 'M': case 'm':
 +		str++;
 +		break;
 +	case 'K': case 'k':
 +		size /= 1024;
 +		str++;
 +		break;
 +	default:
 +		size /= 1024 * 1024;
 +		break;
 +	}
 +
 +	*megs = size;
 +	return(str);
 +}
 +
 +/*********************************************************************//**
 +Check if a file can be opened in read-write mode.
 + at return	true if it doesn't exist or can be opened in rw mode. */
 +static
 +bool
 +srv_file_check_mode(
 +/*================*/
 +	const char*	name)		/*!< in: filename to check */
 +{
 +	os_file_stat_t	stat;
 +
 +	memset(&stat, 0x0, sizeof(stat));
 +
 +	dberr_t		err = os_file_get_status(name, &stat, true);
 +
 +	if (err == DB_FAIL) {
 +
 +		ib_logf(IB_LOG_LEVEL_ERROR,
 +			"os_file_get_status() failed on '%s'. Can't determine "
 +			"file permissions", name);
 +
 +		return(false);
 +
 +	} else if (err == DB_SUCCESS) {
 +
 +		/* Note: stat.rw_perm is only valid of files */
 +
 +		if (stat.type == OS_FILE_TYPE_FILE) {
 +
 +			if (!stat.rw_perm) {
 +
 +				ib_logf(IB_LOG_LEVEL_ERROR,
 +					"%s can't be opened in %s mode",
 +					name,
 +					srv_read_only_mode
 +					? "read" : "read-write");
 +
 +				return(false);
 +			}
 +		} else {
 +			/* Not a regular file, bail out. */
 +
 +			ib_logf(IB_LOG_LEVEL_ERROR,
 +				"'%s' not a regular file.", name);
 +
 +			return(false);
 +		}
 +	} else {
 +
 +		/* This is OK. If the file create fails on RO media, there
 +		is nothing we can do. */
 +
 +		ut_a(err == DB_NOT_FOUND);
 +	}
 +
 +	return(true);
 +}
 +
 +/*********************************************************************//**
 +Reads the data files and their sizes from a character string given in
 +the .cnf file.
 + at return	TRUE if ok, FALSE on parse error */
 +UNIV_INTERN
 +ibool
 +srv_parse_data_file_paths_and_sizes(
 +/*================================*/
 +	char*	str)	/*!< in/out: the data file path string */
 +{
 +	char*	input_str;
 +	char*	path;
 +	ulint	size;
 +	ulint	i	= 0;
 +
 +	srv_auto_extend_last_data_file = FALSE;
 +	srv_last_file_size_max = 0;
 +	srv_data_file_names = NULL;
 +	srv_data_file_sizes = NULL;
 +	srv_data_file_is_raw_partition = NULL;
 +
 +	input_str = str;
 +
 +	/* First calculate the number of data files and check syntax:
 +	path:size[M | G];path:size[M | G]... . Note that a Windows path may
 +	contain a drive name and a ':'. */
 +
 +	while (*str != '\0') {
 +		path = str;
 +
 +		while ((*str != ':' && *str != '\0')
 +		       || (*str == ':'
 +			   && (*(str + 1) == '\\' || *(str + 1) == '/'
 +			       || *(str + 1) == ':'))) {
 +			str++;
 +		}
 +
 +		if (*str == '\0') {
 +			return(FALSE);
 +		}
 +
 +		str++;
 +
 +		str = srv_parse_megabytes(str, &size);
 +
 +		if (0 == strncmp(str, ":autoextend",
 +				 (sizeof ":autoextend") - 1)) {
 +
 +			str += (sizeof ":autoextend") - 1;
 +
 +			if (0 == strncmp(str, ":max:",
 +					 (sizeof ":max:") - 1)) {
 +
 +				str += (sizeof ":max:") - 1;
 +
 +				str = srv_parse_megabytes(str, &size);
 +			}
 +
 +			if (*str != '\0') {
 +
 +				return(FALSE);
 +			}
 +		}
 +
 +		if (strlen(str) >= 6
 +		    && *str == 'n'
 +		    && *(str + 1) == 'e'
 +		    && *(str + 2) == 'w') {
 +			str += 3;
 +		}
 +
 +		if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') {
 +			str += 3;
 +		}
 +
 +		if (size == 0) {
 +			return(FALSE);
 +		}
 +
 +		i++;
 +
 +		if (*str == ';') {
 +			str++;
 +		} else if (*str != '\0') {
 +
 +			return(FALSE);
 +		}
 +	}
 +
 +	if (i == 0) {
 +		/* If innodb_data_file_path was defined it must contain
 +		at least one data file definition */
 +
 +		return(FALSE);
 +	}
 +
 +	srv_data_file_names = static_cast<char**>(
 +		malloc(i * sizeof *srv_data_file_names));
 +
 +	srv_data_file_sizes = static_cast<ulint*>(
 +		malloc(i * sizeof *srv_data_file_sizes));
 +
 +	srv_data_file_is_raw_partition = static_cast<ulint*>(
 +		malloc(i * sizeof *srv_data_file_is_raw_partition));
 +
 +	srv_n_data_files = i;
 +
 +	/* Then store the actual values to our arrays */
 +
 +	str = input_str;
 +	i = 0;
 +
 +	while (*str != '\0') {
 +		path = str;
 +
 +		/* Note that we must step over the ':' in a Windows path;
 +		a Windows path normally looks like C:\ibdata\ibdata1:1G, but
 +		a Windows raw partition may have a specification like
 +		\\.\C::1Gnewraw or \\.\PHYSICALDRIVE2:1Gnewraw */
 +
 +		while ((*str != ':' && *str != '\0')
 +		       || (*str == ':'
 +			   && (*(str + 1) == '\\' || *(str + 1) == '/'
 +			       || *(str + 1) == ':'))) {
 +			str++;
 +		}
 +
 +		if (*str == ':') {
 +			/* Make path a null-terminated string */
 +			*str = '\0';
 +			str++;
 +		}
 +
 +		str = srv_parse_megabytes(str, &size);
 +
 +		srv_data_file_names[i] = path;
 +		srv_data_file_sizes[i] = size;
 +
 +		if (0 == strncmp(str, ":autoextend",
 +				 (sizeof ":autoextend") - 1)) {
 +
 +			srv_auto_extend_last_data_file = TRUE;
 +
 +			str += (sizeof ":autoextend") - 1;
 +
 +			if (0 == strncmp(str, ":max:",
 +					 (sizeof ":max:") - 1)) {
 +
 +				str += (sizeof ":max:") - 1;
 +
 +				str = srv_parse_megabytes(
 +					str, &srv_last_file_size_max);
 +			}
 +
 +			if (*str != '\0') {
 +
 +				return(FALSE);
 +			}
 +		}
 +
 +		(srv_data_file_is_raw_partition)[i] = 0;
 +
 +		if (strlen(str) >= 6
 +		    && *str == 'n'
 +		    && *(str + 1) == 'e'
 +		    && *(str + 2) == 'w') {
 +			str += 3;
 +			/* Initialize new raw device only during bootstrap */
 +			(srv_data_file_is_raw_partition)[i] =
 +			opt_bootstrap ? SRV_NEW_RAW : SRV_OLD_RAW;
 +		}
 +
 +		if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') {
 +			str += 3;
 +
 +			/* Initialize new raw device only during bootstrap */
 +			if ((srv_data_file_is_raw_partition)[i] == 0) {
 +				(srv_data_file_is_raw_partition)[i] =
 +				opt_bootstrap ? SRV_NEW_RAW : SRV_OLD_RAW;
 +			}
 +		}
 +
 +		i++;
 +
 +		if (*str == ';') {
 +			str++;
 +		}
 +	}
 +
 +	return(TRUE);
 +}
 +
 +/*********************************************************************//**
 +Frees the memory allocated by srv_parse_data_file_paths_and_sizes()
 +and srv_parse_log_group_home_dirs(). */
 +UNIV_INTERN
 +void
 +srv_free_paths_and_sizes(void)
 +/*==========================*/
 +{
 +	free(srv_data_file_names);
 +	srv_data_file_names = NULL;
 +	free(srv_data_file_sizes);
 +	srv_data_file_sizes = NULL;
 +	free(srv_data_file_is_raw_partition);
 +	srv_data_file_is_raw_partition = NULL;
 +}
 +
 +#ifndef UNIV_HOTBACKUP
 +
 +static ulint io_tid_i = 0;
 +
 +/********************************************************************//**
 +I/o-handler thread function.
 + at return	OS_THREAD_DUMMY_RETURN */
 +extern "C" UNIV_INTERN
 +os_thread_ret_t
 +DECLARE_THREAD(io_handler_thread)(
 +/*==============================*/
 +	void*	arg)	/*!< in: pointer to the number of the segment in
 +			the aio array */
 +{
 +	ulint	segment;
 +	ulint	tid_i = os_atomic_increment_ulint(&io_tid_i, 1) - 1;
 +
 +	ut_ad(tid_i < srv_n_file_io_threads);
 +
 +	segment = *((ulint*) arg);
 +
 +	srv_io_tids[tid_i] = os_thread_get_tid();
 +	os_thread_set_priority(srv_io_tids[tid_i], srv_sched_priority_io);
 +
 +#ifdef UNIV_DEBUG_THREAD_CREATION
 +	fprintf(stderr, "Io handler thread %lu starts, id %lu\n", segment,
 +		os_thread_pf(os_thread_get_curr_id()));
 +#endif
 +
 +#ifdef UNIV_PFS_THREAD
 +	pfs_register_thread(io_handler_thread_key);
 +#endif /* UNIV_PFS_THREAD */
 +
 +	while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
 +		srv_current_thread_priority = srv_io_thread_priority;
 +		fil_aio_wait(segment);
 +	}
 +
 +	/* We count the number of threads in os_thread_exit(). A created
 +	thread should always use that to exit and not use return() to exit.
 +	The thread actually never comes here because it is exited in an
 +	os_event_wait(). */
 +
 +	os_thread_exit(NULL);
 +
 +	OS_THREAD_DUMMY_RETURN;
 +}
 +#endif /* !UNIV_HOTBACKUP */
 +
 +/*********************************************************************//**
 +Normalizes a directory path for Windows: converts slashes to backslashes. */
 +UNIV_INTERN
 +void
 +srv_normalize_path_for_win(
 +/*=======================*/
 +	char*	str MY_ATTRIBUTE((unused)))	/*!< in/out: null-terminated
 +						character string */
 +{
 +#ifdef __WIN__
 +	for (; *str; str++) {
 +
 +		if (*str == '/') {
 +			*str = '\\';
 +		}
 +	}
 +#endif
 +}
 +
 +#ifndef UNIV_HOTBACKUP
 +/*********************************************************************//**
 +Creates a log file.
 + at return	DB_SUCCESS or error code */
 +static MY_ATTRIBUTE((nonnull, warn_unused_result))
 +dberr_t
 +create_log_file(
 +/*============*/
 +	os_file_t*	file,	/*!< out: file handle */
 +	const char*	name)	/*!< in: log file name */
 +{
 +	ibool		ret;
 +
 +	*file = os_file_create(
 +		innodb_file_log_key, name,
 +		OS_FILE_CREATE|OS_FILE_ON_ERROR_NO_EXIT, OS_FILE_NORMAL,
 +		OS_LOG_FILE, &ret);
 +
 +	if (!ret) {
 +		ib_logf(IB_LOG_LEVEL_ERROR, "Cannot create %s", name);
 +		return(DB_ERROR);
 +	}
 +
 +	ib_logf(IB_LOG_LEVEL_INFO,
 +		"Setting log file %s size to %lu MB",
 +		name, (ulong) srv_log_file_size
 +		>> (20 - UNIV_PAGE_SIZE_SHIFT));
 +
 +	ret = os_file_set_size(name, *file,
 +			       (os_offset_t) srv_log_file_size
 +			       << UNIV_PAGE_SIZE_SHIFT);
 +	if (!ret) {
 +		ib_logf(IB_LOG_LEVEL_ERROR, "Cannot set log file"
 +			" %s to size %lu MB", name, (ulong) srv_log_file_size
 +			>> (20 - UNIV_PAGE_SIZE_SHIFT));
 +		return(DB_ERROR);
 +	}
 +
 +	ret = os_file_close(*file);
 +	ut_a(ret);
 +
 +	return(DB_SUCCESS);
 +}
 +
 +/** Initial number of the first redo log file */
 +#define INIT_LOG_FILE0	(SRV_N_LOG_FILES_MAX + 1)
 +
 +/*********************************************************************//**
 +Creates all log files.
 + at return	DB_SUCCESS or error code */
 +static
 +dberr_t
 +create_log_files(
 +/*=============*/
 +	bool	create_new_db,	/*!< in: TRUE if new database is being
 +				created */
 +	char*	logfilename,	/*!< in/out: buffer for log file name */
 +	size_t	dirnamelen,	/*!< in: length of the directory path */
 +	lsn_t	lsn,		/*!< in: FIL_PAGE_FILE_FLUSH_LSN value */
 +	char*&	logfile0)	/*!< out: name of the first log file */
 +{
 +	if (srv_read_only_mode) {
 +		ib_logf(IB_LOG_LEVEL_ERROR,
 +			"Cannot create log files in read-only mode");
 +		return(DB_READ_ONLY);
 +	}
 +
 +	/* We prevent system tablespace creation with existing files in
 +	data directory. So we do not delete log files when creating new system
 +	tablespace */
 +	if (!create_new_db) {
 +		/* Remove any old log files. */
 +		for (unsigned i = 0; i <= INIT_LOG_FILE0; i++) {
 +			sprintf(logfilename + dirnamelen, "ib_logfile%u", i);
 +
 +			/* Ignore errors about non-existent files or files
 +			that cannot be removed. The create_log_file() will
 +			return an error when the file exists. */
 +#ifdef __WIN__
 +			DeleteFile((LPCTSTR) logfilename);
 +#else
 +			unlink(logfilename);
 +#endif
 +			/* Crashing after deleting the first
 +			file should be recoverable. The buffer
 +			pool was clean, and we can simply create
 +			all log files from the scratch. */
 +			DBUG_EXECUTE_IF("innodb_log_abort_6",
 +					return(DB_ERROR););
 +		}
 +	}
 +
 +	ut_ad(!buf_pool_check_no_pending_io());
 +
 +	DBUG_EXECUTE_IF("innodb_log_abort_7", return(DB_ERROR););
 +
 +	for (unsigned i = 0; i < srv_n_log_files; i++) {
 +		sprintf(logfilename + dirnamelen,
 +			"ib_logfile%u", i ? i : INIT_LOG_FILE0);
 +
 +		dberr_t err = create_log_file(&files[i], logfilename);
 +
 +		if (err != DB_SUCCESS) {
 +			return(err);
 +		}
 +	}
 +
 +	DBUG_EXECUTE_IF("innodb_log_abort_8", return(DB_ERROR););
 +
 +	/* We did not create the first log file initially as
 +	ib_logfile0, so that crash recovery cannot find it until it
 +	has been completed and renamed. */
 +	sprintf(logfilename + dirnamelen, "ib_logfile%u", INIT_LOG_FILE0);
 +
 +	fil_space_create(
 +		logfilename, SRV_LOG_SPACE_FIRST_ID,
 +		fsp_flags_set_page_size(0, UNIV_PAGE_SIZE),
 +		FIL_LOG);
 +	ut_a(fil_validate());
 +
 +	logfile0 = fil_node_create(
 +		logfilename, (ulint) srv_log_file_size,
 +		SRV_LOG_SPACE_FIRST_ID, FALSE);
 +	ut_a(logfile0);
 +
 +	for (unsigned i = 1; i < srv_n_log_files; i++) {
 +		sprintf(logfilename + dirnamelen, "ib_logfile%u", i);
 +
 +		if (!fil_node_create(
 +			    logfilename,
 +			    (ulint) srv_log_file_size,
 +			    SRV_LOG_SPACE_FIRST_ID, FALSE)) {
 +			ut_error;
 +		}
 +	}
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	/* Create the file space object for archived logs. */
 +	fil_space_create("arch_log_space", SRV_LOG_SPACE_FIRST_ID + 1,
 +			 0, FIL_LOG);
 +#endif
 +	log_group_init(0, srv_n_log_files,
 +		       srv_log_file_size * UNIV_PAGE_SIZE,
 +		       SRV_LOG_SPACE_FIRST_ID,
 +		       SRV_LOG_SPACE_FIRST_ID + 1);
 +
 +	fil_open_log_and_system_tablespace_files();
 +
 +	/* Create a log checkpoint. */
 +	mutex_enter(&log_sys->mutex);
 +	ut_d(recv_no_log_write = FALSE);
 +	recv_reset_logs(
 +#ifdef UNIV_LOG_ARCHIVE
 +		UT_LIST_GET_FIRST(log_sys->log_groups)->archived_file_no,
 +		TRUE,
 +#endif
 +		lsn);
 +	mutex_exit(&log_sys->mutex);
 +
 +	return(DB_SUCCESS);
 +}
 +
 +/** Rename the first redo log file.
 + at param[in,out]	logfilename	buffer for the log file name
 + at param[in]	dirnamelen	length of the directory path
 + at param[in]	lsn		FIL_PAGE_FILE_FLUSH_LSN value
 + at param[in,out]	logfile0	name of the first log file
 + at return	error code
 + at retval	DB_SUCCESS	on successful operation */
 +MY_ATTRIBUTE((warn_unused_result, nonnull))
 +static
 +dberr_t
 +create_log_files_rename(
 +/*====================*/
 +	char*	logfilename,	/*!< in/out: buffer for log file name */
 +	size_t	dirnamelen,	/*!< in: length of the directory path */
 +	lsn_t	lsn,		/*!< in: FIL_PAGE_FILE_FLUSH_LSN value */
 +	char*	logfile0)	/*!< in/out: name of the first log file */
 +{
 +	/* If innodb_flush_method=O_DSYNC,
 +	we need to explicitly flush the log buffers. */
 +	fil_flush(SRV_LOG_SPACE_FIRST_ID);
 +
 +	DBUG_EXECUTE_IF("innodb_log_abort_9", return(DB_ERROR););
 +
 +	/* Close the log files, so that we can rename
 +	the first one. */
 +	fil_close_log_files(false);
 +
 +	/* Rename the first log file, now that a log
 +	checkpoint has been created. */
 +	sprintf(logfilename + dirnamelen, "ib_logfile%u", 0);
 +
 +	ib_logf(IB_LOG_LEVEL_INFO,
 +		"Renaming log file %s to %s", logfile0, logfilename);
 +
 +	mutex_enter(&log_sys->mutex);
 +	ut_ad(strlen(logfile0) == 2 + strlen(logfilename));
 +	dberr_t err = os_file_rename(
 +		innodb_file_log_key, logfile0, logfilename)
 +		? DB_SUCCESS : DB_ERROR;
 +
 +	/* Replace the first file with ib_logfile0. */
 +	strcpy(logfile0, logfilename);
 +	mutex_exit(&log_sys->mutex);
 +
 +	DBUG_EXECUTE_IF("innodb_log_abort_10", err = DB_ERROR;);
 +
 +	if (err == DB_SUCCESS) {
 +		fil_open_log_and_system_tablespace_files();
 +		ib_logf(IB_LOG_LEVEL_WARN,
 +			"New log files created, LSN=" LSN_PF, lsn);
 +	}
 +
 +	return(err);
 +}
 +
 +/*********************************************************************//**
 +Opens a log file.
 + at return	DB_SUCCESS or error code */
 +static MY_ATTRIBUTE((nonnull, warn_unused_result))
 +dberr_t
 +open_log_file(
 +/*==========*/
 +	os_file_t*	file,	/*!< out: file handle */
 +	const char*	name,	/*!< in: log file name */
 +	os_offset_t*	size)	/*!< out: file size */
 +{
 +	ibool	ret;
 +
 +	*file = os_file_create(innodb_file_log_key, name,
 +			       OS_FILE_OPEN, OS_FILE_AIO,
 +			       OS_LOG_FILE, &ret);
 +	if (!ret) {
 +		ib_logf(IB_LOG_LEVEL_ERROR, "Unable to open '%s'", name);
 +		return(DB_ERROR);
 +	}
 +
 +	*size = os_file_get_size(*file);
 +
 +	ret = os_file_close(*file);
 +	ut_a(ret);
 +	return(DB_SUCCESS);
 +}
 +
 +/*********************************************************************//**
 +Creates or opens database data files and closes them.
 + at return	DB_SUCCESS or error code */
 +static MY_ATTRIBUTE((nonnull, warn_unused_result))
 +dberr_t
 +open_or_create_data_files(
 +/*======================*/
 +	ibool*		create_new_db,	/*!< out: TRUE if new database should be
 +					created */
 +#ifdef UNIV_LOG_ARCHIVE
 +	lsn_t*		min_arch_log_no,/*!< out: min of archived log
 +					numbers in data files */
 +	lsn_t*		max_arch_log_no,/*!< out: max of archived log
 +					numbers in data files */
 +#endif /* UNIV_LOG_ARCHIVE */
 +	lsn_t*		min_flushed_lsn,/*!< out: min of flushed lsn
 +					values in data files */
 +	lsn_t*		max_flushed_lsn,/*!< out: max of flushed lsn
 +					values in data files */
 +	ulint*		sum_of_new_sizes)/*!< out: sum of sizes of the
 +					new files added */
 +{
 +	ibool		ret;
 +	ulint		i;
 +	ibool		one_opened	= FALSE;
 +	ibool		one_created	= FALSE;
 +	os_offset_t	size;
 +	ulint		flags;
 +	ulint		space;
 +	ulint		rounded_size_pages;
 +	char		name[10000];
 +
 +	if (srv_n_data_files >= 1000) {
 +
 +		ib_logf(IB_LOG_LEVEL_ERROR,
 +			"Can only have < 1000 data files, you have "
 +			"defined %lu", (ulong) srv_n_data_files);
 +
 +		return(DB_ERROR);
 +	}
 +
 +	*sum_of_new_sizes = 0;
 +
 +	*create_new_db = FALSE;
 +
 +	srv_normalize_path_for_win(srv_data_home);
 +
 +	for (i = 0; i < srv_n_data_files; i++) {
 +		ulint	dirnamelen;
 +
 +		srv_normalize_path_for_win(srv_data_file_names[i]);
 +		dirnamelen = strlen(srv_data_home);
 +
 +		ut_a(dirnamelen + strlen(srv_data_file_names[i])
 +		     < (sizeof name) - 1);
 +
 +		memcpy(name, srv_data_home, dirnamelen);
 +
 +		/* Add a path separator if needed. */
 +		if (dirnamelen && name[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
 +			name[dirnamelen++] = SRV_PATH_SEPARATOR;
 +		}
 +
 +		strcpy(name + dirnamelen, srv_data_file_names[i]);
 +
 +		/* Note: It will return true if the file doesn' exist. */
 +
 +		if (!srv_file_check_mode(name)) {
 +
 +			return(DB_FAIL);
 +
 +		} else if (srv_data_file_is_raw_partition[i] == 0) {
 +
 +			/* First we try to create the file: if it already
 +			exists, ret will get value FALSE */
 +
 +			files[i] = os_file_create(
 +				innodb_file_data_key, name, OS_FILE_CREATE,
 +				OS_FILE_NORMAL, OS_DATA_FILE, &ret);
 +
 +			if (srv_read_only_mode) {
 +
 +				if (ret) {
 +					goto size_check;
 +				}
 +
 +				ib_logf(IB_LOG_LEVEL_ERROR,
 +					"Opening %s failed!", name);
 +
 +				return(DB_ERROR);
 +
 +			} else if (!ret
 +				   && os_file_get_last_error(false)
 +				   != OS_FILE_ALREADY_EXISTS
 +#ifdef UNIV_AIX
 +			    	   /* AIX 5.1 after security patch ML7 may have
 +			           errno set to 0 here, which causes our
 +				   function to return 100; work around that
 +				   AIX problem */
 +				   && os_file_get_last_error(false) != 100
 +#endif /* UNIV_AIX */
 +			    ) {
 +				ib_logf(IB_LOG_LEVEL_ERROR,
 +					"Creating or opening %s failed!",
 +					name);
 +
 +				return(DB_ERROR);
 +			}
 +
 +		} else if (srv_data_file_is_raw_partition[i] == SRV_NEW_RAW) {
 +
 +			ut_a(!srv_read_only_mode);
 +
 +			/* The partition is opened, not created; then it is
 +			written over */
 +
 +			srv_start_raw_disk_in_use = TRUE;
 +			srv_created_new_raw = TRUE;
 +
 +			files[i] = os_file_create(
 +				innodb_file_data_key, name, OS_FILE_OPEN_RAW,
 +				OS_FILE_NORMAL, OS_DATA_FILE, &ret);
 +
 +			if (!ret) {
 +				ib_logf(IB_LOG_LEVEL_ERROR,
 +					"Error in opening %s", name);
 +
 +				return(DB_ERROR);
 +			}
 +
 +			const char*	check_msg;
 +			check_msg = fil_read_first_page(
 +				files[i], FALSE, &flags, &space,
 +				min_flushed_lsn, max_flushed_lsn);
 +
 +			/* If first page is valid, don't overwrite DB.
 +			It prevents overwriting DB when mysql_install_db
 +			starts mysqld multiple times during bootstrap. */
 +			if (check_msg == NULL) {
 +
 +				srv_created_new_raw = FALSE;
 +				ret = FALSE;
 +			}
 +
 +		} else if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
 +			srv_start_raw_disk_in_use = TRUE;
 +
 +			ret = FALSE;
 +		} else {
 +			ut_a(0);
 +		}
 +
 +		if (ret == FALSE) {
 +			const char* check_msg;
 +			/* We open the data file */
 +
 +			if (one_created) {
 +				ib_logf(IB_LOG_LEVEL_ERROR,
 +					"Data files can only be added at "
 +					"the end of a tablespace, but "
 +					"data file %s existed beforehand.",
 +					name);
 +				return(DB_ERROR);
 +			}
 +			if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
 +				ut_a(!srv_read_only_mode);
 +				files[i] = os_file_create(
 +					innodb_file_data_key,
 +					name, OS_FILE_OPEN_RAW,
 +					OS_FILE_NORMAL, OS_DATA_FILE, &ret);
 +			} else if (i == 0) {
 +				files[i] = os_file_create(
 +					innodb_file_data_key,
 +					name, OS_FILE_OPEN_RETRY,
 +					OS_FILE_NORMAL, OS_DATA_FILE, &ret);
 +			} else {
 +				files[i] = os_file_create(
 +					innodb_file_data_key,
 +					name, OS_FILE_OPEN, OS_FILE_NORMAL,
 +					OS_DATA_FILE, &ret);
 +			}
 +
 +			if (!ret) {
 +
 +				os_file_get_last_error(true);
 +
 +				ib_logf(IB_LOG_LEVEL_ERROR,
 +					"Can't open '%s'", name);
 +
 +				return(DB_ERROR);
 +			}
 +
 +			if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
 +
 +				goto skip_size_check;
 +			}
 +
 +size_check:
 +			size = os_file_get_size(files[i]);
 +			ut_a(size != (os_offset_t) -1);
 +
 +			/* Under some error conditions like disk full
 +			narios or file size reaching filesystem
 +			limit the data file could contain an incomplete
 +			extent at the end. When we extend a data file
 +			and if some failure happens, then also the data
 +			file could contain an incomplete extent.  So we
 +			need to round the size downward to a megabyte.*/
 +
 +			rounded_size_pages = (ulint) CALC_NUMBER_OF_PAGES(size);
 +
 +			if (i == srv_n_data_files - 1
 +			    && srv_auto_extend_last_data_file) {
 +
 +				if (srv_data_file_sizes[i] > rounded_size_pages
 +				    || (srv_last_file_size_max > 0
 +					&& srv_last_file_size_max
 +					< rounded_size_pages)) {
 +
 +					ib_logf(IB_LOG_LEVEL_ERROR,
 +						"auto-extending "
 +						"data file %s is "
 +						"of a different size "
 +						"%lu pages (rounded "
 +						"down to MB) than specified "
 +						"in the .cnf file: "
 +						"initial %lu pages, "
 +						"max %lu (relevant if "
 +						"non-zero) pages!",
 +						name,
 +						(ulong) rounded_size_pages,
 +						(ulong) srv_data_file_sizes[i],
 +						(ulong)
 +						srv_last_file_size_max);
 +
 +					return(DB_ERROR);
 +				}
 +
 +				srv_data_file_sizes[i] = rounded_size_pages;
 +			}
 +
 +			if (rounded_size_pages != srv_data_file_sizes[i]) {
 +
 +				ib_logf(IB_LOG_LEVEL_ERROR,
 +					"Data file %s is of a different "
 +					"size %lu pages (rounded down to MB) "
 +					"than specified in the .cnf file "
 +					"%lu pages!",
 +					name,
 +					(ulong) rounded_size_pages,
 +					(ulong) srv_data_file_sizes[i]);
 +
 +				return(DB_ERROR);
 +			}
 +skip_size_check:
 +
 +			/* This is the earliest location where we can load
 +			the double write buffer. */
 +			if (i == 0) {
 +				buf_dblwr_init_or_load_pages(
 +					files[i], srv_data_file_names[i], true);
 +			}
 +
 +			bool retry = true;
 +check_first_page:
 +			check_msg = fil_read_first_page(
 +				files[i], one_opened, &flags, &space,
 +				min_flushed_lsn, max_flushed_lsn);
 +
 +			if (check_msg) {
 +
 +				if (retry) {
 +					fsp_open_info	fsp;
 +					const ulint	page_no = 0;
 +
 +					retry = false;
 +					fsp.id = 0;
 +					fsp.filepath = srv_data_file_names[i];
 +					fsp.file = files[i];
 +
 +					if (fil_user_tablespace_restore_page(
 +						&fsp, page_no)) {
 +						goto check_first_page;
 +					}
 +				}
 +
 +				ib_logf(IB_LOG_LEVEL_ERROR,
 +						"%s in data file %s",
 +						check_msg, name);
 +				return(DB_ERROR);
 +			}
 +
 +			/* The first file of the system tablespace must
 +			have space ID = TRX_SYS_SPACE.  The FSP_SPACE_ID
 +			field in files greater than ibdata1 are unreliable. */
 +			ut_a(one_opened || space == TRX_SYS_SPACE);
 +
 +			/* Check the flags for the first system tablespace
 +			file only. */
 +			if (!one_opened
 +			    && UNIV_PAGE_SIZE
 +			       != fsp_flags_get_page_size(flags)) {
 +
 +				ib_logf(IB_LOG_LEVEL_ERROR,
 +					"Data file \"%s\" uses page size %lu,"
 +					"but the start-up parameter "
 +					"is --innodb-page-size=%lu",
 +					name,
 +					fsp_flags_get_page_size(flags),
 +					UNIV_PAGE_SIZE);
 +
 +				return(DB_ERROR);
 +			}
 +
 +			one_opened = TRUE;
 +		} else if (!srv_read_only_mode) {
 +			/* We created the data file and now write it full of
 +			zeros */
 +
 +			one_created = TRUE;
 +
 +			if (i > 0) {
 +				ib_logf(IB_LOG_LEVEL_INFO,
 +					"Data file %s did not"
 +					" exist: new to be created",
 +					name);
 +			} else {
 +				ib_logf(IB_LOG_LEVEL_INFO,
 +					"The first specified "
 +					"data file %s did not exist: "
 +					"a new database to be created!",
 +					name);
 +
 +				*create_new_db = TRUE;
 +			}
 +
 +			ib_logf(IB_LOG_LEVEL_INFO,
 +				"Setting file %s size to %lu MB",
 +				name,
 +				(ulong) (srv_data_file_sizes[i]
 +					 >> (20 - UNIV_PAGE_SIZE_SHIFT)));
 +
 +			ib_logf(IB_LOG_LEVEL_INFO,
 +				"Database physically writes the"
 +				" file full: wait...");
 +
 +			ret = os_file_set_size(
 +				name, files[i],
 +				(os_offset_t) srv_data_file_sizes[i]
 +				<< UNIV_PAGE_SIZE_SHIFT);
 +
 +			if (!ret) {
 +				ib_logf(IB_LOG_LEVEL_ERROR,
 +					"Error in creating %s: "
 +					"probably out of disk space",
 +					name);
 +
 +				return(DB_ERROR);
 +			}
 +
 +			*sum_of_new_sizes += srv_data_file_sizes[i];
 +		}
 +
 +		ret = os_file_close(files[i]);
 +		ut_a(ret);
 +
 +		if (i == 0) {
 +			flags = fsp_flags_set_page_size(0, UNIV_PAGE_SIZE);
 +			fil_space_create(name, 0, flags, FIL_TABLESPACE);
 +		}
 +
 +		ut_a(fil_validate());
 +
 +		if (!fil_node_create(name, srv_data_file_sizes[i], 0,
 +				     srv_data_file_is_raw_partition[i] != 0)) {
 +			return(DB_ERROR);
 +		}
 +	}
 +
 +	return(DB_SUCCESS);
 +}
 +
 +/*********************************************************************//**
 +Create undo tablespace.
 + at return	DB_SUCCESS or error code */
 +static
 +dberr_t
 +srv_undo_tablespace_create(
 +/*=======================*/
 +	const char*	name,		/*!< in: tablespace name */
 +	ulint		size)		/*!< in: tablespace size in pages */
 +{
 +	os_file_t	fh;
 +	ibool		ret;
 +	dberr_t		err = DB_SUCCESS;
 +
 +	os_file_create_subdirs_if_needed(name);
 +
 +	fh = os_file_create(
 +		innodb_file_data_key,
 +		name,
 +		srv_read_only_mode ? OS_FILE_OPEN : OS_FILE_CREATE,
 +		OS_FILE_NORMAL, OS_DATA_FILE, &ret);
 +
 +	if (srv_read_only_mode && ret) {
 +		ib_logf(IB_LOG_LEVEL_INFO,
 +			"%s opened in read-only mode", name);
 +	} else if (ret == FALSE) {
 +		if (os_file_get_last_error(false) != OS_FILE_ALREADY_EXISTS
 +#ifdef UNIV_AIX
 +			/* AIX 5.1 after security patch ML7 may have
 +			errno set to 0 here, which causes our function
 +			to return 100; work around that AIX problem */
 +		    && os_file_get_last_error(false) != 100
 +#endif /* UNIV_AIX */
 +		) {
 +			ib_logf(IB_LOG_LEVEL_ERROR,
 +				"Can't create UNDO tablespace %s", name);
 +		} else {
 +			ib_logf(IB_LOG_LEVEL_ERROR,
 +				"Creating system tablespace with"
 +				" existing undo tablespaces is not"
 +				" supported. Please delete all undo"
 +				" tablespaces before creating new"
 +				" system tablespace.");
 +		}
 +		err = DB_ERROR;
 +	} else {
 +		ut_a(!srv_read_only_mode);
 +
 +		/* We created the data file and now write it full of zeros */
 +
 +		ib_logf(IB_LOG_LEVEL_INFO,
 +			"Data file %s did not exist: new to be created",
 +			name);
 +
 +		ib_logf(IB_LOG_LEVEL_INFO,
 +			"Setting file %s size to %lu MB",
 +			name, size >> (20 - UNIV_PAGE_SIZE_SHIFT));
 +
 +		ib_logf(IB_LOG_LEVEL_INFO,
 +			"Database physically writes the file full: wait...");
 +
 +		ret = os_file_set_size(name, fh, size << UNIV_PAGE_SIZE_SHIFT);
 +
 +		if (!ret) {
 +			ib_logf(IB_LOG_LEVEL_INFO,
 +				"Error in creating %s: probably out of "
 +				"disk space", name);
 +
 +			err = DB_ERROR;
 +		}
 +
 +		os_file_close(fh);
 +	}
 +
 +	return(err);
 +}
 +
 +/*********************************************************************//**
 +Open an undo tablespace.
 + at return	DB_SUCCESS or error code */
 +static
 +dberr_t
 +srv_undo_tablespace_open(
 +/*=====================*/
 +	const char*	name,		/*!< in: tablespace name */
 +	ulint		space)		/*!< in: tablespace id */
 +{
 +	os_file_t	fh;
 +	dberr_t		err	= DB_ERROR;
 +	ibool		ret;
 +	ulint		flags;
 +
 +	if (!srv_file_check_mode(name)) {
 +		ib_logf(IB_LOG_LEVEL_ERROR,
 +			"UNDO tablespaces must be %s!",
 +			srv_read_only_mode ? "writable" : "readable");
 +
 +		return(DB_ERROR);
 +	}
 +
 +	fh = os_file_create(
 +		innodb_file_data_key, name,
 +		OS_FILE_OPEN_RETRY
 +		| OS_FILE_ON_ERROR_NO_EXIT
 +		| OS_FILE_ON_ERROR_SILENT,
 +		OS_FILE_NORMAL,
 +		OS_DATA_FILE,
 +		&ret);
 +
 +	/* If the file open was successful then load the tablespace. */
 +
 +	if (ret) {
 +		os_offset_t	size;
 +
 +		size = os_file_get_size(fh);
 +		ut_a(size != (os_offset_t) -1);
 +
 +		ret = os_file_close(fh);
 +		ut_a(ret);
 +
 +		/* Load the tablespace into InnoDB's internal
 +		data structures. */
 +
 +		/* We set the biggest space id to the undo tablespace
 +		because InnoDB hasn't opened any other tablespace apart
 +		from the system tablespace. */
 +
 +		fil_set_max_space_id_if_bigger(space);
 +
 +		/* Set the compressed page size to 0 (non-compressed) */
 +		flags = fsp_flags_set_page_size(0, UNIV_PAGE_SIZE);
 +		fil_space_create(name, space, flags, FIL_TABLESPACE);
 +
 +		ut_a(fil_validate());
 +
 +		os_offset_t	n_pages = size / UNIV_PAGE_SIZE;
 +
 +		/* On 64 bit Windows ulint can be 32 bit and os_offset_t
 +		is 64 bit. It is OK to cast the n_pages to ulint because
 +		the unit has been scaled to pages and they are always
 +		32 bit. */
 +		if (fil_node_create(name, (ulint) n_pages, space, FALSE)) {
 +			err = DB_SUCCESS;
 +		}
 +	}
 +
 +	return(err);
 +}
 +
 +/********************************************************************
 +Opens the configured number of undo tablespaces.
 + at return	DB_SUCCESS or error code */
 +static
 +dberr_t
 +srv_undo_tablespaces_init(
 +/*======================*/
 +	ibool		create_new_db,		/*!< in: TRUE if new db being
 +						created */
 +	const ulint	n_conf_tablespaces,	/*!< in: configured undo
 +						tablespaces */
 +	ulint*		n_opened)		/*!< out: number of UNDO
 +						tablespaces successfully
 +						discovered and opened */
 +{
 +	ulint		i;
 +	dberr_t		err = DB_SUCCESS;
 +	ulint		prev_space_id = 0;
 +	ulint		n_undo_tablespaces;
 +	ulint		undo_tablespace_ids[TRX_SYS_N_RSEGS + 1];
 +
 +	*n_opened = 0;
 +
 +	ut_a(n_conf_tablespaces <= TRX_SYS_N_RSEGS);
 +
 +	memset(undo_tablespace_ids, 0x0, sizeof(undo_tablespace_ids));
 +
 +	/* Create the undo spaces only if we are creating a new
 +	instance. We don't allow creating of new undo tablespaces
 +	in an existing instance (yet).  This restriction exists because
 +	we check in several places for SYSTEM tablespaces to be less than
 +	the min of user defined tablespace ids. Once we implement saving
 +	the location of the undo tablespaces and their space ids this
 +	restriction will/should be lifted. */
 +
 +	for (i = 0; create_new_db && i < n_conf_tablespaces; ++i) {
 +		char	name[OS_FILE_MAX_PATH];
 +
 +		ut_snprintf(
 +			name, sizeof(name),
 +			"%s%cundo%03lu",
 +			srv_undo_dir, SRV_PATH_SEPARATOR, i + 1);
 +
 +		/* Undo space ids start from 1. */
 +		err = srv_undo_tablespace_create(
 +			name, SRV_UNDO_TABLESPACE_SIZE_IN_PAGES);
 +
 +		if (err != DB_SUCCESS) {
 +
 +			ib_logf(IB_LOG_LEVEL_ERROR,
 +				"Could not create undo tablespace '%s'.",
 +				name);
 +
 +			return(err);
 +		}
 +	}
 +
 +	/* Get the tablespace ids of all the undo segments excluding
 +	the system tablespace (0). If we are creating a new instance then
 +	we build the undo_tablespace_ids ourselves since they don't
 +	already exist. */
 +
 +	if (!create_new_db) {
 +		n_undo_tablespaces = trx_rseg_get_n_undo_tablespaces(
 +			undo_tablespace_ids);
 +	} else {
 +		n_undo_tablespaces = n_conf_tablespaces;
 +
 +		for (i = 1; i <= n_undo_tablespaces; ++i) {
 +			undo_tablespace_ids[i - 1] = i;
 +		}
 +
 +		undo_tablespace_ids[i] = ULINT_UNDEFINED;
 +	}
 +
 +	/* Open all the undo tablespaces that are currently in use. If we
 +	fail to open any of these it is a fatal error. The tablespace ids
 +	should be contiguous. It is a fatal error because they are required
 +	for recovery and are referenced by the UNDO logs (a.k.a RBS). */
 +
 +	for (i = 0; i < n_undo_tablespaces; ++i) {
 +		char	name[OS_FILE_MAX_PATH];
 +
 +		ut_snprintf(
 +			name, sizeof(name),
 +			"%s%cundo%03lu",
 +			srv_undo_dir, SRV_PATH_SEPARATOR,
 +			undo_tablespace_ids[i]);
 +
 +		/* Should be no gaps in undo tablespace ids. */
 +		ut_a(prev_space_id + 1 == undo_tablespace_ids[i]);
 +
 +		/* The system space id should not be in this array. */
 +		ut_a(undo_tablespace_ids[i] != 0);
 +		ut_a(undo_tablespace_ids[i] != ULINT_UNDEFINED);
 +
 +		/* Undo space ids start from 1. */
 +
 +		err = srv_undo_tablespace_open(name, undo_tablespace_ids[i]);
 +
 +		if (err != DB_SUCCESS) {
 +
 +			ib_logf(IB_LOG_LEVEL_ERROR,
 +				"Unable to open undo tablespace '%s'.", name);
 +
 +			return(err);
 +		}
 +
 +		prev_space_id = undo_tablespace_ids[i];
 +
 +		++*n_opened;
 +	}
 +
 +	/* Open any extra unused undo tablespaces. These must be contiguous.
 +	We stop at the first failure. These are undo tablespaces that are
 +	not in use and therefore not required by recovery. We only check
 +	that there are no gaps. */
 +
 +	for (i = prev_space_id + 1; i < TRX_SYS_N_RSEGS; ++i) {
 +		char	name[OS_FILE_MAX_PATH];
 +
 +		ut_snprintf(
 +			name, sizeof(name),
 +			"%s%cundo%03lu", srv_undo_dir, SRV_PATH_SEPARATOR, i);
 +
 +		/* Undo space ids start from 1. */
 +		err = srv_undo_tablespace_open(name, i);
 +
 +		if (err != DB_SUCCESS) {
 +			break;
 +		}
 +
 +		++n_undo_tablespaces;
 +
 +		++*n_opened;
 +	}
 +
 +	/* If the user says that there are fewer than what we find we
 +	tolerate that discrepancy but not the inverse. Because there could
 +	be unused undo tablespaces for future use. */
 +
 +	if (n_conf_tablespaces > n_undo_tablespaces) {
 +		ut_print_timestamp(stderr);
 +		fprintf(stderr,
 +			" InnoDB: Expected to open %lu undo "
 +			"tablespaces but was able\n",
 +			n_conf_tablespaces);
 +		ut_print_timestamp(stderr);
 +		fprintf(stderr,
 +			" InnoDB: to find only %lu undo "
 +			"tablespaces.\n", n_undo_tablespaces);
 +		ut_print_timestamp(stderr);
 +		fprintf(stderr,
 +			" InnoDB: Set the "
 +			"innodb_undo_tablespaces parameter to "
 +			"the\n");
 +		ut_print_timestamp(stderr);
 +		fprintf(stderr,
 +			" InnoDB: correct value and retry. Suggested "
 +			"value is %lu\n", n_undo_tablespaces);
 +
 +		return(err != DB_SUCCESS ? err : DB_ERROR);
 +
 +	} else  if (n_undo_tablespaces > 0) {
 +
 +		ib_logf(IB_LOG_LEVEL_INFO, "Opened %lu undo tablespaces",
 +			n_undo_tablespaces);
 +
 +		if (n_conf_tablespaces == 0) {
 +			ib_logf(IB_LOG_LEVEL_WARN,
 +				"Using the system tablespace for all UNDO "
 +				"logging because innodb_undo_tablespaces=0");
 +		}
 +	}
 +
 +	if (create_new_db) {
 +		mtr_t	mtr;
 +
 +		mtr_start(&mtr);
 +
 +		/* The undo log tablespace */
 +		for (i = 1; i <= n_undo_tablespaces; ++i) {
 +
 +			fsp_header_init(
 +				i, SRV_UNDO_TABLESPACE_SIZE_IN_PAGES, &mtr);
 +		}
 +
 +		mtr_commit(&mtr);
 +	}
 +
 +	return(DB_SUCCESS);
 +}
 +
 +/********************************************************************
 +Wait for the purge thread(s) to start up. */
 +static
 +void
 +srv_start_wait_for_purge_to_start()
 +/*===============================*/
 +{
 +	/* Wait for the purge coordinator and master thread to startup. */
 +
 +	purge_state_t	state = trx_purge_state();
 +
 +	ut_a(state != PURGE_STATE_DISABLED);
 +
 +	while (srv_shutdown_state == SRV_SHUTDOWN_NONE
 +	       && srv_force_recovery < SRV_FORCE_NO_BACKGROUND
 +	       && state == PURGE_STATE_INIT) {
 +
 +		switch (state = trx_purge_state()) {
 +		case PURGE_STATE_RUN:
 +		case PURGE_STATE_STOP:
 +			break;
 +
 +		case PURGE_STATE_INIT:
 +			ib_logf(IB_LOG_LEVEL_INFO,
 +				"Waiting for purge to start");
 +
 +			os_thread_sleep(50000);
 +			break;
 +
 +		case PURGE_STATE_EXIT:
 +		case PURGE_STATE_DISABLED:
 +			ut_error;
 +		}
 +	}
 +}
 +
 +/*********************************************************************//**
 +Initializes the log tracking subsystem and starts its thread.  */
 +static
 +void
 +init_log_online(void)
 +/*=================*/
 +{
 +	if (UNIV_UNLIKELY(srv_force_recovery > 0 || srv_read_only_mode)) {
 +		srv_track_changed_pages = FALSE;
 +		return;
 +	}
 +
 +	if (srv_track_changed_pages) {
 +
 +		log_online_read_init();
 +
 +		/* Create the thread that follows the redo log to output the
 +		   changed page bitmap */
 +		srv_redo_log_follow_thread_handle = os_thread_create(&srv_redo_log_follow_thread, NULL,
 +				 thread_ids + 5 + SRV_MAX_N_IO_THREADS);
 +		srv_redo_log_follow_thread_started = true;
 +	}
 +}
 +
 +/********************************************************************
 +Starts InnoDB and creates a new database if database files
 +are not found and the user wants.
 + at return	DB_SUCCESS or error code */
 +UNIV_INTERN
 +dberr_t
 +innobase_start_or_create_for_mysql(void)
 +/*====================================*/
 +{
 +	ibool		create_new_db;
 +	lsn_t		min_flushed_lsn;
 +	lsn_t		max_flushed_lsn;
 +#ifdef UNIV_LOG_ARCHIVE
 +	lsn_t		min_arch_log_no	= LSN_MAX;
 +	lsn_t		max_arch_log_no	= LSN_MAX;
 +#endif /* UNIV_LOG_ARCHIVE */
 +	ulint		sum_of_new_sizes;
 +	dberr_t		err;
 +	unsigned	i;
 +	ulint		srv_n_log_files_found = srv_n_log_files;
 +	ulint		io_limit;
 +	mtr_t		mtr;
 +	ib_bh_t*	ib_bh;
 +	ulint		n_recovered_trx;
 +	char		logfilename[10000];
 +	char*		logfile0	= NULL;
 +	size_t		dirnamelen;
 +	bool		sys_datafiles_created = false;
 +
 +	/* Check that os_fast_mutexes work as expected */
 +	os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &srv_os_test_mutex);
 +
 +	ut_a(0 == os_fast_mutex_trylock(&srv_os_test_mutex));
 +
 +	os_fast_mutex_unlock(&srv_os_test_mutex);
 +
 +	os_fast_mutex_lock(&srv_os_test_mutex);
 +
 +	os_fast_mutex_unlock(&srv_os_test_mutex);
 +
 +	os_fast_mutex_free(&srv_os_test_mutex);
 +
 +	high_level_read_only = srv_read_only_mode
 +		|| srv_force_recovery > SRV_FORCE_NO_TRX_UNDO;
 +
 +	if (srv_read_only_mode) {
 +		ib_logf(IB_LOG_LEVEL_INFO, "Started in read only mode");
 +	}
 +
 +#ifdef HAVE_DARWIN_THREADS
 +# ifdef F_FULLFSYNC
 +	/* This executable has been compiled on Mac OS X 10.3 or later.
 +	Assume that F_FULLFSYNC is available at run-time. */
 +	srv_have_fullfsync = TRUE;
 +# else /* F_FULLFSYNC */
 +	/* This executable has been compiled on Mac OS X 10.2
 +	or earlier.  Determine if the executable is running
 +	on Mac OS X 10.3 or later. */
 +	struct utsname utsname;
 +	if (uname(&utsname)) {
 +		ut_print_timestamp(stderr);
 +		fputs(" InnoDB: cannot determine Mac OS X version!\n", stderr);
 +	} else {
 +		srv_have_fullfsync = strcmp(utsname.release, "7.") >= 0;
 +	}
 +	if (!srv_have_fullfsync) {
 +		ut_print_timestamp(stderr);
 +		fputs(" InnoDB: On Mac OS X, fsync() may be "
 +		      "broken on internal drives,\n", stderr);
 +		ut_print_timestamp(stderr);
 +		fputs(" InnoDB: making transactions unsafe!\n", stderr);
 +	}
 +# endif /* F_FULLFSYNC */
 +#endif /* HAVE_DARWIN_THREADS */
 +
 +	ib_logf(IB_LOG_LEVEL_INFO,
 +		"Using %s to ref count buffer pool pages",
 +#ifdef PAGE_ATOMIC_REF_COUNT
 +		"atomics"
 +#else
 +		"mutexes"
 +#endif /* PAGE_ATOMIC_REF_COUNT */
 +	);
 +
 +	compile_time_assert(sizeof(ulint) == sizeof(void*));
 +
 +	/* If stacktrace is used we set up signal handler for SIGUSR2 signal
 +	here. If signal handler set fails we report that and disable
 +	stacktrace feature. */
 +
 +	if (srv_use_stacktrace) {
 +#if defined (__linux__) && HAVE_BACKTRACE && HAVE_BACKTRACE_SYMBOLS
 +		 struct sigaction sigact;
 +
 +		 sigact.sa_sigaction = os_stacktrace_print;
 +		 sigact.sa_flags = SA_RESTART | SA_SIGINFO;
 +
 +		 if (sigaction(SIGUSR2, &sigact, (struct sigaction *)NULL) != 0)
 +		 {
 +			 fprintf(stderr, " InnoDB:error setting signal handler for %d (%s)\n",
 +				 SIGUSR2, strsignal(SIGUSR2));
 +			 srv_use_stacktrace = FALSE;
 +
 +		 }
 +#endif /* defined (__linux__) && HAVE_BACKTRACE && HAVE_BACKTRACE_SYMBOLS */
 +	}
 +
 +#ifdef UNIV_DEBUG
 +	ut_print_timestamp(stderr);
 +	fprintf(stderr,
 +		" InnoDB: !!!!!!!! UNIV_DEBUG switched on !!!!!!!!!\n");
 +#endif
 +
 +#ifdef UNIV_IBUF_DEBUG
 +	ut_print_timestamp(stderr);
 +	fprintf(stderr,
 +		" InnoDB: !!!!!!!! UNIV_IBUF_DEBUG switched on !!!!!!!!!\n");
 +# ifdef UNIV_IBUF_COUNT_DEBUG
 +	ut_print_timestamp(stderr);
 +	fprintf(stderr,
 +		" InnoDB: !!!!!!!! UNIV_IBUF_COUNT_DEBUG switched on "
 +		"!!!!!!!!!\n");
 +	ut_print_timestamp(stderr);
 +	fprintf(stderr,
 +		" InnoDB: Crash recovery will fail with UNIV_IBUF_COUNT_DEBUG\n");
 +# endif
 +#endif
 +
 +#ifdef UNIV_BLOB_DEBUG
 +	fprintf(stderr,
 +		"InnoDB: !!!!!!!! UNIV_BLOB_DEBUG switched on !!!!!!!!!\n"
 +		"InnoDB: Server restart may fail with UNIV_BLOB_DEBUG\n");
 +#endif /* UNIV_BLOB_DEBUG */
 +
 +#ifdef UNIV_SYNC_DEBUG
 +	ut_print_timestamp(stderr);
 +	fprintf(stderr,
 +		" InnoDB: !!!!!!!! UNIV_SYNC_DEBUG switched on !!!!!!!!!\n");
 +#endif
 +
 +#ifdef UNIV_SEARCH_DEBUG
 +	ut_print_timestamp(stderr);
 +	fprintf(stderr,
 +		" InnoDB: !!!!!!!! UNIV_SEARCH_DEBUG switched on !!!!!!!!!\n");
 +#endif
 +
 +#ifdef UNIV_LOG_LSN_DEBUG
 +	ut_print_timestamp(stderr);
 +	fprintf(stderr,
 +		" InnoDB: !!!!!!!! UNIV_LOG_LSN_DEBUG switched on !!!!!!!!!\n");
 +#endif /* UNIV_LOG_LSN_DEBUG */
 +#ifdef UNIV_MEM_DEBUG
 +	ut_print_timestamp(stderr);
 +	fprintf(stderr,
 +		" InnoDB: !!!!!!!! UNIV_MEM_DEBUG switched on !!!!!!!!!\n");
 +#endif
 +
 +	if (srv_use_sys_malloc) {
 +		ib_logf(IB_LOG_LEVEL_INFO,
 +			"The InnoDB memory heap is disabled");
 +	}
 +
 +#if defined(COMPILER_HINTS_ENABLED)
 +	ib_logf(IB_LOG_LEVEL_INFO,
 +		" InnoDB: Compiler hints enabled.");
 +#endif /* defined(COMPILER_HINTS_ENABLED) */
 +
 +	ib_logf(IB_LOG_LEVEL_INFO,
 +		"" IB_ATOMICS_STARTUP_MSG "");
 +
 +	ib_logf(IB_LOG_LEVEL_INFO,
 +		"" IB_MEMORY_BARRIER_STARTUP_MSG "");
 +
 +#ifndef HAVE_MEMORY_BARRIER
 +#if defined __i386__ || defined __x86_64__ || defined _M_IX86 || defined _M_X64 || defined __WIN__
 +#else
 +	ib_logf(IB_LOG_LEVEL_WARN,
 +		"MySQL was built without a memory barrier capability on this"
 +		" architecture, which might allow a mutex/rw_lock violation"
 +		" under high thread concurrency. This may cause a hang.");
 +#endif /* IA32 or AMD64 */
 +#endif /* HAVE_MEMORY_BARRIER */
 +
 +	ib_logf(IB_LOG_LEVEL_INFO,
 +		"Compressed tables use zlib " ZLIB_VERSION
 +#ifdef UNIV_ZIP_DEBUG
 +	      " with validation"
 +#endif /* UNIV_ZIP_DEBUG */
 +	      );
 +#ifdef UNIV_ZIP_COPY
 +	ib_logf(IB_LOG_LEVEL_INFO, "and extra copying");
 +#endif /* UNIV_ZIP_COPY */
 +
 +
 +	/* Since InnoDB does not currently clean up all its internal data
 +	structures in MySQL Embedded Server Library server_end(), we
 +	print an error message if someone tries to start up InnoDB a
 +	second time during the process lifetime. */
 +
 +	if (srv_start_has_been_called) {
 +		ut_print_timestamp(stderr);
 +		fprintf(stderr, " InnoDB: Error: startup called second time "
 +			"during the process\n");
 +		ut_print_timestamp(stderr);
 +		fprintf(stderr, " InnoDB: lifetime. In the MySQL Embedded "
 +			"Server Library you\n");
 +		ut_print_timestamp(stderr);
 +		fprintf(stderr, " InnoDB: cannot call server_init() more "
 +			"than once during the\n");
 +		ut_print_timestamp(stderr);
 +		fprintf(stderr, " InnoDB: process lifetime.\n");
 +	}
 +
 +	srv_start_has_been_called = TRUE;
 +
 +#ifdef UNIV_DEBUG
 +	log_do_write = TRUE;
 +#endif /* UNIV_DEBUG */
 +	/*	yydebug = TRUE; */
 +
 +	srv_is_being_started = TRUE;
 +	srv_startup_is_before_trx_rollback_phase = TRUE;
 +
 +#ifdef __WIN__
 +	switch (os_get_os_version()) {
 +	case OS_WIN95:
 +	case OS_WIN31:
 +	case OS_WINNT:
 +		srv_use_native_conditions = FALSE;
 +		/* On Win 95, 98, ME, Win32 subsystem for Windows 3.1,
 +		and NT use simulated aio. In NT Windows provides async i/o,
 +		but when run in conjunction with InnoDB Hot Backup, it seemed
 +		to corrupt the data files. */
 +
 +		srv_use_native_aio = FALSE;
 +		break;
 +
 +	case OS_WIN2000:
 +	case OS_WINXP:
 +		/* On 2000 and XP, async IO is available, but no condition variables. */
 +		srv_use_native_aio = TRUE;
 +		srv_use_native_conditions = FALSE;
 + 		break;
 +
 +	default:
 +		/* Vista and later have both async IO and condition variables */
 +		srv_use_native_aio = TRUE;
 +		srv_use_native_conditions = TRUE;
 +		break;
 +	}
 +
 +#elif defined(LINUX_NATIVE_AIO)
 +
 +	if (srv_use_native_aio) {
 +		ib_logf(IB_LOG_LEVEL_INFO, "Using Linux native AIO");
 +	}
 +#else
 +	/* Currently native AIO is supported only on windows and linux
 +	and that also when the support is compiled in. In all other
 +	cases, we ignore the setting of innodb_use_native_aio. */
 +	srv_use_native_aio = FALSE;
 +#endif /* __WIN__ */
 +
 +	if (srv_file_flush_method_str == NULL) {
 +		/* These are the default options */
 +
 +		srv_unix_file_flush_method = SRV_UNIX_FSYNC;
 +
 +		srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
 +	} else if (0 == ut_strcmp(srv_file_flush_method_str, "fsync")) {
 +		srv_unix_file_flush_method = SRV_UNIX_FSYNC;
 +
 +	} else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DSYNC")) {
 +		srv_unix_file_flush_method = SRV_UNIX_O_DSYNC;
 +
 +	} else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) {
 +		srv_unix_file_flush_method = SRV_UNIX_O_DIRECT;
 +
 +	} else if (0 == ut_strcmp(srv_file_flush_method_str, "ALL_O_DIRECT")) {
 +		srv_unix_file_flush_method = SRV_UNIX_ALL_O_DIRECT;
 +
 +	} else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT_NO_FSYNC")) {
 +		srv_unix_file_flush_method = SRV_UNIX_O_DIRECT_NO_FSYNC;
 +
 +	} else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) {
 +		srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC;
 +
 +	} else if (0 == ut_strcmp(srv_file_flush_method_str, "nosync")) {
 +		srv_unix_file_flush_method = SRV_UNIX_NOSYNC;
 +#ifdef _WIN32
 +	} else if (0 == ut_strcmp(srv_file_flush_method_str, "normal")) {
 +		srv_win_file_flush_method = SRV_WIN_IO_NORMAL;
 +		srv_use_native_aio = FALSE;
 +
 +	} else if (0 == ut_strcmp(srv_file_flush_method_str, "unbuffered")) {
 +		srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
 +		srv_use_native_aio = FALSE;
 +
 +	} else if (0 == ut_strcmp(srv_file_flush_method_str,
 +				  "async_unbuffered")) {
 +		srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
 +		srv_use_native_aio = TRUE;
 +#endif /* __WIN__ */
 +	} else {
 +		ib_logf(IB_LOG_LEVEL_ERROR,
 +			"Unrecognized value %s for innodb_flush_method",
 +			srv_file_flush_method_str);
 +		return(DB_ERROR);
 +	}
 +
 +	/* Note that the call srv_boot() also changes the values of
 +	some variables to the units used by InnoDB internally */
 +
 +	/* Set the maximum number of threads which can wait for a semaphore
 +	inside InnoDB: this is the 'sync wait array' size, as well as the
 +	maximum number of threads that can wait in the 'srv_conc array' for
 +	their time to enter InnoDB. */
 +
 +#define BUF_POOL_SIZE_THRESHOLD (1024 * 1024 * 1024)
 +	srv_max_n_threads = 1   /* io_ibuf_thread */
 +			    + 1 /* io_log_thread */
 +			    + 1 /* lock_wait_timeout_thread */
 +			    + 1 /* srv_error_monitor_thread */
 +			    + 1 /* srv_monitor_thread */
 +			    + 1 /* srv_master_thread */
 +			    + 1 /* srv_redo_log_follow_thread */
 +			    + 1 /* srv_purge_coordinator_thread */
 +			    + 1 /* buf_dump_thread */
 +			    + 1 /* dict_stats_thread */
 +			    + 1 /* fts_optimize_thread */
 +			    + 1 /* recv_writer_thread */
 +			    + 1 /* buf_flush_page_cleaner_thread */
 +			    + 1 /* trx_rollback_or_clean_all_recovered */
 +			    + 128 /* added as margin, for use of
 +				  InnoDB Memcached etc. */
 +			    + max_connections
 +			    + srv_n_read_io_threads
 +			    + srv_n_write_io_threads
 +			    + srv_n_purge_threads
 +			    /* FTS Parallel Sort */
 +			    + fts_sort_pll_degree * FTS_NUM_AUX_INDEX
 +			      * max_connections;
 +
 +	if (srv_buf_pool_size < BUF_POOL_SIZE_THRESHOLD) {
 +		/* If buffer pool is less than 1 GB,
 +		use only one buffer pool instance */
 +		srv_buf_pool_instances = 1;
 +	}
 +
 +	srv_boot();
 +
 +	ib_logf(IB_LOG_LEVEL_INFO,
 +		"%s CPU crc32 instructions",
 +		ut_crc32_sse2_enabled ? "Using" : "Not using");
 +
 +	if (!srv_read_only_mode) {
 +
 +		mutex_create(srv_monitor_file_mutex_key,
 +			     &srv_monitor_file_mutex, SYNC_NO_ORDER_CHECK);
 +
 +		if (srv_innodb_status) {
 +
 +			srv_monitor_file_name = static_cast<char*>(
 +				mem_alloc(
 +					strlen(fil_path_to_mysql_datadir)
 +					+ 20 + sizeof "/innodb_status."));
 +
 +			sprintf(srv_monitor_file_name, "%s/innodb_status.%lu",
 +				fil_path_to_mysql_datadir,
 +				os_proc_get_number());
 +
 +			srv_monitor_file = fopen(srv_monitor_file_name, "w+");
 +
 +			if (!srv_monitor_file) {
 +
 +				ib_logf(IB_LOG_LEVEL_ERROR,
 +					"Unable to create %s: %s",
 +					srv_monitor_file_name,
 +					strerror(errno));
 +
 +				return(DB_ERROR);
 +			}
 +		} else {
 +			srv_monitor_file_name = NULL;
 +			srv_monitor_file = os_file_create_tmpfile(NULL);
 +
 +			if (!srv_monitor_file) {
 +				return(DB_ERROR);
 +			}
 +		}
 +
 +		mutex_create(srv_dict_tmpfile_mutex_key,
 +			     &srv_dict_tmpfile_mutex, SYNC_DICT_OPERATION);
 +
 +		srv_dict_tmpfile = os_file_create_tmpfile(NULL);
 +
 +		if (!srv_dict_tmpfile) {
 +			return(DB_ERROR);
 +		}
 +
 +		mutex_create(srv_misc_tmpfile_mutex_key,
 +			     &srv_misc_tmpfile_mutex, SYNC_ANY_LATCH);
 +
 +		srv_misc_tmpfile = os_file_create_tmpfile(NULL);
 +
 +		if (!srv_misc_tmpfile) {
 +			return(DB_ERROR);
 +		}
 +	}
 +
 +	/* If user has set the value of innodb_file_io_threads then
 +	we'll emit a message telling the user that this parameter
 +	is now deprecated. */
 +	if (srv_n_file_io_threads != 4) {
 +		ib_logf(IB_LOG_LEVEL_WARN,
 +			"innodb_file_io_threads is deprecated. Please use "
 +			"innodb_read_io_threads and innodb_write_io_threads "
 +			"instead");
 +	}
 +
 +	/* Now overwrite the value on srv_n_file_io_threads */
 +	srv_n_file_io_threads = srv_n_read_io_threads;
 +
 +	if (!srv_read_only_mode) {
 +		/* Add the log and ibuf IO threads. */
 +		srv_n_file_io_threads += 2;
 +		srv_n_file_io_threads += srv_n_write_io_threads;
 +	} else {
 +		ib_logf(IB_LOG_LEVEL_INFO,
 +			"Disabling background IO write threads.");
 +
 +		srv_n_write_io_threads = 0;
 +	}
 +
 +	ut_a(srv_n_file_io_threads <= SRV_MAX_N_IO_THREADS);
 +
 +	io_limit = 8 * SRV_N_PENDING_IOS_PER_THREAD;
 +
 +	/* On Windows when using native aio the number of aio requests
 +	that a thread can handle at a given time is limited to 32
 +	i.e.: SRV_N_PENDING_IOS_PER_THREAD */
 +# ifdef __WIN__
 +	if (srv_use_native_aio) {
 +		io_limit = SRV_N_PENDING_IOS_PER_THREAD;
 +	}
 +# endif /* __WIN__ */
 +
 +	if (!os_aio_init(io_limit,
 +			 srv_n_read_io_threads,
 +			 srv_n_write_io_threads,
 +			 SRV_MAX_N_PENDING_SYNC_IOS)) {
 +
 +		ib_logf(IB_LOG_LEVEL_ERROR,
 +			"Fatal : Cannot initialize AIO sub-system");
 +#if defined(LINUX_NATIVE_AIO)
 +		ib_logf(IB_LOG_LEVEL_INFO,
 +                        "You can try increasing system fs.aio-max-nr to 1048576 "
 +                        "or larger or setting innodb_use_native_aio = 0 in my.cnf");
 +#endif
 +
 +		return(DB_ERROR);
 +	}
 +
 +	fil_init(srv_file_per_table ? 50000 : 5000, srv_max_n_open_files);
 +
 +	double	size;
 +	char	unit;
 +
 +	if (srv_buf_pool_size >= 1024 * 1024 * 1024) {
 +		size = ((double) srv_buf_pool_size) / (1024 * 1024 * 1024);
 +		unit = 'G';
 +	} else {
 +		size = ((double) srv_buf_pool_size) / (1024 * 1024);
 +		unit = 'M';
 +	}
 +
 +	/* Print time to initialize the buffer pool */
 +	ib_logf(IB_LOG_LEVEL_INFO,
 +		"Initializing buffer pool, size = %.1f%c", size, unit);
 +
 +	err = buf_pool_init(srv_buf_pool_size, srv_buf_pool_instances);
 +
 +	if (err != DB_SUCCESS) {
 +		ib_logf(IB_LOG_LEVEL_ERROR,
 +			"Cannot allocate memory for the buffer pool");
 +
 +		return(DB_ERROR);
 +	}
 +
 +	ib_logf(IB_LOG_LEVEL_INFO,
 +		"Completed initialization of buffer pool");
 +
 +#ifdef UNIV_DEBUG
 +	/* We have observed deadlocks with a 5MB buffer pool but
 +	the actual lower limit could very well be a little higher. */
 +
 +	if (srv_buf_pool_size <= 5 * 1024 * 1024) {
 +
 +		ib_logf(IB_LOG_LEVEL_INFO,
 +			"Small buffer pool size (%luM), the flst_validate() "
 +			"debug function can cause a deadlock if the "
 +			"buffer pool fills up.",
 +			srv_buf_pool_size / 1024 / 1024);
 +	}
 +#endif /* UNIV_DEBUG */
 +
 +	fsp_init();
 +	log_init();
 +	log_online_init();
 +
 +	lock_sys_create(srv_lock_table_size);
 +
 +	/* Create i/o-handler threads: */
 +
 +	for (i = 0; i < srv_n_file_io_threads; ++i) {
 +
 +		n[i] = i;
 +
 +		thread_handles[i] = os_thread_create(io_handler_thread, n + i, thread_ids + i);
 +		thread_started[i] = true;
 +	}
 +
 +	if (srv_n_log_files * srv_log_file_size * UNIV_PAGE_SIZE
 +	    >= 512ULL * 1024ULL * 1024ULL * 1024ULL) {
 +		/* log_block_convert_lsn_to_no() limits the returned block
 +		number to 1G and given that OS_FILE_LOG_BLOCK_SIZE is 512
 +		bytes, then we have a limit of 512 GB. If that limit is to
 +		be raised, then log_block_convert_lsn_to_no() must be
 +		modified. */
 +		ib_logf(IB_LOG_LEVEL_ERROR,
 +			"Combined size of log files must be < 512 GB");
 +
 +		return(DB_ERROR);
 +	}
 +
 +	if (srv_n_log_files * srv_log_file_size >= ULINT_MAX) {
 +		/* fil_io() takes ulint as an argument and we are passing
 +		(next_offset / UNIV_PAGE_SIZE) to it in log_group_write_buf().
 +		So (next_offset / UNIV_PAGE_SIZE) must be less than ULINT_MAX.
 +		So next_offset must be < ULINT_MAX * UNIV_PAGE_SIZE. This
 +		means that we are limited to ULINT_MAX * UNIV_PAGE_SIZE which
 +		is 64 TB on 32 bit systems. */
 +		fprintf(stderr,
 +			" InnoDB: Error: combined size of log files"
 +			" must be < %lu GB\n",
 +			ULINT_MAX / 1073741824 * UNIV_PAGE_SIZE);
 +
 +		return(DB_ERROR);
 +	}
 +
 +	sum_of_new_sizes = 0;
 +
 +	for (i = 0; i < srv_n_data_files; i++) {
 +#ifndef __WIN__
 +		if (sizeof(off_t) < 5
 +		    && srv_data_file_sizes[i]
 +		    >= (ulint) (1 << (32 - UNIV_PAGE_SIZE_SHIFT))) {
 +			ut_print_timestamp(stderr);
 +			fprintf(stderr,
 +				" InnoDB: Error: file size must be < 4 GB"
 +				" with this MySQL binary\n");
 +			ut_print_timestamp(stderr);
 +			fprintf(stderr,
 +				" InnoDB: and operating system combination,"
 +				" in some OS's < 2 GB\n");
 +
 +			return(DB_ERROR);
 +		}
 +#endif
 +		sum_of_new_sizes += srv_data_file_sizes[i];
 +	}
 +
 +	if (sum_of_new_sizes < 10485760 / UNIV_PAGE_SIZE) {
 +		ib_logf(IB_LOG_LEVEL_ERROR,
 +			"Tablespace size must be at least 10 MB");
 +
 +		return(DB_ERROR);
 +	}
 +
 +	recv_sys_create();
 +	recv_sys_init(buf_pool_get_curr_size());
 +
 +	err = open_or_create_data_files(&create_new_db,
 +#ifdef UNIV_LOG_ARCHIVE
 +					&min_arch_log_no, &max_arch_log_no,
 +#endif /* UNIV_LOG_ARCHIVE */
 +					&min_flushed_lsn, &max_flushed_lsn,
 +					&sum_of_new_sizes);
 +	if (err == DB_FAIL) {
 +
 +		ib_logf(IB_LOG_LEVEL_ERROR,
 +			"The system tablespace must be writable!");
 +
 +		return(DB_ERROR);
 +
 +	} else if (err != DB_SUCCESS) {
 +
 +		ib_logf(IB_LOG_LEVEL_ERROR,
 +			"Could not open or create the system tablespace. If "
 +			"you tried to add new data files to the system "
 +			"tablespace, and it failed here, you should now "
 +			"edit innodb_data_file_path in my.cnf back to what "
 +			"it was, and remove the new ibdata files InnoDB "
 +			"created in this failed attempt. InnoDB only wrote "
 +			"those files full of zeros, but did not yet use "
 +			"them in any way. But be careful: do not remove "
 +			"old data files which contain your precious data!");
 +
 +		return(err);
 +	}
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	srv_normalize_path_for_win(srv_arch_dir);
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	dirnamelen = strlen(srv_log_group_home_dir);
 +	ut_a(dirnamelen < (sizeof logfilename) - 10 - sizeof "ib_logfile");
 +	memcpy(logfilename, srv_log_group_home_dir, dirnamelen);
 +
 +	/* Add a path separator if needed. */
 +	if (dirnamelen && logfilename[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
 +		logfilename[dirnamelen++] = SRV_PATH_SEPARATOR;
 +	}
 +
 +	srv_log_file_size_requested = srv_log_file_size;
 +
 +	if (create_new_db) {
 +		bool success = buf_flush_list(ULINT_MAX, LSN_MAX, NULL);
 +		ut_a(success);
 +
 +		min_flushed_lsn = max_flushed_lsn = log_get_lsn();
 +
 +		buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
 +
 +		err = create_log_files(create_new_db, logfilename, dirnamelen,
 +				       max_flushed_lsn, logfile0);
 +
 +		if (err != DB_SUCCESS) {
 +			return(err);
 +		}
 +	} else {
 +		for (i = 0; i < SRV_N_LOG_FILES_MAX; i++) {
 +			os_offset_t	size;
 +			os_file_stat_t	stat_info;
 +
 +			sprintf(logfilename + dirnamelen,
 +				"ib_logfile%u", i);
 +
 +			err = os_file_get_status(
 +				logfilename, &stat_info, false);
 +
 +			if (err == DB_NOT_FOUND) {
 +				if (i == 0) {
 +					if (max_flushed_lsn
 +					    != min_flushed_lsn) {
 +						ib_logf(IB_LOG_LEVEL_ERROR,
 +							"Cannot create"
 +							" log files because"
 +							" data files are"
 +							" corrupt or"
 +							" not in sync"
 +							" with each other");
 +						return(DB_ERROR);
 +					}
 +
 +					if (max_flushed_lsn < (lsn_t) 1000) {
 +						ib_logf(IB_LOG_LEVEL_ERROR,
 +							"Cannot create"
 +							" log files because"
 +							" data files are"
 +							" corrupt or the"
 +							" database was not"
 +							" shut down cleanly"
 +							" after creating"
 +							" the data files.");
 +						return(DB_ERROR);
 +					}
 +
 +					err = create_log_files(
 +						create_new_db, logfilename,
 +						dirnamelen, max_flushed_lsn,
 +						logfile0);
 +
 +					if (err == DB_SUCCESS) {
 +						err = create_log_files_rename(
 +							logfilename,
 +							dirnamelen,
 +							max_flushed_lsn,
 +							logfile0);
 +					}
 +
 +					if (err != DB_SUCCESS) {
 +						return(err);
 +					}
 +
 +					/* Suppress the message about
 +					crash recovery. */
 +					max_flushed_lsn = min_flushed_lsn
 +						= log_get_lsn();
 +					goto files_checked;
 +				} else if (i < 2) {
 +					/* must have at least 2 log files */
 +					ib_logf(IB_LOG_LEVEL_ERROR,
 +						"Only one log file found.");
 +					return(err);
 +				}
 +
 +				/* opened all files */
 +				break;
 +			}
 +
 +			if (!srv_file_check_mode(logfilename)) {
 +				return(DB_ERROR);
 +			}
 +
 +			err = open_log_file(&files[i], logfilename, &size);
 +
 +			if (err != DB_SUCCESS) {
 +				return(err);
 +			}
 +
 +			ut_a(size != (os_offset_t) -1);
 +
 +			if (size & ((1 << UNIV_PAGE_SIZE_SHIFT) - 1)) {
 +				ib_logf(IB_LOG_LEVEL_ERROR,
 +					"Log file %s size "
 +					UINT64PF " is not a multiple of"
 +					" innodb_page_size",
 +					logfilename, size);
 +				return(DB_ERROR);
 +			}
 +
 +			size >>= UNIV_PAGE_SIZE_SHIFT;
 +
 +			if (i == 0) {
 +				srv_log_file_size = size;
 +			} else if (size != srv_log_file_size) {
 +				ib_logf(IB_LOG_LEVEL_ERROR,
 +					"Log file %s is"
 +					" of different size " UINT64PF " bytes"
 +					" than other log"
 +					" files " UINT64PF " bytes!",
 +					logfilename,
 +					size << UNIV_PAGE_SIZE_SHIFT,
 +					(os_offset_t) srv_log_file_size
 +					<< UNIV_PAGE_SIZE_SHIFT);
 +				return(DB_ERROR);
 +			}
 +		}
 +
 +		srv_n_log_files_found = i;
 +
 +		/* Create the in-memory file space objects. */
 +
 +		sprintf(logfilename + dirnamelen, "ib_logfile%u", 0);
 +
 +		fil_space_create(logfilename,
 +				 SRV_LOG_SPACE_FIRST_ID,
 +				 fsp_flags_set_page_size(0, UNIV_PAGE_SIZE),
 +				 FIL_LOG);
 +
 +		ut_a(fil_validate());
 +
 +		/* srv_log_file_size is measured in pages; if page size is 16KB,
 +		then we have a limit of 64TB on 32 bit systems */
 +		ut_a(srv_log_file_size <= ULINT_MAX);
 +
 +		for (unsigned j = 0; j < i; j++) {
 +			sprintf(logfilename + dirnamelen, "ib_logfile%u", j);
 +
 +			if (!fil_node_create(logfilename,
 +					     (ulint) srv_log_file_size,
 +					     SRV_LOG_SPACE_FIRST_ID, FALSE)) {
 +				return(DB_ERROR);
 +			}
 +		}
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +		/* Create the file space object for archived logs. Under
 +		MySQL, no archiving ever done. */
 +		fil_space_create("arch_log_space", SRV_LOG_SPACE_FIRST_ID + 1,
 +				 0, FIL_LOG);
 +#endif /* UNIV_LOG_ARCHIVE */
 +		log_group_init(0, i, srv_log_file_size * UNIV_PAGE_SIZE,
 +			       SRV_LOG_SPACE_FIRST_ID,
 +			       SRV_LOG_SPACE_FIRST_ID + 1);
 +	}
 +
 +files_checked:
 +	/* Open all log files and data files in the system
 +	tablespace: we keep them open until database
 +	shutdown */
 +
 +	fil_open_log_and_system_tablespace_files();
 +
 +	err = srv_undo_tablespaces_init(
 +		create_new_db,
 +		srv_undo_tablespaces,
 +		&srv_undo_tablespaces_open);
 +
 +	/* If the force recovery is set very high then we carry on regardless
 +	of all errors. Basically this is fingers crossed mode. */
 +
 +	if (err != DB_SUCCESS
 +	    && srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) {
 +
 +		return(err);
 +	}
 +
 +	/* Initialize objects used by dict stats gathering thread, which
 +	can also be used by recovery if it tries to drop some table */
 +	if (!srv_read_only_mode) {
 +		dict_stats_thread_init();
 +	}
 +
 +	trx_sys_file_format_init();
 +
 +	trx_sys_create();
 +
 +	bool srv_monitor_thread_started = false;
 +
 +	if (create_new_db) {
 +		ut_a(!srv_read_only_mode);
 +		init_log_online();
 +
 +		mtr_start(&mtr);
 +
 +		fsp_header_init(0, sum_of_new_sizes, &mtr);
 +
 +		mtr_commit(&mtr);
 +
 +		/* To maintain backward compatibility we create only
 +		the first rollback segment before the double write buffer.
 +		All the remaining rollback segments will be created later,
 +		after the double write buffer has been created. */
 +		trx_sys_create_sys_pages();
 +
 +		ib_bh = trx_sys_init_at_db_start();
 +		n_recovered_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
 +
 +		/* The purge system needs to create the purge view and
 +		therefore requires that the trx_sys is inited. */
 +
 +		trx_purge_sys_create(srv_n_purge_threads, ib_bh);
 +
 +		err = dict_create();
 +
 +		if (err != DB_SUCCESS) {
 +			return(err);
 +		}
 +
 +		srv_startup_is_before_trx_rollback_phase = FALSE;
 +
 +		bool success = buf_flush_list(ULINT_MAX, LSN_MAX, NULL);
 +		ut_a(success);
 +
 +		min_flushed_lsn = max_flushed_lsn = log_get_lsn();
 +
 +		buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
 +
 +		/* Stamp the LSN to the data files. */
 +		fil_write_flushed_lsn_to_data_files(max_flushed_lsn, 0);
 +
 +		fil_flush_file_spaces(FIL_TABLESPACE);
 +
 +		err = create_log_files_rename(logfilename, dirnamelen,
 +					      max_flushed_lsn, logfile0);
 +
 +		if (err != DB_SUCCESS) {
 +			return(err);
 +		}
 +	} else {
 +
 +		/* Check if we support the max format that is stamped
 +		on the system tablespace.
 +		Note:  We are NOT allowed to make any modifications to
 +		the TRX_SYS_PAGE_NO page before recovery  because this
 +		page also contains the max_trx_id etc. important system
 +		variables that are required for recovery.  We need to
 +		ensure that we return the system to a state where normal
 +		recovery is guaranteed to work. We do this by
 +		invalidating the buffer cache, this will force the
 +		reread of the page and restoration to its last known
 +		consistent state, this is REQUIRED for the recovery
 +		process to work. */
 +		err = trx_sys_file_format_max_check(
 +			srv_max_file_format_at_startup);
 +
 +		if (err != DB_SUCCESS) {
 +			return(err);
 +		}
 +
 +		/* Invalidate the buffer pool to ensure that we reread
 +		the page that we read above, during recovery.
 +		Note that this is not as heavy weight as it seems. At
 +		this point there will be only ONE page in the buf_LRU
 +		and there must be no page in the buf_flush list. */
 +		buf_pool_invalidate();
 +
 +		/* Start monitor thread early enough so that e.g. crash
 +		recovery failing to find free pages in the buffer pool is
 +		diagnosed. */
 +		if (!srv_read_only_mode)
 +		{
 +			/* Create the thread which prints InnoDB monitor
 +			info */
 +			thread_handles[4 + SRV_MAX_N_IO_THREADS] =
 +				os_thread_create(
 +					srv_monitor_thread,
 +					NULL,
 +					thread_ids + 4 + SRV_MAX_N_IO_THREADS);
 +
 +			thread_started[4 + SRV_MAX_N_IO_THREADS] = true;
 +		}
 +
 +		/* We always try to do a recovery, even if the database had
 +		been shut down normally: this is the normal startup path */
 +
 +		err = recv_recovery_from_checkpoint_start(
 +			LOG_CHECKPOINT, LSN_MAX,
 +			min_flushed_lsn, max_flushed_lsn);
 +
 +		if (err != DB_SUCCESS) {
 +			return(err);
 +		}
 +
 +		init_log_online();
 +
 +		/* Initialize the change buffer. */
 +		err = dict_boot();
 +
 +		if (err != DB_SUCCESS) {
 +			return(err);
 +		}
 +
 +		if (!srv_read_only_mode) {
 +			if (sum_of_new_sizes > 0) {
 +				/* New data file(s) were added */
 +				mtr_start(&mtr);
 +				fsp_header_inc_size(0, sum_of_new_sizes, &mtr);
 +				mtr_commit(&mtr);
 +				/* Immediately write the log record about
 +				increased tablespace size to disk, so that it
 +				is durable even if mysqld would crash
 +				quickly */
 +				log_buffer_flush_to_disk();
 +			}
 +		}
 +
 +		const ulint	tablespace_size_in_header
 +			= fsp_header_get_tablespace_size();
 +
 +#ifdef UNIV_DEBUG
 +		/* buf_debug_prints = TRUE; */
 +#endif /* UNIV_DEBUG */
 +		ulint	sum_of_data_file_sizes = 0;
 +
 +		for (ulint d = 0; d < srv_n_data_files; d++) {
 +			sum_of_data_file_sizes += srv_data_file_sizes[d];
 +		}
 +
 +		/* Compare the system tablespace file size to what is
 +		stored in FSP_SIZE. In open_or_create_data_files()
 +		we already checked that the file sizes match the
 +		innodb_data_file_path specification. */
 +		if (srv_read_only_mode
 +		    || sum_of_data_file_sizes == tablespace_size_in_header) {
 +			/* Do not complain about the size. */
 +		} else if (!srv_auto_extend_last_data_file
 +			   || sum_of_data_file_sizes
 +			   < tablespace_size_in_header) {
 +			ib_logf(IB_LOG_LEVEL_ERROR,
 +				"Tablespace size stored in header is " ULINTPF
 +				" pages, but the sum of data file sizes is "
 +				ULINTPF " pages",
 +				tablespace_size_in_header,
 +				sum_of_data_file_sizes);
 +
 +			if (srv_force_recovery == 0
 +			    && sum_of_data_file_sizes
 +			    < tablespace_size_in_header) {
 +				ib_logf(IB_LOG_LEVEL_ERROR,
 +					"Cannot start InnoDB. The tail of"
 +					" the system tablespace is"
 +					" missing. Have you edited"
 +					" innodb_data_file_path in my.cnf"
 +					" in an inappropriate way, removing"
 +					" data files from there?"
 +					" You can set innodb_force_recovery=1"
 +					" in my.cnf to force"
 +					" a startup if you are trying to"
 +					" recover a badly corrupt database.");
 +
 +				return(DB_ERROR);
 +			}
 +		}
 +
- 		/* This must precede recv_apply_hashed_log_recs(TRUE). */
++		/* This must precede recv_apply_hashed_log_recs(true). */
 +		ib_bh = trx_sys_init_at_db_start();
 +
 +		if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
 +			/* Apply the hashed log records to the
 +			respective file pages, for the last batch of
 +			recv_group_scan_log_recs(). */
 +
- 			recv_apply_hashed_log_recs(TRUE);
++			recv_apply_hashed_log_recs(true);
 +			DBUG_PRINT("ib_log", ("apply completed"));
 +		}
 +
 +		n_recovered_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
 +
 +		/* The purge system needs to create the purge view and
 +		therefore requires that the trx_sys is inited. */
 +
 +		trx_purge_sys_create(srv_n_purge_threads, ib_bh);
 +
 +		/* recv_recovery_from_checkpoint_finish needs trx lists which
 +		are initialized in trx_sys_init_at_db_start(). */
 +
 +		recv_recovery_from_checkpoint_finish();
 +
 +		if (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE) {
 +			/* The following call is necessary for the insert
 +			buffer to work with multiple tablespaces. We must
 +			know the mapping between space id's and .ibd file
 +			names.
 +
 +			In a crash recovery, we check that the info in data
 +			dictionary is consistent with what we already know
 +			about space id's from the call of
 +			fil_load_single_table_tablespaces().
 +
 +			In a normal startup, we create the space objects for
 +			every table in the InnoDB data dictionary that has
 +			an .ibd file.
 +
 +			We also determine the maximum tablespace id used. */
 +			dict_check_t	dict_check;
 +
 +			if (recv_needed_recovery) {
 +				dict_check = DICT_CHECK_ALL_LOADED;
 +			} else if (n_recovered_trx) {
 +				dict_check = DICT_CHECK_SOME_LOADED;
 +			} else {
 +				dict_check = DICT_CHECK_NONE_LOADED;
 +			}
 +
 +			/* Create the SYS_TABLESPACES and SYS_DATAFILES system table */
 +			err = dict_create_or_check_sys_tablespace();
 +			if (err != DB_SUCCESS) {
 +				return(err);
 +			}
 +
 +			sys_datafiles_created = true;
 +
 +			/* This function assumes that SYS_DATAFILES exists */
 +			dict_check_tablespaces_and_store_max_id(dict_check);
 +		}
 +
 +		if (!srv_force_recovery
 +		    && !recv_sys->found_corrupt_log
 +		    && (srv_log_file_size_requested != srv_log_file_size
 +			|| srv_n_log_files_found != srv_n_log_files)) {
 +			/* Prepare to replace the redo log files. */
 +
 +			if (srv_read_only_mode) {
 +				ib_logf(IB_LOG_LEVEL_ERROR,
 +					"Cannot resize log files "
 +					"in read-only mode.");
 +				return(DB_READ_ONLY);
 +			}
 +
 +			/* Clean the buffer pool. */
 +			bool success = buf_flush_list(
 +				ULINT_MAX, LSN_MAX, NULL);
 +			ut_a(success);
 +
 +			DBUG_EXECUTE_IF("innodb_log_abort_1",
 +					return(DB_ERROR););
 +
 +			min_flushed_lsn = max_flushed_lsn = log_get_lsn();
 +
 +			ib_logf(IB_LOG_LEVEL_WARN,
 +				"Resizing redo log from %u*%u to %u*%u pages"
 +				", LSN=" LSN_PF,
 +				(unsigned) i,
 +				(unsigned) srv_log_file_size,
 +				(unsigned) srv_n_log_files,
 +				(unsigned) srv_log_file_size_requested,
 +				max_flushed_lsn);
 +
 +			buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
 +
 +			/* Flush the old log files. */
 +			log_buffer_flush_to_disk();
 +			/* If innodb_flush_method=O_DSYNC,
 +			we need to explicitly flush the log buffers. */
 +			fil_flush(SRV_LOG_SPACE_FIRST_ID);
 +
 +			ut_ad(max_flushed_lsn == log_get_lsn());
 +
 +			/* Prohibit redo log writes from any other
 +			threads until creating a log checkpoint at the
 +			end of create_log_files(). */
 +			ut_d(recv_no_log_write = TRUE);
 +			ut_ad(!buf_pool_check_no_pending_io());
 +
 +			DBUG_EXECUTE_IF("innodb_log_abort_3",
 +					return(DB_ERROR););
 +
 +			/* Stamp the LSN to the data files. */
 +			fil_write_flushed_lsn_to_data_files(
 +				max_flushed_lsn, 0);
 +
 +			DBUG_EXECUTE_IF("innodb_log_abort_4", err = DB_ERROR;);
 +
 +			if (err != DB_SUCCESS) {
 +				return(err);
 +			}
 +
 +			fil_flush_file_spaces(FIL_TABLESPACE);
 +
 +			/* Close and free the redo log files, so that
 +			we can replace them. */
 +			fil_close_log_files(true);
 +
 +			DBUG_EXECUTE_IF("innodb_log_abort_5",
 +					return(DB_ERROR););
 +
 +			/* Free the old log file space. */
 +			log_group_close_all();
 +
 +			ib_logf(IB_LOG_LEVEL_WARN,
 +				"Starting to delete and rewrite log files.");
 +
 +			srv_log_file_size = srv_log_file_size_requested;
 +
 +			err = create_log_files(create_new_db, logfilename,
 +					       dirnamelen, max_flushed_lsn,
 +					       logfile0);
 +
 +			if (err != DB_SUCCESS) {
 +				return(err);
 +			}
 +
 +			/* create_log_files() can increase system lsn that is
 +			why FIL_PAGE_FILE_FLUSH_LSN have to be updated */
 +			min_flushed_lsn = max_flushed_lsn = log_get_lsn();
 +			fil_write_flushed_lsn_to_data_files(min_flushed_lsn, 0);
 +			fil_flush_file_spaces(FIL_TABLESPACE);
 +
 +			err = create_log_files_rename(logfilename, dirnamelen,
 +						      log_get_lsn(), logfile0);
 +			if (err != DB_SUCCESS) {
 +				return(err);
 +			}
 +		}
 +
 +		srv_startup_is_before_trx_rollback_phase = FALSE;
 +		recv_recovery_rollback_active();
 +
 +		/* It is possible that file_format tag has never
 +		been set. In this case we initialize it to minimum
 +		value.  Important to note that we can do it ONLY after
 +		we have finished the recovery process so that the
 +		image of TRX_SYS_PAGE_NO is not stale. */
 +		trx_sys_file_format_tag_init();
 +	}
 +
 +	ut_ad(err == DB_SUCCESS);
 +	ut_a(sum_of_new_sizes != ULINT_UNDEFINED);
 +
 +#ifdef UNIV_LOG_ARCHIVE
 +	if (!srv_read_only_mode) {
 +		if (!srv_log_archive_on) {
 +			ut_a(DB_SUCCESS == log_archive_noarchivelog());
 +		} else {
 +			bool	start_archive;
 +
 +			mutex_enter(&(log_sys->mutex));
 +
 +			start_archive = false;
 +
 +			if (log_sys->archiving_state == LOG_ARCH_OFF) {
 +				start_archive = true;
 +			}
 +
 +			mutex_exit(&(log_sys->mutex));
 +
 +			if (start_archive) {
 +				ut_a(DB_SUCCESS == log_archive_archivelog());
 +			}
 +		}
 +	}
 +#endif /* UNIV_LOG_ARCHIVE */
 +
 +	/* fprintf(stderr, "Max allowed record size %lu\n",
 +	page_get_free_space_of_empty() / 2); */
 +
 +	if (buf_dblwr == NULL) {
 +		/* Create the doublewrite buffer to a new tablespace */
 +
 +		buf_dblwr_create();
 +	}
 +
 +	/* Here the double write buffer has already been created and so
 +	any new rollback segments will be allocated after the double
 +	write buffer. The default segment should already exist.
 +	We create the new segments only if it's a new database or
 +	the database was shutdown cleanly. */
 +
 +	/* Note: When creating the extra rollback segments during an upgrade
 +	we violate the latching order, even if the change buffer is empty.
 +	We make an exception in sync0sync.cc and check srv_is_being_started
 +	for that violation. It cannot create a deadlock because we are still
 +	running in single threaded mode essentially. Only the IO threads
 +	should be running at this stage. */
 +
 +	ut_a(srv_undo_logs > 0);
 +	ut_a(srv_undo_logs <= TRX_SYS_N_RSEGS);
 +
 +	/* The number of rsegs that exist in InnoDB is given by status
 +	variable srv_available_undo_logs. The number of rsegs to use can
 +	be set using the dynamic global variable srv_undo_logs. */
 +
 +	srv_available_undo_logs = trx_sys_create_rsegs(
 +		srv_undo_tablespaces, srv_undo_logs);
 +
 +	if (srv_available_undo_logs == ULINT_UNDEFINED) {
 +		/* Can only happen if server is read only. */
 +		ut_a(srv_read_only_mode);
 +		srv_undo_logs = ULONG_UNDEFINED;
 +	}
 +
 +	if (!srv_read_only_mode) {
 +		/* Create the thread which watches the timeouts
 +		for lock waits */
 +		thread_handles[2 + SRV_MAX_N_IO_THREADS] = os_thread_create(
 +			lock_wait_timeout_thread,
 +			NULL, thread_ids + 2 + SRV_MAX_N_IO_THREADS);
 +		thread_started[2 + SRV_MAX_N_IO_THREADS] = true;
 +
 +		/* Create the thread which warns of long semaphore waits */
 +		thread_handles[3 + SRV_MAX_N_IO_THREADS] = os_thread_create(
 +			srv_error_monitor_thread,
 +			NULL, thread_ids + 3 + SRV_MAX_N_IO_THREADS);
 +		thread_started[3 + SRV_MAX_N_IO_THREADS] = true;
 +
 +		/* Create the thread which prints InnoDB monitor info */
 +		if (!thread_started[4 + SRV_MAX_N_IO_THREADS]) {
 +			/* srv_monitor_thread not yet started */
 +			thread_handles[4 + SRV_MAX_N_IO_THREADS] = os_thread_create(
 +				srv_monitor_thread,
 +				NULL, thread_ids + 4 + SRV_MAX_N_IO_THREADS);
 +			thread_started[4 + SRV_MAX_N_IO_THREADS] = true;
 +		}
 +	}
 +
 +	/* Create the SYS_FOREIGN and SYS_FOREIGN_COLS system tables */
 +	err = dict_create_or_check_foreign_constraint_tables();
 +	if (err != DB_SUCCESS) {
 +		return(err);
 +	}
 +
 +	/* Create the SYS_TABLESPACES and SYS_DATAFILES system tables if we
 +	have not done that already on crash recovery. */
 +	if (sys_datafiles_created == false) {
 +		err = dict_create_or_check_sys_tablespace();
 +		if (err != DB_SUCCESS) {
 +			return(err);
 +		}
 +	}
 +
 +	srv_is_being_started = FALSE;
 +
 +	ut_a(trx_purge_state() == PURGE_STATE_INIT);
 +
 +	/* Create the master thread which does purge and other utility
 +	operations */
 +
 +	if (!srv_read_only_mode) {
 +
 +		thread_handles[1 + SRV_MAX_N_IO_THREADS] = os_thread_create(
 +			srv_master_thread,
 +			NULL, thread_ids + (1 + SRV_MAX_N_IO_THREADS));
 +		thread_started[1 + SRV_MAX_N_IO_THREADS] = true;
 +	}
 +
 +	if (!srv_read_only_mode
 +	    && srv_force_recovery < SRV_FORCE_NO_BACKGROUND) {
 +
 +		thread_handles[6 + SRV_MAX_N_IO_THREADS] = os_thread_create(
 +			srv_purge_coordinator_thread,
 +			NULL, thread_ids + 6 + SRV_MAX_N_IO_THREADS);
 +
 +		thread_started[6 + SRV_MAX_N_IO_THREADS] = true;
 +
 +		ut_a(UT_ARR_SIZE(thread_ids)
 +		     > 6 + srv_n_purge_threads + SRV_MAX_N_IO_THREADS);
 +
 +		/* We've already created the purge coordinator thread above. */
 +		for (i = 1; i < srv_n_purge_threads; ++i) {
 +			thread_handles[6 + i + SRV_MAX_N_IO_THREADS] = os_thread_create(
 +				srv_worker_thread, NULL,
 +				thread_ids + 6 + i + SRV_MAX_N_IO_THREADS);
 +			thread_started[6 + i + SRV_MAX_N_IO_THREADS] = true;
 +		}
 +
 +		srv_start_wait_for_purge_to_start();
 +
 +	} else {
 +		purge_sys->state = PURGE_STATE_DISABLED;
 +	}
 +
 +	if (!srv_read_only_mode) {
 +		buf_flush_page_cleaner_thread_handle = os_thread_create(buf_flush_page_cleaner_thread, NULL, NULL);
 +		buf_flush_page_cleaner_thread_started = true;
 +	}
 +
 +	buf_flush_lru_manager_thread_handle = os_thread_create(buf_flush_lru_manager_thread, NULL, NULL);
 +	buf_flush_lru_manager_thread_started = true;
 +
 +	if (!srv_file_per_table && srv_pass_corrupt_table) {
 +		fprintf(stderr, "InnoDB: Warning:"
 +			" The option innodb_file_per_table is disabled,"
 +			" so using the option innodb_pass_corrupt_table doesn't make sense.\n");
 +	}
 +
 +	if (srv_print_verbose_log) {
 +		ib_logf(IB_LOG_LEVEL_INFO,
 +			" Percona XtraDB (http://www.percona.com) %s started; "
 +			"log sequence number " LSN_PF "",
 +			INNODB_VERSION_STR, srv_start_lsn);
 +	}
 +
 +	if (srv_force_recovery > 0) {
 +		ib_logf(IB_LOG_LEVEL_INFO,
 +			"!!! innodb_force_recovery is set to %lu !!!",
 +			(ulong) srv_force_recovery);
 +	}
 +
 +	if (srv_force_recovery == 0) {
 +		/* In the insert buffer we may have even bigger tablespace
 +		id's, because we may have dropped those tablespaces, but
 +		insert buffer merge has not had time to clean the records from
 +		the ibuf tree. */
 +
 +		ibuf_update_max_tablespace_id();
 +	}
 +
 +	if (!srv_read_only_mode) {
 +		/* Create the buffer pool dump/load thread */
 +		buf_dump_thread_handle = os_thread_create(buf_dump_thread, NULL, NULL);
 +		buf_dump_thread_started = true;
 +
 +		/* Create the dict stats gathering thread */
 +		dict_stats_thread_handle = os_thread_create(dict_stats_thread, NULL, NULL);
 +		dict_stats_thread_started = true;
 +
 +		/* Create the thread that will optimize the FTS sub-system. */
 +		fts_optimize_init();
 +	}
 +
 +	srv_was_started = TRUE;
 +
 +	return(DB_SUCCESS);
 +}
 +
 +#if 0
 +/********************************************************************
 +Sync all FTS cache before shutdown */
 +static
 +void
 +srv_fts_close(void)
 +/*===============*/
 +{
 +	dict_table_t*	table;
 +
 +	for (table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
 +	     table; table = UT_LIST_GET_NEXT(table_LRU, table)) {
 +		fts_t*          fts = table->fts;
 +
 +		if (fts != NULL) {
 +			fts_sync_table(table);
 +		}
 +	}
 +
 +	for (table = UT_LIST_GET_FIRST(dict_sys->table_non_LRU);
 +	     table; table = UT_LIST_GET_NEXT(table_LRU, table)) {
 +		fts_t*          fts = table->fts;
 +
 +		if (fts != NULL) {
 +			fts_sync_table(table);
 +		}
 +	}
 +}
 +#endif
 +
 +/****************************************************************//**
 +Shuts down the InnoDB database.
 + at return	DB_SUCCESS or error code */
 +UNIV_INTERN
 +dberr_t
 +innobase_shutdown_for_mysql(void)
 +/*=============================*/
 +{
 +	ulint	i;
 +
 +	if (!srv_was_started) {
 +		if (srv_is_being_started) {
 +			ib_logf(IB_LOG_LEVEL_WARN,
 +				"Shutting down an improperly started, "
 +				"or created database!");
 +		}
 +
 +		return(DB_SUCCESS);
 +	}
 +
 +	if (!srv_read_only_mode) {
 +		/* Shutdown the FTS optimize sub system. */
 +		fts_optimize_start_shutdown();
 +
 +		fts_optimize_end();
 +	}
 +
 +	/* 1. Flush the buffer pool to disk, write the current lsn to
 +	the tablespace header(s), and copy all log data to archive.
 +	The step 1 is the real InnoDB shutdown. The remaining steps 2 - ...
 +	just free data structures after the shutdown. */
 +
 +	logs_empty_and_mark_files_at_shutdown();
 +
 +	if (srv_conc_get_active_threads() != 0) {
 +		ib_logf(IB_LOG_LEVEL_WARN,
 +			"Query counter shows %ld queries still "
 +			"inside InnoDB at shutdown",
 +			srv_conc_get_active_threads());
 +	}
 +
 +	/* 2. Make all threads created by InnoDB to exit */
 +
 +	srv_shutdown_state = SRV_SHUTDOWN_EXIT_THREADS;
 +
 +	/* All threads end up waiting for certain events. Put those events
 +	to the signaled state. Then the threads will exit themselves after
 +	os_event_wait(). */
 +
 +	for (i = 0; i < 1000; i++) {
 +		/* NOTE: IF YOU CREATE THREADS IN INNODB, YOU MUST EXIT THEM
 +		HERE OR EARLIER */
 +
 +		if (!srv_read_only_mode) {
 +			/* a. Let the lock timeout thread exit */
 +			os_event_set(lock_sys->timeout_event);
 +
 +			/* b. srv error monitor thread exits automatically,
 +			no need to do anything here */
 +
 +			/* c. We wake the master thread so that it exits */
 +			srv_wake_master_thread();
 +
 +			/* d. Wakeup purge threads. */
 +			srv_purge_wakeup();
 +		}
 +
 +		/* e. Exit the i/o threads */
 +
 +		os_aio_wake_all_threads_at_shutdown();
 +
 +		/* f. dict_stats_thread is signaled from
 +		logs_empty_and_mark_files_at_shutdown() and should have
 +		already quit or is quitting right now. */
 +
 +		os_rmb;
 +		if (os_thread_count == 0) {
 +			/* All the threads have exited or are just exiting;
 +			NOTE that the threads may not have completed their
 +			exit yet. Should we use pthread_join() to make sure
 +			they have exited? If we did, we would have to
 +			remove the pthread_detach() from
 +			os_thread_exit().  Now we just sleep 0.1
 +			seconds and hope that is enough! */
 +
 +			os_thread_sleep(100000);
 +
 +			break;
 +		}
 +
 +		os_thread_sleep(100000);
 +	}
 +
 +	if (i == 1000) {
 +		ib_logf(IB_LOG_LEVEL_WARN,
 +			"%lu threads created by InnoDB"
 +			" had not exited at shutdown!",
 +			(ulong) os_thread_count);
 +	}
 +
 +	if (srv_monitor_file) {
 +		fclose(srv_monitor_file);
 +		srv_monitor_file = 0;
 +		if (srv_monitor_file_name) {
 +			unlink(srv_monitor_file_name);
 +			mem_free(srv_monitor_file_name);
 +		}
 +	}
 +
 +	if (srv_dict_tmpfile) {
 +		fclose(srv_dict_tmpfile);
 +		srv_dict_tmpfile = 0;
 +	}
 +
 +	if (srv_misc_tmpfile) {
 +		fclose(srv_misc_tmpfile);
 +		srv_misc_tmpfile = 0;
 +	}
 +
 +	if (!srv_read_only_mode) {
 +		dict_stats_thread_deinit();
 +	}
 +
 +#ifdef __WIN__
 +	/* MDEV-361: ha_innodb.dll leaks handles on Windows
 +	MDEV-7403: should not pass recv_writer_thread_handle to
 +	CloseHandle().
 +
 +	On Windows we should call CloseHandle() for all
 +	open thread handles. */
 +	if (os_thread_count == 0) {
 +		for (i = 0; i < SRV_MAX_N_IO_THREADS + 6 + 32; ++i) {
 +			if (thread_started[i]) {
 +				CloseHandle(thread_handles[i]);
 +			}
 +		}
 +
 +		if (buf_flush_page_cleaner_thread_started) {
 +			CloseHandle(buf_flush_page_cleaner_thread_handle);
 +		}
 +
 +		if (buf_dump_thread_started) {
 +			CloseHandle(buf_dump_thread_handle);
 +		}
 +
 +		if (dict_stats_thread_started) {
 +			CloseHandle(dict_stats_thread_handle);
 +		}
 +
 +		if (buf_flush_lru_manager_thread_started) {
 +			CloseHandle(buf_flush_lru_manager_thread_handle);
 +		}
 +
 +		if (srv_redo_log_follow_thread_started) {
 +			CloseHandle(srv_redo_log_follow_thread_handle);
 +		}
 +	}
 +#endif /* __WIN __ */
 +
 +	/* This must be disabled before closing the buffer pool
 +	and closing the data dictionary.  */
 +	btr_search_disable();
 +
 +	ibuf_close();
 +	log_online_shutdown();
 +	log_shutdown();
 +	trx_sys_file_format_close();
 +	trx_sys_close();
 +	lock_sys_close();
 +
 +	/* We don't create these mutexes in RO mode because we don't create
 +	the temp files that the cover. */
 +	if (!srv_read_only_mode) {
 +		mutex_free(&srv_monitor_file_mutex);
 +		mutex_free(&srv_dict_tmpfile_mutex);
 +		mutex_free(&srv_misc_tmpfile_mutex);
 +	}
 +
 +	dict_close();
 +	btr_search_sys_free();
 +
 +	/* 3. Free all InnoDB's own mutexes and the os_fast_mutexes inside
 +	them */
 +	os_aio_free();
 +	que_close();
 +	row_mysql_close();
 +	srv_mon_free();
 +	srv_free();
 +	fil_close();
 +
 +	/* 4. Free all allocated memory */
 +
 +	pars_lexer_close();
 +	log_mem_free();
 +	buf_pool_free(srv_buf_pool_instances);
 +	mem_close();
 +	sync_close();
 +
 +	/* ut_free_all_mem() frees all allocated memory not freed yet
 +	in shutdown, and it will also free the ut_list_mutex, so it
 +	should be the last one for all operation */
 +	ut_free_all_mem();
 +
 +	os_rmb;
 +	if (os_thread_count != 0
 +	    || os_event_count != 0
 +	    || os_mutex_count != 0
 +	    || os_fast_mutex_count != 0) {
 +		ib_logf(IB_LOG_LEVEL_WARN,
 +			"Some resources were not cleaned up in shutdown: "
 +			"threads %lu, events %lu, os_mutexes %lu, "
 +			"os_fast_mutexes %lu",
 +			(ulong) os_thread_count, (ulong) os_event_count,
 +			(ulong) os_mutex_count, (ulong) os_fast_mutex_count);
 +	}
 +
 +	if (dict_foreign_err_file) {
 +		fclose(dict_foreign_err_file);
 +	}
 +
 +	if (srv_print_verbose_log) {
 +		ib_logf(IB_LOG_LEVEL_INFO,
 +			"Shutdown completed; log sequence number " LSN_PF "",
 +			srv_shutdown_lsn);
 +	}
 +
 +	srv_was_started = FALSE;
 +	srv_start_has_been_called = FALSE;
 +
 +	return(DB_SUCCESS);
 +}
 +#endif /* !UNIV_HOTBACKUP */
 +
 +
 +/********************************************************************
 +Signal all per-table background threads to shutdown, and wait for them to do
 +so. */
 +UNIV_INTERN
 +void
 +srv_shutdown_table_bg_threads(void)
 +/*===============================*/
 +{
 +	dict_table_t*	table;
 +	dict_table_t*	first;
 +	dict_table_t*	last = NULL;
 +
 +	mutex_enter(&dict_sys->mutex);
 +
 +	/* Signal all threads that they should stop. */
 +	table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
 +	first = table;
 +	while (table) {
 +		dict_table_t*	next;
 +		fts_t*		fts = table->fts;
 +
 +		if (fts != NULL) {
 +			fts_start_shutdown(table, fts);
 +		}
 +
 +		next = UT_LIST_GET_NEXT(table_LRU, table);
 +
 +		if (!next) {
 +			last = table;
 +		}
 +
 +		table = next;
 +	}
 +
 +	/* We must release dict_sys->mutex here; if we hold on to it in the
 +	loop below, we will deadlock if any of the background threads try to
 +	acquire it (for example, the FTS thread by calling que_eval_sql).
 +
 +	Releasing it here and going through dict_sys->table_LRU without
 +	holding it is safe because:
 +
 +	 a) MySQL only starts the shutdown procedure after all client
 +	 threads have been disconnected and no new ones are accepted, so no
 +	 new tables are added or old ones dropped.
 +
 +	 b) Despite its name, the list is not LRU, and the order stays
 +	 fixed.
 +
 +	To safeguard against the above assumptions ever changing, we store
 +	the first and last items in the list above, and then check that
 +	they've stayed the same below. */
 +
 +	mutex_exit(&dict_sys->mutex);
 +
 +	/* Wait for the threads of each table to stop. This is not inside
 +	the above loop, because by signaling all the threads first we can
 +	overlap their shutting down delays. */
 +	table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
 +	ut_a(first == table);
 +	while (table) {
 +		dict_table_t*	next;
 +		fts_t*		fts = table->fts;
 +
 +		if (fts != NULL) {
 +			fts_shutdown(table, fts);
 +		}
 +
 +		next = UT_LIST_GET_NEXT(table_LRU, table);
 +
 +		if (table == last) {
 +			ut_a(!next);
 +		}
 +
 +		table = next;
 +	}
 +}
 +
 +/*****************************************************************//**
 +Get the meta-data filename from the table name. */
 +UNIV_INTERN
 +void
 +srv_get_meta_data_filename(
 +/*=======================*/
 +	dict_table_t*	table,		/*!< in: table */
 +	char*			filename,	/*!< out: filename */
 +	ulint			max_len)	/*!< in: filename max length */
 +{
 +	ulint			len;
 +	char*			path;
 +	char*			suffix;
 +	static const ulint	suffix_len = strlen(".cfg");
 +
 +	if (DICT_TF_HAS_DATA_DIR(table->flags)) {
 +		dict_get_and_save_data_dir_path(table, false);
 +		ut_a(table->data_dir_path);
 +
 +		path = os_file_make_remote_pathname(
 +			table->data_dir_path, table->name, "cfg");
 +	} else {
 +		path = fil_make_ibd_name(table->name, false);
 +	}
 +
 +	ut_a(path);
 +	len = ut_strlen(path);
 +	ut_a(max_len >= len);
 +
 +	suffix = path + (len - suffix_len);
 +	if (strncmp(suffix, ".cfg", suffix_len) == 0) {
 +		strcpy(filename, path);
 +	} else {
 +		ut_ad(strncmp(suffix, ".ibd", suffix_len) == 0);
 +
 +		strncpy(filename, path, len - suffix_len);
 +		suffix = filename + (len - suffix_len);
 +		strcpy(suffix, ".cfg");
 +	}
 +
 +	mem_free(path);
 +
 +	srv_normalize_path_for_win(filename);
 +}


More information about the commits mailing list