[Commits] fcc0430: MySQL 5.7.9 InnoDB

Jan Lindström jan.lindstrom at mariadb.com
Thu Nov 19 11:52:28 EET 2015


revision-id: fcc0430d14f5872e6bc865a6f1e18e6a8783fdad ()
parent(s): 86ff4da14dc53659e88ee8cd66412045dcb26e31
committer: Jan Lindström
timestamp: 2015-11-19 11:52:15 +0200
message:

MySQL 5.7.9 InnoDB

---
 storage/innobase/CMakeLists.txt           |   452 -
 storage/innobase/COPYING.Google           |    30 -
 storage/innobase/COPYING.Percona          |    30 -
 storage/innobase/Doxyfile                 |  1419 ---
 storage/innobase/api/api0api.cc           |   774 +-
 storage/innobase/api/api0misc.cc          |   124 +-
 storage/innobase/btr/btr0btr.cc           |  2693 +++---
 storage/innobase/btr/btr0bulk.cc          |   988 +++
 storage/innobase/btr/btr0cur.cc           |  4296 +++++++---
 storage/innobase/btr/btr0pcur.cc          |   241 +-
 storage/innobase/btr/btr0sea.cc           |  1330 +--
 storage/innobase/buf/buf0buddy.cc         |   255 +-
 storage/innobase/buf/buf0buf.cc           |  4367 +++++++---
 storage/innobase/buf/buf0checksum.cc      |    69 +-
 storage/innobase/buf/buf0dblwr.cc         |   692 +-
 storage/innobase/buf/buf0dump.cc          |   397 +-
 storage/innobase/buf/buf0flu.cc           |  2631 ++++--
 storage/innobase/buf/buf0lru.cc           |  1093 +--
 storage/innobase/buf/buf0rea.cc           |   567 +-
 storage/innobase/compile-innodb           |    25 -
 storage/innobase/data/data0data.cc        |   272 +-
 storage/innobase/data/data0type.cc        |    38 +-
 storage/innobase/dict/dict0boot.cc        |   135 +-
 storage/innobase/dict/dict0crea.cc        |  1264 ++-
 storage/innobase/dict/dict0dict.cc        |  2874 ++++---
 storage/innobase/dict/dict0load.cc        |  2384 ++++--
 storage/innobase/dict/dict0mem.cc         |   390 +-
 storage/innobase/dict/dict0stats.cc       |   724 +-
 storage/innobase/dict/dict0stats_bg.cc    |    80 +-
 storage/innobase/dyn/dyn0dyn.cc           |    66 -
 storage/innobase/eval/eval0eval.cc        |   140 +-
 storage/innobase/eval/eval0proc.cc        |    20 +-
 storage/innobase/fil/fil0fil.cc           |  7549 ++++++++--------
 storage/innobase/fsp/fsp0file.cc          |  1110 +++
 storage/innobase/fsp/fsp0fsp.cc           |  3015 +++----
 storage/innobase/fsp/fsp0space.cc         |   291 +
 storage/innobase/fsp/fsp0sysspace.cc      |  1033 +++
 storage/innobase/fts/fts0ast.cc           |   153 +-
 storage/innobase/fts/fts0blex.cc          |    21 +-
 storage/innobase/fts/fts0blex.l           |     1 +
 storage/innobase/fts/fts0config.cc        |    84 +-
 storage/innobase/fts/fts0fts.cc           |  1946 +++--
 storage/innobase/fts/fts0opt.cc           |   408 +-
 storage/innobase/fts/fts0pars.cc          |    13 +-
 storage/innobase/fts/fts0pars.y           |    17 +-
 storage/innobase/fts/fts0plugin.cc        |   295 +
 storage/innobase/fts/fts0que.cc           |   920 +-
 storage/innobase/fts/fts0sql.cc           |    90 +-
 storage/innobase/fts/fts0tlex.cc          |   389 +-
 storage/innobase/fts/fts0tlex.l           |     1 +
 storage/innobase/fut/fut0lst.cc           |   212 +-
 storage/innobase/ha/ha0ha.cc              |   178 +-
 storage/innobase/ha/ha0storage.cc         |    12 +-
 storage/innobase/ha/hash0hash.cc          |   144 +-
 storage/innobase/ha_innodb.def            |     4 -
 storage/innobase/handler/ha_innodb.cc     | 12842 +++++++++++++++++-----------
 storage/innobase/handler/ha_innodb.h      |  1114 ++-
 storage/innobase/handler/ha_innopart.cc   |  4438 ++++++++++
 storage/innobase/handler/ha_innopart.h    |  1326 +++
 storage/innobase/handler/handler0alter.cc |  4505 ++++++++--
 storage/innobase/handler/i_s.cc           |  1565 +++-
 storage/innobase/handler/i_s.h            |    15 +-
 storage/innobase/ibuf/ibuf0ibuf.cc        |  1537 ++--
 storage/innobase/include/api0api.h        |   413 +-
 storage/innobase/include/api0misc.h       |     8 +-
 storage/innobase/include/btr0btr.h        |   448 +-
 storage/innobase/include/btr0btr.ic       |   101 +-
 storage/innobase/include/btr0bulk.h       |   383 +
 storage/innobase/include/btr0cur.h        |   421 +-
 storage/innobase/include/btr0cur.ic       |    58 +-
 storage/innobase/include/btr0pcur.h       |   154 +-
 storage/innobase/include/btr0pcur.ic      |   139 +-
 storage/innobase/include/btr0sea.h        |   308 +-
 storage/innobase/include/btr0sea.ic       |   155 +-
 storage/innobase/include/btr0types.h      |   197 +-
 storage/innobase/include/buf0buddy.h      |    22 +-
 storage/innobase/include/buf0buddy.ic     |    12 +-
 storage/innobase/include/buf0buf.h        |  1372 +--
 storage/innobase/include/buf0buf.ic       |   633 +-
 storage/innobase/include/buf0checksum.h   |    36 +-
 storage/innobase/include/buf0dblwr.h      |    36 +-
 storage/innobase/include/buf0dump.h       |     7 +-
 storage/innobase/include/buf0flu.h        |   274 +-
 storage/innobase/include/buf0flu.ic       |    96 +-
 storage/innobase/include/buf0lru.h        |    63 +-
 storage/innobase/include/buf0rea.h        |   118 +-
 storage/innobase/include/buf0types.h      |    41 +-
 storage/innobase/include/data0data.h      |   278 +-
 storage/innobase/include/data0data.ic     |   242 +-
 storage/innobase/include/data0type.h      |   121 +-
 storage/innobase/include/data0type.ic     |    77 +-
 storage/innobase/include/db0err.h         |    55 +-
 storage/innobase/include/dict0boot.h      |    52 +-
 storage/innobase/include/dict0boot.ic     |    10 +-
 storage/innobase/include/dict0crea.h      |   335 +-
 storage/innobase/include/dict0crea.ic     |    56 +-
 storage/innobase/include/dict0dict.h      |  1024 ++-
 storage/innobase/include/dict0dict.ic     |   752 +-
 storage/innobase/include/dict0load.h      |   247 +-
 storage/innobase/include/dict0mem.h       |  1212 ++-
 storage/innobase/include/dict0mem.ic      |    33 +-
 storage/innobase/include/dict0priv.h      |     8 +-
 storage/innobase/include/dict0priv.ic     |    32 +-
 storage/innobase/include/dict0stats.h     |    27 +-
 storage/innobase/include/dict0stats.ic    |    13 +-
 storage/innobase/include/dict0stats_bg.h  |    21 +-
 storage/innobase/include/dict0types.h     |    16 +-
 storage/innobase/include/dyn0buf.h        |   505 ++
 storage/innobase/include/dyn0dyn.h        |   199 -
 storage/innobase/include/dyn0dyn.ic       |   306 -
 storage/innobase/include/dyn0types.h      |    39 +
 storage/innobase/include/eval0eval.h      |    10 +-
 storage/innobase/include/eval0eval.ic     |    12 +-
 storage/innobase/include/eval0proc.h      |    24 +-
 storage/innobase/include/eval0proc.ic     |     6 +-
 storage/innobase/include/fil0fil.h        |  1609 ++--
 storage/innobase/include/fsp0file.h       |   576 ++
 storage/innobase/include/fsp0fsp.h        |   476 +-
 storage/innobase/include/fsp0fsp.ic       |   345 +-
 storage/innobase/include/fsp0space.h      |   229 +
 storage/innobase/include/fsp0sysspace.h   |   322 +
 storage/innobase/include/fsp0types.h      |   260 +-
 storage/innobase/include/fts0ast.h        |    53 +-
 storage/innobase/include/fts0blex.h       |     2 +-
 storage/innobase/include/fts0fts.h        |   302 +-
 storage/innobase/include/fts0opt.h        |     3 +-
 storage/innobase/include/fts0plugin.h     |    50 +
 storage/innobase/include/fts0priv.h       |   215 +-
 storage/innobase/include/fts0priv.ic      |    20 +-
 storage/innobase/include/fts0tlex.h       |     2 +-
 storage/innobase/include/fts0tokenize.h   |   188 +
 storage/innobase/include/fts0types.h      |   116 +-
 storage/innobase/include/fts0types.ic     |   314 +-
 storage/innobase/include/fut0fut.h        |    30 +-
 storage/innobase/include/fut0fut.ic       |    36 +-
 storage/innobase/include/fut0lst.h        |    40 +-
 storage/innobase/include/fut0lst.ic       |    32 +-
 storage/innobase/include/gis0geo.h        |   162 +
 storage/innobase/include/gis0rtree.h      |   572 ++
 storage/innobase/include/gis0rtree.ic     |   274 +
 storage/innobase/include/gis0type.h       |   168 +
 storage/innobase/include/ha0ha.h          |   106 +-
 storage/innobase/include/ha0ha.ic         |    46 +-
 storage/innobase/include/ha0storage.h     |    31 +-
 storage/innobase/include/ha0storage.ic    |     6 +-
 storage/innobase/include/ha_prototypes.h  |   387 +-
 storage/innobase/include/handler0alter.h  |    15 +-
 storage/innobase/include/hash0hash.h      |    92 +-
 storage/innobase/include/hash0hash.ic     |    76 +-
 storage/innobase/include/ib0mutex.h       |  1166 +++
 storage/innobase/include/ibuf0ibuf.h      |   217 +-
 storage/innobase/include/ibuf0ibuf.ic     |   131 +-
 storage/innobase/include/lock0iter.h      |     6 +-
 storage/innobase/include/lock0lock.h      |   393 +-
 storage/innobase/include/lock0lock.ic     |    49 +-
 storage/innobase/include/lock0prdt.h      |   224 +
 storage/innobase/include/lock0priv.h      |  1052 ++-
 storage/innobase/include/lock0priv.ic     |   364 +-
 storage/innobase/include/lock0types.h     |    35 +-
 storage/innobase/include/log0log.h        |   704 +-
 storage/innobase/include/log0log.ic       |   194 +-
 storage/innobase/include/log0recv.h       |   249 +-
 storage/innobase/include/log0recv.ic      |    25 +-
 storage/innobase/include/log0types.h      |    50 +
 storage/innobase/include/mach0data.h      |   168 +-
 storage/innobase/include/mach0data.ic     |   400 +-
 storage/innobase/include/mem0dbg.h        |   150 -
 storage/innobase/include/mem0dbg.ic       |   109 -
 storage/innobase/include/mem0mem.h        |   463 +-
 storage/innobase/include/mem0mem.ic       |   410 +-
 storage/innobase/include/mem0pool.h       |   121 -
 storage/innobase/include/mem0pool.ic      |    24 -
 storage/innobase/include/mtr0log.h        |   130 +-
 storage/innobase/include/mtr0log.ic       |   211 +-
 storage/innobase/include/mtr0mtr.h        |   907 +-
 storage/innobase/include/mtr0mtr.ic       |   391 +-
 storage/innobase/include/mtr0types.h      |   261 +-
 storage/innobase/include/os0atomic.h      |   320 +
 storage/innobase/include/os0atomic.ic     |   215 +
 storage/innobase/include/os0event.h       |   135 +
 storage/innobase/include/os0file.h        |  2256 +++--
 storage/innobase/include/os0file.ic       |   513 +-
 storage/innobase/include/os0once.h        |     4 +-
 storage/innobase/include/os0proc.h        |    51 +-
 storage/innobase/include/os0sync.h        |   858 --
 storage/innobase/include/os0sync.ic       |   234 -
 storage/innobase/include/os0thread.h      |    57 +-
 storage/innobase/include/page0cur.h       |   168 +-
 storage/innobase/include/page0cur.ic      |   127 +-
 storage/innobase/include/page0page.h      |   317 +-
 storage/innobase/include/page0page.ic     |   276 +-
 storage/innobase/include/page0size.h      |   202 +
 storage/innobase/include/page0types.h     |    65 +-
 storage/innobase/include/page0zip.h       |   215 +-
 storage/innobase/include/page0zip.ic      |    52 +-
 storage/innobase/include/pars0opt.h       |     7 +-
 storage/innobase/include/pars0pars.h      |   135 +-
 storage/innobase/include/pars0sym.h       |    25 +-
 storage/innobase/include/que0que.h        |    99 +-
 storage/innobase/include/que0que.ic       |    21 +-
 storage/innobase/include/read0read.h      |   252 +-
 storage/innobase/include/read0read.ic     |   148 -
 storage/innobase/include/read0types.h     |   305 +-
 storage/innobase/include/rem0cmp.h        |   355 +-
 storage/innobase/include/rem0cmp.ic       |   216 +-
 storage/innobase/include/rem0rec.h        |   386 +-
 storage/innobase/include/rem0rec.ic       |   196 +-
 storage/innobase/include/row0ext.h        |     9 +-
 storage/innobase/include/row0ftsort.h     |    36 +-
 storage/innobase/include/row0import.h     |     8 +-
 storage/innobase/include/row0ins.h        |    63 +-
 storage/innobase/include/row0log.h        |    88 +-
 storage/innobase/include/row0log.ic       |    18 +-
 storage/innobase/include/row0merge.h      |   170 +-
 storage/innobase/include/row0mysql.h      |   382 +-
 storage/innobase/include/row0purge.h      |    29 +-
 storage/innobase/include/row0quiesce.h    |     5 +-
 storage/innobase/include/row0row.h        |    90 +-
 storage/innobase/include/row0row.ic       |    11 +-
 storage/innobase/include/row0sel.h        |   200 +-
 storage/innobase/include/row0sel.ic       |    46 +-
 storage/innobase/include/row0trunc.h      |   429 +
 storage/innobase/include/row0uins.h       |     8 +-
 storage/innobase/include/row0umod.h       |     7 +-
 storage/innobase/include/row0undo.h       |    16 +-
 storage/innobase/include/row0upd.h        |   216 +-
 storage/innobase/include/row0upd.ic       |    67 +-
 storage/innobase/include/row0vers.h       |    50 +-
 storage/innobase/include/sess0sess.h      |   146 +
 storage/innobase/include/srv0conc.h       |    27 +-
 storage/innobase/include/srv0mon.h        |   148 +-
 storage/innobase/include/srv0mon.ic       |    12 +-
 storage/innobase/include/srv0srv.h        |   327 +-
 storage/innobase/include/srv0start.h      |    97 +-
 storage/innobase/include/sync0arr.h       |    69 +-
 storage/innobase/include/sync0arr.ic      |    51 +-
 storage/innobase/include/sync0debug.h     |   105 +
 storage/innobase/include/sync0policy.h    |   550 ++
 storage/innobase/include/sync0policy.ic   |   100 +
 storage/innobase/include/sync0rw.h        |   572 +-
 storage/innobase/include/sync0rw.ic       |   483 +-
 storage/innobase/include/sync0sync.h      |   790 +-
 storage/innobase/include/sync0sync.ic     |   403 -
 storage/innobase/include/sync0types.h     |  1225 ++-
 storage/innobase/include/trx0i_s.h        |    35 +-
 storage/innobase/include/trx0purge.h      |   345 +-
 storage/innobase/include/trx0purge.ic     |    27 +-
 storage/innobase/include/trx0rec.h        |   130 +-
 storage/innobase/include/trx0rec.ic       |    26 +-
 storage/innobase/include/trx0roll.h       |   123 +-
 storage/innobase/include/trx0roll.ic      |    46 +-
 storage/innobase/include/trx0rseg.h       |   187 +-
 storage/innobase/include/trx0rseg.ic      |    94 +-
 storage/innobase/include/trx0sys.h        |   327 +-
 storage/innobase/include/trx0sys.ic       |   211 +-
 storage/innobase/include/trx0trx.h        |  1055 ++-
 storage/innobase/include/trx0trx.ic       |   264 +-
 storage/innobase/include/trx0types.h      |   221 +-
 storage/innobase/include/trx0undo.h       |   266 +-
 storage/innobase/include/trx0undo.ic      |    61 +-
 storage/innobase/include/trx0xa.h         |    15 +-
 storage/innobase/include/univ.i           |   264 +-
 storage/innobase/include/usr0sess.h       |     9 +-
 storage/innobase/include/ut0bh.h          |   152 -
 storage/innobase/include/ut0bh.ic         |   125 -
 storage/innobase/include/ut0byte.h        |    18 +-
 storage/innobase/include/ut0byte.ic       |    18 +-
 storage/innobase/include/ut0counter.h     |   104 +-
 storage/innobase/include/ut0crc32.h       |    25 +-
 storage/innobase/include/ut0dbg.h         |   162 +-
 storage/innobase/include/ut0list.h        |    23 +-
 storage/innobase/include/ut0list.ic       |     6 +-
 storage/innobase/include/ut0lst.h         |   544 +-
 storage/innobase/include/ut0mem.h         |   146 +-
 storage/innobase/include/ut0mem.ic        |    58 +-
 storage/innobase/include/ut0mutex.h       |   225 +
 storage/innobase/include/ut0mutex.ic      |   108 +
 storage/innobase/include/ut0new.h         |   918 ++
 storage/innobase/include/ut0pool.h        |   366 +
 storage/innobase/include/ut0rbt.h         |    59 +-
 storage/innobase/include/ut0rnd.h         |    31 +-
 storage/innobase/include/ut0rnd.ic        |    38 +-
 storage/innobase/include/ut0stage.h       |   573 ++
 storage/innobase/include/ut0ut.h          |   478 +-
 storage/innobase/include/ut0ut.ic         |   105 +-
 storage/innobase/include/ut0vec.h         |    55 +-
 storage/innobase/include/ut0vec.ic        |    97 +-
 storage/innobase/include/ut0wqueue.h      |    41 +-
 storage/innobase/lock/lock0iter.cc        |     9 +-
 storage/innobase/lock/lock0lock.cc        |  7450 ++++++++--------
 storage/innobase/lock/lock0prdt.cc        |  1056 +++
 storage/innobase/lock/lock0wait.cc        |    67 +-
 storage/innobase/log/log0log.cc           |  3126 ++-----
 storage/innobase/log/log0recv.cc          |  3502 ++++----
 storage/innobase/mach/mach0data.cc        |   102 +-
 storage/innobase/mem/mem0dbg.cc           |  1050 ---
 storage/innobase/mem/mem0mem.cc           |   231 +-
 storage/innobase/mem/mem0pool.cc          |   727 --
 storage/innobase/mtr/mtr0log.cc           |   149 +-
 storage/innobase/mtr/mtr0mtr.cc           |  1131 ++-
 storage/innobase/os/os0event.cc           |   549 ++
 storage/innobase/os/os0file.cc            | 10983 +++++++++++++++---------
 storage/innobase/os/os0proc.cc            |   146 +-
 storage/innobase/os/os0sync.cc            |   916 --
 storage/innobase/os/os0thread.cc          |   246 +-
 storage/innobase/page/page0cur.cc         |  1149 ++-
 storage/innobase/page/page0page.cc        |   996 ++-
 storage/innobase/page/page0zip.cc         |   826 +-
 storage/innobase/pars/lexyy.cc            |   156 +-
 storage/innobase/pars/make_bison.sh       |     3 +-
 storage/innobase/pars/make_flex.sh        |     3 +-
 storage/innobase/pars/pars0grm.cc         |     5 +-
 storage/innobase/pars/pars0grm.y          |     5 +-
 storage/innobase/pars/pars0lex.l          |     5 +-
 storage/innobase/pars/pars0opt.cc         |    59 +-
 storage/innobase/pars/pars0pars.cc        |   286 +-
 storage/innobase/pars/pars0sym.cc         |    43 +-
 storage/innobase/que/que0que.cc           |   259 +-
 storage/innobase/read/read0read.cc        |   793 +-
 storage/innobase/rem/rem0cmp.cc           |  1657 ++--
 storage/innobase/rem/rem0rec.cc           |   595 +-
 storage/innobase/row/row0ext.cc           |    31 +-
 storage/innobase/row/row0ftsort.cc        |   491 +-
 storage/innobase/row/row0import.cc        |  1004 +--
 storage/innobase/row/row0ins.cc           |  1344 ++-
 storage/innobase/row/row0log.cc           |   856 +-
 storage/innobase/row/row0merge.cc         |  2270 +++--
 storage/innobase/row/row0mysql.cc         |  3875 +++++----
 storage/innobase/row/row0purge.cc         |   275 +-
 storage/innobase/row/row0quiesce.cc       |    98 +-
 storage/innobase/row/row0row.cc           |   489 +-
 storage/innobase/row/row0sel.cc           |  2061 +++--
 storage/innobase/row/row0trunc.cc         |  3051 +++++++
 storage/innobase/row/row0uins.cc          |   122 +-
 storage/innobase/row/row0umod.cc          |   291 +-
 storage/innobase/row/row0undo.cc          |   123 +-
 storage/innobase/row/row0upd.cc           |  1192 ++-
 storage/innobase/row/row0vers.cc          |   728 +-
 storage/innobase/srv/srv0conc.cc          |   363 +-
 storage/innobase/srv/srv0mon.cc           |   269 +-
 storage/innobase/srv/srv0srv.cc           |   956 ++-
 storage/innobase/srv/srv0start.cc         |  2946 +++----
 storage/innobase/sync/sync0arr.cc         |   971 ++-
 storage/innobase/sync/sync0debug.cc       |  1799 ++++
 storage/innobase/sync/sync0rw.cc          |  1061 ++-
 storage/innobase/sync/sync0sync.cc        |  1699 +---
 storage/innobase/trx/trx0i_s.cc           |   322 +-
 storage/innobase/trx/trx0purge.cc         |  1123 ++-
 storage/innobase/trx/trx0rec.cc           |  1271 ++-
 storage/innobase/trx/trx0roll.cc          |   644 +-
 storage/innobase/trx/trx0rseg.cc          |   276 +-
 storage/innobase/trx/trx0sys.cc           |   704 +-
 storage/innobase/trx/trx0trx.cc           |  2389 ++++--
 storage/innobase/trx/trx0undo.cc          |   821 +-
 storage/innobase/usr/usr0sess.cc          |    14 +-
 storage/innobase/ut/ut0bh.cc              |   159 -
 storage/innobase/ut/ut0crc32.cc           |   680 +-
 storage/innobase/ut/ut0dbg.cc             |    83 +-
 storage/innobase/ut/ut0list.cc            |    23 +-
 storage/innobase/ut/ut0mem.cc             |   424 +-
 storage/innobase/ut/ut0new.cc             |   227 +
 storage/innobase/ut/ut0rbt.cc             |    87 +-
 storage/innobase/ut/ut0rnd.cc             |     7 +-
 storage/innobase/ut/ut0ut.cc              |   431 +-
 storage/innobase/ut/ut0vec.cc             |     4 +-
 storage/innobase/ut/ut0wqueue.cc          |    35 +-
 366 files changed, 125870 insertions(+), 76997 deletions(-)

diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt
deleted file mode 100644
index 2e93989..0000000
--- a/storage/innobase/CMakeLists.txt
+++ /dev/null
@@ -1,452 +0,0 @@
-# Copyright (c) 2006, 2015, Oracle and/or its affiliates. All rights reserved.
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; version 2 of the License.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
-
-# This is the CMakeLists for InnoDB
-
-INCLUDE(CheckFunctionExists)
-INCLUDE(CheckCSourceCompiles)
-INCLUDE(CheckCSourceRuns)
-
-# OS tests
-IF(UNIX)
-  IF(CMAKE_SYSTEM_NAME STREQUAL "Linux")
-    CHECK_INCLUDE_FILES (libaio.h HAVE_LIBAIO_H)
-    CHECK_LIBRARY_EXISTS(aio io_queue_init "" HAVE_LIBAIO)
-    ADD_DEFINITIONS("-DUNIV_LINUX -D_GNU_SOURCE=1")
-    IF(HAVE_LIBAIO_H AND HAVE_LIBAIO)
-      ADD_DEFINITIONS(-DLINUX_NATIVE_AIO=1)
-      LINK_LIBRARIES(aio)
-    ENDIF()
-    IF(HAVE_LIBNUMA)
-      LINK_LIBRARIES(numa)
-    ENDIF()
-  ELSEIF(CMAKE_SYSTEM_NAME MATCHES "HP*")
-    ADD_DEFINITIONS("-DUNIV_HPUX")
-  ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "AIX")
-    ADD_DEFINITIONS("-DUNIV_AIX")
-  ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "SunOS")
-    ADD_DEFINITIONS("-DUNIV_SOLARIS")
-  ENDIF()
-ENDIF()
-
-IF(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
-# After: WL#5825 Using C++ Standard Library with MySQL code
-#       we no longer use -fno-exceptions
-#	SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-exceptions")
-ENDIF()
-
-# Enable InnoDB's UNIV_DEBUG and UNIV_SYNC_DEBUG in debug builds
-SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DUNIV_DEBUG -DUNIV_SYNC_DEBUG")
-
-# Add -Wconversion if compiling with GCC
-## As of Mar 15 2011 this flag causes 3573+ warnings. If you are reading this
-## please fix them and enable the following code:
-#IF(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
-#SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wconversion")
-#ENDIF()
-
-CHECK_FUNCTION_EXISTS(sched_getcpu  HAVE_SCHED_GETCPU)
-
-IF(NOT MSVC)
-# either define HAVE_IB_GCC_ATOMIC_BUILTINS or not
-IF(NOT CMAKE_CROSSCOMPILING)
-  CHECK_C_SOURCE_RUNS(
-  "
-  int main()
-  {
-    long	x;
-    long	y;
-    long	res;
-
-    x = 10;
-    y = 123;
-    res = __sync_bool_compare_and_swap(&x, x, y);
-    if (!res || x != y) {
-      return(1);
-    }
-
-    x = 10;
-    y = 123;
-    res = __sync_bool_compare_and_swap(&x, x + 1, y);
-    if (res || x != 10) {
-      return(1);
-    }
-    x = 10;
-    y = 123;
-    res = __sync_add_and_fetch(&x, y);
-    if (res != 123 + 10 || x != 123 + 10) {
-      return(1);
-    }
-    return(0);
-  }"
-  HAVE_IB_GCC_ATOMIC_BUILTINS
-  )
-  CHECK_C_SOURCE_RUNS(
-  "
-  int main()
-  {
-    long	res;
-    char	c;
-
-    c = 10;
-    res = __sync_lock_test_and_set(&c, 123);
-    if (res != 10 || c != 123) {
-      return(1);
-    }
-    return(0);
-  }"
-  HAVE_IB_GCC_ATOMIC_BUILTINS_BYTE
-  )
-  CHECK_C_SOURCE_RUNS(
-  "#include<stdint.h>
-  int main()
-  {
-    int64_t	x,y,res;
-
-    x = 10;
-    y = 123;
-    res = __sync_sub_and_fetch(&y, x);
-    if (res != y || y != 113) {
-      return(1);
-    }
-    res = __sync_add_and_fetch(&y, x);
-    if (res != y || y != 123) {
-      return(1);
-    }
-    return(0);
-  }"
-  HAVE_IB_GCC_ATOMIC_BUILTINS_64
-  )
-  CHECK_C_SOURCE_RUNS(
-  "#include<stdint.h>
-  int main()
-  {
-    __sync_synchronize();
-    return(0);
-  }"
-  HAVE_IB_GCC_SYNC_SYNCHRONISE
-  )
-  CHECK_C_SOURCE_RUNS(
-  "#include<stdint.h>
-  int main()
-  {
-    __atomic_thread_fence(__ATOMIC_ACQUIRE);
-    __atomic_thread_fence(__ATOMIC_RELEASE);
-    return(0);
-  }"
-  HAVE_IB_GCC_ATOMIC_THREAD_FENCE
-  )
-  CHECK_C_SOURCE_RUNS(
-  "#include<stdint.h>
-  int main()
-  {
-    unsigned char	c;
-
-    __atomic_test_and_set(&c, __ATOMIC_ACQUIRE);
-    __atomic_clear(&c, __ATOMIC_RELEASE);
-    return(0);
-  }"
-  HAVE_IB_GCC_ATOMIC_TEST_AND_SET
-  )
-ENDIF()
-
-IF(HAVE_IB_GCC_ATOMIC_BUILTINS)
- ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_BUILTINS=1)
-ENDIF()
-
-IF(HAVE_IB_GCC_ATOMIC_BUILTINS_BYTE)
- ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_BUILTINS_BYTE=1)
-ENDIF()
-
-IF(HAVE_IB_GCC_ATOMIC_BUILTINS_64)
- ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_BUILTINS_64=1)
-ENDIF()
-
-IF(HAVE_IB_GCC_SYNC_SYNCHRONISE)
- ADD_DEFINITIONS(-DHAVE_IB_GCC_SYNC_SYNCHRONISE=1)
-ENDIF()
-
-IF(HAVE_IB_GCC_ATOMIC_THREAD_FENCE)
- ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_THREAD_FENCE=1)
-ENDIF()
-
-IF(HAVE_IB_GCC_ATOMIC_TEST_AND_SET)
- ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_TEST_AND_SET=1)
-ENDIF()
-
- # either define HAVE_IB_ATOMIC_PTHREAD_T_GCC or not
-IF(NOT CMAKE_CROSSCOMPILING)
-  CHECK_C_SOURCE_RUNS(
-  "
-  #include <pthread.h>
-  #include <string.h>
-
-  int main() {
-    pthread_t       x1;
-    pthread_t       x2;
-    pthread_t       x3;
-
-    memset(&x1, 0x0, sizeof(x1));
-    memset(&x2, 0x0, sizeof(x2));
-    memset(&x3, 0x0, sizeof(x3));
-
-    __sync_bool_compare_and_swap(&x1, x2, x3);
-
-    return(0);
-  }"
-  HAVE_IB_ATOMIC_PTHREAD_T_GCC)
-ENDIF()
-IF(HAVE_IB_ATOMIC_PTHREAD_T_GCC)
-  ADD_DEFINITIONS(-DHAVE_IB_ATOMIC_PTHREAD_T_GCC=1)
-ENDIF()
-
-ENDIF(NOT MSVC)
-
-CHECK_FUNCTION_EXISTS(asprintf  HAVE_ASPRINTF)
-CHECK_FUNCTION_EXISTS(vasprintf  HAVE_VASPRINTF)
-
-# Solaris atomics
-IF(CMAKE_SYSTEM_NAME STREQUAL "SunOS")
-  CHECK_FUNCTION_EXISTS(atomic_cas_ulong  HAVE_ATOMIC_CAS_ULONG)
-  CHECK_FUNCTION_EXISTS(atomic_cas_32 HAVE_ATOMIC_CAS_32)
-  CHECK_FUNCTION_EXISTS(atomic_cas_64 HAVE_ATOMIC_CAS_64)
-  CHECK_FUNCTION_EXISTS(atomic_add_long_nv HAVE_ATOMIC_ADD_LONG_NV)
-  CHECK_FUNCTION_EXISTS(atomic_swap_uchar HAVE_ATOMIC_SWAP_UCHAR)
-  IF(HAVE_ATOMIC_CAS_ULONG AND
-     HAVE_ATOMIC_CAS_32 AND
-     HAVE_ATOMIC_CAS_64 AND
-     HAVE_ATOMIC_ADD_LONG_NV AND
-     HAVE_ATOMIC_SWAP_UCHAR)
-    SET(HAVE_IB_SOLARIS_ATOMICS 1)
-  ENDIF()
-
-  IF(HAVE_IB_SOLARIS_ATOMICS)
-    ADD_DEFINITIONS(-DHAVE_IB_SOLARIS_ATOMICS=1)
-  ENDIF()
-
-  IF(NOT CMAKE_CROSSCOMPILING)
-  # either define HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS or not
-  CHECK_C_SOURCE_COMPILES(
-  "   #include <pthread.h>
-      #include <string.h>
-
-      int main(int argc, char** argv) {
-        pthread_t       x1;
-        pthread_t       x2;
-        pthread_t       x3;
-
-        memset(&x1, 0x0, sizeof(x1));
-        memset(&x2, 0x0, sizeof(x2));
-        memset(&x3, 0x0, sizeof(x3));
-
-        if (sizeof(pthread_t) == 4) {
-
-          atomic_cas_32(&x1, x2, x3);
-
-        } else if (sizeof(pthread_t) == 8) {
-
-          atomic_cas_64(&x1, x2, x3);
-
-        } else {
-
-          return(1);
-        }
-
-      return(0);
-    }
-  " HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS)
-  CHECK_C_SOURCE_COMPILES(
-  "#include <mbarrier.h>
-  int main() {
-    __machine_r_barrier();
-    __machine_w_barrier();
-    return(0);
-  }"
-  HAVE_IB_MACHINE_BARRIER_SOLARIS)
-  ENDIF()
-  IF(HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS)
-    ADD_DEFINITIONS(-DHAVE_IB_ATOMIC_PTHREAD_T_SOLARIS=1)
-  ENDIF()
-  IF(HAVE_IB_MACHINE_BARRIER_SOLARIS)
-    ADD_DEFINITIONS(-DHAVE_IB_MACHINE_BARRIER_SOLARIS=1)
-  ENDIF()
-ENDIF()
-
-
-IF(UNIX)
-# this is needed to know which one of atomic_cas_32() or atomic_cas_64()
-# to use in the source
-SET(CMAKE_EXTRA_INCLUDE_FILES pthread.h)
-CHECK_TYPE_SIZE(pthread_t SIZEOF_PTHREAD_T)
-SET(CMAKE_EXTRA_INCLUDE_FILES)
-ENDIF()
-
-IF(SIZEOF_PTHREAD_T)
-  ADD_DEFINITIONS(-DSIZEOF_PTHREAD_T=${SIZEOF_PTHREAD_T})
-ENDIF()
-
-IF(MSVC)
-  ADD_DEFINITIONS(-DHAVE_WINDOWS_ATOMICS)
-  ADD_DEFINITIONS(-DHAVE_WINDOWS_MM_FENCE)
-ENDIF()
-
-
-# Include directories under innobase
-INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/storage/innobase/include
-		    ${CMAKE_SOURCE_DIR}/storage/innobase/handler)
-
-# Sun Studio bug with -xO2
-IF(CMAKE_CXX_COMPILER_ID MATCHES "SunPro"
-	AND CMAKE_CXX_FLAGS_RELEASE MATCHES "O2"
-	AND NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
-	# Sun Studio 12 crashes with -xO2 flag, but not with higher optimization
-	# -xO3
-	SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_SOURCE_DIR}/rem/rem0rec.cc
-    PROPERTIES COMPILE_FLAGS -xO3)
-ENDIF()
-
-# Removing compiler optimizations for innodb/mem/* files on 64-bit Windows
-# due to 64-bit compiler error, See MySQL Bug #19424, #36366, #34297
-IF (MSVC AND CMAKE_SIZEOF_VOID_P EQUAL 8)
-	SET_SOURCE_FILES_PROPERTIES(mem/mem0mem.cc mem/mem0pool.cc
-				    PROPERTIES COMPILE_FLAGS -Od)
-ENDIF()
-
-SET(INNOBASE_SOURCES
-	api/api0api.cc
-	api/api0misc.cc
-	btr/btr0btr.cc
-	btr/btr0cur.cc
-	btr/btr0pcur.cc
-	btr/btr0sea.cc
-	buf/buf0buddy.cc
-	buf/buf0buf.cc
-	buf/buf0dblwr.cc
-	buf/buf0checksum.cc
-	buf/buf0dump.cc
-	buf/buf0flu.cc
-	buf/buf0lru.cc
-	buf/buf0rea.cc
-	data/data0data.cc
-	data/data0type.cc
-	dict/dict0boot.cc
-	dict/dict0crea.cc
-	dict/dict0dict.cc
-	dict/dict0load.cc
-	dict/dict0mem.cc
-	dict/dict0stats.cc
-	dict/dict0stats_bg.cc
-	dyn/dyn0dyn.cc
-	eval/eval0eval.cc
-	eval/eval0proc.cc
-	fil/fil0fil.cc
-	fsp/fsp0fsp.cc
-	fut/fut0fut.cc
-	fut/fut0lst.cc
-	ha/ha0ha.cc
-	ha/ha0storage.cc
-	ha/hash0hash.cc
-	fts/fts0fts.cc
-	fts/fts0ast.cc
-	fts/fts0blex.cc
-	fts/fts0config.cc
-	fts/fts0opt.cc
-	fts/fts0pars.cc
-	fts/fts0que.cc
-	fts/fts0sql.cc
-	fts/fts0tlex.cc
-	handler/ha_innodb.cc
-	handler/handler0alter.cc
-	handler/i_s.cc
-	ibuf/ibuf0ibuf.cc
-	lock/lock0iter.cc
-	lock/lock0lock.cc
-	lock/lock0wait.cc
-	log/log0log.cc
-	log/log0recv.cc
-	mach/mach0data.cc
-	mem/mem0mem.cc
-	mem/mem0pool.cc
-	mtr/mtr0log.cc
-	mtr/mtr0mtr.cc
-	os/os0file.cc
-	os/os0proc.cc
-	os/os0sync.cc
-	os/os0thread.cc
-	page/page0cur.cc
-	page/page0page.cc
-	page/page0zip.cc
-	pars/lexyy.cc
-	pars/pars0grm.cc
-	pars/pars0opt.cc
-	pars/pars0pars.cc
-	pars/pars0sym.cc
-	que/que0que.cc
-	read/read0read.cc
-	rem/rem0cmp.cc
-	rem/rem0rec.cc
-	row/row0ext.cc
-	row/row0ftsort.cc
-	row/row0import.cc
-	row/row0ins.cc
-	row/row0merge.cc
-	row/row0mysql.cc
-	row/row0log.cc
-	row/row0purge.cc
-	row/row0row.cc
-	row/row0sel.cc
-	row/row0uins.cc
-	row/row0umod.cc
-	row/row0undo.cc
-	row/row0upd.cc
-	row/row0quiesce.cc
-	row/row0vers.cc
-	srv/srv0conc.cc
-	srv/srv0mon.cc
-	srv/srv0srv.cc
-	srv/srv0start.cc
-	sync/sync0arr.cc
-	sync/sync0rw.cc
-	sync/sync0sync.cc
-	trx/trx0i_s.cc
-	trx/trx0purge.cc
-	trx/trx0rec.cc
-	trx/trx0roll.cc
-	trx/trx0rseg.cc
-	trx/trx0sys.cc
-	trx/trx0trx.cc
-	trx/trx0undo.cc
-	usr/usr0sess.cc
-	ut/ut0bh.cc
-	ut/ut0byte.cc
-	ut/ut0crc32.cc
-	ut/ut0dbg.cc
-	ut/ut0list.cc
-	ut/ut0mem.cc
-	ut/ut0rbt.cc
-	ut/ut0rnd.cc
-	ut/ut0ut.cc
-	ut/ut0vec.cc
-	ut/ut0wqueue.cc)
-
-IF(WITH_INNODB)
-  # Legacy option
-  SET(WITH_INNOBASE_STORAGE_ENGINE TRUE)
-ENDIF()
-
-MYSQL_ADD_PLUGIN(innobase ${INNOBASE_SOURCES} STORAGE_ENGINE
-  DEFAULT
-  MODULE_OUTPUT_NAME ha_innodb
-  LINK_LIBRARIES ${ZLIB_LIBRARY})
diff --git a/storage/innobase/COPYING.Google b/storage/innobase/COPYING.Google
deleted file mode 100644
index 5ade2b0..0000000
--- a/storage/innobase/COPYING.Google
+++ /dev/null
@@ -1,30 +0,0 @@
-Portions of this software contain modifications contributed by Google, Inc.
-These contributions are used with the following license:
-
-Copyright (c) 2008, Google Inc. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-      * Redistributions of source code must retain the above copyright
-        notice, this list of conditions and the following disclaimer.
-      * Redistributions in binary form must reproduce the above
-        copyright notice, this list of conditions and the following
-        disclaimer in the documentation and/or other materials
-        provided with the distribution.
-      * Neither the name of the Google Inc. nor the names of its
-        contributors may be used to endorse or promote products
-        derived from this software without specific prior written
-        permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/storage/innobase/COPYING.Percona b/storage/innobase/COPYING.Percona
deleted file mode 100644
index 8c78681..0000000
--- a/storage/innobase/COPYING.Percona
+++ /dev/null
@@ -1,30 +0,0 @@
-Portions of this software contain modifications contributed by Percona, Inc.
-These contributions are used with the following license:
-
-Copyright (c) 2008, 2009, Percona Inc. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-      * Redistributions of source code must retain the above copyright
-        notice, this list of conditions and the following disclaimer.
-      * Redistributions in binary form must reproduce the above
-        copyright notice, this list of conditions and the following
-        disclaimer in the documentation and/or other materials
-        provided with the distribution.
-      * Neither the name of the Percona Inc. nor the names of its
-        contributors may be used to endorse or promote products
-        derived from this software without specific prior written
-        permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/storage/innobase/Doxyfile b/storage/innobase/Doxyfile
deleted file mode 100644
index 7cf5048..0000000
--- a/storage/innobase/Doxyfile
+++ /dev/null
@@ -1,1419 +0,0 @@
-# Doxyfile 1.5.6
-
-# Usage: SVNVERSION=-r$(svnversion) doxygen
-
-# This file describes the settings to be used by the documentation system
-# doxygen (www.doxygen.org) for a project
-#
-# All text after a hash (#) is considered a comment and will be ignored
-# The format is:
-#       TAG = value [value, ...]
-# For lists items can also be appended using:
-#       TAG += value [value, ...]
-# Values that contain spaces should be placed between quotes (" ")
-
-#---------------------------------------------------------------------------
-# Project related configuration options
-#---------------------------------------------------------------------------
-
-# This tag specifies the encoding used for all characters in the config file
-# that follow. The default is UTF-8 which is also the encoding used for all
-# text before the first occurrence of this tag. Doxygen uses libiconv (or the
-# iconv built into libc) for the transcoding. See
-# http://www.gnu.org/software/libiconv for the list of possible encodings.
-
-DOXYFILE_ENCODING      = UTF-8
-
-# The PROJECT_NAME tag is a single word (or a sequence of words surrounded
-# by quotes) that should identify the project.
-
-PROJECT_NAME           = "InnoDB Plugin"
-
-# The PROJECT_NUMBER tag can be used to enter a project or revision number.
-# This could be handy for archiving the generated documentation or
-# if some version control system is used.
-
-PROJECT_NUMBER         = 1.0$(SVNVERSION)
-
-# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
-# base path where the generated documentation will be put.
-# If a relative path is entered, it will be relative to the location
-# where doxygen was started. If left blank the current directory will be used.
-
-OUTPUT_DIRECTORY       = dox
-
-# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
-# 4096 sub-directories (in 2 levels) under the output directory of each output
-# format and will distribute the generated files over these directories.
-# Enabling this option can be useful when feeding doxygen a huge amount of
-# source files, where putting all generated files in the same directory would
-# otherwise cause performance problems for the file system.
-
-CREATE_SUBDIRS         = NO
-
-# The OUTPUT_LANGUAGE tag is used to specify the language in which all
-# documentation generated by doxygen is written. Doxygen will use this
-# information to generate all constant output in the proper language.
-# The default language is English, other supported languages are:
-# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional,
-# Croatian, Czech, Danish, Dutch, Farsi, Finnish, French, German, Greek,
-# Hungarian, Italian, Japanese, Japanese-en (Japanese with English messages),
-# Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, Polish,
-# Portuguese, Romanian, Russian, Serbian, Slovak, Slovene, Spanish, Swedish,
-# and Ukrainian.
-
-OUTPUT_LANGUAGE        = English
-
-# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
-# include brief member descriptions after the members that are listed in
-# the file and class documentation (similar to JavaDoc).
-# Set to NO to disable this.
-
-BRIEF_MEMBER_DESC      = YES
-
-# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
-# the brief description of a member or function before the detailed description.
-# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
-# brief descriptions will be completely suppressed.
-
-REPEAT_BRIEF           = YES
-
-# This tag implements a quasi-intelligent brief description abbreviator
-# that is used to form the text in various listings. Each string
-# in this list, if found as the leading text of the brief description, will be
-# stripped from the text and the result after processing the whole list, is
-# used as the annotated text. Otherwise, the brief description is used as-is.
-# If left blank, the following values are used ("$name" is automatically
-# replaced with the name of the entity): "The $name class" "The $name widget"
-# "The $name file" "is" "provides" "specifies" "contains"
-# "represents" "a" "an" "the"
-
-ABBREVIATE_BRIEF       =
-
-# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
-# Doxygen will generate a detailed section even if there is only a brief
-# description.
-
-ALWAYS_DETAILED_SEC    = NO
-
-# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
-# inherited members of a class in the documentation of that class as if those
-# members were ordinary class members. Constructors, destructors and assignment
-# operators of the base classes will not be shown.
-
-INLINE_INHERITED_MEMB  = NO
-
-# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
-# path before files name in the file list and in the header files. If set
-# to NO the shortest path that makes the file name unique will be used.
-
-FULL_PATH_NAMES        = YES
-
-# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
-# can be used to strip a user-defined part of the path. Stripping is
-# only done if one of the specified strings matches the left-hand part of
-# the path. The tag can be used to show relative paths in the file list.
-# If left blank the directory from which doxygen is run is used as the
-# path to strip.
-
-STRIP_FROM_PATH        =
-
-# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of
-# the path mentioned in the documentation of a class, which tells
-# the reader which header file to include in order to use a class.
-# If left blank only the name of the header file containing the class
-# definition is used. Otherwise one should specify the include paths that
-# are normally passed to the compiler using the -I flag.
-
-STRIP_FROM_INC_PATH    =
-
-# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter
-# (but less readable) file names. This can be useful is your file systems
-# doesn't support long names like on DOS, Mac, or CD-ROM.
-
-SHORT_NAMES            = NO
-
-# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
-# will interpret the first line (until the first dot) of a JavaDoc-style
-# comment as the brief description. If set to NO, the JavaDoc
-# comments will behave just like regular Qt-style comments
-# (thus requiring an explicit @brief command for a brief description.)
-
-JAVADOC_AUTOBRIEF      = NO
-
-# If the QT_AUTOBRIEF tag is set to YES then Doxygen will
-# interpret the first line (until the first dot) of a Qt-style
-# comment as the brief description. If set to NO, the comments
-# will behave just like regular Qt-style comments (thus requiring
-# an explicit \brief command for a brief description.)
-
-QT_AUTOBRIEF           = NO
-
-# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen
-# treat a multi-line C++ special comment block (i.e. a block of //! or ///
-# comments) as a brief description. This used to be the default behaviour.
-# The new default is to treat a multi-line C++ comment block as a detailed
-# description. Set this tag to YES if you prefer the old behaviour instead.
-
-MULTILINE_CPP_IS_BRIEF = NO
-
-# If the DETAILS_AT_TOP tag is set to YES then Doxygen
-# will output the detailed description near the top, like JavaDoc.
-# If set to NO, the detailed description appears after the member
-# documentation.
-
-DETAILS_AT_TOP         = NO
-
-# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
-# member inherits the documentation from any documented member that it
-# re-implements.
-
-INHERIT_DOCS           = YES
-
-# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce
-# a new page for each member. If set to NO, the documentation of a member will
-# be part of the file/class/namespace that contains it.
-
-SEPARATE_MEMBER_PAGES  = NO
-
-# The TAB_SIZE tag can be used to set the number of spaces in a tab.
-# Doxygen uses this value to replace tabs by spaces in code fragments.
-
-TAB_SIZE               = 8
-
-# This tag can be used to specify a number of aliases that acts
-# as commands in the documentation. An alias has the form "name=value".
-# For example adding "sideeffect=\par Side Effects:\n" will allow you to
-# put the command \sideeffect (or @sideeffect) in the documentation, which
-# will result in a user-defined paragraph with heading "Side Effects:".
-# You can put \n's in the value part of an alias to insert newlines.
-
-ALIASES                =
-
-# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C
-# sources only. Doxygen will then generate output that is more tailored for C.
-# For instance, some of the names that are used will be different. The list
-# of all members will be omitted, etc.
-
-OPTIMIZE_OUTPUT_FOR_C  = YES
-
-# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java
-# sources only. Doxygen will then generate output that is more tailored for
-# Java. For instance, namespaces will be presented as packages, qualified
-# scopes will look different, etc.
-
-OPTIMIZE_OUTPUT_JAVA   = NO
-
-# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
-# sources only. Doxygen will then generate output that is more tailored for
-# Fortran.
-
-OPTIMIZE_FOR_FORTRAN   = NO
-
-# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
-# sources. Doxygen will then generate output that is tailored for
-# VHDL.
-
-OPTIMIZE_OUTPUT_VHDL   = NO
-
-# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
-# to include (a tag file for) the STL sources as input, then you should
-# set this tag to YES in order to let doxygen match functions declarations and
-# definitions whose arguments contain STL classes (e.g. func(std::string); v.s.
-# func(std::string) {}). This also make the inheritance and collaboration
-# diagrams that involve STL classes more complete and accurate.
-
-BUILTIN_STL_SUPPORT    = NO
-
-# If you use Microsoft's C++/CLI language, you should set this option to YES to
-# enable parsing support.
-
-CPP_CLI_SUPPORT        = NO
-
-# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only.
-# Doxygen will parse them like normal C++ but will assume all classes use public
-# instead of private inheritance when no explicit protection keyword is present.
-
-SIP_SUPPORT            = NO
-
-# For Microsoft's IDL there are propget and propput attributes to indicate getter
-# and setter methods for a property. Setting this option to YES (the default)
-# will make doxygen to replace the get and set methods by a property in the
-# documentation. This will only work if the methods are indeed getting or
-# setting a simple type. If this is not the case, or you want to show the
-# methods anyway, you should set this option to NO.
-
-IDL_PROPERTY_SUPPORT   = YES
-
-# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
-# tag is set to YES, then doxygen will reuse the documentation of the first
-# member in the group (if any) for the other members of the group. By default
-# all members of a group must be documented explicitly.
-
-DISTRIBUTE_GROUP_DOC   = NO
-
-# Set the SUBGROUPING tag to YES (the default) to allow class member groups of
-# the same type (for instance a group of public functions) to be put as a
-# subgroup of that type (e.g. under the Public Functions section). Set it to
-# NO to prevent subgrouping. Alternatively, this can be done per class using
-# the \nosubgrouping command.
-
-SUBGROUPING            = YES
-
-# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum
-# is documented as struct, union, or enum with the name of the typedef. So
-# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
-# with name TypeT. When disabled the typedef will appear as a member of a file,
-# namespace, or class. And the struct will be named TypeS. This can typically
-# be useful for C code in case the coding convention dictates that all compound
-# types are typedef'ed and only the typedef is referenced, never the tag name.
-
-TYPEDEF_HIDES_STRUCT   = NO
-
-#---------------------------------------------------------------------------
-# Build related configuration options
-#---------------------------------------------------------------------------
-
-# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
-# documentation are documented, even if no documentation was available.
-# Private class members and static file members will be hidden unless
-# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
-
-EXTRACT_ALL            = NO
-
-# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
-# will be included in the documentation.
-
-EXTRACT_PRIVATE        = YES
-
-# If the EXTRACT_STATIC tag is set to YES all static members of a file
-# will be included in the documentation.
-
-EXTRACT_STATIC         = YES
-
-# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs)
-# defined locally in source files will be included in the documentation.
-# If set to NO only classes defined in header files are included.
-
-EXTRACT_LOCAL_CLASSES  = YES
-
-# This flag is only useful for Objective-C code. When set to YES local
-# methods, which are defined in the implementation section but not in
-# the interface are included in the documentation.
-# If set to NO (the default) only methods in the interface are included.
-
-EXTRACT_LOCAL_METHODS  = NO
-
-# If this flag is set to YES, the members of anonymous namespaces will be
-# extracted and appear in the documentation as a namespace called
-# 'anonymous_namespace{file}', where file will be replaced with the base
-# name of the file that contains the anonymous namespace. By default
-# anonymous namespace are hidden.
-
-EXTRACT_ANON_NSPACES   = NO
-
-# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
-# undocumented members of documented classes, files or namespaces.
-# If set to NO (the default) these members will be included in the
-# various overviews, but no documentation section is generated.
-# This option has no effect if EXTRACT_ALL is enabled.
-
-HIDE_UNDOC_MEMBERS     = NO
-
-# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
-# undocumented classes that are normally visible in the class hierarchy.
-# If set to NO (the default) these classes will be included in the various
-# overviews. This option has no effect if EXTRACT_ALL is enabled.
-
-HIDE_UNDOC_CLASSES     = NO
-
-# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all
-# friend (class|struct|union) declarations.
-# If set to NO (the default) these declarations will be included in the
-# documentation.
-
-HIDE_FRIEND_COMPOUNDS  = NO
-
-# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any
-# documentation blocks found inside the body of a function.
-# If set to NO (the default) these blocks will be appended to the
-# function's detailed documentation block.
-
-HIDE_IN_BODY_DOCS      = NO
-
-# The INTERNAL_DOCS tag determines if documentation
-# that is typed after a \internal command is included. If the tag is set
-# to NO (the default) then the documentation will be excluded.
-# Set it to YES to include the internal documentation.
-
-INTERNAL_DOCS          = NO
-
-# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
-# file names in lower-case letters. If set to YES upper-case letters are also
-# allowed. This is useful if you have classes or files whose names only differ
-# in case and if your file system supports case sensitive file names. Windows
-# and Mac users are advised to set this option to NO.
-
-CASE_SENSE_NAMES       = YES
-
-# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
-# will show members with their full class and namespace scopes in the
-# documentation. If set to YES the scope will be hidden.
-
-HIDE_SCOPE_NAMES       = NO
-
-# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
-# will put a list of the files that are included by a file in the documentation
-# of that file.
-
-SHOW_INCLUDE_FILES     = YES
-
-# If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
-# is inserted in the documentation for inline members.
-
-INLINE_INFO            = YES
-
-# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
-# will sort the (detailed) documentation of file and class members
-# alphabetically by member name. If set to NO the members will appear in
-# declaration order.
-
-SORT_MEMBER_DOCS       = YES
-
-# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the
-# brief documentation of file, namespace and class members alphabetically
-# by member name. If set to NO (the default) the members will appear in
-# declaration order.
-
-SORT_BRIEF_DOCS        = NO
-
-# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the
-# hierarchy of group names into alphabetical order. If set to NO (the default)
-# the group names will appear in their defined order.
-
-SORT_GROUP_NAMES       = NO
-
-# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be
-# sorted by fully-qualified names, including namespaces. If set to
-# NO (the default), the class list will be sorted only by class name,
-# not including the namespace part.
-# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
-# Note: This option applies only to the class list, not to the
-# alphabetical list.
-
-SORT_BY_SCOPE_NAME     = NO
-
-# The GENERATE_TODOLIST tag can be used to enable (YES) or
-# disable (NO) the todo list. This list is created by putting \todo
-# commands in the documentation.
-
-GENERATE_TODOLIST      = YES
-
-# The GENERATE_TESTLIST tag can be used to enable (YES) or
-# disable (NO) the test list. This list is created by putting \test
-# commands in the documentation.
-
-GENERATE_TESTLIST      = YES
-
-# The GENERATE_BUGLIST tag can be used to enable (YES) or
-# disable (NO) the bug list. This list is created by putting \bug
-# commands in the documentation.
-
-GENERATE_BUGLIST       = YES
-
-# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or
-# disable (NO) the deprecated list. This list is created by putting
-# \deprecated commands in the documentation.
-
-GENERATE_DEPRECATEDLIST= YES
-
-# The ENABLED_SECTIONS tag can be used to enable conditional
-# documentation sections, marked by \if sectionname ... \endif.
-
-ENABLED_SECTIONS       =
-
-# The MAX_INITIALIZER_LINES tag determines the maximum number of lines
-# the initial value of a variable or define consists of for it to appear in
-# the documentation. If the initializer consists of more lines than specified
-# here it will be hidden. Use a value of 0 to hide initializers completely.
-# The appearance of the initializer of individual variables and defines in the
-# documentation can be controlled using \showinitializer or \hideinitializer
-# command in the documentation regardless of this setting.
-
-MAX_INITIALIZER_LINES  = 30
-
-# Set the SHOW_USED_FILES tag to NO to disable the list of files generated
-# at the bottom of the documentation of classes and structs. If set to YES the
-# list will mention the files that were used to generate the documentation.
-
-SHOW_USED_FILES        = YES
-
-# If the sources in your project are distributed over multiple directories
-# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy
-# in the documentation. The default is NO.
-
-SHOW_DIRECTORIES       = NO
-
-# Set the SHOW_FILES tag to NO to disable the generation of the Files page.
-# This will remove the Files entry from the Quick Index and from the
-# Folder Tree View (if specified). The default is YES.
-
-SHOW_FILES             = YES
-
-# Set the SHOW_NAMESPACES tag to NO to disable the generation of the
-# Namespaces page.  This will remove the Namespaces entry from the Quick Index
-# and from the Folder Tree View (if specified). The default is YES.
-
-SHOW_NAMESPACES        = YES
-
-# The FILE_VERSION_FILTER tag can be used to specify a program or script that
-# doxygen should invoke to get the current version for each file (typically from
-# the version control system). Doxygen will invoke the program by executing (via
-# popen()) the command <command> <input-file>, where <command> is the value of
-# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file
-# provided by doxygen. Whatever the program writes to standard output
-# is used as the file version. See the manual for examples.
-
-FILE_VERSION_FILTER    =
-
-#---------------------------------------------------------------------------
-# configuration options related to warning and progress messages
-#---------------------------------------------------------------------------
-
-# The QUIET tag can be used to turn on/off the messages that are generated
-# by doxygen. Possible values are YES and NO. If left blank NO is used.
-
-QUIET                  = YES
-
-# The WARNINGS tag can be used to turn on/off the warning messages that are
-# generated by doxygen. Possible values are YES and NO. If left blank
-# NO is used.
-
-WARNINGS               = YES
-
-# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
-# for undocumented members. If EXTRACT_ALL is set to YES then this flag will
-# automatically be disabled.
-
-WARN_IF_UNDOCUMENTED   = YES
-
-# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for
-# potential errors in the documentation, such as not documenting some
-# parameters in a documented function, or documenting parameters that
-# don't exist or using markup commands wrongly.
-
-WARN_IF_DOC_ERROR      = YES
-
-# This WARN_NO_PARAMDOC option can be abled to get warnings for
-# functions that are documented, but have no documentation for their parameters
-# or return value. If set to NO (the default) doxygen will only warn about
-# wrong or incomplete parameter documentation, but not about the absence of
-# documentation.
-
-WARN_NO_PARAMDOC       = NO
-
-# The WARN_FORMAT tag determines the format of the warning messages that
-# doxygen can produce. The string should contain the $file, $line, and $text
-# tags, which will be replaced by the file and line number from which the
-# warning originated and the warning text. Optionally the format may contain
-# $version, which will be replaced by the version of the file (if it could
-# be obtained via FILE_VERSION_FILTER)
-
-WARN_FORMAT            = "$file:$line: $text"
-
-# The WARN_LOGFILE tag can be used to specify a file to which warning
-# and error messages should be written. If left blank the output is written
-# to stderr.
-
-WARN_LOGFILE           =
-
-#---------------------------------------------------------------------------
-# configuration options related to the input files
-#---------------------------------------------------------------------------
-
-# The INPUT tag can be used to specify the files and/or directories that contain
-# documented source files. You may enter file names like "myfile.cpp" or
-# directories like "/usr/src/myproject". Separate the files or directories
-# with spaces.
-
-INPUT                  = . include/univ.i
-
-# This tag can be used to specify the character encoding of the source files
-# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is
-# also the default input encoding. Doxygen uses libiconv (or the iconv built
-# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for
-# the list of possible encodings.
-
-INPUT_ENCODING         = UTF-8
-
-# If the value of the INPUT tag contains directories, you can use the
-# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
-# and *.h) to filter out the source-files in the directories. If left
-# blank the following patterns are tested:
-# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
-# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
-
-FILE_PATTERNS          = *.c *.ic *.h
-
-# The RECURSIVE tag can be used to turn specify whether or not subdirectories
-# should be searched for input files as well. Possible values are YES and NO.
-# If left blank NO is used.
-
-RECURSIVE              = YES
-
-# The EXCLUDE tag can be used to specify files and/or directories that should
-# excluded from the INPUT source files. This way you can easily exclude a
-# subdirectory from a directory tree whose root is specified with the INPUT tag.
-
-EXCLUDE                =
-
-# The EXCLUDE_SYMLINKS tag can be used select whether or not files or
-# directories that are symbolic links (a Unix filesystem feature) are excluded
-# from the input.
-
-EXCLUDE_SYMLINKS       = NO
-
-# If the value of the INPUT tag contains directories, you can use the
-# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
-# certain files from those directories. Note that the wildcards are matched
-# against the file with absolute path, so to exclude all test directories
-# for example use the pattern */test/*
-
-EXCLUDE_PATTERNS       =
-
-# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
-# (namespaces, classes, functions, etc.) that should be excluded from the
-# output. The symbol name can be a fully qualified name, a word, or if the
-# wildcard * is used, a substring. Examples: ANamespace, AClass,
-# AClass::ANamespace, ANamespace::*Test
-
-EXCLUDE_SYMBOLS        =
-
-# The EXAMPLE_PATH tag can be used to specify one or more files or
-# directories that contain example code fragments that are included (see
-# the \include command).
-
-EXAMPLE_PATH           =
-
-# If the value of the EXAMPLE_PATH tag contains directories, you can use the
-# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
-# and *.h) to filter out the source-files in the directories. If left
-# blank all files are included.
-
-EXAMPLE_PATTERNS       =
-
-# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
-# searched for input files to be used with the \include or \dontinclude
-# commands irrespective of the value of the RECURSIVE tag.
-# Possible values are YES and NO. If left blank NO is used.
-
-EXAMPLE_RECURSIVE      = NO
-
-# The IMAGE_PATH tag can be used to specify one or more files or
-# directories that contain image that are included in the documentation (see
-# the \image command).
-
-IMAGE_PATH             =
-
-# The INPUT_FILTER tag can be used to specify a program that doxygen should
-# invoke to filter for each input file. Doxygen will invoke the filter program
-# by executing (via popen()) the command <filter> <input-file>, where <filter>
-# is the value of the INPUT_FILTER tag, and <input-file> is the name of an
-# input file. Doxygen will then use the output that the filter program writes
-# to standard output.  If FILTER_PATTERNS is specified, this tag will be
-# ignored.
-
-INPUT_FILTER           =
-
-# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
-# basis.  Doxygen will compare the file name with each pattern and apply the
-# filter if there is a match.  The filters are a list of the form:
-# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further
-# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER
-# is applied to all files.
-
-FILTER_PATTERNS        =
-
-# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
-# INPUT_FILTER) will be used to filter the input files when producing source
-# files to browse (i.e. when SOURCE_BROWSER is set to YES).
-
-FILTER_SOURCE_FILES    = NO
-
-#---------------------------------------------------------------------------
-# configuration options related to source browsing
-#---------------------------------------------------------------------------
-
-# If the SOURCE_BROWSER tag is set to YES then a list of source files will
-# be generated. Documented entities will be cross-referenced with these sources.
-# Note: To get rid of all source code in the generated output, make sure also
-# VERBATIM_HEADERS is set to NO.
-
-SOURCE_BROWSER         = NO
-
-# Setting the INLINE_SOURCES tag to YES will include the body
-# of functions and classes directly in the documentation.
-
-INLINE_SOURCES         = NO
-
-# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
-# doxygen to hide any special comment blocks from generated source code
-# fragments. Normal C and C++ comments will always remain visible.
-
-STRIP_CODE_COMMENTS    = YES
-
-# If the REFERENCED_BY_RELATION tag is set to YES
-# then for each documented function all documented
-# functions referencing it will be listed.
-
-REFERENCED_BY_RELATION = NO
-
-# If the REFERENCES_RELATION tag is set to YES
-# then for each documented function all documented entities
-# called/used by that function will be listed.
-
-REFERENCES_RELATION    = NO
-
-# If the REFERENCES_LINK_SOURCE tag is set to YES (the default)
-# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from
-# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will
-# link to the source code.  Otherwise they will link to the documentstion.
-
-REFERENCES_LINK_SOURCE = YES
-
-# If the USE_HTAGS tag is set to YES then the references to source code
-# will point to the HTML generated by the htags(1) tool instead of doxygen
-# built-in source browser. The htags tool is part of GNU's global source
-# tagging system (see http://www.gnu.org/software/global/global.html). You
-# will need version 4.8.6 or higher.
-
-USE_HTAGS              = NO
-
-# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
-# will generate a verbatim copy of the header file for each class for
-# which an include is specified. Set to NO to disable this.
-
-VERBATIM_HEADERS       = YES
-
-#---------------------------------------------------------------------------
-# configuration options related to the alphabetical class index
-#---------------------------------------------------------------------------
-
-# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
-# of all compounds will be generated. Enable this if the project
-# contains a lot of classes, structs, unions or interfaces.
-
-ALPHABETICAL_INDEX     = NO
-
-# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
-# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
-# in which this list will be split (can be a number in the range [1..20])
-
-COLS_IN_ALPHA_INDEX    = 5
-
-# In case all classes in a project start with a common prefix, all
-# classes will be put under the same header in the alphabetical index.
-# The IGNORE_PREFIX tag can be used to specify one or more prefixes that
-# should be ignored while generating the index headers.
-
-IGNORE_PREFIX          =
-
-#---------------------------------------------------------------------------
-# configuration options related to the HTML output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_HTML tag is set to YES (the default) Doxygen will
-# generate HTML output.
-
-GENERATE_HTML          = YES
-
-# The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
-# If a relative path is entered the value of OUTPUT_DIRECTORY will be
-# put in front of it. If left blank `html' will be used as the default path.
-
-HTML_OUTPUT            = html
-
-# The HTML_FILE_EXTENSION tag can be used to specify the file extension for
-# each generated HTML page (for example: .htm,.php,.asp). If it is left blank
-# doxygen will generate files with .html extension.
-
-HTML_FILE_EXTENSION    = .html
-
-# The HTML_HEADER tag can be used to specify a personal HTML header for
-# each generated HTML page. If it is left blank doxygen will generate a
-# standard header.
-
-HTML_HEADER            =
-
-# The HTML_FOOTER tag can be used to specify a personal HTML footer for
-# each generated HTML page. If it is left blank doxygen will generate a
-# standard footer.
-
-HTML_FOOTER            =
-
-# The HTML_STYLESHEET tag can be used to specify a user-defined cascading
-# style sheet that is used by each HTML page. It can be used to
-# fine-tune the look of the HTML output. If the tag is left blank doxygen
-# will generate a default style sheet. Note that doxygen will try to copy
-# the style sheet file to the HTML output directory, so don't put your own
-# stylesheet in the HTML output directory as well, or it will be erased!
-
-HTML_STYLESHEET        =
-
-# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
-# files or namespaces will be aligned in HTML using tables. If set to
-# NO a bullet list will be used.
-
-HTML_ALIGN_MEMBERS     = YES
-
-# If the GENERATE_HTMLHELP tag is set to YES, additional index files
-# will be generated that can be used as input for tools like the
-# Microsoft HTML help workshop to generate a compiled HTML help file (.chm)
-# of the generated HTML documentation.
-
-GENERATE_HTMLHELP      = NO
-
-# If the GENERATE_DOCSET tag is set to YES, additional index files
-# will be generated that can be used as input for Apple's Xcode 3
-# integrated development environment, introduced with OSX 10.5 (Leopard).
-# To create a documentation set, doxygen will generate a Makefile in the
-# HTML output directory. Running make will produce the docset in that
-# directory and running "make install" will install the docset in
-# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find
-# it at startup.
-
-GENERATE_DOCSET        = NO
-
-# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the
-# feed. A documentation feed provides an umbrella under which multiple
-# documentation sets from a single provider (such as a company or product suite)
-# can be grouped.
-
-DOCSET_FEEDNAME        = "Doxygen generated docs"
-
-# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that
-# should uniquely identify the documentation set bundle. This should be a
-# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen
-# will append .docset to the name.
-
-DOCSET_BUNDLE_ID       = org.doxygen.Project
-
-# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
-# documentation will contain sections that can be hidden and shown after the
-# page has loaded. For this to work a browser that supports
-# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox
-# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari).
-
-HTML_DYNAMIC_SECTIONS  = NO
-
-# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
-# be used to specify the file name of the resulting .chm file. You
-# can add a path in front of the file if the result should not be
-# written to the html output directory.
-
-CHM_FILE               =
-
-# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can
-# be used to specify the location (absolute path including file name) of
-# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run
-# the HTML help compiler on the generated index.hhp.
-
-HHC_LOCATION           =
-
-# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag
-# controls if a separate .chi index file is generated (YES) or that
-# it should be included in the master .chm file (NO).
-
-GENERATE_CHI           = NO
-
-# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING
-# is used to encode HtmlHelp index (hhk), content (hhc) and project file
-# content.
-
-CHM_INDEX_ENCODING     =
-
-# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag
-# controls whether a binary table of contents is generated (YES) or a
-# normal table of contents (NO) in the .chm file.
-
-BINARY_TOC             = NO
-
-# The TOC_EXPAND flag can be set to YES to add extra items for group members
-# to the contents of the HTML help documentation and to the tree view.
-
-TOC_EXPAND             = NO
-
-# The DISABLE_INDEX tag can be used to turn on/off the condensed index at
-# top of each HTML page. The value NO (the default) enables the index and
-# the value YES disables it.
-
-DISABLE_INDEX          = NO
-
-# This tag can be used to set the number of enum values (range [1..20])
-# that doxygen will group on one line in the generated HTML documentation.
-
-ENUM_VALUES_PER_LINE   = 4
-
-# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
-# structure should be generated to display hierarchical information.
-# If the tag value is set to FRAME, a side panel will be generated
-# containing a tree-like index structure (just like the one that
-# is generated for HTML Help). For this to work a browser that supports
-# JavaScript, DHTML, CSS and frames is required (for instance Mozilla 1.0+,
-# Netscape 6.0+, Internet explorer 5.0+, or Konqueror). Windows users are
-# probably better off using the HTML help feature. Other possible values
-# for this tag are: HIERARCHIES, which will generate the Groups, Directories,
-# and Class Hiererachy pages using a tree view instead of an ordered list;
-# ALL, which combines the behavior of FRAME and HIERARCHIES; and NONE, which
-# disables this behavior completely. For backwards compatibility with previous
-# releases of Doxygen, the values YES and NO are equivalent to FRAME and NONE
-# respectively.
-
-GENERATE_TREEVIEW      = NONE
-
-# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
-# used to set the initial width (in pixels) of the frame in which the tree
-# is shown.
-
-TREEVIEW_WIDTH         = 250
-
-# Use this tag to change the font size of Latex formulas included
-# as images in the HTML documentation. The default is 10. Note that
-# when you change the font size after a successful doxygen run you need
-# to manually remove any form_*.png images from the HTML output directory
-# to force them to be regenerated.
-
-FORMULA_FONTSIZE       = 10
-
-#---------------------------------------------------------------------------
-# configuration options related to the LaTeX output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
-# generate Latex output.
-
-GENERATE_LATEX         = NO
-
-# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
-# If a relative path is entered the value of OUTPUT_DIRECTORY will be
-# put in front of it. If left blank `latex' will be used as the default path.
-
-LATEX_OUTPUT           = latex
-
-# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
-# invoked. If left blank `latex' will be used as the default command name.
-
-LATEX_CMD_NAME         = latex
-
-# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to
-# generate index for LaTeX. If left blank `makeindex' will be used as the
-# default command name.
-
-MAKEINDEX_CMD_NAME     = makeindex
-
-# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
-# LaTeX documents. This may be useful for small projects and may help to
-# save some trees in general.
-
-COMPACT_LATEX          = NO
-
-# The PAPER_TYPE tag can be used to set the paper type that is used
-# by the printer. Possible values are: a4, a4wide, letter, legal and
-# executive. If left blank a4wide will be used.
-
-PAPER_TYPE             = a4wide
-
-# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
-# packages that should be included in the LaTeX output.
-
-EXTRA_PACKAGES         =
-
-# The LATEX_HEADER tag can be used to specify a personal LaTeX header for
-# the generated latex document. The header should contain everything until
-# the first chapter. If it is left blank doxygen will generate a
-# standard header. Notice: only use this tag if you know what you are doing!
-
-LATEX_HEADER           =
-
-# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
-# is prepared for conversion to pdf (using ps2pdf). The pdf file will
-# contain links (just like the HTML output) instead of page references
-# This makes the output suitable for online browsing using a pdf viewer.
-
-PDF_HYPERLINKS         = YES
-
-# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
-# plain latex in the generated Makefile. Set this option to YES to get a
-# higher quality PDF documentation.
-
-USE_PDFLATEX           = YES
-
-# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
-# command to the generated LaTeX files. This will instruct LaTeX to keep
-# running if errors occur, instead of asking the user for help.
-# This option is also used when generating formulas in HTML.
-
-LATEX_BATCHMODE        = NO
-
-# If LATEX_HIDE_INDICES is set to YES then doxygen will not
-# include the index chapters (such as File Index, Compound Index, etc.)
-# in the output.
-
-LATEX_HIDE_INDICES     = NO
-
-#---------------------------------------------------------------------------
-# configuration options related to the RTF output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
-# The RTF output is optimized for Word 97 and may not look very pretty with
-# other RTF readers or editors.
-
-GENERATE_RTF           = NO
-
-# The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
-# If a relative path is entered the value of OUTPUT_DIRECTORY will be
-# put in front of it. If left blank `rtf' will be used as the default path.
-
-RTF_OUTPUT             = rtf
-
-# If the COMPACT_RTF tag is set to YES Doxygen generates more compact
-# RTF documents. This may be useful for small projects and may help to
-# save some trees in general.
-
-COMPACT_RTF            = NO
-
-# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
-# will contain hyperlink fields. The RTF file will
-# contain links (just like the HTML output) instead of page references.
-# This makes the output suitable for online browsing using WORD or other
-# programs which support those fields.
-# Note: wordpad (write) and others do not support links.
-
-RTF_HYPERLINKS         = NO
-
-# Load stylesheet definitions from file. Syntax is similar to doxygen's
-# config file, i.e. a series of assignments. You only have to provide
-# replacements, missing definitions are set to their default value.
-
-RTF_STYLESHEET_FILE    =
-
-# Set optional variables used in the generation of an rtf document.
-# Syntax is similar to doxygen's config file.
-
-RTF_EXTENSIONS_FILE    =
-
-#---------------------------------------------------------------------------
-# configuration options related to the man page output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_MAN tag is set to YES (the default) Doxygen will
-# generate man pages
-
-GENERATE_MAN           = NO
-
-# The MAN_OUTPUT tag is used to specify where the man pages will be put.
-# If a relative path is entered the value of OUTPUT_DIRECTORY will be
-# put in front of it. If left blank `man' will be used as the default path.
-
-MAN_OUTPUT             = man
-
-# The MAN_EXTENSION tag determines the extension that is added to
-# the generated man pages (default is the subroutine's section .3)
-
-MAN_EXTENSION          = .3
-
-# If the MAN_LINKS tag is set to YES and Doxygen generates man output,
-# then it will generate one additional man file for each entity
-# documented in the real man page(s). These additional files
-# only source the real man page, but without them the man command
-# would be unable to find the correct page. The default is NO.
-
-MAN_LINKS              = NO
-
-#---------------------------------------------------------------------------
-# configuration options related to the XML output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_XML tag is set to YES Doxygen will
-# generate an XML file that captures the structure of
-# the code including all documentation.
-
-GENERATE_XML           = NO
-
-# The XML_OUTPUT tag is used to specify where the XML pages will be put.
-# If a relative path is entered the value of OUTPUT_DIRECTORY will be
-# put in front of it. If left blank `xml' will be used as the default path.
-
-XML_OUTPUT             = xml
-
-# The XML_SCHEMA tag can be used to specify an XML schema,
-# which can be used by a validating XML parser to check the
-# syntax of the XML files.
-
-XML_SCHEMA             =
-
-# The XML_DTD tag can be used to specify an XML DTD,
-# which can be used by a validating XML parser to check the
-# syntax of the XML files.
-
-XML_DTD                =
-
-# If the XML_PROGRAMLISTING tag is set to YES Doxygen will
-# dump the program listings (including syntax highlighting
-# and cross-referencing information) to the XML output. Note that
-# enabling this will significantly increase the size of the XML output.
-
-XML_PROGRAMLISTING     = YES
-
-#---------------------------------------------------------------------------
-# configuration options for the AutoGen Definitions output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will
-# generate an AutoGen Definitions (see autogen.sf.net) file
-# that captures the structure of the code including all
-# documentation. Note that this feature is still experimental
-# and incomplete at the moment.
-
-GENERATE_AUTOGEN_DEF   = NO
-
-#---------------------------------------------------------------------------
-# configuration options related to the Perl module output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_PERLMOD tag is set to YES Doxygen will
-# generate a Perl module file that captures the structure of
-# the code including all documentation. Note that this
-# feature is still experimental and incomplete at the
-# moment.
-
-GENERATE_PERLMOD       = NO
-
-# If the PERLMOD_LATEX tag is set to YES Doxygen will generate
-# the necessary Makefile rules, Perl scripts and LaTeX code to be able
-# to generate PDF and DVI output from the Perl module output.
-
-PERLMOD_LATEX          = NO
-
-# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be
-# nicely formatted so it can be parsed by a human reader.  This is useful
-# if you want to understand what is going on.  On the other hand, if this
-# tag is set to NO the size of the Perl module output will be much smaller
-# and Perl will parse it just the same.
-
-PERLMOD_PRETTY         = YES
-
-# The names of the make variables in the generated doxyrules.make file
-# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX.
-# This is useful so different doxyrules.make files included by the same
-# Makefile don't overwrite each other's variables.
-
-PERLMOD_MAKEVAR_PREFIX =
-
-#---------------------------------------------------------------------------
-# Configuration options related to the preprocessor
-#---------------------------------------------------------------------------
-
-# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
-# evaluate all C-preprocessor directives found in the sources and include
-# files.
-
-ENABLE_PREPROCESSING   = YES
-
-# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
-# names in the source code. If set to NO (the default) only conditional
-# compilation will be performed. Macro expansion can be done in a controlled
-# way by setting EXPAND_ONLY_PREDEF to YES.
-
-MACRO_EXPANSION        = YES
-
-# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
-# then the macro expansion is limited to the macros specified with the
-# PREDEFINED and EXPAND_AS_DEFINED tags.
-
-EXPAND_ONLY_PREDEF     = YES
-
-# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
-# in the INCLUDE_PATH (see below) will be search if a #include is found.
-
-SEARCH_INCLUDES        = YES
-
-# The INCLUDE_PATH tag can be used to specify one or more directories that
-# contain include files that are not input files but should be processed by
-# the preprocessor.
-
-INCLUDE_PATH           =
-
-# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
-# patterns (like *.h and *.hpp) to filter out the header-files in the
-# directories. If left blank, the patterns specified with FILE_PATTERNS will
-# be used.
-
-INCLUDE_FILE_PATTERNS  =
-
-# The PREDEFINED tag can be used to specify one or more macro names that
-# are defined before the preprocessor is started (similar to the -D option of
-# gcc). The argument of the tag is a list of macros of the form: name
-# or name=definition (no spaces). If the definition and the = are
-# omitted =1 is assumed. To prevent a macro definition from being
-# undefined via #undef or recursively expanded use the := operator
-# instead of the = operator.
-
-PREDEFINED             = DOXYGEN UNIV_DEBUG UNIV_SYNC_DEBUG __attribute__()=
-
-# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
-# this tag can be used to specify a list of macro names that should be expanded.
-# The macro definition that is found in the sources will be used.
-# Use the PREDEFINED tag if you want to use a different macro definition.
-
-EXPAND_AS_DEFINED      = UT_LIST_BASE_NODE_T UT_LIST_NODE_T
-
-# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
-# doxygen's preprocessor will remove all function-like macros that are alone
-# on a line, have an all uppercase name, and do not end with a semicolon. Such
-# function macros are typically used for boiler-plate code, and will confuse
-# the parser if not removed.
-
-SKIP_FUNCTION_MACROS   = YES
-
-#---------------------------------------------------------------------------
-# Configuration::additions related to external references
-#---------------------------------------------------------------------------
-
-# The TAGFILES option can be used to specify one or more tagfiles.
-# Optionally an initial location of the external documentation
-# can be added for each tagfile. The format of a tag file without
-# this location is as follows:
-#   TAGFILES = file1 file2 ...
-# Adding location for the tag files is done as follows:
-#   TAGFILES = file1=loc1 "file2 = loc2" ...
-# where "loc1" and "loc2" can be relative or absolute paths or
-# URLs. If a location is present for each tag, the installdox tool
-# does not have to be run to correct the links.
-# Note that each tag file must have a unique name
-# (where the name does NOT include the path)
-# If a tag file is not located in the directory in which doxygen
-# is run, you must also specify the path to the tagfile here.
-
-TAGFILES               =
-
-# When a file name is specified after GENERATE_TAGFILE, doxygen will create
-# a tag file that is based on the input files it reads.
-
-GENERATE_TAGFILE       =
-
-# If the ALLEXTERNALS tag is set to YES all external classes will be listed
-# in the class index. If set to NO only the inherited external classes
-# will be listed.
-
-ALLEXTERNALS           = NO
-
-# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed
-# in the modules index. If set to NO, only the current project's groups will
-# be listed.
-
-EXTERNAL_GROUPS        = NO
-
-# The PERL_PATH should be the absolute path and name of the perl script
-# interpreter (i.e. the result of `which perl').
-
-PERL_PATH              = /usr/bin/perl
-
-#---------------------------------------------------------------------------
-# Configuration options related to the dot tool
-#---------------------------------------------------------------------------
-
-# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
-# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base
-# or super classes. Setting the tag to NO turns the diagrams off. Note that
-# this option is superseded by the HAVE_DOT option below. This is only a
-# fallback. It is recommended to install and use dot, since it yields more
-# powerful graphs.
-
-CLASS_DIAGRAMS         = YES
-
-# You can define message sequence charts within doxygen comments using the \msc
-# command. Doxygen will then run the mscgen tool (see
-# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the
-# documentation. The MSCGEN_PATH tag allows you to specify the directory where
-# the mscgen tool resides. If left empty the tool is assumed to be found in the
-# default search path.
-
-MSCGEN_PATH            =
-
-# If set to YES, the inheritance and collaboration graphs will hide
-# inheritance and usage relations if the target is undocumented
-# or is not a class.
-
-HIDE_UNDOC_RELATIONS   = YES
-
-# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
-# available from the path. This tool is part of Graphviz, a graph visualization
-# toolkit from AT&T and Lucent Bell Labs. The other options in this section
-# have no effect if this option is set to NO (the default)
-
-HAVE_DOT               = YES
-
-# By default doxygen will write a font called FreeSans.ttf to the output
-# directory and reference it in all dot files that doxygen generates. This
-# font does not include all possible unicode characters however, so when you need
-# these (or just want a differently looking font) you can specify the font name
-# using DOT_FONTNAME. You need need to make sure dot is able to find the font,
-# which can be done by putting it in a standard location or by setting the
-# DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory
-# containing the font.
-
-DOT_FONTNAME           = FreeSans
-
-# By default doxygen will tell dot to use the output directory to look for the
-# FreeSans.ttf font (which doxygen will put there itself). If you specify a
-# different font using DOT_FONTNAME you can set the path where dot
-# can find it using this tag.
-
-DOT_FONTPATH           =
-
-# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
-# will generate a graph for each documented class showing the direct and
-# indirect inheritance relations. Setting this tag to YES will force the
-# the CLASS_DIAGRAMS tag to NO.
-
-CLASS_GRAPH            = YES
-
-# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
-# will generate a graph for each documented class showing the direct and
-# indirect implementation dependencies (inheritance, containment, and
-# class references variables) of the class with other documented classes.
-
-COLLABORATION_GRAPH    = YES
-
-# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen
-# will generate a graph for groups, showing the direct groups dependencies
-
-GROUP_GRAPHS           = NO
-
-# If the UML_LOOK tag is set to YES doxygen will generate inheritance and
-# collaboration diagrams in a style similar to the OMG's Unified Modeling
-# Language.
-
-UML_LOOK               = NO
-
-# If set to YES, the inheritance and collaboration graphs will show the
-# relations between templates and their instances.
-
-TEMPLATE_RELATIONS     = NO
-
-# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT
-# tags are set to YES then doxygen will generate a graph for each documented
-# file showing the direct and indirect include dependencies of the file with
-# other documented files.
-
-INCLUDE_GRAPH          = YES
-
-# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and
-# HAVE_DOT tags are set to YES then doxygen will generate a graph for each
-# documented header file showing the documented files that directly or
-# indirectly include this file.
-
-INCLUDED_BY_GRAPH      = YES
-
-# If the CALL_GRAPH and HAVE_DOT options are set to YES then
-# doxygen will generate a call dependency graph for every global function
-# or class method. Note that enabling this option will significantly increase
-# the time of a run. So in most cases it will be better to enable call graphs
-# for selected functions only using the \callgraph command.
-
-CALL_GRAPH             = NO
-
-# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then
-# doxygen will generate a caller dependency graph for every global function
-# or class method. Note that enabling this option will significantly increase
-# the time of a run. So in most cases it will be better to enable caller
-# graphs for selected functions only using the \callergraph command.
-
-CALLER_GRAPH           = NO
-
-# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
-# will graphical hierarchy of all classes instead of a textual one.
-
-GRAPHICAL_HIERARCHY    = YES
-
-# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES
-# then doxygen will show the dependencies a directory has on other directories
-# in a graphical way. The dependency relations are determined by the #include
-# relations between the files in the directories.
-
-DIRECTORY_GRAPH        = YES
-
-# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
-# generated by dot. Possible values are png, jpg, or gif
-# If left blank png will be used.
-
-DOT_IMAGE_FORMAT       = png
-
-# The tag DOT_PATH can be used to specify the path where the dot tool can be
-# found. If left blank, it is assumed the dot tool can be found in the path.
-
-DOT_PATH               =
-
-# The DOTFILE_DIRS tag can be used to specify one or more directories that
-# contain dot files that are included in the documentation (see the
-# \dotfile command).
-
-DOTFILE_DIRS           =
-
-# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of
-# nodes that will be shown in the graph. If the number of nodes in a graph
-# becomes larger than this value, doxygen will truncate the graph, which is
-# visualized by representing a node as a red box. Note that doxygen if the
-# number of direct children of the root node in a graph is already larger than
-# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note
-# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
-
-DOT_GRAPH_MAX_NODES    = 50
-
-# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the
-# graphs generated by dot. A depth value of 3 means that only nodes reachable
-# from the root by following a path via at most 3 edges will be shown. Nodes
-# that lay further from the root node will be omitted. Note that setting this
-# option to 1 or 2 may greatly reduce the computation time needed for large
-# code bases. Also note that the size of a graph can be further restricted by
-# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
-
-MAX_DOT_GRAPH_DEPTH    = 3
-
-# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
-# background. This is enabled by default, which results in a transparent
-# background. Warning: Depending on the platform used, enabling this option
-# may lead to badly anti-aliased labels on the edges of a graph (i.e. they
-# become hard to read).
-
-DOT_TRANSPARENT        = YES
-
-# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
-# files in one run (i.e. multiple -o and -T options on the command line). This
-# makes dot run faster, but since only newer versions of dot (>1.8.10)
-# support this, this feature is disabled by default.
-
-DOT_MULTI_TARGETS      = NO
-
-# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
-# generate a legend page explaining the meaning of the various boxes and
-# arrows in the dot generated graphs.
-
-GENERATE_LEGEND        = YES
-
-# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will
-# remove the intermediate dot files that are used to generate
-# the various graphs.
-
-DOT_CLEANUP            = YES
-
-#---------------------------------------------------------------------------
-# Configuration::additions related to the search engine
-#---------------------------------------------------------------------------
-
-# The SEARCHENGINE tag specifies whether or not a search engine should be
-# used. If set to NO the values of all tags below this one will be ignored.
-
-SEARCHENGINE           = NO
diff --git a/storage/innobase/api/api0api.cc b/storage/innobase/api/api0api.cc
index 07eecdc..01aedaa 100644
--- a/storage/innobase/api/api0api.cc
+++ b/storage/innobase/api/api0api.cc
@@ -24,15 +24,7 @@ InnoDB Native API
 3/20/2011 Jimmy Yang extracted from Embedded InnoDB
 *******************************************************/
 
-#include "univ.i"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdarg.h>
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
+#include "ha_prototypes.h"
 
 #include "api0api.h"
 #include "api0misc.h"
@@ -50,11 +42,9 @@ InnoDB Native API
 #include "row0sel.h"
 #include "lock0lock.h"
 #include "rem0cmp.h"
-#include "ut0dbg.h"
 #include "dict0priv.h"
-#include "ut0ut.h"
-#include "ha_prototypes.h"
 #include "trx0roll.h"
+#include "row0trunc.h"
 
 /** configure variable for binlog option with InnoDB APIs */
 my_bool ib_binlog_enabled = FALSE;
@@ -212,15 +202,15 @@ operation, we only do it every INNOBASE_WAKE_INTERVAL'th step. */
 #define INNOBASE_WAKE_INTERVAL	32
 
 /*****************************************************************//**
-Check whether the Innodb persistent cursor is positioned.
- at return	IB_TRUE if positioned */
+Check whether the InnoDB persistent cursor is positioned.
+ at return IB_TRUE if positioned */
 UNIV_INLINE
 ib_bool_t
 ib_btr_cursor_is_positioned(
 /*========================*/
 	btr_pcur_t*	pcur)		/*!< in: InnoDB persistent cursor */
 {
-	return(pcur->old_stored == BTR_PCUR_OLD_STORED
+	return(pcur->old_stored
 	       && (pcur->pos_state == BTR_PCUR_IS_POSITIONED
 	           || pcur->pos_state == BTR_PCUR_WAS_POSITIONED));
 }
@@ -228,7 +218,7 @@ ib_btr_cursor_is_positioned(
 
 /********************************************************************//**
 Open a table using the table id, if found then increment table ref count.
- at return	table instance if found */
+ at return table instance if found */
 static
 dict_table_t*
 ib_open_table_by_id(
@@ -260,9 +250,9 @@ ib_open_table_by_id(
 
 /********************************************************************//**
 Open a table using the table name, if found then increment table ref count.
- at return	table instance if found */
-UNIV_INTERN
-void*
+ at return table instance if found */
+static
+dict_table_t*
 ib_open_table_by_name(
 /*==================*/
 	const char*	name)		/*!< in: table name to lookup */
@@ -281,7 +271,7 @@ ib_open_table_by_name(
 
 /********************************************************************//**
 Find table using table name.
- at return	table instance if found */
+ at return table instance if found */
 static
 dict_table_t*
 ib_lookup_table_by_name(
@@ -358,7 +348,7 @@ ib_read_tuple(
 			*len = offset_size;
 		}
 		ptr = *rec_buf;
-	}  else {
+	} else {
 		/* Make a copy of the rec. */
 		ptr = mem_heap_alloc(tuple->heap, offset_size);
 	}
@@ -392,12 +382,11 @@ ib_read_tuple(
 		/* Fetch and copy any externally stored column. */
 		if (rec_offs_nth_extern(offsets, i)) {
 
-			ulint	zip_size;
-
-			zip_size = dict_table_zip_size(index->table);
+			const page_size_t	page_size(
+				dict_table_page_size(index->table));
 
 			data = btr_rec_copy_externally_stored_field(
-				copy, offsets, zip_size, i, &len,
+				copy, offsets, page_size, i, &len,
 				tuple->heap);
 
 			ut_a(len != UNIV_SQL_NULL);
@@ -409,7 +398,7 @@ ib_read_tuple(
 
 /*****************************************************************//**
 Create an InnoDB key tuple.
- at return	tuple instance created, or NULL */
+ at return tuple instance created, or NULL */
 static
 ib_tpl_t
 ib_key_tuple_new_low(
@@ -462,7 +451,7 @@ ib_key_tuple_new_low(
 
 /*****************************************************************//**
 Create an InnoDB key tuple.
- at return	tuple instance created, or NULL */
+ at return tuple instance created, or NULL */
 static
 ib_tpl_t
 ib_key_tuple_new(
@@ -483,7 +472,7 @@ ib_key_tuple_new(
 
 /*****************************************************************//**
 Create an InnoDB row tuple.
- at return	tuple instance, or NULL */
+ at return tuple instance, or NULL */
 static
 ib_tpl_t
 ib_row_tuple_new_low(
@@ -515,7 +504,7 @@ ib_row_tuple_new_low(
 
 /*****************************************************************//**
 Create an InnoDB row tuple.
- at return	tuple instance, or NULL */
+ at return tuple instance, or NULL */
 static
 ib_tpl_t
 ib_row_tuple_new(
@@ -536,8 +525,7 @@ ib_row_tuple_new(
 
 /*****************************************************************//**
 Begin a transaction.
- at return	innobase txn handle */
-UNIV_INTERN
+ at return innobase txn handle */
 ib_err_t
 ib_trx_start(
 /*=========*/
@@ -558,7 +546,7 @@ ib_trx_start(
 	trx->api_auto_commit = auto_commit;
 	trx->read_write = read_write;
 
-	trx_start_if_not_started(trx);
+	trx_start_if_not_started(trx, read_write);
 
 	trx->isolation_level = ib_trx_level;
 
@@ -572,8 +560,7 @@ ib_trx_start(
 /*****************************************************************//**
 Begin a transaction. This will allocate a new transaction handle.
 put the transaction in the active state.
- at return	innobase txn handle */
-UNIV_INTERN
+ at return innobase txn handle */
 ib_trx_t
 ib_trx_begin(
 /*=========*/
@@ -595,11 +582,9 @@ ib_trx_begin(
 	return(static_cast<ib_trx_t>(trx));
 }
 
-
 /*****************************************************************//**
 Check if transaction is read_only
 @return transaction read_only status */
-UNIV_INTERN
 ib_u32_t
 ib_trx_read_only(
 /*=============*/
@@ -609,25 +594,9 @@ ib_trx_read_only(
 
 	return(trx->read_only);
 }
-
-/*****************************************************************//**
-Get the transaction's state.
- at return	transaction state */
-UNIV_INTERN
-ib_trx_state_t
-ib_trx_state(
-/*=========*/
-	ib_trx_t	ib_trx)		/*!< in: trx handle */
-{
-	trx_t*		trx = (trx_t*) ib_trx;
-
-	return((ib_trx_state_t) trx->state);
-}
-
 /*****************************************************************//**
 Get a trx start time.
- at return	trx start_time */
-UNIV_INTERN
+ at return trx start_time */
 ib_u64_t
 ib_trx_get_start_time(
 /*==================*/
@@ -638,8 +607,7 @@ ib_trx_get_start_time(
 }
 /*****************************************************************//**
 Release the resources of the transaction.
- at return	DB_SUCCESS or err code */
-UNIV_INTERN
+ at return DB_SUCCESS or err code */
 ib_err_t
 ib_trx_release(
 /*===========*/
@@ -656,17 +624,17 @@ ib_trx_release(
 /*****************************************************************//**
 Commit a transaction. This function will also release the schema
 latches too.
- at return	DB_SUCCESS or err code */
-
+ at return DB_SUCCESS or err code */
 ib_err_t
 ib_trx_commit(
 /*==========*/
 	ib_trx_t	ib_trx)		/*!< in: trx handle */
 {
 	ib_err_t	err = DB_SUCCESS;
-	trx_t*		trx = (trx_t*) ib_trx;
+	trx_t*		trx = reinterpret_cast<trx_t*>(ib_trx);
+
+	if (!trx_is_started(trx)) {
 
-	if (trx->state == TRX_STATE_NOT_STARTED) {
 		return(err);
 	}
 
@@ -678,8 +646,7 @@ ib_trx_commit(
 /*****************************************************************//**
 Rollback a transaction. This function will also release the schema
 latches too.
- at return	DB_SUCCESS or err code */
-UNIV_INTERN
+ at return DB_SUCCESS or err code */
 ib_err_t
 ib_trx_rollback(
 /*============*/
@@ -696,7 +663,7 @@ ib_trx_rollback(
 	return(err);
 }
 
-#ifdef __WIN__
+#ifdef _WIN32
 /*****************************************************************//**
 Convert a string to lower case. */
 static
@@ -710,7 +677,7 @@ ib_to_lower_case(
 		++ptr;
 	}
 }
-#endif /* __WIN__ */
+#endif /* _WIN32 */
 
 /*****************************************************************//**
 Normalizes a table name string. A normalized name consists of the
@@ -762,7 +729,7 @@ ib_normalize_table_name(
 			ut_strlen(name) + 1 - (db_name - name));
 
 		norm_name[table_name - db_name - 1] = '/';
-#ifdef __WIN__
+#ifdef _WIN32
 		ib_to_lower_case(norm_name);
 #endif
 	} else {
@@ -771,56 +738,8 @@ ib_normalize_table_name(
 }
 
 /*****************************************************************//**
-Check whether the table name conforms to our requirements. Currently
-we only do a simple check for the presence of a '/'.
- at return	DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_table_name_check(
-/*================*/
-	const char*	name)		/*!< in: table name to check */
-{
-	const char*	slash = NULL;
-	ulint		len = ut_strlen(name);
-
-	if (len < 2
-	    || *name == '/'
-	    || name[len - 1] == '/'
-	    || (name[0] == '.' && name[1] == '/')
-	    || (name[0] == '.' && name[1] == '.' && name[2] == '/')) {
-
-		return(DB_DATA_MISMATCH);
-	}
-
-	for ( ; *name; ++name) {
-#ifdef __WIN__
-		/* Check for reserved characters in DOS filenames. */
-		switch (*name) {
-		case ':':
-		case '|':
-		case '"':
-		case '*':
-		case '<':
-		case '>':
-			return(DB_DATA_MISMATCH);
-		}
-#endif /* __WIN__ */
-		if (*name == '/') {
-			if (slash) {
-				return(DB_DATA_MISMATCH);
-			}
-			slash = name;
-		}
-	}
-
-	return(slash ? DB_SUCCESS : DB_DATA_MISMATCH);
-}
-
-
-
-/*****************************************************************//**
 Get a table id. The caller must have acquired the dictionary mutex.
- at return	DB_SUCCESS if found */
+ at return DB_SUCCESS if found */
 static
 ib_err_t
 ib_table_get_id_low(
@@ -846,7 +765,7 @@ ib_table_get_id_low(
 
 /*****************************************************************//**
 Create an internal cursor instance.
- at return	DB_SUCCESS or err code */
+ at return DB_SUCCESS or err code */
 static
 ib_err_t
 ib_create_cursor(
@@ -918,7 +837,7 @@ ib_create_cursor(
 /*****************************************************************//**
 Create an internal cursor instance, and set prebuilt->index to index
 with supplied index_id.
- at return	DB_SUCCESS or err code */
+ at return DB_SUCCESS or err code */
 static
 ib_err_t
 ib_create_cursor_with_index_id(
@@ -943,8 +862,7 @@ ib_create_cursor_with_index_id(
 
 /*****************************************************************//**
 Open an InnoDB table and return a cursor handle to it.
- at return	DB_SUCCESS or err code */
-UNIV_INTERN
+ at return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_open_table_using_id(
 /*==========================*/
@@ -955,12 +873,10 @@ ib_cursor_open_table_using_id(
 {
 	ib_err_t	err;
 	dict_table_t*	table;
+	const ib_bool_t	locked
+		= ib_trx && ib_schema_lock_is_exclusive(ib_trx);
 
-	if (ib_trx == NULL || !ib_schema_lock_is_exclusive(ib_trx)) {
-		table = ib_open_table_by_id(table_id, FALSE);
-	} else {
-		table = ib_open_table_by_id(table_id, TRUE);
-	}
+	table = ib_open_table_by_id(table_id, locked);
 
 	if (table == NULL) {
 
@@ -974,58 +890,8 @@ ib_cursor_open_table_using_id(
 }
 
 /*****************************************************************//**
-Open an InnoDB index and return a cursor handle to it.
- at return	DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_open_index_using_id(
-/*==========================*/
-	ib_id_u64_t	index_id,	/*!< in: index id of index to open */
-	ib_trx_t	ib_trx,		/*!< in: Current transaction handle
-					can be NULL */
-	ib_crsr_t*	ib_crsr)	/*!< out: InnoDB cursor */
-{
-	ib_err_t	err;
-	dict_table_t*	table;
-	ulint		table_id = (ulint)( index_id >> 32);
-
-	if (ib_trx == NULL || !ib_schema_lock_is_exclusive(ib_trx)) {
-		table = ib_open_table_by_id(table_id, FALSE);
-	} else {
-		table = ib_open_table_by_id(table_id, TRUE);
-	}
-
-	if (table == NULL) {
-
-		return(DB_TABLE_NOT_FOUND);
-	}
-
-	/* We only return the lower 32 bits of the dulint. */
-	err = ib_create_cursor_with_index_id(
-		ib_crsr, table, index_id, (trx_t*) ib_trx);
-
-	if (ib_crsr != NULL) {
-		const ib_cursor_t*	cursor;
-
-		cursor = *(ib_cursor_t**) ib_crsr;
-
-		if (cursor->prebuilt->index == NULL) {
-			ib_err_t	crsr_err;
-
-			crsr_err = ib_cursor_close(*ib_crsr);
-			ut_a(crsr_err == DB_SUCCESS);
-
-			*ib_crsr = NULL;
-		}
-	}
-
-	return(err);
-}
-
-/*****************************************************************//**
 Open an InnoDB secondary index cursor and return a cursor handle to it.
- at return	DB_SUCCESS or err code */
-UNIV_INTERN
+ at return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_open_index_using_name(
 /*============================*/
@@ -1092,8 +958,7 @@ ib_cursor_open_index_using_name(
 
 /*****************************************************************//**
 Open an InnoDB table and return a cursor handle to it.
- at return	DB_SUCCESS or err code */
-UNIV_INTERN
+ at return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_open_table(
 /*=================*/
@@ -1106,22 +971,22 @@ ib_cursor_open_table(
 	dict_table_t*	table;
 	char*		normalized_name;
 
-	normalized_name = static_cast<char*>(mem_alloc(ut_strlen(name) + 1));
+	normalized_name = static_cast<char*>(ut_malloc_nokey(ut_strlen(name)
+							     + 1));
 	ib_normalize_table_name(normalized_name, name);
 
 	if (ib_trx != NULL) {
-	       if (!ib_schema_lock_is_exclusive(ib_trx)) {
-			table = (dict_table_t*)ib_open_table_by_name(
-				normalized_name);
+		if (!ib_schema_lock_is_exclusive(ib_trx)) {
+			table = ib_open_table_by_name(normalized_name);
 		} else {
 			/* NOTE: We do not acquire MySQL metadata lock */
 			table = ib_lookup_table_by_name(normalized_name);
 		}
 	} else {
-		table = (dict_table_t*)ib_open_table_by_name(normalized_name);
+		table = ib_open_table_by_name(normalized_name);
 	}
 
-	mem_free(normalized_name);
+	ut_free(normalized_name);
 	normalized_name = NULL;
 
 	/* It can happen that another thread has created the table but
@@ -1157,22 +1022,8 @@ ib_qry_proc_free(
 }
 
 /*****************************************************************//**
-set a cursor trx to NULL */
-UNIV_INTERN
-void
-ib_cursor_clear_trx(
-/*================*/
-	ib_crsr_t	ib_crsr)	/*!< in/out: InnoDB cursor */
-{
-	ib_cursor_t*	cursor = (ib_cursor_t*) ib_crsr;
-
-	cursor->prebuilt->trx = NULL;
-}
-
-/*****************************************************************//**
 Reset the cursor.
- at return	DB_SUCCESS or err code */
-UNIV_INTERN
+ at return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_reset(
 /*============*/
@@ -1198,7 +1049,7 @@ ib_cursor_reset(
 
 /*****************************************************************//**
 update the cursor with new transactions and also reset the cursor
- at return	DB_SUCCESS or err code */
+ at return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_new_trx(
 /*==============*/
@@ -1217,16 +1068,16 @@ ib_cursor_new_trx(
 
 	trx_assign_read_view(prebuilt->trx);
 
-        ib_qry_proc_free(&cursor->q_proc);
+	ib_qry_proc_free(&cursor->q_proc);
 
-        mem_heap_empty(cursor->query_heap);
+	mem_heap_empty(cursor->query_heap);
 
 	return(err);
 }
 
 /*****************************************************************//**
 Commit the transaction in a cursor
- at return	DB_SUCCESS or err code */
+ at return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_commit_trx(
 /*=================*/
@@ -1247,8 +1098,7 @@ ib_cursor_commit_trx(
 
 /*****************************************************************//**
 Close an InnoDB table and free the cursor.
- at return	DB_SUCCESS or err code */
-UNIV_INTERN
+ at return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_close(
 /*============*/
@@ -1285,8 +1135,7 @@ ib_cursor_close(
 
 /*****************************************************************//**
 Close the table, decrement n_ref_count count.
- at return	DB_SUCCESS or err code */
-UNIV_INTERN
+ at return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_close_table(
 /*==================*/
@@ -1303,7 +1152,7 @@ ib_cursor_close_table(
 }
 /**********************************************************************//**
 Run the insert query and do error handling.
- at return	DB_SUCCESS or error code */
+ at return DB_SUCCESS or error code */
 UNIV_INLINE
 ib_err_t
 ib_insert_row_with_lock_retry(
@@ -1344,7 +1193,7 @@ ib_insert_row_with_lock_retry(
 
 /*****************************************************************//**
 Write a row.
- at return	DB_SUCCESS or err code */
+ at return DB_SUCCESS or err code */
 static
 ib_err_t
 ib_execute_insert_query_graph(
@@ -1393,7 +1242,7 @@ ib_insert_query_graph_create(
 	ib_qry_node_t*	node = &q_proc->node;
 	trx_t*		trx = cursor->prebuilt->trx;
 
-	ut_a(trx->state != TRX_STATE_NOT_STARTED);
+	ut_a(trx_is_started(trx));
 
 	if (node->ins == NULL) {
 		dtuple_t*	row;
@@ -1422,8 +1271,7 @@ ib_insert_query_graph_create(
 
 /*****************************************************************//**
 Insert a row to a table.
- at return	DB_SUCCESS or err code */
-UNIV_INTERN
+ at return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_insert_row(
 /*=================*/
@@ -1503,7 +1351,7 @@ ib_cursor_insert_row(
 
 /*********************************************************************//**
 Gets pointer to a prebuilt update vector used in updates.
- at return	update vector */
+ at return update vector */
 UNIV_INLINE
 upd_t*
 ib_update_vector_create(
@@ -1517,7 +1365,7 @@ ib_update_vector_create(
 	ib_qry_grph_t*	grph = &q_proc->grph;
 	ib_qry_node_t*	node = &q_proc->node;
 
-	ut_a(trx->state != TRX_STATE_NOT_STARTED);
+	ut_a(trx_is_started(trx));
 
 	if (node->upd == NULL) {
 		node->upd = static_cast<upd_node_t*>(
@@ -1568,7 +1416,7 @@ ib_update_col(
 /**********************************************************************//**
 Checks which fields have changed in a row and stores the new data
 to an update vector.
- at return	DB_SUCCESS or err code */
+ at return DB_SUCCESS or err code */
 static
 ib_err_t
 ib_calc_diff(
@@ -1635,7 +1483,7 @@ ib_calc_diff(
 
 /**********************************************************************//**
 Run the update query and do error handling.
- at return	DB_SUCCESS or error code */
+ at return DB_SUCCESS or error code */
 UNIV_INLINE
 ib_err_t
 ib_update_row_with_lock_retry(
@@ -1683,7 +1531,7 @@ ib_update_row_with_lock_retry(
 
 /*********************************************************************//**
 Does an update or delete of a row.
- at return	DB_SUCCESS or err code */
+ at return DB_SUCCESS or err code */
 UNIV_INLINE
 ib_err_t
 ib_execute_update_query_graph(
@@ -1700,7 +1548,7 @@ ib_execute_update_query_graph(
 	ib_qry_proc_t*	q_proc = &cursor->q_proc;
 
 	/* The transaction must be running. */
-	ut_a(trx->state != TRX_STATE_NOT_STARTED);
+	ut_a(trx_is_started(trx));
 
 	node = q_proc->node.upd;
 
@@ -1743,8 +1591,7 @@ ib_execute_update_query_graph(
 
 /*****************************************************************//**
 Update a row in a table.
- at return	DB_SUCCESS or err code */
-UNIV_INTERN
+ at return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_update_row(
 /*=================*/
@@ -1761,9 +1608,9 @@ ib_cursor_update_row(
 	const ib_tuple_t*new_tuple = (const ib_tuple_t*) ib_new_tpl;
 
 	if (dict_index_is_clust(prebuilt->index)) {
-		pcur = &cursor->prebuilt->pcur;
+		pcur = cursor->prebuilt->pcur;
 	} else if (prebuilt->need_to_access_clustered) {
-		pcur = &cursor->prebuilt->clust_pcur;
+		pcur = cursor->prebuilt->clust_pcur;
 	} else {
 		return(DB_ERROR);
 	}
@@ -1789,7 +1636,7 @@ ib_cursor_update_row(
 
 /**********************************************************************//**
 Build the update query graph to delete a row from an index.
- at return	DB_SUCCESS or err code */
+ at return DB_SUCCESS or err code */
 static
 ib_err_t
 ib_delete_row(
@@ -1856,8 +1703,7 @@ ib_delete_row(
 
 /*****************************************************************//**
 Delete a row in a table.
- at return	DB_SUCCESS or err code */
-UNIV_INTERN
+ at return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_delete_row(
 /*=================*/
@@ -1874,12 +1720,12 @@ ib_cursor_delete_row(
 	/* Check whether this is a secondary index cursor */
 	if (index != prebuilt->index) {
 		if (prebuilt->need_to_access_clustered) {
-			pcur = &prebuilt->clust_pcur;
+			pcur = prebuilt->clust_pcur;
 		} else {
 			return(DB_ERROR);
 		}
 	} else {
-		pcur = &prebuilt->pcur;
+		pcur = prebuilt->pcur;
 	}
 
 	if (ib_btr_cursor_is_positioned(pcur)) {
@@ -1931,8 +1777,7 @@ ib_cursor_delete_row(
 
 /*****************************************************************//**
 Read current row.
- at return	DB_SUCCESS or err code */
-UNIV_INTERN
+ at return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_read_row(
 /*===============*/
@@ -1945,7 +1790,7 @@ ib_cursor_read_row(
 	ib_tuple_t*	tuple = (ib_tuple_t*) ib_tpl;
 	ib_cursor_t*	cursor = (ib_cursor_t*) ib_crsr;
 
-	ut_a(cursor->prebuilt->trx->state != TRX_STATE_NOT_STARTED);
+	ut_a(trx_is_started(cursor->prebuilt->trx));
 
 	/* When searching with IB_EXACT_MATCH set, row_search_for_mysql()
 	will not position the persistent cursor but will copy the record
@@ -1959,9 +1804,9 @@ ib_cursor_read_row(
 
 		if (prebuilt->need_to_access_clustered
 		    && tuple->type == TPL_TYPE_ROW) {
-			pcur = &prebuilt->clust_pcur;
+			pcur = prebuilt->clust_pcur;
 		} else {
-			pcur = &prebuilt->pcur;
+			pcur = prebuilt->pcur;
 		}
 
 		if (pcur == NULL) {
@@ -2003,7 +1848,7 @@ ib_cursor_read_row(
 
 /*****************************************************************//**
 Move cursor to the first record in the table.
- at return	DB_SUCCESS or err code */
+ at return DB_SUCCESS or err code */
 UNIV_INLINE
 ib_err_t
 ib_cursor_position(
@@ -2015,24 +1860,23 @@ ib_cursor_position(
 	row_prebuilt_t*	prebuilt = cursor->prebuilt;
 	unsigned char*	buf;
 
-	buf = static_cast<unsigned char*>(mem_alloc(UNIV_PAGE_SIZE));
+	buf = static_cast<unsigned char*>(ut_malloc_nokey(UNIV_PAGE_SIZE));
 
 	/* We want to position at one of the ends, row_search_for_mysql()
 	uses the search_tuple fields to work out what to do. */
 	dtuple_set_n_fields(prebuilt->search_tuple, 0);
 
 	err = static_cast<ib_err_t>(row_search_for_mysql(
-		buf, mode, prebuilt, 0, 0));
+		buf, static_cast<page_cur_mode_t>(mode), prebuilt, 0, 0));
 
-	mem_free(buf);
+	ut_free(buf);
 
 	return(err);
 }
 
 /*****************************************************************//**
 Move cursor to the first record in the table.
- at return	DB_SUCCESS or err code */
-UNIV_INTERN
+ at return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_first(
 /*============*/
@@ -2044,23 +1888,8 @@ ib_cursor_first(
 }
 
 /*****************************************************************//**
-Move cursor to the last record in the table.
- at return	DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_last(
-/*===========*/
-	ib_crsr_t	ib_crsr)	/*!< in: InnoDB cursor instance */
-{
-	ib_cursor_t*	cursor = (ib_cursor_t*) ib_crsr;
-
-	return(ib_cursor_position(cursor, IB_CUR_L));
-}
-
-/*****************************************************************//**
 Move cursor to the next user record in the table.
 @return DB_SUCCESS or err code */
-UNIV_INTERN
 ib_err_t
 ib_cursor_next(
 /*===========*/
@@ -2082,8 +1911,7 @@ ib_cursor_next(
 
 /*****************************************************************//**
 Search for key.
- at return	DB_SUCCESS or err code */
-UNIV_INTERN
+ at return DB_SUCCESS or err code */
 ib_err_t
 ib_cursor_moveto(
 /*=============*/
@@ -2121,19 +1949,19 @@ ib_cursor_moveto(
 
 	prebuilt->innodb_api_rec = NULL;
 
-	buf = static_cast<unsigned char*>(mem_alloc(UNIV_PAGE_SIZE));
+	buf = static_cast<unsigned char*>(ut_malloc_nokey(UNIV_PAGE_SIZE));
 
 	err = static_cast<ib_err_t>(row_search_for_mysql(
-		buf, ib_srch_mode, prebuilt, cursor->match_mode, 0));
+		buf, static_cast<page_cur_mode_t>(ib_srch_mode), prebuilt,
+		cursor->match_mode, 0));
 
-	mem_free(buf);
+	ut_free(buf);
 
 	return(err);
 }
 
 /*****************************************************************//**
 Set the cursor search mode. */
-UNIV_INTERN
 void
 ib_cursor_set_match_mode(
 /*=====================*/
@@ -2147,7 +1975,7 @@ ib_cursor_set_match_mode(
 
 /*****************************************************************//**
 Get the dfield instance for the column in the tuple.
- at return	dfield instance in tuple */
+ at return dfield instance in tuple */
 UNIV_INLINE
 dfield_t*
 ib_col_get_dfield(
@@ -2164,7 +1992,7 @@ ib_col_get_dfield(
 
 /*****************************************************************//**
 Predicate to check whether a column type contains variable length data.
- at return	DB_SUCCESS or error code */
+ at return DB_SUCCESS or error code */
 UNIV_INLINE
 ib_err_t
 ib_col_is_capped(
@@ -2177,14 +2005,14 @@ ib_col_is_capped(
 		|| dtype_get_mtype(dtype) == DATA_MYSQL
 		|| dtype_get_mtype(dtype) == DATA_VARMYSQL
 		|| dtype_get_mtype(dtype) == DATA_FIXBINARY
-		|| dtype_get_mtype(dtype) == DATA_BINARY)
+		|| dtype_get_mtype(dtype) == DATA_BINARY
+		|| dtype_get_mtype(dtype) == DATA_POINT)
 	       && dtype_get_len(dtype) > 0));
 }
 
 /*****************************************************************//**
 Set a column of the tuple. Make a copy using the tuple's heap.
- at return	DB_SUCCESS or error code */
-UNIV_INTERN
+ at return DB_SUCCESS or error code */
 ib_err_t
 ib_col_set_value(
 /*=============*/
@@ -2275,24 +2103,19 @@ ib_col_set_value(
 		ut_error;
 		break;
 
-	case DATA_CHAR: {
-		ulint	pad_char = ULINT_UNDEFINED;
-
-		pad_char = dtype_get_pad_char(
-			dtype_get_mtype(dtype),	dtype_get_prtype(dtype));
-
-		ut_a(pad_char != ULINT_UNDEFINED);
-
-		memset((byte*) dst + len,
-		       static_cast<int>(pad_char),
-			   static_cast<size_t>(col_len - len));
-
+	case DATA_CHAR:
 		memcpy(dst, src, len);
+		memset((byte*) dst + len, 0x20, col_len - len);
+		len = col_len;
+		break;
 
-		len = static_cast<ib_ulint_t>(col_len);
+	case DATA_POINT:
+		memcpy(dst, src, len);
 		break;
-	}
+
 	case DATA_BLOB:
+	case DATA_VAR_POINT:
+	case DATA_GEOMETRY:
 	case DATA_BINARY:
 	case DATA_DECIMAL:
 	case DATA_VARCHAR:
@@ -2391,8 +2214,7 @@ ib_col_set_value(
 
 /*****************************************************************//**
 Get the size of the data available in a column of the tuple.
- at return	bytes avail or IB_SQL_NULL */
-UNIV_INTERN
+ at return bytes avail or IB_SQL_NULL */
 ib_ulint_t
 ib_col_get_len(
 /*===========*/
@@ -2413,7 +2235,7 @@ ib_col_get_len(
 
 /*****************************************************************//**
 Copy a column value from the tuple.
- at return	bytes copied or IB_SQL_NULL */
+ at return bytes copied or IB_SQL_NULL */
 UNIV_INLINE
 ib_ulint_t
 ib_col_copy_value_low(
@@ -2440,7 +2262,7 @@ ib_col_copy_value_low(
 		switch (dtype_get_mtype(dfield_get_type(dfield))) {
 		case DATA_INT: {
 			ibool		usign;
-			ullint		ret;
+			uintmax_t	ret;
 
 			ut_a(data_len == len);
 
@@ -2509,8 +2331,7 @@ ib_col_copy_value_low(
 
 /*****************************************************************//**
 Copy a column value from the tuple.
- at return	bytes copied or IB_SQL_NULL */
-UNIV_INTERN
+ at return bytes copied or IB_SQL_NULL */
 ib_ulint_t
 ib_col_copy_value(
 /*==============*/
@@ -2524,7 +2345,7 @@ ib_col_copy_value(
 
 /*****************************************************************//**
 Get the InnoDB column attribute from the internal column precise type.
- at return	precise type in api format */
+ at return precise type in api format */
 UNIV_INLINE
 ib_col_attr_t
 ib_col_get_attr(
@@ -2546,8 +2367,7 @@ ib_col_get_attr(
 
 /*****************************************************************//**
 Get a column name from the tuple.
- at return	name of the column */
-UNIV_INTERN
+ at return name of the column */
 const char*
 ib_col_get_name(
 /*============*/
@@ -2567,8 +2387,7 @@ ib_col_get_name(
 
 /*****************************************************************//**
 Get an index field name from the cursor.
- at return	name of the field */
-UNIV_INTERN
+ at return name of the field */
 const char*
 ib_get_idx_field_name(
 /*==================*/
@@ -2592,7 +2411,7 @@ ib_get_idx_field_name(
 
 /*****************************************************************//**
 Get a column type, length and attributes from the tuple.
- at return	len of column data */
+ at return len of column data */
 UNIV_INLINE
 ib_ulint_t
 ib_col_get_meta_low(
@@ -2655,8 +2474,7 @@ ib_tuple_check_int(
 
 /*************************************************************//**
 Read a signed int 8 bit column from an InnoDB tuple.
- at return	DB_SUCCESS or error */
-UNIV_INTERN
+ at return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_read_i8(
 /*=============*/
@@ -2677,8 +2495,7 @@ ib_tuple_read_i8(
 
 /*************************************************************//**
 Read an unsigned int 8 bit column from an InnoDB tuple.
- at return	DB_SUCCESS or error */
-UNIV_INTERN
+ at return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_read_u8(
 /*=============*/
@@ -2699,8 +2516,7 @@ ib_tuple_read_u8(
 
 /*************************************************************//**
 Read a signed int 16 bit column from an InnoDB tuple.
- at return	DB_SUCCESS or error */
-UNIV_INTERN
+ at return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_read_i16(
 /*==============*/
@@ -2721,8 +2537,7 @@ ib_tuple_read_i16(
 
 /*************************************************************//**
 Read an unsigned int 16 bit column from an InnoDB tuple.
- at return	DB_SUCCESS or error */
-UNIV_INTERN
+ at return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_read_u16(
 /*==============*/
@@ -2743,8 +2558,7 @@ ib_tuple_read_u16(
 
 /*************************************************************//**
 Read a signed int 32 bit column from an InnoDB tuple.
- at return	DB_SUCCESS or error */
-UNIV_INTERN
+ at return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_read_i32(
 /*==============*/
@@ -2765,8 +2579,7 @@ ib_tuple_read_i32(
 
 /*************************************************************//**
 Read an unsigned int 32 bit column from an InnoDB tuple.
- at return	DB_SUCCESS or error */
-UNIV_INTERN
+ at return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_read_u32(
 /*==============*/
@@ -2787,8 +2600,7 @@ ib_tuple_read_u32(
 
 /*************************************************************//**
 Read a signed int 64 bit column from an InnoDB tuple.
- at return	DB_SUCCESS or error */
-UNIV_INTERN
+ at return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_read_i64(
 /*==============*/
@@ -2809,8 +2621,7 @@ ib_tuple_read_i64(
 
 /*************************************************************//**
 Read an unsigned int 64 bit column from an InnoDB tuple.
- at return	DB_SUCCESS or error */
-UNIV_INTERN
+ at return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_read_u64(
 /*==============*/
@@ -2831,8 +2642,7 @@ ib_tuple_read_u64(
 
 /*****************************************************************//**
 Get a column value pointer from the tuple.
- at return	NULL or pointer to buffer */
-UNIV_INTERN
+ at return NULL or pointer to buffer */
 const void*
 ib_col_get_value(
 /*=============*/
@@ -2854,8 +2664,7 @@ ib_col_get_value(
 
 /*****************************************************************//**
 Get a column type, length and attributes from the tuple.
- at return	len of column data */
-UNIV_INTERN
+ at return len of column data */
 ib_ulint_t
 ib_col_get_meta(
 /*============*/
@@ -2868,8 +2677,7 @@ ib_col_get_meta(
 
 /*****************************************************************//**
 "Clear" or reset an InnoDB tuple. We free the heap and recreate the tuple.
- at return	new tuple, or NULL */
-UNIV_INTERN
+ at return new tuple, or NULL */
 ib_tpl_t
 ib_tuple_clear(
 /*============*/
@@ -2897,8 +2705,7 @@ ib_tuple_clear(
 Create a new cluster key search tuple and copy the contents of  the
 secondary index key tuple columns that refer to the cluster index record
 to the cluster key. It does a deep copy of the column data.
- at return	DB_SUCCESS or error code */
-UNIV_INTERN
+ at return DB_SUCCESS or error code */
 ib_err_t
 ib_tuple_get_cluster_key(
 /*=====================*/
@@ -2970,64 +2777,8 @@ ib_tuple_get_cluster_key(
 }
 
 /*****************************************************************//**
-Copy the contents of  source tuple to destination tuple. The tuples
-must be of the same type and belong to the same table/index.
- at return	DB_SUCCESS or error code */
-UNIV_INTERN
-ib_err_t
-ib_tuple_copy(
-/*==========*/
-	ib_tpl_t	ib_dst_tpl,	/*!< in: destination tuple */
-	const ib_tpl_t	ib_src_tpl)	/*!< in: source tuple */
-{
-	ulint		i;
-	ulint		n_fields;
-	ib_err_t	err = DB_SUCCESS;
-	const ib_tuple_t*src_tuple = (const ib_tuple_t*) ib_src_tpl;
-	ib_tuple_t*	dst_tuple = (ib_tuple_t*) ib_dst_tpl;
-
-	/* Make sure src and dst are not the same. */
-	ut_a(src_tuple != dst_tuple);
-
-	/* Make sure they are the same type and refer to the same index. */
-	if (src_tuple->type != dst_tuple->type
-	   || src_tuple->index != dst_tuple->index) {
-
-		return(DB_DATA_MISMATCH);
-	}
-
-	n_fields = dtuple_get_n_fields(src_tuple->ptr);
-	ut_ad(n_fields == dtuple_get_n_fields(dst_tuple->ptr));
-
-	/* Do a deep copy of the data fields. */
-	for (i = 0; i < n_fields; ++i) {
-		dfield_t*	src_field;
-		dfield_t*	dst_field;
-
-		src_field = dtuple_get_nth_field(src_tuple->ptr, i);
-		dst_field = dtuple_get_nth_field(dst_tuple->ptr, i);
-
-		if (!dfield_is_null(src_field)) {
-			UNIV_MEM_ASSERT_RW(src_field->data, src_field->len);
-
-			dst_field->data = mem_heap_dup(
-				dst_tuple->heap,
-				src_field->data,
-				src_field->len);
-
-			dst_field->len = src_field->len;
-		} else {
-			dfield_set_null(dst_field);
-		}
-	}
-
-	return(err);
-}
-
-/*****************************************************************//**
 Create an InnoDB tuple used for index/table search.
- at return	own: Tuple for current index */
-UNIV_INTERN
+ at return own: Tuple for current index */
 ib_tpl_t
 ib_sec_search_tuple_create(
 /*=======================*/
@@ -3043,8 +2794,7 @@ ib_sec_search_tuple_create(
 
 /*****************************************************************//**
 Create an InnoDB tuple used for index/table search.
- at return	own: Tuple for current index */
-UNIV_INTERN
+ at return own: Tuple for current index */
 ib_tpl_t
 ib_sec_read_tuple_create(
 /*=====================*/
@@ -3060,8 +2810,7 @@ ib_sec_read_tuple_create(
 
 /*****************************************************************//**
 Create an InnoDB tuple used for table key operations.
- at return	own: Tuple for current table */
-UNIV_INTERN
+ at return own: Tuple for current table */
 ib_tpl_t
 ib_clust_search_tuple_create(
 /*=========================*/
@@ -3079,8 +2828,7 @@ ib_clust_search_tuple_create(
 
 /*****************************************************************//**
 Create an InnoDB tuple for table row operations.
- at return	own: Tuple for current table */
-UNIV_INTERN
+ at return own: Tuple for current table */
 ib_tpl_t
 ib_clust_read_tuple_create(
 /*=======================*/
@@ -3098,8 +2846,7 @@ ib_clust_read_tuple_create(
 
 /*****************************************************************//**
 Return the number of user columns in the tuple definition.
- at return	number of user columns */
-UNIV_INTERN
+ at return number of user columns */
 ib_ulint_t
 ib_tuple_get_n_user_cols(
 /*=====================*/
@@ -3118,8 +2865,7 @@ ib_tuple_get_n_user_cols(
 
 /*****************************************************************//**
 Return the number of columns in the tuple definition.
- at return	number of columns */
-UNIV_INTERN
+ at return number of columns */
 ib_ulint_t
 ib_tuple_get_n_cols(
 /*================*/
@@ -3132,7 +2878,6 @@ ib_tuple_get_n_cols(
 
 /*****************************************************************//**
 Destroy an InnoDB tuple. */
-UNIV_INTERN
 void
 ib_tuple_delete(
 /*============*/
@@ -3149,8 +2894,7 @@ ib_tuple_delete(
 
 /*****************************************************************//**
 Get a table id. This function will acquire the dictionary mutex.
- at return	DB_SUCCESS if found */
-UNIV_INTERN
+ at return DB_SUCCESS if found */
 ib_err_t
 ib_table_get_id(
 /*============*/
@@ -3169,62 +2913,8 @@ ib_table_get_id(
 }
 
 /*****************************************************************//**
-Get an index id.
- at return	DB_SUCCESS if found */
-UNIV_INTERN
-ib_err_t
-ib_index_get_id(
-/*============*/
-	const char*	table_name,	/*!< in: find index for this table */
-	const char*	index_name,	/*!< in: index to find */
-	ib_id_u64_t*	index_id)	/*!< out: index id if found */
-{
-	dict_table_t*	table;
-	char*		normalized_name;
-	ib_err_t	err = DB_TABLE_NOT_FOUND;
-
-	*index_id = 0;
-
-	normalized_name = static_cast<char*>(
-		mem_alloc(ut_strlen(table_name) + 1));
-	ib_normalize_table_name(normalized_name, table_name);
-
-	table = ib_lookup_table_by_name(normalized_name);
-
-	mem_free(normalized_name);
-	normalized_name = NULL;
-
-	if (table != NULL) {
-		dict_index_t*	index;
-
-		index = dict_table_get_index_on_name(table, index_name);
-
-		if (index != NULL) {
-			/* We only support 32 bit table and index ids. Because
-			we need to pack the table id into the index id. */
-
-			*index_id = (table->id);
-			*index_id <<= 32;
-			*index_id |= (index->id);
-
-			err = DB_SUCCESS;
-		}
-	}
-
-	return(err);
-}
-
-#ifdef __WIN__
-#define SRV_PATH_SEPARATOR      '\\'
-#else
-#define SRV_PATH_SEPARATOR      '/'
-#endif
-
-
-/*****************************************************************//**
 Check if cursor is positioned.
- at return	IB_TRUE if positioned */
-UNIV_INTERN
+ at return IB_TRUE if positioned */
 ib_bool_t
 ib_cursor_is_positioned(
 /*====================*/
@@ -3233,14 +2923,13 @@ ib_cursor_is_positioned(
 	const ib_cursor_t*	cursor = (const ib_cursor_t*) ib_crsr;
 	row_prebuilt_t*		prebuilt = cursor->prebuilt;
 
-	return(ib_btr_cursor_is_positioned(&prebuilt->pcur));
+	return(ib_btr_cursor_is_positioned(prebuilt->pcur));
 }
 
 
 /*****************************************************************//**
 Checks if the data dictionary is latched in exclusive mode.
- at return	TRUE if exclusive latch */
-UNIV_INTERN
+ at return TRUE if exclusive latch */
 ib_bool_t
 ib_schema_lock_is_exclusive(
 /*========================*/
@@ -3253,8 +2942,7 @@ ib_schema_lock_is_exclusive(
 
 /*****************************************************************//**
 Checks if the data dictionary is latched in shared mode.
- at return	TRUE if shared latch */
-UNIV_INTERN
+ at return TRUE if shared latch */
 ib_bool_t
 ib_schema_lock_is_shared(
 /*=====================*/
@@ -3267,8 +2955,7 @@ ib_schema_lock_is_shared(
 
 /*****************************************************************//**
 Set the Lock an InnoDB cursor/table.
- at return	DB_SUCCESS or error code */
-UNIV_INTERN
+ at return DB_SUCCESS or error code */
 ib_err_t
 ib_cursor_lock(
 /*===========*/
@@ -3286,8 +2973,7 @@ ib_cursor_lock(
 
 /*****************************************************************//**
 Set the Lock an InnoDB table using the table id.
- at return	DB_SUCCESS or error code */
-UNIV_INTERN
+ at return DB_SUCCESS or error code */
 ib_err_t
 ib_table_lock(
 /*==========*/
@@ -3302,7 +2988,7 @@ ib_table_lock(
 	ib_qry_proc_t	q_proc;
 	trx_t*		trx = (trx_t*) ib_trx;
 
-	ut_a(trx->state != TRX_STATE_NOT_STARTED);
+	ut_ad(trx_is_started(trx));
 
 	table = ib_open_table_by_id(table_id, FALSE);
 
@@ -3336,8 +3022,7 @@ ib_table_lock(
 
 /*****************************************************************//**
 Unlock an InnoDB table.
- at return	DB_SUCCESS or error code */
-UNIV_INTERN
+ at return DB_SUCCESS or error code */
 ib_err_t
 ib_cursor_unlock(
 /*=============*/
@@ -3358,8 +3043,7 @@ ib_cursor_unlock(
 
 /*****************************************************************//**
 Set the Lock mode of the cursor.
- at return	DB_SUCCESS or error code */
-UNIV_INTERN
+ at return DB_SUCCESS or error code */
 ib_err_t
 ib_cursor_set_lock_mode(
 /*====================*/
@@ -3379,8 +3063,8 @@ ib_cursor_set_lock_mode(
 	}
 
 	if (err == DB_SUCCESS) {
-		prebuilt->select_lock_type = (enum lock_mode) ib_lck_mode;
-		ut_a(prebuilt->trx->state != TRX_STATE_NOT_STARTED);
+		prebuilt->select_lock_type = (lock_mode) ib_lck_mode;
+		ut_a(trx_is_started(prebuilt->trx));
 	}
 
 	return(err);
@@ -3388,7 +3072,6 @@ ib_cursor_set_lock_mode(
 
 /*****************************************************************//**
 Set need to access clustered index record. */
-UNIV_INTERN
 void
 ib_cursor_set_cluster_access(
 /*=========================*/
@@ -3401,128 +3084,7 @@ ib_cursor_set_cluster_access(
 }
 
 /*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
- at return	DB_SUCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_write_i8(
-/*==============*/
-	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
-	int		col_no,		/*!< in: column number */
-	ib_i8_t		val)		/*!< in: value to write */
-{
-	return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true));
-}
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
- at return	DB_SUCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_write_i16(
-/*===============*/
-	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
-	int		col_no,		/*!< in: column number */
-	ib_i16_t	val)		/*!< in: value to write */
-{
-	return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true));
-}
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
- at return	DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_write_i32(
-/*===============*/
-	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
-	int		col_no,		/*!< in: column number */
-	ib_i32_t	val)		/*!< in: value to write */
-{
-	return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true));
-}
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
- at return	DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_write_i64(
-/*===============*/
-	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
-	int		col_no,		/*!< in: column number */
-	ib_i64_t	val)		/*!< in: value to write */
-{
-	return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true));
-}
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
- at return	DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_write_u8(
-/*==============*/
-	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
-	int		col_no,		/*!< in: column number */
-	ib_u8_t		val)		/*!< in: value to write */
-{
-	return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true));
-}
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
- at return	DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_write_u16(
-/*===============*/
-	ib_tpl_t	ib_tpl,		/*!< in/out: tupe to write to */
-	int		col_no,		/*!< in: column number */
-	ib_u16_t	val)		/*!< in: value to write */
-{
-	return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true));
-}
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
- at return	DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_write_u32(
-/*===============*/
-	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
-	int		col_no,		/*!< in: column number */
-	ib_u32_t	val)		/*!< in: value to write */
-{
-	return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true));
-}
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
- at return	DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_write_u64(
-/*===============*/
-	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
-	int		col_no,		/*!< in: column number */
-	ib_u64_t	val)		/*!< in: value to write */
-{
-	return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true));
-}
-
-/*****************************************************************//**
 Inform the cursor that it's the start of an SQL statement. */
-UNIV_INTERN
 void
 ib_cursor_stmt_begin(
 /*=================*/
@@ -3535,8 +3097,7 @@ ib_cursor_stmt_begin(
 
 /*****************************************************************//**
 Write a double value to a column.
- at return	DB_SUCCESS or error */
-UNIV_INTERN
+ at return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_write_double(
 /*==================*/
@@ -3559,8 +3120,7 @@ ib_tuple_write_double(
 
 /*************************************************************//**
 Read a double column value from an InnoDB tuple.
- at return	DB_SUCCESS or error */
-UNIV_INTERN
+ at return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_read_double(
 /*=================*/
@@ -3586,8 +3146,7 @@ ib_tuple_read_double(
 
 /*****************************************************************//**
 Write a float value to a column.
- at return	DB_SUCCESS or error */
-UNIV_INTERN
+ at return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_write_float(
 /*=================*/
@@ -3610,8 +3169,7 @@ ib_tuple_write_float(
 
 /*************************************************************//**
 Read a float value from an InnoDB tuple.
- at return	DB_SUCCESS or error */
-UNIV_INTERN
+ at return DB_SUCCESS or error */
 ib_err_t
 ib_tuple_read_float(
 /*================*/
@@ -3639,7 +3197,6 @@ ib_tuple_read_float(
 Truncate a table. The cursor handle will be closed and set to NULL
 on success.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 ib_err_t
 ib_cursor_truncate(
 /*===============*/
@@ -3692,7 +3249,6 @@ ib_cursor_truncate(
 /*****************************************************************//**
 Truncate a table.
 @return DB_SUCCESS or error code */
-UNIV_INTERN
 ib_err_t
 ib_table_truncate(
 /*==============*/
@@ -3743,8 +3299,7 @@ ib_table_truncate(
 	}
 
 	if (trunc_err == DB_SUCCESS) {
-		ut_a(ib_trx_state(ib_trx) == static_cast<ib_trx_state_t>(
-			TRX_STATE_NOT_STARTED));
+		ut_a(!trx_is_started(static_cast<trx_t*>(ib_trx)));
 	} else {
 		err = ib_trx_rollback(ib_trx);
 		ut_a(err == DB_SUCCESS);
@@ -3766,35 +3321,18 @@ ib_table_truncate(
 }
 
 /*****************************************************************//**
-Frees a possible InnoDB trx object associated with the current THD.
- at return 0 or error number */
-UNIV_INTERN
-ib_err_t
-ib_close_thd(
-/*=========*/
-	void*		thd)	/*!< in: handle to the MySQL thread of the user
-				whose resources should be free'd */
-{
-	innobase_close_thd(static_cast<THD*>(thd));
-
-	return(DB_SUCCESS);
-}
-
-/*****************************************************************//**
 Return isolation configuration set by "innodb_api_trx_level"
 @return trx isolation level*/
-UNIV_INTERN
-ib_trx_state_t
+ib_trx_level_t
 ib_cfg_trx_level()
 /*==============*/
 {
-	return(static_cast<ib_trx_state_t>(ib_trx_level_setting));
+	return(static_cast<ib_trx_level_t>(ib_trx_level_setting));
 }
 
 /*****************************************************************//**
 Return configure value for background commit interval (in seconds)
 @return background commit interval (in seconds) */
-UNIV_INTERN
 ib_ulint_t
 ib_cfg_bk_commit_interval()
 /*=======================*/
@@ -3805,7 +3343,6 @@ ib_cfg_bk_commit_interval()
 /*****************************************************************//**
 Get generic configure status
 @return configure status*/
-UNIV_INTERN
 int
 ib_cfg_get_cfg()
 /*============*/
@@ -3826,10 +3363,21 @@ ib_cfg_get_cfg()
 }
 
 /*****************************************************************//**
+Wrapper of ut_strerr() which converts an InnoDB error number to a
+human readable text message.
+ at return string, describing the error */
+const char*
+ib_ut_strerr(
+/*=========*/
+	ib_err_t	num)	/*!< in: error number */
+{
+	return(ut_strerr(num));
+}
+
+/*****************************************************************//**
 Increase/decrease the memcached sync count of table to sync memcached
 DML with SQL DDLs.
 @return DB_SUCCESS or error number */
-UNIV_INTERN
 ib_err_t
 ib_cursor_set_memcached_sync(
 /*=========================*/
@@ -3849,21 +3397,9 @@ ib_cursor_set_memcached_sync(
                 }
 
 		if (flag) {
-#ifdef HAVE_ATOMIC_BUILTINS
 			os_atomic_increment_lint(&table->memcached_sync_count, 1);
-#else
-		        dict_mutex_enter_for_mysql();
-                        ++table->memcached_sync_count;
-                        dict_mutex_exit_for_mysql();
-#endif
 		} else {
-#ifdef HAVE_ATOMIC_BUILTINS
 			os_atomic_decrement_lint(&table->memcached_sync_count, 1);
-#else
-		        dict_mutex_enter_for_mysql();
-                        --table->memcached_sync_count;
-                        dict_mutex_exit_for_mysql();
-#endif
 		        ut_a(table->memcached_sync_count >= 0);
 		}
 	} else {
diff --git a/storage/innobase/api/api0misc.cc b/storage/innobase/api/api0misc.cc
index b237010..c83eaed 100644
--- a/storage/innobase/api/api0misc.cc
+++ b/storage/innobase/api/api0misc.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2008, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2008, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -24,11 +24,7 @@ InnoDB Native API
 3/20/2011 Jimmy Yang extracted from Embedded InnoDB
 *******************************************************/
 
-#include <errno.h>
-
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif /* HAVE_UNISTD_H */
+#include "ha_prototypes.h"
 
 #include "api0misc.h"
 #include "trx0roll.h"
@@ -38,15 +34,10 @@ InnoDB Native API
 #include "pars0pars.h"
 #include "row0sel.h"
 #include "lock0lock.h"
-#include "ha_prototypes.h"
-#include <m_ctype.h>
-#include <mysys_err.h>
-#include <mysql/plugin.h>
 
 /*********************************************************************//**
 Sets a lock on a table.
- at return	error code or DB_SUCCESS */
-UNIV_INTERN
+ at return error code or DB_SUCCESS */
 dberr_t
 ib_trx_lock_table_with_retry(
 /*=========================*/
@@ -122,85 +113,84 @@ run_again:
 Handles user errors and lock waits detected by the database engine.
 @return TRUE if it was a lock wait and we should continue running
 the query thread */
-UNIV_INTERN
 ibool
 ib_handle_errors(
 /*=============*/
-        dberr_t*	new_err,/*!< out: possible new error encountered in
-                                lock wait, or if no new error, the value
-                                of trx->error_state at the entry of this
-                                function */
-        trx_t*          trx,    /*!< in: transaction */
-        que_thr_t*      thr,    /*!< in: query thread */
-        trx_savept_t*   savept) /*!< in: savepoint or NULL */
+	dberr_t*	new_err,/*!< out: possible new error encountered in
+				lock wait, or if no new error, the value
+				of trx->error_state at the entry of this
+				function */
+	trx_t*		trx,    /*!< in: transaction */
+	que_thr_t*	thr,    /*!< in: query thread */
+	trx_savept_t*	savept) /*!< in: savepoint or NULL */
 {
-        dberr_t		err;
+	dberr_t		err;
 handle_new_error:
-        err = trx->error_state;
+	err = trx->error_state;
 
-        ut_a(err != DB_SUCCESS);
+	ut_a(err != DB_SUCCESS);
 
-        trx->error_state = DB_SUCCESS;
+	trx->error_state = DB_SUCCESS;
 
-        switch (err) {
-        case DB_LOCK_WAIT_TIMEOUT:
+	switch (err) {
+	case DB_LOCK_WAIT_TIMEOUT:
 		trx_rollback_for_mysql(trx);
 		break;
-                /* fall through */
-        case DB_DUPLICATE_KEY:
-        case DB_FOREIGN_DUPLICATE_KEY:
-        case DB_TOO_BIG_RECORD:
-        case DB_ROW_IS_REFERENCED:
-        case DB_NO_REFERENCED_ROW:
-        case DB_CANNOT_ADD_CONSTRAINT:
-        case DB_TOO_MANY_CONCURRENT_TRXS:
-        case DB_OUT_OF_FILE_SPACE:
-                if (savept) {
-                        /* Roll back the latest, possibly incomplete
-                        insertion or update */
+		/* fall through */
+	case DB_DUPLICATE_KEY:
+	case DB_FOREIGN_DUPLICATE_KEY:
+	case DB_TOO_BIG_RECORD:
+	case DB_ROW_IS_REFERENCED:
+	case DB_NO_REFERENCED_ROW:
+	case DB_CANNOT_ADD_CONSTRAINT:
+	case DB_TOO_MANY_CONCURRENT_TRXS:
+	case DB_OUT_OF_FILE_SPACE:
+		if (savept) {
+			/* Roll back the latest, possibly incomplete
+			insertion or update */
 
 			trx_rollback_to_savepoint(trx, savept);
-                }
-                break;
-        case DB_LOCK_WAIT:
+		}
+		break;
+	case DB_LOCK_WAIT:
 		lock_wait_suspend_thread(thr);
 
-                if (trx->error_state != DB_SUCCESS) {
-                        que_thr_stop_for_mysql(thr);
+		if (trx->error_state != DB_SUCCESS) {
+			que_thr_stop_for_mysql(thr);
 
-                        goto handle_new_error;
-                }
+			goto handle_new_error;
+		}
 
-                *new_err = err;
+		*new_err = err;
 
-                return(TRUE); /* Operation needs to be retried. */
+		return(TRUE); /* Operation needs to be retried. */
 
-        case DB_DEADLOCK:
-        case DB_LOCK_TABLE_FULL:
-                /* Roll back the whole transaction; this resolution was added
-                to version 3.23.43 */
+	case DB_DEADLOCK:
+	case DB_LOCK_TABLE_FULL:
+		/* Roll back the whole transaction; this resolution was added
+		to version 3.23.43 */
 
-                trx_rollback_for_mysql(trx);
-                break;
+		trx_rollback_for_mysql(trx);
+		break;
 
-        case DB_MUST_GET_MORE_FILE_SPACE:
+	case DB_MUST_GET_MORE_FILE_SPACE:
 
-                exit(1);
+		ut_error;
 
-        case DB_CORRUPTION:
+	case DB_CORRUPTION:
 	case DB_FOREIGN_EXCEED_MAX_CASCADE:
-                break;
-        default:
-                ut_error;
-        }
+		break;
+	default:
+		ut_error;
+	}
 
-        if (trx->error_state != DB_SUCCESS) {
-                *new_err = trx->error_state;
-        } else {
-                *new_err = err;
-        }
+	if (trx->error_state != DB_SUCCESS) {
+		*new_err = trx->error_state;
+	} else {
+		*new_err = err;
+	}
 
-        trx->error_state = DB_SUCCESS;
+	trx->error_state = DB_SUCCESS;
 
-        return(FALSE);
+	return(FALSE);
 }
diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc
index 79b5334..1e25308 100644
--- a/storage/innobase/btr/btr0btr.cc
+++ b/storage/innobase/btr/btr0btr.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2012, Facebook Inc.
 
 This program is free software; you can redistribute it and/or modify it under
@@ -30,9 +30,10 @@ Created 6/2/1994 Heikki Tuuri
 #include "btr0btr.ic"
 #endif
 
-#include "fsp0fsp.h"
+#include "fsp0sysspace.h"
 #include "page0page.h"
 #include "page0zip.h"
+#include "gis0rtree.h"
 
 #ifndef UNIV_HOTBACKUP
 #include "btr0cur.h"
@@ -43,13 +44,15 @@ Created 6/2/1994 Heikki Tuuri
 #include "ibuf0ibuf.h"
 #include "trx0trx.h"
 #include "srv0mon.h"
+#include "gis0geo.h"
+#include "ut0new.h"
+#include "dict0boot.h"
 
 /**************************************************************//**
 Checks if the page in the cursor can be merged with given page.
 If necessary, re-organize the merge_page.
- at return	TRUE if possible to merge. */
-UNIV_INTERN
-ibool
+ at return	true if possible to merge. */
+bool
 btr_can_merge_with_page(
 /*====================*/
 	btr_cur_t*	cursor,		/*!< in: cursor on the page to merge */
@@ -61,581 +64,19 @@ btr_can_merge_with_page(
 
 /**************************************************************//**
 Report that an index page is corrupted. */
-UNIV_INTERN
 void
 btr_corruption_report(
 /*==================*/
 	const buf_block_t*	block,	/*!< in: corrupted block */
 	const dict_index_t*	index)	/*!< in: index tree */
 {
-	fprintf(stderr, "InnoDB: flag mismatch in space %u page %u"
-		" index %s of table %s\n",
-		(unsigned) buf_block_get_space(block),
-		(unsigned) buf_block_get_page_no(block),
-		index->name, index->table_name);
-	if (block->page.zip.data) {
-		buf_page_print(block->page.zip.data,
-			       buf_block_get_zip_size(block),
-			       BUF_PAGE_PRINT_NO_CRASH);
-	}
-	buf_page_print(buf_block_get_frame(block), 0, 0);
+	ib::error()
+		<< "Flag mismatch in page " << block->page.id
+		<< " index " << index->name
+		<< " of table " << index->table->name;
 }
 
 #ifndef UNIV_HOTBACKUP
-#ifdef UNIV_BLOB_DEBUG
-# include "srv0srv.h"
-# include "ut0rbt.h"
-
-/** TRUE when messages about index->blobs modification are enabled. */
-static ibool btr_blob_dbg_msg;
-
-/** Issue a message about an operation on index->blobs.
- at param op	operation
- at param b	the entry being subjected to the operation
- at param ctx	the context of the operation */
-#define btr_blob_dbg_msg_issue(op, b, ctx)			\
-	fprintf(stderr, op " %u:%u:%u->%u %s(%u,%u,%u)\n",	\
-		(b)->ref_page_no, (b)->ref_heap_no,		\
-		(b)->ref_field_no, (b)->blob_page_no, ctx,	\
-		(b)->owner, (b)->always_owner, (b)->del)
-
-/** Insert to index->blobs a reference to an off-page column.
- at param index	the index tree
- at param b	the reference
- at param ctx	context (for logging) */
-UNIV_INTERN
-void
-btr_blob_dbg_rbt_insert(
-/*====================*/
-	dict_index_t*		index,	/*!< in/out: index tree */
-	const btr_blob_dbg_t*	b,	/*!< in: the reference */
-	const char*		ctx)	/*!< in: context (for logging) */
-{
-	if (btr_blob_dbg_msg) {
-		btr_blob_dbg_msg_issue("insert", b, ctx);
-	}
-	mutex_enter(&index->blobs_mutex);
-	rbt_insert(index->blobs, b, b);
-	mutex_exit(&index->blobs_mutex);
-}
-
-/** Remove from index->blobs a reference to an off-page column.
- at param index	the index tree
- at param b	the reference
- at param ctx	context (for logging) */
-UNIV_INTERN
-void
-btr_blob_dbg_rbt_delete(
-/*====================*/
-	dict_index_t*		index,	/*!< in/out: index tree */
-	const btr_blob_dbg_t*	b,	/*!< in: the reference */
-	const char*		ctx)	/*!< in: context (for logging) */
-{
-	if (btr_blob_dbg_msg) {
-		btr_blob_dbg_msg_issue("delete", b, ctx);
-	}
-	mutex_enter(&index->blobs_mutex);
-	ut_a(rbt_delete(index->blobs, b));
-	mutex_exit(&index->blobs_mutex);
-}
-
-/**************************************************************//**
-Comparator for items (btr_blob_dbg_t) in index->blobs.
-The key in index->blobs is (ref_page_no, ref_heap_no, ref_field_no).
- at return negative, 0 or positive if *a<*b, *a=*b, *a>*b */
-static
-int
-btr_blob_dbg_cmp(
-/*=============*/
-	const void*	a,	/*!< in: first btr_blob_dbg_t to compare */
-	const void*	b)	/*!< in: second btr_blob_dbg_t to compare */
-{
-	const btr_blob_dbg_t*	aa = static_cast<const btr_blob_dbg_t*>(a);
-	const btr_blob_dbg_t*	bb = static_cast<const btr_blob_dbg_t*>(b);
-
-	ut_ad(aa != NULL);
-	ut_ad(bb != NULL);
-
-	if (aa->ref_page_no != bb->ref_page_no) {
-		return(aa->ref_page_no < bb->ref_page_no ? -1 : 1);
-	}
-	if (aa->ref_heap_no != bb->ref_heap_no) {
-		return(aa->ref_heap_no < bb->ref_heap_no ? -1 : 1);
-	}
-	if (aa->ref_field_no != bb->ref_field_no) {
-		return(aa->ref_field_no < bb->ref_field_no ? -1 : 1);
-	}
-	return(0);
-}
-
-/**************************************************************//**
-Add a reference to an off-page column to the index->blobs map. */
-UNIV_INTERN
-void
-btr_blob_dbg_add_blob(
-/*==================*/
-	const rec_t*	rec,		/*!< in: clustered index record */
-	ulint		field_no,	/*!< in: off-page column number */
-	ulint		page_no,	/*!< in: start page of the column */
-	dict_index_t*	index,		/*!< in/out: index tree */
-	const char*	ctx)		/*!< in: context (for logging) */
-{
-	btr_blob_dbg_t	b;
-	const page_t*	page	= page_align(rec);
-
-	ut_a(index->blobs);
-
-	b.blob_page_no = page_no;
-	b.ref_page_no = page_get_page_no(page);
-	b.ref_heap_no = page_rec_get_heap_no(rec);
-	b.ref_field_no = field_no;
-	ut_a(b.ref_field_no >= index->n_uniq);
-	b.always_owner = b.owner = TRUE;
-	b.del = FALSE;
-	ut_a(!rec_get_deleted_flag(rec, page_is_comp(page)));
-	btr_blob_dbg_rbt_insert(index, &b, ctx);
-}
-
-/**************************************************************//**
-Add to index->blobs any references to off-page columns from a record.
- at return number of references added */
-UNIV_INTERN
-ulint
-btr_blob_dbg_add_rec(
-/*=================*/
-	const rec_t*	rec,	/*!< in: record */
-	dict_index_t*	index,	/*!< in/out: index */
-	const ulint*	offsets,/*!< in: offsets */
-	const char*	ctx)	/*!< in: context (for logging) */
-{
-	ulint		count	= 0;
-	ulint		i;
-	btr_blob_dbg_t	b;
-	ibool		del;
-
-	ut_ad(rec_offs_validate(rec, index, offsets));
-
-	if (!rec_offs_any_extern(offsets)) {
-		return(0);
-	}
-
-	b.ref_page_no = page_get_page_no(page_align(rec));
-	b.ref_heap_no = page_rec_get_heap_no(rec);
-	del = (rec_get_deleted_flag(rec, rec_offs_comp(offsets)) != 0);
-
-	for (i = 0; i < rec_offs_n_fields(offsets); i++) {
-		if (rec_offs_nth_extern(offsets, i)) {
-			ulint		len;
-			const byte*	field_ref = rec_get_nth_field(
-				rec, offsets, i, &len);
-
-			ut_a(len != UNIV_SQL_NULL);
-			ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
-			field_ref += len - BTR_EXTERN_FIELD_REF_SIZE;
-
-			if (!memcmp(field_ref, field_ref_zero,
-				    BTR_EXTERN_FIELD_REF_SIZE)) {
-				/* the column has not been stored yet */
-				continue;
-			}
-
-			b.ref_field_no = i;
-			b.blob_page_no = mach_read_from_4(
-				field_ref + BTR_EXTERN_PAGE_NO);
-			ut_a(b.ref_field_no >= index->n_uniq);
-			b.always_owner = b.owner
-				= !(field_ref[BTR_EXTERN_LEN]
-				    & BTR_EXTERN_OWNER_FLAG);
-			b.del = del;
-
-			btr_blob_dbg_rbt_insert(index, &b, ctx);
-			count++;
-		}
-	}
-
-	return(count);
-}
-
-/**************************************************************//**
-Display the references to off-page columns.
-This function is to be called from a debugger,
-for example when a breakpoint on ut_dbg_assertion_failed is hit. */
-UNIV_INTERN
-void
-btr_blob_dbg_print(
-/*===============*/
-	const dict_index_t*	index)	/*!< in: index tree */
-{
-	const ib_rbt_node_t*	node;
-
-	if (!index->blobs) {
-		return;
-	}
-
-	/* We intentionally do not acquire index->blobs_mutex here.
-	This function is to be called from a debugger, and the caller
-	should make sure that the index->blobs_mutex is held. */
-
-	for (node = rbt_first(index->blobs);
-	     node != NULL; node = rbt_next(index->blobs, node)) {
-		const btr_blob_dbg_t*	b
-			= rbt_value(btr_blob_dbg_t, node);
-		fprintf(stderr, "%u:%u:%u->%u%s%s%s\n",
-			b->ref_page_no, b->ref_heap_no, b->ref_field_no,
-			b->blob_page_no,
-			b->owner ? "" : "(disowned)",
-			b->always_owner ? "" : "(has disowned)",
-			b->del ? "(deleted)" : "");
-	}
-}
-
-/**************************************************************//**
-Remove from index->blobs any references to off-page columns from a record.
- at return number of references removed */
-UNIV_INTERN
-ulint
-btr_blob_dbg_remove_rec(
-/*====================*/
-	const rec_t*	rec,	/*!< in: record */
-	dict_index_t*	index,	/*!< in/out: index */
-	const ulint*	offsets,/*!< in: offsets */
-	const char*	ctx)	/*!< in: context (for logging) */
-{
-	ulint		i;
-	ulint		count	= 0;
-	btr_blob_dbg_t	b;
-
-	ut_ad(rec_offs_validate(rec, index, offsets));
-
-	if (!rec_offs_any_extern(offsets)) {
-		return(0);
-	}
-
-	b.ref_page_no = page_get_page_no(page_align(rec));
-	b.ref_heap_no = page_rec_get_heap_no(rec);
-
-	for (i = 0; i < rec_offs_n_fields(offsets); i++) {
-		if (rec_offs_nth_extern(offsets, i)) {
-			ulint		len;
-			const byte*	field_ref = rec_get_nth_field(
-				rec, offsets, i, &len);
-
-			ut_a(len != UNIV_SQL_NULL);
-			ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
-			field_ref += len - BTR_EXTERN_FIELD_REF_SIZE;
-
-			b.ref_field_no = i;
-			b.blob_page_no = mach_read_from_4(
-				field_ref + BTR_EXTERN_PAGE_NO);
-
-			switch (b.blob_page_no) {
-			case 0:
-				/* The column has not been stored yet.
-				The BLOB pointer must be all zero.
-				There cannot be a BLOB starting at
-				page 0, because page 0 is reserved for
-				the tablespace header. */
-				ut_a(!memcmp(field_ref, field_ref_zero,
-					     BTR_EXTERN_FIELD_REF_SIZE));
-				/* fall through */
-			case FIL_NULL:
-				/* the column has been freed already */
-				continue;
-			}
-
-			btr_blob_dbg_rbt_delete(index, &b, ctx);
-			count++;
-		}
-	}
-
-	return(count);
-}
-
-/**************************************************************//**
-Check that there are no references to off-page columns from or to
-the given page. Invoked when freeing or clearing a page.
- at return TRUE when no orphan references exist */
-UNIV_INTERN
-ibool
-btr_blob_dbg_is_empty(
-/*==================*/
-	dict_index_t*	index,		/*!< in: index */
-	ulint		page_no)	/*!< in: page number */
-{
-	const ib_rbt_node_t*	node;
-	ibool			success	= TRUE;
-
-	if (!index->blobs) {
-		return(success);
-	}
-
-	mutex_enter(&index->blobs_mutex);
-
-	for (node = rbt_first(index->blobs);
-	     node != NULL; node = rbt_next(index->blobs, node)) {
-		const btr_blob_dbg_t*	b
-			= rbt_value(btr_blob_dbg_t, node);
-
-		if (b->ref_page_no != page_no && b->blob_page_no != page_no) {
-			continue;
-		}
-
-		fprintf(stderr,
-			"InnoDB: orphan BLOB ref%s%s%s %u:%u:%u->%u\n",
-			b->owner ? "" : "(disowned)",
-			b->always_owner ? "" : "(has disowned)",
-			b->del ? "(deleted)" : "",
-			b->ref_page_no, b->ref_heap_no, b->ref_field_no,
-			b->blob_page_no);
-
-		if (b->blob_page_no != page_no || b->owner || !b->del) {
-			success = FALSE;
-		}
-	}
-
-	mutex_exit(&index->blobs_mutex);
-	return(success);
-}
-
-/**************************************************************//**
-Count and process all references to off-page columns on a page.
- at return number of references processed */
-UNIV_INTERN
-ulint
-btr_blob_dbg_op(
-/*============*/
-	const page_t*		page,	/*!< in: B-tree leaf page */
-	const rec_t*		rec,	/*!< in: record to start from
-					(NULL to process the whole page) */
-	dict_index_t*		index,	/*!< in/out: index */
-	const char*		ctx,	/*!< in: context (for logging) */
-	const btr_blob_dbg_op_f	op)	/*!< in: operation on records */
-{
-	ulint		count	= 0;
-	mem_heap_t*	heap	= NULL;
-	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-	ulint*		offsets	= offsets_;
-	rec_offs_init(offsets_);
-
-	ut_a(fil_page_get_type(page) == FIL_PAGE_INDEX);
-	ut_a(!rec || page_align(rec) == page);
-
-	if (!index->blobs || !page_is_leaf(page)
-	    || !dict_index_is_clust(index)) {
-		return(0);
-	}
-
-	if (rec == NULL) {
-		rec = page_get_infimum_rec(page);
-	}
-
-	do {
-		offsets = rec_get_offsets(rec, index, offsets,
-					  ULINT_UNDEFINED, &heap);
-		count += op(rec, index, offsets, ctx);
-		rec = page_rec_get_next_const(rec);
-	} while (!page_rec_is_supremum(rec));
-
-	if (heap) {
-		mem_heap_free(heap);
-	}
-
-	return(count);
-}
-
-/**************************************************************//**
-Count and add to index->blobs any references to off-page columns
-from records on a page.
- at return number of references added */
-UNIV_INTERN
-ulint
-btr_blob_dbg_add(
-/*=============*/
-	const page_t*	page,	/*!< in: rewritten page */
-	dict_index_t*	index,	/*!< in/out: index */
-	const char*	ctx)	/*!< in: context (for logging) */
-{
-	btr_blob_dbg_assert_empty(index, page_get_page_no(page));
-
-	return(btr_blob_dbg_op(page, NULL, index, ctx, btr_blob_dbg_add_rec));
-}
-
-/**************************************************************//**
-Count and remove from index->blobs any references to off-page columns
-from records on a page.
-Used when reorganizing a page, before copying the records.
- at return number of references removed */
-UNIV_INTERN
-ulint
-btr_blob_dbg_remove(
-/*================*/
-	const page_t*	page,	/*!< in: b-tree page */
-	dict_index_t*	index,	/*!< in/out: index */
-	const char*	ctx)	/*!< in: context (for logging) */
-{
-	ulint	count;
-
-	count = btr_blob_dbg_op(page, NULL, index, ctx,
-				btr_blob_dbg_remove_rec);
-
-	/* Check that no references exist. */
-	btr_blob_dbg_assert_empty(index, page_get_page_no(page));
-
-	return(count);
-}
-
-/**************************************************************//**
-Restore in index->blobs any references to off-page columns
-Used when page reorganize fails due to compressed page overflow. */
-UNIV_INTERN
-void
-btr_blob_dbg_restore(
-/*=================*/
-	const page_t*	npage,	/*!< in: page that failed to compress  */
-	const page_t*	page,	/*!< in: copy of original page */
-	dict_index_t*	index,	/*!< in/out: index */
-	const char*	ctx)	/*!< in: context (for logging) */
-{
-	ulint	removed;
-	ulint	added;
-
-	ut_a(page_get_page_no(npage) == page_get_page_no(page));
-	ut_a(page_get_space_id(npage) == page_get_space_id(page));
-
-	removed = btr_blob_dbg_remove(npage, index, ctx);
-	added = btr_blob_dbg_add(page, index, ctx);
-	ut_a(added == removed);
-}
-
-/**************************************************************//**
-Modify the 'deleted' flag of a record. */
-UNIV_INTERN
-void
-btr_blob_dbg_set_deleted_flag(
-/*==========================*/
-	const rec_t*		rec,	/*!< in: record */
-	dict_index_t*		index,	/*!< in/out: index */
-	const ulint*		offsets,/*!< in: rec_get_offs(rec, index) */
-	ibool			del)	/*!< in: TRUE=deleted, FALSE=exists */
-{
-	const ib_rbt_node_t*	node;
-	btr_blob_dbg_t		b;
-	btr_blob_dbg_t*		c;
-	ulint			i;
-
-	ut_ad(rec_offs_validate(rec, index, offsets));
-	ut_a(dict_index_is_clust(index));
-	ut_a(del == !!del);/* must be FALSE==0 or TRUE==1 */
-
-	if (!rec_offs_any_extern(offsets) || !index->blobs) {
-
-		return;
-	}
-
-	b.ref_page_no = page_get_page_no(page_align(rec));
-	b.ref_heap_no = page_rec_get_heap_no(rec);
-
-	for (i = 0; i < rec_offs_n_fields(offsets); i++) {
-		if (rec_offs_nth_extern(offsets, i)) {
-			ulint		len;
-			const byte*	field_ref = rec_get_nth_field(
-				rec, offsets, i, &len);
-
-			ut_a(len != UNIV_SQL_NULL);
-			ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
-			field_ref += len - BTR_EXTERN_FIELD_REF_SIZE;
-
-			b.ref_field_no = i;
-			b.blob_page_no = mach_read_from_4(
-				field_ref + BTR_EXTERN_PAGE_NO);
-
-			switch (b.blob_page_no) {
-			case 0:
-				ut_a(memcmp(field_ref, field_ref_zero,
-					    BTR_EXTERN_FIELD_REF_SIZE));
-				/* page number 0 is for the
-				page allocation bitmap */
-			case FIL_NULL:
-				/* the column has been freed already */
-				ut_error;
-			}
-
-			mutex_enter(&index->blobs_mutex);
-			node = rbt_lookup(index->blobs, &b);
-			ut_a(node);
-
-			c = rbt_value(btr_blob_dbg_t, node);
-			/* The flag should be modified. */
-			c->del = del;
-			if (btr_blob_dbg_msg) {
-				b = *c;
-				mutex_exit(&index->blobs_mutex);
-				btr_blob_dbg_msg_issue("del_mk", &b, "");
-			} else {
-				mutex_exit(&index->blobs_mutex);
-			}
-		}
-	}
-}
-
-/**************************************************************//**
-Change the ownership of an off-page column. */
-UNIV_INTERN
-void
-btr_blob_dbg_owner(
-/*===============*/
-	const rec_t*		rec,	/*!< in: record */
-	dict_index_t*		index,	/*!< in/out: index */
-	const ulint*		offsets,/*!< in: rec_get_offs(rec, index) */
-	ulint			i,	/*!< in: ith field in rec */
-	ibool			own)	/*!< in: TRUE=owned, FALSE=disowned */
-{
-	const ib_rbt_node_t*	node;
-	btr_blob_dbg_t		b;
-	const byte*		field_ref;
-	ulint			len;
-
-	ut_ad(rec_offs_validate(rec, index, offsets));
-	ut_a(rec_offs_nth_extern(offsets, i));
-
-	field_ref = rec_get_nth_field(rec, offsets, i, &len);
-	ut_a(len != UNIV_SQL_NULL);
-	ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
-	field_ref += len - BTR_EXTERN_FIELD_REF_SIZE;
-
-	b.ref_page_no = page_get_page_no(page_align(rec));
-	b.ref_heap_no = page_rec_get_heap_no(rec);
-	b.ref_field_no = i;
-	b.owner = !(field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG);
-	b.blob_page_no = mach_read_from_4(field_ref + BTR_EXTERN_PAGE_NO);
-
-	ut_a(b.owner == own);
-
-	mutex_enter(&index->blobs_mutex);
-	node = rbt_lookup(index->blobs, &b);
-	/* row_ins_clust_index_entry_by_modify() invokes
-	btr_cur_unmark_extern_fields() also for the newly inserted
-	references, which are all zero bytes until the columns are stored.
-	The node lookup must fail if and only if that is the case. */
-	ut_a(!memcmp(field_ref, field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE)
-	     == !node);
-
-	if (node) {
-		btr_blob_dbg_t*	c = rbt_value(btr_blob_dbg_t, node);
-		/* Some code sets ownership from TRUE to TRUE.
-		We do not allow changing ownership from FALSE to FALSE. */
-		ut_a(own || c->owner);
-
-		c->owner = own;
-		if (!own) {
-			c->always_owner = FALSE;
-		}
-	}
-
-	mutex_exit(&index->blobs_mutex);
-}
-#endif /* UNIV_BLOB_DEBUG */
-
 /*
 Latching strategy of the InnoDB B-tree
 --------------------------------------
@@ -694,7 +135,7 @@ we allocate pages for the non-leaf levels of the tree.
 #ifdef UNIV_BTR_DEBUG
 /**************************************************************//**
 Checks a file segment header within a B-tree root page.
- at return	TRUE if valid */
+ at return TRUE if valid */
 static
 ibool
 btr_root_fseg_validate(
@@ -713,8 +154,7 @@ btr_root_fseg_validate(
 
 /**************************************************************//**
 Gets the root node of a tree and x- or s-latches it.
- at return	root page, x- or s-latched */
-static
+ at return root page, x- or s-latched */
 buf_block_t*
 btr_root_block_get(
 /*===============*/
@@ -723,16 +163,13 @@ btr_root_block_get(
 					or RW_X_LATCH */
 	mtr_t*			mtr)	/*!< in: mtr */
 {
-	ulint		space;
-	ulint		zip_size;
-	ulint		root_page_no;
-	buf_block_t*	block;
+	const ulint		space = dict_index_get_space(index);
+	const page_id_t		page_id(space, dict_index_get_page(index));
+	const page_size_t	page_size(dict_table_page_size(index->table));
 
-	space = dict_index_get_space(index);
-	zip_size = dict_table_zip_size(index->table);
-	root_page_no = dict_index_get_page(index);
+	buf_block_t*	block = btr_block_get(page_id, page_size, mode,
+					      index, mtr);
 
-	block = btr_block_get(space, zip_size, root_page_no, mode, index, mtr);
 	btr_assert_not_corrupted(block, index);
 #ifdef UNIV_BTR_DEBUG
 	if (!dict_index_is_ibuf(index)) {
@@ -749,16 +186,18 @@ btr_root_block_get(
 }
 
 /**************************************************************//**
-Gets the root node of a tree and x-latches it.
- at return	root page, x-latched */
-UNIV_INTERN
+Gets the root node of a tree and sx-latches it for segment access.
+ at return root page, sx-latched */
 page_t*
 btr_root_get(
 /*=========*/
 	const dict_index_t*	index,	/*!< in: index tree */
 	mtr_t*			mtr)	/*!< in: mtr */
 {
-	return(buf_block_get_frame(btr_root_block_get(index, RW_X_LATCH,
+	/* Intended to be used for segment list access.
+	SX lock doesn't block reading user data by other threads.
+	And block the segment list access by others.*/
+	return(buf_block_get_frame(btr_root_block_get(index, RW_SX_LATCH,
 						      mtr)));
 }
 
@@ -766,8 +205,7 @@ btr_root_get(
 Gets the height of the B-tree (the level of the root, when the leaf
 level is assumed to be 0). The caller must hold an S or X latch on
 the index.
- at return	tree height (level of the root) */
-UNIV_INTERN
+ at return tree height (level of the root) */
 ulint
 btr_height_get(
 /*===========*/
@@ -777,21 +215,22 @@ btr_height_get(
 	ulint		height;
 	buf_block_t*	root_block;
 
-	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
-				MTR_MEMO_S_LOCK)
-	      || mtr_memo_contains(mtr, dict_index_get_lock(index),
-				MTR_MEMO_X_LOCK));
+	ut_ad(srv_read_only_mode
+	      || mtr_memo_contains_flagged(mtr, dict_index_get_lock(index),
+					   MTR_MEMO_S_LOCK
+					   | MTR_MEMO_X_LOCK
+					   | MTR_MEMO_SX_LOCK)
+	      || dict_table_is_intrinsic(index->table));
 
-        /* S latches the page */
-        root_block = btr_root_block_get(index, RW_S_LATCH, mtr);
+	/* S latches the page */
+	root_block = btr_root_block_get(index, RW_S_LATCH, mtr);
 
-        height = btr_page_get_level(buf_block_get_frame(root_block), mtr);
+	height = btr_page_get_level(buf_block_get_frame(root_block), mtr);
 
-        /* Release the S latch on the root page. */
-        mtr_memo_release(mtr, root_block, MTR_MEMO_PAGE_S_FIX);
-#ifdef UNIV_SYNC_DEBUG
-        sync_thread_reset_level(&root_block->lock);
-#endif /* UNIV_SYNC_DEBUG */
+	/* Release the S latch on the root page. */
+	mtr->memo_release(root_block, MTR_MEMO_PAGE_S_FIX);
+
+	ut_d(sync_check_unlock(&root_block->lock));
 
 	return(height);
 }
@@ -799,7 +238,7 @@ btr_height_get(
 /**************************************************************//**
 Checks a file segment header within a B-tree root page and updates
 the segment header space id.
- at return	TRUE if valid */
+ at return TRUE if valid */
 static
 bool
 btr_root_fseg_adjust_on_import(
@@ -832,41 +271,34 @@ btr_root_fseg_adjust_on_import(
 /**************************************************************//**
 Checks and adjusts the root node of a tree during IMPORT TABLESPACE.
 @return error code, or DB_SUCCESS */
-UNIV_INTERN
 dberr_t
 btr_root_adjust_on_import(
 /*======================*/
 	const dict_index_t*	index)	/*!< in: index tree */
 {
-	dberr_t		err;
-	mtr_t		mtr;
-	page_t*		page;
-	buf_block_t*	block;
-	page_zip_des_t*	page_zip;
-	dict_table_t*	table		= index->table;
-	ulint		space_id	= dict_index_get_space(index);
-	ulint		zip_size	= dict_table_zip_size(table);
-	ulint		root_page_no	= dict_index_get_page(index);
+	dberr_t			err;
+	mtr_t			mtr;
+	page_t*			page;
+	buf_block_t*		block;
+	page_zip_des_t*		page_zip;
+	dict_table_t*		table = index->table;
+	const ulint		space_id = dict_index_get_space(index);
+	const page_id_t		page_id(space_id, dict_index_get_page(index));
+	const page_size_t	page_size(dict_table_page_size(table));
+
+	DBUG_EXECUTE_IF("ib_import_trigger_corruption_3",
+			return(DB_CORRUPTION););
 
 	mtr_start(&mtr);
 
 	mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
 
-	DBUG_EXECUTE_IF("ib_import_trigger_corruption_3",
-			return(DB_CORRUPTION););
-
-	block = btr_block_get(
-		space_id, zip_size, root_page_no, RW_X_LATCH, index, &mtr);
+	block = btr_block_get(page_id, page_size, RW_X_LATCH, index, &mtr);
 
 	page = buf_block_get_frame(block);
 	page_zip = buf_block_get_page_zip(block);
 
-	/* Check that this is a B-tree page and both the PREV and NEXT
-	pointers are FIL_NULL, because the root page does not have any
-	siblings. */
-	if (fil_page_get_type(page) != FIL_PAGE_INDEX
-	    || fil_page_get_prev(page) != FIL_NULL
-	    || fil_page_get_next(page) != FIL_NULL) {
+	if (!page_is_root(page)) {
 
 		err = DB_CORRUPTION;
 
@@ -879,18 +311,13 @@ btr_root_adjust_on_import(
 		if (page_is_compact_format != dict_table_is_comp(table)) {
 			err = DB_CORRUPTION;
 		} else {
-
 			/* Check that the table flags and the tablespace
 			flags match. */
-			ulint	flags = fil_space_get_flags(table->space);
-
-			if (flags
-			    && flags != dict_tf_to_fsp_flags(table->flags)) {
-
-				err = DB_CORRUPTION;
-			} else {
-				err = DB_SUCCESS;
-			}
+			ulint	flags = dict_tf_to_fsp_flags(table->flags,
+							     false);
+			ulint	fsp_flags = fil_space_get_flags(table->space);
+			err = fsp_flags_are_equal(flags, fsp_flags)
+			      ? DB_SUCCESS : DB_CORRUPTION;
 		}
 	} else {
 		err = DB_SUCCESS;
@@ -913,124 +340,9 @@ btr_root_adjust_on_import(
 	return(err);
 }
 
-/*************************************************************//**
-Gets pointer to the previous user record in the tree. It is assumed that
-the caller has appropriate latches on the page and its neighbor.
- at return	previous user record, NULL if there is none */
-UNIV_INTERN
-rec_t*
-btr_get_prev_user_rec(
-/*==================*/
-	rec_t*	rec,	/*!< in: record on leaf level */
-	mtr_t*	mtr)	/*!< in: mtr holding a latch on the page, and if
-			needed, also to the previous page */
-{
-	page_t*	page;
-	page_t*	prev_page;
-	ulint	prev_page_no;
-
-	if (!page_rec_is_infimum(rec)) {
-
-		rec_t*	prev_rec = page_rec_get_prev(rec);
-
-		if (!page_rec_is_infimum(prev_rec)) {
-
-			return(prev_rec);
-		}
-	}
-
-	page = page_align(rec);
-	prev_page_no = btr_page_get_prev(page, mtr);
-
-	if (prev_page_no != FIL_NULL) {
-
-		ulint		space;
-		ulint		zip_size;
-		buf_block_t*	prev_block;
-
-		space = page_get_space_id(page);
-		zip_size = fil_space_get_zip_size(space);
-
-		prev_block = buf_page_get_with_no_latch(space, zip_size,
-							prev_page_no, mtr);
-		prev_page = buf_block_get_frame(prev_block);
-		/* The caller must already have a latch to the brother */
-		ut_ad(mtr_memo_contains(mtr, prev_block,
-					MTR_MEMO_PAGE_S_FIX)
-		      || mtr_memo_contains(mtr, prev_block,
-					   MTR_MEMO_PAGE_X_FIX));
-#ifdef UNIV_BTR_DEBUG
-		ut_a(page_is_comp(prev_page) == page_is_comp(page));
-		ut_a(btr_page_get_next(prev_page, mtr)
-		     == page_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-
-		return(page_rec_get_prev(page_get_supremum_rec(prev_page)));
-	}
-
-	return(NULL);
-}
-
-/*************************************************************//**
-Gets pointer to the next user record in the tree. It is assumed that the
-caller has appropriate latches on the page and its neighbor.
- at return	next user record, NULL if there is none */
-UNIV_INTERN
-rec_t*
-btr_get_next_user_rec(
-/*==================*/
-	rec_t*	rec,	/*!< in: record on leaf level */
-	mtr_t*	mtr)	/*!< in: mtr holding a latch on the page, and if
-			needed, also to the next page */
-{
-	page_t*	page;
-	page_t*	next_page;
-	ulint	next_page_no;
-
-	if (!page_rec_is_supremum(rec)) {
-
-		rec_t*	next_rec = page_rec_get_next(rec);
-
-		if (!page_rec_is_supremum(next_rec)) {
-
-			return(next_rec);
-		}
-	}
-
-	page = page_align(rec);
-	next_page_no = btr_page_get_next(page, mtr);
-
-	if (next_page_no != FIL_NULL) {
-		ulint		space;
-		ulint		zip_size;
-		buf_block_t*	next_block;
-
-		space = page_get_space_id(page);
-		zip_size = fil_space_get_zip_size(space);
-
-		next_block = buf_page_get_with_no_latch(space, zip_size,
-							next_page_no, mtr);
-		next_page = buf_block_get_frame(next_block);
-		/* The caller must already have a latch to the brother */
-		ut_ad(mtr_memo_contains(mtr, next_block, MTR_MEMO_PAGE_S_FIX)
-		      || mtr_memo_contains(mtr, next_block,
-					   MTR_MEMO_PAGE_X_FIX));
-#ifdef UNIV_BTR_DEBUG
-		ut_a(page_is_comp(next_page) == page_is_comp(page));
-		ut_a(btr_page_get_prev(next_page, mtr)
-		     == page_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-
-		return(page_rec_get_next(page_get_infimum_rec(next_page)));
-	}
-
-	return(NULL);
-}
-
 /**************************************************************//**
 Creates a new index page (not the root, and also not
 used in page reorganization).  @see btr_page_empty(). */
-static
 void
 btr_page_create(
 /*============*/
@@ -1042,18 +354,21 @@ btr_page_create(
 {
 	page_t*		page = buf_block_get_frame(block);
 
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
-	btr_blob_dbg_assert_empty(index, buf_block_get_page_no(block));
+	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
 
 	if (page_zip) {
-		page_create_zip(block, index, level, 0, mtr);
+		page_create_zip(block, index, level, 0, NULL, mtr);
 	} else {
-		page_create(block, mtr, dict_table_is_comp(index->table));
+		page_create(block, mtr, dict_table_is_comp(index->table),
+			    dict_index_is_spatial(index));
 		/* Set the level of the new index page */
 		btr_page_set_level(page, NULL, level, mtr);
 	}
 
-	block->check_index_page_at_flush = TRUE;
+	/* For Spatial Index, initialize the Split Sequence Number */
+	if (dict_index_is_spatial(index)) {
+		page_set_ssn_id(block, page_zip, 0, mtr);
+	}
 
 	btr_page_set_index_id(page, page_zip, index->id, mtr);
 }
@@ -1061,7 +376,7 @@ btr_page_create(
 /**************************************************************//**
 Allocates a new file page to be used in an ibuf tree. Takes the page from
 the free list of the tree, which must contain pages!
- at return	new allocated block, x-latched */
+ at return new allocated block, x-latched */
 static
 buf_block_t*
 btr_page_alloc_for_ibuf(
@@ -1080,9 +395,11 @@ btr_page_alloc_for_ibuf(
 				   + PAGE_BTR_IBUF_FREE_LIST, mtr);
 	ut_a(node_addr.page != FIL_NULL);
 
-	new_block = buf_page_get(dict_index_get_space(index),
-				 dict_table_zip_size(index->table),
-				 node_addr.page, RW_X_LATCH, mtr);
+	new_block = buf_page_get(
+		page_id_t(dict_index_get_space(index), node_addr.page),
+		dict_table_page_size(index->table),
+		RW_X_LATCH, mtr);
+
 	new_page = buf_block_get_frame(new_block);
 	buf_block_dbg_add_level(new_block, SYNC_IBUF_TREE_NODE_NEW);
 
@@ -1148,7 +465,6 @@ that the caller has made the reservation for free extents!
 @retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
 (init_mtr == mtr, or the page was not previously freed in mtr)
 @retval block (not allocated or initialized) otherwise */
-UNIV_INTERN
 buf_block_t*
 btr_page_alloc(
 /*===========*/
@@ -1183,8 +499,7 @@ btr_page_alloc(
 
 /**************************************************************//**
 Gets the number of pages in a B-tree.
- at return	number of pages, or ULINT_UNDEFINED if the index is unavailable */
-UNIV_INTERN
+ at return number of pages, or ULINT_UNDEFINED if the index is unavailable */
 ulint
 btr_get_size(
 /*=========*/
@@ -1198,11 +513,14 @@ btr_get_size(
 	ulint		n;
 	ulint		dummy;
 
-	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
-				MTR_MEMO_S_LOCK));
+	ut_ad(srv_read_only_mode
+	      || mtr_memo_contains(mtr, dict_index_get_lock(index),
+				   MTR_MEMO_S_LOCK)
+	      || dict_table_is_intrinsic(index->table));
 
-	if (index->page == FIL_NULL || dict_index_is_online_ddl(index)
-	    || *index->name == TEMP_INDEX_PREFIX) {
+	if (index->page == FIL_NULL
+	    || dict_index_is_online_ddl(index)
+	    || !index->is_committed()) {
 		return(ULINT_UNDEFINED);
 	}
 
@@ -1241,7 +559,7 @@ btr_page_free_for_ibuf(
 {
 	page_t*		root;
 
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
 	root = btr_root_get(index, mtr);
 
 	flst_add_first(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
@@ -1254,26 +572,23 @@ btr_page_free_for_ibuf(
 
 /**************************************************************//**
 Frees a file page used in an index tree. Can be used also to (BLOB)
-external storage pages, because the page level 0 can be given as an
-argument. */
-UNIV_INTERN
+external storage pages. */
 void
 btr_page_free_low(
 /*==============*/
 	dict_index_t*	index,	/*!< in: index tree */
 	buf_block_t*	block,	/*!< in: block to be freed, x-latched */
-	ulint		level,	/*!< in: page level */
+	ulint		level,	/*!< in: page level (ULINT_UNDEFINED=BLOB) */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
 	fseg_header_t*	seg_header;
 	page_t*		root;
 
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
 	/* The page gets invalid for optimistic searches: increment the frame
 	modify clock */
 
 	buf_block_modify_clock_inc(block);
-	btr_blob_dbg_assert_empty(index, buf_block_get_page_no(block));
 
 	if (dict_index_is_ibuf(index)) {
 
@@ -1284,20 +599,28 @@ btr_page_free_low(
 
 	root = btr_root_get(index, mtr);
 
-	if (level == 0) {
+	if (level == 0 || level == ULINT_UNDEFINED) {
 		seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
 	} else {
 		seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
 	}
 
+#ifdef UNIV_GIS_DEBUG
+	if (dict_index_is_spatial(index)) {
+		fprintf(stderr, "GIS_DIAG: Freed  %ld\n",
+			(long) block->page.id.page_no());
+	}
+#endif
+
 	fseg_free_page(seg_header,
-		       buf_block_get_space(block),
-		       buf_block_get_page_no(block), mtr);
+		       block->page.id.space(),
+		       block->page.id.page_no(),
+		       level != ULINT_UNDEFINED, mtr);
 
 	/* The page was marked free in the allocation bitmap, but it
 	should remain buffer-fixed until mtr_commit(mtr) or until it
 	is explicitly freed from the mini-transaction. */
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
 	/* TODO: Discard any operations on the page from the redo log
 	and remove the block from the flush list and the buffer pool.
 	This would free up buffer pool earlier and reduce writes to
@@ -1307,7 +630,6 @@ btr_page_free_low(
 /**************************************************************//**
 Frees a file page used in an index tree. NOTE: cannot free field external
 storage pages because the page must contain info on its level. */
-UNIV_INTERN
 void
 btr_page_free(
 /*==========*/
@@ -1318,7 +640,8 @@ btr_page_free(
 	const page_t*	page	= buf_block_get_frame(block);
 	ulint		level	= btr_page_get_level(page, mtr);
 
-	ut_ad(fil_page_get_type(block->frame) == FIL_PAGE_INDEX);
+	ut_ad(fil_page_index_page_check(block->frame));
+	ut_ad(level != ULINT_UNDEFINED);
 	btr_page_free_low(index, block, level, mtr);
 }
 
@@ -1358,8 +681,8 @@ btr_node_ptr_set_child_page_no(
 }
 
 /************************************************************//**
-Returns the child page of a node pointer and x-latches it.
- at return	child page, x-latched */
+Returns the child page of a node pointer and sx-latches it.
+ at return child page, sx-latched */
 static
 buf_block_t*
 btr_node_ptr_get_child(
@@ -1369,21 +692,20 @@ btr_node_ptr_get_child(
 	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
-	ulint	page_no;
-	ulint	space;
-
 	ut_ad(rec_offs_validate(node_ptr, index, offsets));
-	space = page_get_space_id(page_align(node_ptr));
-	page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
 
-	return(btr_block_get(space, dict_table_zip_size(index->table),
-			     page_no, RW_X_LATCH, index, mtr));
+	const page_id_t	page_id(
+		page_get_space_id(page_align(node_ptr)),
+		btr_node_ptr_get_child_page_no(node_ptr, offsets));
+
+	return(btr_block_get(page_id, dict_table_page_size(index->table),
+			     RW_SX_LATCH, index, mtr));
 }
 
 /************************************************************//**
 Returns the upper level node pointer to a page. It is assumed that mtr holds
-an x-latch on the tree.
- at return	rec_get_offsets() of the node pointer record */
+an sx-latch on the tree.
+ at return rec_get_offsets() of the node pointer record */
 static
 ulint*
 btr_page_get_father_node_ptr_func(
@@ -1393,6 +715,8 @@ btr_page_get_father_node_ptr_func(
 	btr_cur_t*	cursor,	/*!< in: cursor pointing to user record,
 				out: cursor on node pointer record,
 				its page x-latched */
+	ulint		latch_mode,/*!< in: BTR_CONT_MODIFY_TREE
+				or BTR_CONT_SEARCH_TREE */
 	const char*	file,	/*!< in: file name */
 	ulint		line,	/*!< in: line where called */
 	mtr_t*		mtr)	/*!< in: mtr */
@@ -1404,11 +728,17 @@ btr_page_get_father_node_ptr_func(
 	ulint		page_no;
 	dict_index_t*	index;
 
-	page_no = buf_block_get_page_no(btr_cur_get_block(cursor));
+	ut_ad(latch_mode == BTR_CONT_MODIFY_TREE
+	      || latch_mode == BTR_CONT_SEARCH_TREE);
+
+	page_no = btr_cur_get_block(cursor)->page.id.page_no();
 	index = btr_cur_get_index(cursor);
 
-	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
-				MTR_MEMO_X_LOCK));
+	ut_ad(srv_read_only_mode
+	      || mtr_memo_contains_flagged(mtr, dict_index_get_lock(index),
+					   MTR_MEMO_X_LOCK
+					   | MTR_MEMO_SX_LOCK)
+	      || dict_table_is_intrinsic(index->table));
 
 	ut_ad(dict_index_get_page(index) != page_no);
 
@@ -1418,9 +748,27 @@ btr_page_get_father_node_ptr_func(
 	ut_a(page_rec_is_user_rec(user_rec));
 	tuple = dict_index_build_node_ptr(index, user_rec, 0, heap, level);
 
-	btr_cur_search_to_nth_level(index, level + 1, tuple, PAGE_CUR_LE,
-				    BTR_CONT_MODIFY_TREE, cursor, 0,
-				    file, line, mtr);
+	if (!dict_index_is_spatial(index)) {
+		if (dict_table_is_intrinsic(index->table)) {
+			btr_cur_search_to_nth_level_with_no_latch(
+				index, level + 1, tuple, PAGE_CUR_LE, cursor,
+				file, line, mtr);
+		} else {
+			btr_cur_search_to_nth_level(
+				index, level + 1, tuple,
+				PAGE_CUR_LE, latch_mode, cursor, 0,
+				file, line, mtr);
+		}
+	} else {
+		/* For R-tree, only latch mode from caller would be
+		BTR_CONT_MODIFY_TREE */
+		ut_ad(latch_mode == BTR_CONT_MODIFY_TREE);
+
+		/* Try to avoid traverse from the root, and get the
+		father node from parent_path vector */
+		rtr_get_father_node(index, level + 1, tuple,
+				    NULL, cursor, page_no, mtr);
+	}
 
 	node_ptr = btr_cur_get_rec(cursor);
 	ut_ad(!page_rec_is_comp(node_ptr)
@@ -1430,22 +778,15 @@ btr_page_get_father_node_ptr_func(
 
 	if (btr_node_ptr_get_child_page_no(node_ptr, offsets) != page_no) {
 		rec_t*	print_rec;
-		fputs("InnoDB: Dump of the child page:\n", stderr);
-		buf_page_print(page_align(user_rec), 0,
-			       BUF_PAGE_PRINT_NO_CRASH);
-		fputs("InnoDB: Dump of the parent page:\n", stderr);
-		buf_page_print(page_align(node_ptr), 0,
-			       BUF_PAGE_PRINT_NO_CRASH);
-
-		fputs("InnoDB: Corruption of an index tree: table ", stderr);
-		ut_print_name(stderr, NULL, TRUE, index->table_name);
-		fputs(", index ", stderr);
-		ut_print_name(stderr, NULL, FALSE, index->name);
-		fprintf(stderr, ",\n"
-			"InnoDB: father ptr page no %lu, child page no %lu\n",
-			(ulong)
-			btr_node_ptr_get_child_page_no(node_ptr, offsets),
-			(ulong) page_no);
+
+		ib::error()
+			<< "Corruption of an index tree: table "
+			<< index->table->name
+			<< " index " << index->name
+			<< ", father ptr page no "
+			<< btr_node_ptr_get_child_page_no(node_ptr, offsets)
+			<< ", child page no " << page_no;
+
 		print_rec = page_rec_get_next(
 			page_get_infimum_rec(page_align(user_rec)));
 		offsets = rec_get_offsets(print_rec, index,
@@ -1455,27 +796,28 @@ btr_page_get_father_node_ptr_func(
 					  ULINT_UNDEFINED, &heap);
 		page_rec_print(node_ptr, offsets);
 
-		fputs("InnoDB: You should dump + drop + reimport the table"
-		      " to fix the\n"
-		      "InnoDB: corruption. If the crash happens at "
-		      "the database startup, see\n"
-		      "InnoDB: " REFMAN "forcing-innodb-recovery.html about\n"
-		      "InnoDB: forcing recovery. "
-		      "Then dump + drop + reimport.\n", stderr);
-
-		ut_error;
+		ib::fatal()
+			<< "You should dump + drop + reimport the table to"
+			<< " fix the corruption. If the crash happens at"
+			<< " database startup. " << FORCE_RECOVERY_MSG
+			<< " Then dump + drop + reimport.";
 	}
 
 	return(offsets);
 }
 
 #define btr_page_get_father_node_ptr(of,heap,cur,mtr)			\
-	btr_page_get_father_node_ptr_func(of,heap,cur,__FILE__,__LINE__,mtr)
+	btr_page_get_father_node_ptr_func(				\
+		of,heap,cur,BTR_CONT_MODIFY_TREE,__FILE__,__LINE__,mtr)
+
+#define btr_page_get_father_node_ptr_for_validate(of,heap,cur,mtr)	\
+	btr_page_get_father_node_ptr_func(				\
+		of,heap,cur,BTR_CONT_SEARCH_TREE,__FILE__,__LINE__,mtr)
 
 /************************************************************//**
 Returns the upper level node pointer to a page. It is assumed that mtr holds
 an x-latch on the tree.
- at return	rec_get_offsets() of the node pointer record */
+ at return rec_get_offsets() of the node pointer record */
 static
 ulint*
 btr_page_get_father_block(
@@ -1519,26 +861,120 @@ btr_page_get_father(
 	mem_heap_free(heap);
 }
 
-/************************************************************//**
-Creates the root node for a new index tree.
- at return	page number of the created root, FIL_NULL if did not succeed */
-UNIV_INTERN
+/** Free a B-tree root page. btr_free_but_not_root() must already
+have been called.
+In a persistent tablespace, the caller must invoke fsp_init_file_page()
+before mtr.commit().
+ at param[in,out]	block	index root page
+ at param[in,out]	mtr	mini-transaction */
+static
+void
+btr_free_root(
+	buf_block_t*	block,
+	mtr_t*		mtr)
+{
+	fseg_header_t*	header;
+
+	ut_ad(mtr_memo_contains_flagged(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr->is_named_space(block->page.id.space()));
+
+	btr_search_drop_page_hash_index(block);
+
+	header = buf_block_get_frame(block) + PAGE_HEADER + PAGE_BTR_SEG_TOP;
+#ifdef UNIV_BTR_DEBUG
+	ut_a(btr_root_fseg_validate(header, block->page.id.space()));
+#endif /* UNIV_BTR_DEBUG */
+
+	while (!fseg_free_step(header, true, mtr)) {
+		/* Free the entire segment in small steps. */
+	}
+}
+
+/** PAGE_INDEX_ID value for freed index B-trees */
+static const index_id_t	BTR_FREED_INDEX_ID = 0;
+
+/** Invalidate an index root page so that btr_free_root_check()
+will not find it.
+ at param[in,out]	block	index root page
+ at param[in,out]	mtr	mini-transaction */
+static
+void
+btr_free_root_invalidate(
+	buf_block_t*	block,
+	mtr_t*		mtr)
+{
+	ut_ad(page_is_root(block->frame));
+
+	btr_page_set_index_id(
+		buf_block_get_frame(block),
+		buf_block_get_page_zip(block),
+		BTR_FREED_INDEX_ID, mtr);
+}
+
+/** Prepare to free a B-tree.
+ at param[in]	page_id		page id
+ at param[in]	page_size	page size
+ at param[in]	index_id	PAGE_INDEX_ID contents
+ at param[in,out]	mtr		mini-transaction
+ at return root block, to invoke btr_free_but_not_root() and btr_free_root()
+ at retval NULL if the page is no longer a matching B-tree page */
+static __attribute__((warn_unused_result))
+buf_block_t*
+btr_free_root_check(
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	index_id_t		index_id,
+	mtr_t*			mtr)
+{
+	ut_ad(page_id.space() != srv_tmp_space.space_id());
+	ut_ad(index_id != BTR_FREED_INDEX_ID);
+
+	buf_block_t*	block = buf_page_get(
+		page_id, page_size, RW_X_LATCH, mtr);
+	buf_block_dbg_add_level(block, SYNC_TREE_NODE);
+
+	if (fil_page_index_page_check(block->frame)
+	    && index_id == btr_page_get_index_id(block->frame)) {
+		/* This should be a root page.
+		It should not be possible to reassign the same
+		index_id for some other index in the tablespace. */
+		ut_ad(page_is_root(block->frame));
+	} else {
+		block = NULL;
+	}
+
+	return(block);
+}
+
+/** Create the root node for a new index tree.
+ at param[in]	type			type of the index
+ at param[in]	space			space where created
+ at param[in]	page_size		page size
+ at param[in]	index_id		index id
+ at param[in]	index			index, or NULL when applying TRUNCATE
+log record during recovery
+ at param[in]	btr_redo_create_info	used for applying TRUNCATE log
+ at param[in]	mtr			mini-transaction handle
+record during recovery
+ at return page number of the created root, FIL_NULL if did not succeed */
 ulint
 btr_create(
-/*=======*/
-	ulint		type,	/*!< in: type of the index */
-	ulint		space,	/*!< in: space where created */
-	ulint		zip_size,/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	index_id_t	index_id,/*!< in: index id */
-	dict_index_t*	index,	/*!< in: index */
-	mtr_t*		mtr)	/*!< in: mini-transaction handle */
+	ulint			type,
+	ulint			space,
+	const page_size_t&	page_size,
+	index_id_t		index_id,
+	dict_index_t*		index,
+	const btr_create_t*	btr_redo_create_info,
+	mtr_t*			mtr)
 {
-	ulint		page_no;
-	buf_block_t*	block;
-	buf_frame_t*	frame;
-	page_t*		page;
-	page_zip_des_t*	page_zip;
+	ulint			page_no;
+	buf_block_t*		block;
+	buf_frame_t*		frame;
+	page_t*			page;
+	page_zip_des_t*		page_zip;
+
+	ut_ad(mtr->is_named_space(space));
+	ut_ad(index_id != BTR_FREED_INDEX_ID);
 
 	/* Create the two new segments (one, in the case of an ibuf tree) for
 	the index tree; the segment headers are put on the allocated root page
@@ -1551,10 +987,14 @@ btr_create(
 			space, 0,
 			IBUF_HEADER + IBUF_TREE_SEG_HEADER, mtr);
 
+		if (ibuf_hdr_block == NULL) {
+			return(FIL_NULL);
+		}
+
 		buf_block_dbg_add_level(
 			ibuf_hdr_block, SYNC_IBUF_TREE_NODE_NEW);
 
-		ut_ad(buf_block_get_page_no(ibuf_hdr_block)
+		ut_ad(ibuf_hdr_block->page.id.page_no()
 		      == IBUF_HEADER_PAGE_NO);
 		/* Allocate then the next page to the segment: it will be the
 		tree root page */
@@ -1564,16 +1004,8 @@ btr_create(
 			+ IBUF_HEADER + IBUF_TREE_SEG_HEADER,
 			IBUF_TREE_ROOT_PAGE_NO,
 			FSP_UP, mtr);
-		ut_ad(buf_block_get_page_no(block) == IBUF_TREE_ROOT_PAGE_NO);
+		ut_ad(block->page.id.page_no() == IBUF_TREE_ROOT_PAGE_NO);
 	} else {
-#ifdef UNIV_BLOB_DEBUG
-		if ((type & DICT_CLUSTERED) && !index->blobs) {
-			mutex_create(PFS_NOT_INSTRUMENTED,
-				     &index->blobs_mutex, SYNC_ANY_LATCH);
-			index->blobs = rbt_create(sizeof(btr_blob_dbg_t),
-						  btr_blob_dbg_cmp);
-		}
-#endif /* UNIV_BLOB_DEBUG */
 		block = fseg_create(space, 0,
 				    PAGE_HEADER + PAGE_BTR_SEG_TOP, mtr);
 	}
@@ -1583,7 +1015,7 @@ btr_create(
 		return(FIL_NULL);
 	}
 
-	page_no = buf_block_get_page_no(block);
+	page_no = block->page.id.page_no();
 	frame = buf_block_get_frame(block);
 
 	if (type & DICT_IBUF) {
@@ -1602,7 +1034,10 @@ btr_create(
 				 PAGE_HEADER + PAGE_BTR_SEG_LEAF, mtr)) {
 			/* Not enough space for new segment, free root
 			segment before return. */
-			btr_free_root(space, zip_size, page_no, mtr);
+			btr_free_root(block, mtr);
+			if (!dict_table_is_temporary(index->table)) {
+				btr_free_root_invalidate(block, mtr);
+			}
 
 			return(FIL_NULL);
 		}
@@ -1616,16 +1051,48 @@ btr_create(
 	page_zip = buf_block_get_page_zip(block);
 
 	if (page_zip) {
-		page = page_create_zip(block, index, 0, 0, mtr);
+		if (index != NULL) {
+			page = page_create_zip(block, index, 0, 0, NULL, mtr);
+		} else {
+			/* Create a compressed index page when applying
+			TRUNCATE log record during recovery */
+			ut_ad(btr_redo_create_info != NULL);
+
+			redo_page_compress_t	page_comp_info;
+
+			page_comp_info.type = type;
+
+			page_comp_info.index_id = index_id;
+
+			page_comp_info.n_fields =
+				btr_redo_create_info->n_fields;
+
+			page_comp_info.field_len =
+				btr_redo_create_info->field_len;
+
+			page_comp_info.fields = btr_redo_create_info->fields;
+
+			page_comp_info.trx_id_pos =
+				btr_redo_create_info->trx_id_pos;
+
+			page = page_create_zip(block, NULL, 0, 0,
+					       &page_comp_info, mtr);
+		}
 	} else {
-		page = page_create(block, mtr,
-				   dict_table_is_comp(index->table));
+		if (index != NULL) {
+			page = page_create(block, mtr,
+					   dict_table_is_comp(index->table),
+					   dict_index_is_spatial(index));
+		} else {
+			ut_ad(btr_redo_create_info != NULL);
+			page = page_create(
+				block, mtr, btr_redo_create_info->format_flags,
+				type == DICT_SPATIAL);
+		}
 		/* Set the level of the new index page */
 		btr_page_set_level(page, NULL, 0, mtr);
 	}
 
-	block->check_index_page_at_flush = TRUE;
-
 	/* Set the index id of the page */
 	btr_page_set_index_id(page, page_zip, index_id, mtr);
 
@@ -1635,9 +1102,16 @@ btr_create(
 
 	/* We reset the free bits for the page to allow creation of several
 	trees in the same mtr, otherwise the latch on a bitmap page would
-	prevent it because of the latching order */
+	prevent it because of the latching order.
+
+	index will be NULL if we are recreating the table during recovery
+	on behalf of TRUNCATE.
+
+	Note: Insert Buffering is disabled for temporary tables given that
+	most temporary tables are smaller in size and short-lived. */
+	if (!(type & DICT_CLUSTERED)
+	    && (index == NULL || !dict_table_is_temporary(index->table))) {
 
-	if (!(type & DICT_CLUSTERED)) {
 		ibuf_reset_free_bits(block);
 	}
 
@@ -1650,39 +1124,39 @@ btr_create(
 	return(page_no);
 }
 
-/************************************************************//**
-Frees a B-tree except the root page, which MUST be freed after this
-by calling btr_free_root. */
-UNIV_INTERN
+/** Free a B-tree except the root page. The root page MUST be freed after
+this by calling btr_free_root.
+ at param[in,out]	block		root page
+ at param[in]	log_mode	mtr logging mode */
+static
 void
 btr_free_but_not_root(
-/*==================*/
-	ulint	space,		/*!< in: space where created */
-	ulint	zip_size,	/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint	root_page_no)	/*!< in: root page number */
+	buf_block_t*	block,
+	mtr_log_t	log_mode)
 {
 	ibool	finished;
-	page_t*	root;
 	mtr_t	mtr;
 
+	ut_ad(page_is_root(block->frame));
 leaf_loop:
 	mtr_start(&mtr);
+	mtr_set_log_mode(&mtr, log_mode);
+	mtr.set_named_space(block->page.id.space());
+
+	page_t*	root = block->frame;
 
-	root = btr_page_get(space, zip_size, root_page_no, RW_X_LATCH,
-			    NULL, &mtr);
 #ifdef UNIV_BTR_DEBUG
 	ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
-				    + root, space));
+				    + root, block->page.id.space()));
 	ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
-				    + root, space));
+				    + root, block->page.id.space()));
 #endif /* UNIV_BTR_DEBUG */
 
 	/* NOTE: page hash indexes are dropped when a page is freed inside
 	fsp0fsp. */
 
 	finished = fseg_free_step(root + PAGE_HEADER + PAGE_BTR_SEG_LEAF,
-				  &mtr);
+				  true, &mtr);
 	mtr_commit(&mtr);
 
 	if (!finished) {
@@ -1691,16 +1165,18 @@ leaf_loop:
 	}
 top_loop:
 	mtr_start(&mtr);
+	mtr_set_log_mode(&mtr, log_mode);
+	mtr.set_named_space(block->page.id.space());
+
+	root = block->frame;
 
-	root = btr_page_get(space, zip_size, root_page_no, RW_X_LATCH,
-			    NULL, &mtr);
 #ifdef UNIV_BTR_DEBUG
 	ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
-				    + root, space));
+				    + root, block->page.id.space()));
 #endif /* UNIV_BTR_DEBUG */
 
 	finished = fseg_free_step_not_header(
-		root + PAGE_HEADER + PAGE_BTR_SEG_TOP, &mtr);
+		root + PAGE_HEADER + PAGE_BTR_SEG_TOP, true, &mtr);
 	mtr_commit(&mtr);
 
 	if (!finished) {
@@ -1709,34 +1185,51 @@ top_loop:
 	}
 }
 
-/************************************************************//**
-Frees the B-tree root page. Other tree MUST already have been freed. */
-UNIV_INTERN
+/** Free a persistent index tree if it exists.
+ at param[in]	page_id		root page id
+ at param[in]	page_size	page size
+ at param[in]	index_id	PAGE_INDEX_ID contents
+ at param[in,out]	mtr		mini-transaction */
 void
-btr_free_root(
-/*==========*/
-	ulint	space,		/*!< in: space where created */
-	ulint	zip_size,	/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint	root_page_no,	/*!< in: root page number */
-	mtr_t*	mtr)		/*!< in/out: mini-transaction */
+btr_free_if_exists(
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	index_id_t		index_id,
+	mtr_t*			mtr)
 {
-	buf_block_t*	block;
-	fseg_header_t*	header;
+	buf_block_t* root = btr_free_root_check(
+		page_id, page_size, index_id, mtr);
 
-	block = btr_block_get(space, zip_size, root_page_no, RW_X_LATCH,
-			      NULL, mtr);
+	if (root == NULL) {
+		return;
+	}
 
-	btr_search_drop_page_hash_index(block);
+	btr_free_but_not_root(root, mtr->get_log_mode());
+	mtr->set_named_space(page_id.space());
+	btr_free_root(root, mtr);
+	btr_free_root_invalidate(root, mtr);
+}
 
-	header = buf_block_get_frame(block) + PAGE_HEADER + PAGE_BTR_SEG_TOP;
-#ifdef UNIV_BTR_DEBUG
-	ut_a(btr_root_fseg_validate(header, space));
-#endif /* UNIV_BTR_DEBUG */
+/** Free an index tree in a temporary tablespace or during TRUNCATE TABLE.
+ at param[in]	page_id		root page id
+ at param[in]	page_size	page size */
+void
+btr_free(
+	const page_id_t&	page_id,
+	const page_size_t&	page_size)
+{
+	mtr_t		mtr;
+	mtr.start();
+	mtr.set_log_mode(MTR_LOG_NO_REDO);
 
-	while (!fseg_free_step(header, mtr)) {
-		/* Free the entire segment in small steps. */
-	}
+	buf_block_t*	block = buf_page_get(
+		page_id, page_size, RW_X_LATCH, &mtr);
+
+	ut_ad(page_is_root(block->frame));
+
+	btr_free_but_not_root(block, MTR_LOG_NO_REDO);
+	btr_free_root(block, &mtr);
+	mtr.commit();
 }
 #endif /* !UNIV_HOTBACKUP */
 
@@ -1751,7 +1244,6 @@ IBUF_BITMAP_FREE is unaffected by reorganization.
 
 @retval true if the operation was successful
 @retval false if it is a compressed page, and recompression failed */
-UNIV_INTERN
 bool
 btr_page_reorganize_low(
 /*====================*/
@@ -1774,7 +1266,6 @@ btr_page_reorganize_low(
 	page_zip_des_t*	page_zip	= buf_block_get_page_zip(block);
 	buf_block_t*	temp_block;
 	page_t*		temp_page;
-	ulint		log_mode;
 	ulint		data_size1;
 	ulint		data_size2;
 	ulint		max_ins_size1;
@@ -1782,8 +1273,9 @@ btr_page_reorganize_low(
 	bool		success		= false;
 	ulint		pos;
 	bool		log_compressed;
+	bool		is_spatial;
 
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
 	btr_assert_not_corrupted(block, index);
 #ifdef UNIV_ZIP_DEBUG
 	ut_a(!page_zip || page_zip_validate(page_zip, page, index));
@@ -1792,7 +1284,7 @@ btr_page_reorganize_low(
 	max_ins_size1 = page_get_max_insert_size_after_reorganize(page, 1);
 
 	/* Turn logging off */
-	log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
+	mtr_log_t	log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
 
 #ifndef UNIV_HOTBACKUP
 	temp_block = buf_block_alloc(buf_pool);
@@ -1804,6 +1296,11 @@ btr_page_reorganize_low(
 
 	MONITOR_INC(MONITOR_INDEX_REORG_ATTEMPTS);
 
+	/* This function can be called by log redo with a "dummy" index.
+	So we would trust more on the original page's type */
+	is_spatial = (fil_page_get_type(page) == FIL_PAGE_RTREE
+		      || dict_index_is_spatial(index));
+
 	/* Copy the old page to temporary space */
 	buf_frame_copy(temp_page, page);
 
@@ -1811,10 +1308,7 @@ btr_page_reorganize_low(
 	if (!recovery) {
 		btr_search_drop_page_hash_index(block);
 	}
-
-	block->check_index_page_at_flush = TRUE;
 #endif /* !UNIV_HOTBACKUP */
-	btr_blob_dbg_remove(page, index, "btr_page_reorganize");
 
 	/* Save the cursor position. */
 	pos = page_rec_get_n_recs_before(page_cur_get_rec(cursor));
@@ -1822,7 +1316,7 @@ btr_page_reorganize_low(
 	/* Recreate the page: note that global data on page (possible
 	segment headers, next page-field, etc.) is preserved intact */
 
-	page_create(block, mtr, dict_table_is_comp(index->table));
+	page_create(block, mtr, dict_table_is_comp(index->table), is_spatial);
 
 	/* Copy the records from the temporary space to the recreated page;
 	do not copy the lock bits yet */
@@ -1831,7 +1325,13 @@ btr_page_reorganize_low(
 					page_get_infimum_rec(temp_page),
 					index, mtr);
 
-	if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)) {
+	/* Multiple transactions cannot simultaneously operate on the
+	same temp-table in parallel.
+	max_trx_id is ignored for temp tables because it not required
+	for MVCC. */
+	if (dict_index_is_sec_or_ibuf(index)
+	    && page_is_leaf(page)
+	    && !dict_table_is_temporary(index->table)) {
 		/* Copy max trx id to recreated page */
 		trx_id_t	max_trx_id = page_get_max_trx_id(temp_page);
 		page_set_max_trx_id(block, NULL, max_trx_id, mtr);
@@ -1850,12 +1350,9 @@ btr_page_reorganize_low(
 	}
 
 	if (page_zip
-	    && !page_zip_compress(page_zip, page, index, z_level, mtr)) {
+	    && !page_zip_compress(page_zip, page, index, z_level, NULL, mtr)) {
 
 		/* Restore the old page and exit. */
-		btr_blob_dbg_restore(page, temp_page, index,
-				     "btr_page_reorganize_compress_fail");
-
 #if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
 		/* Check that the bytes that we skip are identical. */
 		ut_a(!memcmp(page, temp_page, PAGE_HEADER));
@@ -1880,7 +1377,8 @@ btr_page_reorganize_low(
 	}
 
 #ifndef UNIV_HOTBACKUP
-	if (!recovery) {
+	/* No locks are acquried for intrinsic tables. */
+	if (!recovery && !dict_table_is_locking_disabled(index->table)) {
 		/* Update the record lock bitmaps */
 		lock_move_reorganize_page(block, temp_block);
 	}
@@ -1890,19 +1388,13 @@ btr_page_reorganize_low(
 	max_ins_size2 = page_get_max_insert_size_after_reorganize(page, 1);
 
 	if (data_size1 != data_size2 || max_ins_size1 != max_ins_size2) {
-		buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH);
-		buf_page_print(temp_page, 0, BUF_PAGE_PRINT_NO_CRASH);
-
-		fprintf(stderr,
-			"InnoDB: Error: page old data size %lu"
-			" new data size %lu\n"
-			"InnoDB: Error: page old max ins size %lu"
-			" new max ins size %lu\n"
-			"InnoDB: Submit a detailed bug report"
-			" to http://bugs.mysql.com\n",
-			(unsigned long) data_size1, (unsigned long) data_size2,
-			(unsigned long) max_ins_size1,
-			(unsigned long) max_ins_size2);
+		ib::error()
+			<< "Page old data size " << data_size1
+			<< " new data size " << data_size2
+			<< ", page old max ins size " << max_ins_size1
+			<< " new max ins size " << max_ins_size2;
+
+		ib::error() << BUG_REPORT_MSG;
 		ut_ad(0);
 	} else {
 		success = true;
@@ -1928,8 +1420,8 @@ func_exit:
 
 #ifndef UNIV_HOTBACKUP
 	if (success) {
-		byte	type;
-		byte*	log_ptr;
+		mlog_id_t	type;
+		byte*		log_ptr;
 
 		/* Write the log record */
 		if (page_zip) {
@@ -2004,7 +1496,6 @@ IBUF_BITMAP_FREE is unaffected by reorganization.
 
 @retval true if the operation was successful
 @retval false if it is a compressed page, and recompression failed */
-UNIV_INTERN
 bool
 btr_page_reorganize(
 /*================*/
@@ -2019,8 +1510,7 @@ btr_page_reorganize(
 
 /***********************************************************//**
 Parses a redo log record of reorganizing a page.
- at return	end of log record or NULL */
-UNIV_INTERN
+ at return end of log record or NULL */
 byte*
 btr_parse_page_reorganize(
 /*======================*/
@@ -2033,7 +1523,9 @@ btr_parse_page_reorganize(
 {
 	ulint	level;
 
-	ut_ad(ptr && end_ptr);
+	ut_ad(ptr != NULL);
+	ut_ad(end_ptr != NULL);
+	ut_ad(index != NULL);
 
 	/* If dealing with a compressed page the record has the
 	compression level used during original compression written in
@@ -2073,26 +1565,24 @@ btr_page_empty(
 {
 	page_t*	page = buf_block_get_frame(block);
 
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
 	ut_ad(page_zip == buf_block_get_page_zip(block));
 #ifdef UNIV_ZIP_DEBUG
 	ut_a(!page_zip || page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
 
 	btr_search_drop_page_hash_index(block);
-	btr_blob_dbg_remove(page, index, "btr_page_empty");
 
 	/* Recreate the page: note that global data on page (possible
 	segment headers, next page-field, etc.) is preserved intact */
 
 	if (page_zip) {
-		page_create_zip(block, index, level, 0, mtr);
+		page_create_zip(block, index, level, 0, NULL, mtr);
 	} else {
-		page_create(block, mtr, dict_table_is_comp(index->table));
+		page_create(block, mtr, dict_table_is_comp(index->table),
+			    dict_index_is_spatial(index));
 		btr_page_set_level(page, NULL, level, mtr);
 	}
-
-	block->check_index_page_at_flush = TRUE;
 }
 
 /*************************************************************//**
@@ -2101,8 +1591,7 @@ the tuple. It is assumed that mtr contains an x-latch on the tree.
 NOTE that the operation of this function must always succeed,
 we cannot reverse it: therefore enough free disk space must be
 guaranteed to be available before this function is called.
- at return	inserted record */
-UNIV_INTERN
+ at return inserted record */
 rec_t*
 btr_root_raise_and_insert(
 /*======================*/
@@ -2151,9 +1640,12 @@ btr_root_raise_and_insert(
 
 	ut_a(dict_index_get_page(index) == page_get_page_no(root));
 #endif /* UNIV_BTR_DEBUG */
-	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
-				MTR_MEMO_X_LOCK));
-	ut_ad(mtr_memo_contains(mtr, root_block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_flagged(mtr, dict_index_get_lock(index),
+					MTR_MEMO_X_LOCK
+					| MTR_MEMO_SX_LOCK)
+	      || dict_table_is_intrinsic(index->table));
+	ut_ad(mtr_is_block_fix(
+		mtr, root_block, MTR_MEMO_PAGE_X_FIX, index->table));
 
 	/* Allocate a new page to the tree. Root splitting is done by first
 	moving the root records to the new page, emptying the root, putting
@@ -2192,8 +1684,15 @@ btr_root_raise_and_insert(
 
 		/* Update the lock table and possible hash index. */
 
-		lock_move_rec_list_end(new_block, root_block,
-				       page_get_infimum_rec(root));
+		if (!dict_table_is_locking_disabled(index->table)) {
+			lock_move_rec_list_end(new_block, root_block,
+					       page_get_infimum_rec(root));
+		}
+
+		/* Move any existing predicate locks */
+		if (dict_index_is_spatial(index)) {
+			lock_prdt_rec_move(new_block, root_block);
+		}
 
 		btr_search_move_or_delete_hash_entries(new_block, root_block,
 						       index);
@@ -2204,7 +1703,9 @@ btr_root_raise_and_insert(
 	information of the record to be inserted on the infimum of the
 	root page: we cannot discard the lock structs on the root page */
 
-	lock_update_root_raise(new_block, root_block);
+	if (!dict_table_is_locking_disabled(index->table)) {
+		lock_update_root_raise(new_block, root_block);
+	}
 
 	/* Create a memory heap where the node pointer is stored */
 	if (!*heap) {
@@ -2212,13 +1713,20 @@ btr_root_raise_and_insert(
 	}
 
 	rec = page_rec_get_next(page_get_infimum_rec(new_page));
-	new_page_no = buf_block_get_page_no(new_block);
+	new_page_no = new_block->page.id.page_no();
 
 	/* Build the node pointer (= node key and page address) for the
 	child */
+	if (dict_index_is_spatial(index)) {
+		rtr_mbr_t		new_mbr;
 
-	node_ptr = dict_index_build_node_ptr(
-		index, rec, new_page_no, *heap, level);
+		rtr_page_cal_mbr(index, new_block, &new_mbr, *heap);
+		node_ptr = rtr_index_build_node_ptr(
+			index, &new_mbr, rec, new_page_no, *heap, level);
+	} else {
+		node_ptr = dict_index_build_node_ptr(
+			index, rec, new_page_no, *heap, level);
+	}
 	/* The node pointer must be marked as the predefined minimum record,
 	as there is no lower alphabetical limit to records in the leftmost
 	node of a level: */
@@ -2252,28 +1760,29 @@ btr_root_raise_and_insert(
 
 	/* We play safe and reset the free bits for the new page */
 
-#if 0
-	fprintf(stderr, "Root raise new page no %lu\n", new_page_no);
-#endif
-
-	if (!dict_index_is_clust(index)) {
+	if (!dict_index_is_clust(index)
+	    && !dict_table_is_temporary(index->table)) {
 		ibuf_reset_free_bits(new_block);
 	}
 
 	/* Reposition the cursor to the child node */
-	page_cur_search(new_block, index, tuple,
-			PAGE_CUR_LE, page_cursor);
+	page_cur_search(new_block, index, tuple, page_cursor);
 
 	/* Split the child and insert tuple */
-	return(btr_page_split_and_insert(flags, cursor, offsets, heap,
-					 tuple, n_ext, mtr));
+	if (dict_index_is_spatial(index)) {
+		/* Split rtree page and insert tuple */
+		return(rtr_page_split_and_insert(flags, cursor, offsets, heap,
+						 tuple, n_ext, mtr));
+	} else {
+		return(btr_page_split_and_insert(flags, cursor, offsets, heap,
+						 tuple, n_ext, mtr));
+	}
 }
 
 /*************************************************************//**
 Decides if the page should be split at the convergence point of inserts
 converging to the left.
- at return	TRUE if split recommended */
-UNIV_INTERN
+ at return TRUE if split recommended */
 ibool
 btr_page_get_split_rec_to_left(
 /*===========================*/
@@ -2317,8 +1826,7 @@ btr_page_get_split_rec_to_left(
 /*************************************************************//**
 Decides if the page should be split at the convergence point of inserts
 converging to the right.
- at return	TRUE if split recommended */
-UNIV_INTERN
+ at return TRUE if split recommended */
 ibool
 btr_page_get_split_rec_to_right(
 /*============================*/
@@ -2494,7 +2002,7 @@ func_exit:
 /*************************************************************//**
 Returns TRUE if the insert fits on the appropriate half-page with the
 chosen split_rec.
- at return	true if fits */
+ at return true if fits */
 static __attribute__((nonnull(1,3,4,6), warn_unused_result))
 bool
 btr_page_insert_fits(
@@ -2587,7 +2095,6 @@ btr_page_insert_fits(
 /*******************************************************//**
 Inserts a data tuple to a tree on a non-leaf level. It is assumed
 that mtr holds an x-latch on the tree. */
-UNIV_INTERN
 void
 btr_insert_on_non_leaf_level_func(
 /*==============================*/
@@ -2603,14 +2110,37 @@ btr_insert_on_non_leaf_level_func(
 	btr_cur_t	cursor;
 	dberr_t		err;
 	rec_t*		rec;
-	ulint*		offsets	= NULL;
 	mem_heap_t*	heap = NULL;
+	ulint           offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*          offsets         = offsets_;
+	rec_offs_init(offsets_);
+	rtr_info_t	rtr_info;
 
 	ut_ad(level > 0);
 
-	btr_cur_search_to_nth_level(index, level, tuple, PAGE_CUR_LE,
-				    BTR_CONT_MODIFY_TREE,
-				    &cursor, 0, file, line, mtr);
+	if (!dict_index_is_spatial(index)) {
+		if (dict_table_is_intrinsic(index->table)) {
+			btr_cur_search_to_nth_level_with_no_latch(
+				index, level, tuple, PAGE_CUR_LE, &cursor,
+				__FILE__, __LINE__, mtr);
+		} else {
+			btr_cur_search_to_nth_level(
+				index, level, tuple, PAGE_CUR_LE,
+				BTR_CONT_MODIFY_TREE,
+				&cursor, 0, file, line, mtr);
+		}
+	} else {
+		/* For spatial index, initialize structures to track
+		its parents etc. */
+		rtr_init_rtr_info(&rtr_info, false, &cursor, index, false);
+
+		rtr_info_update_btr(&cursor, &rtr_info);
+
+		btr_cur_search_to_nth_level(index, level, tuple,
+					    PAGE_CUR_RTREE_INSERT,
+					    BTR_CONT_MODIFY_TREE,
+					    &cursor, 0, file, line, mtr);
+	}
 
 	ut_ad(cursor.flag == BTR_CUR_BINARY);
 
@@ -2632,7 +2162,16 @@ btr_insert_on_non_leaf_level_func(
 						 &dummy_big_rec, 0, NULL, mtr);
 		ut_a(err == DB_SUCCESS);
 	}
-	mem_heap_free(heap);
+
+	if (heap != NULL) {
+		mem_heap_free(heap);
+	}
+
+	if (dict_index_is_spatial(index)) {
+		ut_ad(cursor.rtr_info);
+
+		rtr_clean_rtr_info(&rtr_info, true);
+	}
 }
 
 /**************************************************************//**
@@ -2652,8 +2191,6 @@ btr_attach_half_pages(
 	ulint		direction,	/*!< in: FSP_UP or FSP_DOWN */
 	mtr_t*		mtr)		/*!< in: mtr */
 {
-	ulint		space;
-	ulint		zip_size;
 	ulint		prev_page_no;
 	ulint		next_page_no;
 	ulint		level;
@@ -2666,9 +2203,12 @@ btr_attach_half_pages(
 	page_zip_des_t*	upper_page_zip;
 	dtuple_t*	node_ptr_upper;
 	mem_heap_t*	heap;
+	buf_block_t*	prev_block = NULL;
+	buf_block_t*	next_block = NULL;
 
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
-	ut_ad(mtr_memo_contains(mtr, new_block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
+	ut_ad(mtr_is_block_fix(
+		mtr, new_block, MTR_MEMO_PAGE_X_FIX, index->table));
 
 	/* Create a memory heap where the data tuple is stored */
 	heap = mem_heap_create(1024);
@@ -2680,10 +2220,10 @@ btr_attach_half_pages(
 		ulint*		offsets;
 
 		lower_page = buf_block_get_frame(new_block);
-		lower_page_no = buf_block_get_page_no(new_block);
+		lower_page_no = new_block->page.id.page_no();
 		lower_page_zip = buf_block_get_page_zip(new_block);
 		upper_page = buf_block_get_frame(block);
-		upper_page_no = buf_block_get_page_no(block);
+		upper_page_no = block->page.id.page_no();
 		upper_page_zip = buf_block_get_page_zip(block);
 
 		/* Look up the index for the node pointer to page */
@@ -2700,13 +2240,31 @@ btr_attach_half_pages(
 		mem_heap_empty(heap);
 	} else {
 		lower_page = buf_block_get_frame(block);
-		lower_page_no = buf_block_get_page_no(block);
+		lower_page_no = block->page.id.page_no();
 		lower_page_zip = buf_block_get_page_zip(block);
 		upper_page = buf_block_get_frame(new_block);
-		upper_page_no = buf_block_get_page_no(new_block);
+		upper_page_no = new_block->page.id.page_no();
 		upper_page_zip = buf_block_get_page_zip(new_block);
 	}
 
+	/* Get the previous and next pages of page */
+	prev_page_no = btr_page_get_prev(page, mtr);
+	next_page_no = btr_page_get_next(page, mtr);
+
+	const ulint	space = block->page.id.space();
+
+	/* for consistency, both blocks should be locked, before change */
+	if (prev_page_no != FIL_NULL && direction == FSP_DOWN) {
+		prev_block = btr_block_get(
+			page_id_t(space, prev_page_no), block->page.size,
+			RW_X_LATCH, index, mtr);
+	}
+	if (next_page_no != FIL_NULL && direction != FSP_DOWN) {
+		next_block = btr_block_get(
+			page_id_t(space, next_page_no), block->page.size,
+			RW_X_LATCH, index, mtr);
+	}
+
 	/* Get the level of the split pages */
 	level = btr_page_get_level(buf_block_get_frame(block), mtr);
 	ut_ad(level
@@ -2724,25 +2282,16 @@ btr_attach_half_pages(
 	btr_insert_on_non_leaf_level(flags, index, level + 1,
 				     node_ptr_upper, mtr);
 
-	/* Free the memory heap */
-	mem_heap_free(heap);
-
-	/* Get the previous and next pages of page */
-
-	prev_page_no = btr_page_get_prev(page, mtr);
-	next_page_no = btr_page_get_next(page, mtr);
-	space = buf_block_get_space(block);
-	zip_size = buf_block_get_zip_size(block);
+	/* Free the memory heap */
+	mem_heap_free(heap);
 
 	/* Update page links of the level */
 
-	if (prev_page_no != FIL_NULL) {
-		buf_block_t*	prev_block = btr_block_get(
-			space, zip_size, prev_page_no, RW_X_LATCH, index, mtr);
+	if (prev_block) {
 #ifdef UNIV_BTR_DEBUG
 		ut_a(page_is_comp(prev_block->frame) == page_is_comp(page));
 		ut_a(btr_page_get_next(prev_block->frame, mtr)
-		     == buf_block_get_page_no(block));
+		     == block->page.id.page_no());
 #endif /* UNIV_BTR_DEBUG */
 
 		btr_page_set_next(buf_block_get_frame(prev_block),
@@ -2750,9 +2299,7 @@ btr_attach_half_pages(
 				  lower_page_no, mtr);
 	}
 
-	if (next_page_no != FIL_NULL) {
-		buf_block_t*	next_block = btr_block_get(
-			space, zip_size, next_page_no, RW_X_LATCH, index, mtr);
+	if (next_block) {
 #ifdef UNIV_BTR_DEBUG
 		ut_a(page_is_comp(next_block->frame) == page_is_comp(page));
 		ut_a(btr_page_get_prev(next_block->frame, mtr)
@@ -2764,11 +2311,24 @@ btr_attach_half_pages(
 				  upper_page_no, mtr);
 	}
 
-	btr_page_set_prev(lower_page, lower_page_zip, prev_page_no, mtr);
-	btr_page_set_next(lower_page, lower_page_zip, upper_page_no, mtr);
+	if (direction == FSP_DOWN) {
+		/* lower_page is new */
+		btr_page_set_prev(lower_page, lower_page_zip,
+				  prev_page_no, mtr);
+	} else {
+		ut_ad(btr_page_get_prev(lower_page, mtr) == prev_page_no);
+	}
 
+	btr_page_set_next(lower_page, lower_page_zip, upper_page_no, mtr);
 	btr_page_set_prev(upper_page, upper_page_zip, lower_page_no, mtr);
-	btr_page_set_next(upper_page, upper_page_zip, next_page_no, mtr);
+
+	if (direction != FSP_DOWN) {
+		/* upper_page is new */
+		btr_page_set_next(upper_page, upper_page_zip,
+				  next_page_no, mtr);
+	} else {
+		ut_ad(btr_page_get_next(upper_page, mtr) == next_page_no);
+	}
 }
 
 /*************************************************************//**
@@ -2830,9 +2390,12 @@ btr_insert_into_right_sibling(
 	page_t*		page = buf_block_get_frame(block);
 	ulint		next_page_no = btr_page_get_next(page, mtr);
 
-	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index),
-				MTR_MEMO_X_LOCK));
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(dict_table_is_intrinsic(cursor->index->table)
+	      || mtr_memo_contains_flagged(
+			mtr, dict_index_get_lock(cursor->index),
+			MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK));
+	ut_ad(mtr_is_block_fix(
+		mtr, block, MTR_MEMO_PAGE_X_FIX, cursor->index->table));
 	ut_ad(heap);
 
 	if (next_page_no == FIL_NULL || !page_rec_is_supremum(
@@ -2846,12 +2409,13 @@ btr_insert_into_right_sibling(
 	page_t*		next_page;
 	btr_cur_t	next_father_cursor;
 	rec_t*		rec = NULL;
-	ulint		zip_size = buf_block_get_zip_size(block);
 	ulint		max_size;
 
+	const ulint	space = block->page.id.space();
+
 	next_block = btr_block_get(
-		buf_block_get_space(block), zip_size,
-		next_page_no, RW_X_LATCH, cursor->index, mtr);
+		page_id_t(space, next_page_no), block->page.size,
+		RW_X_LATCH, cursor->index, mtr);
 	next_page = buf_block_get_frame(next_block);
 
 	bool	is_leaf = page_is_leaf(next_page);
@@ -2866,15 +2430,19 @@ btr_insert_into_right_sibling(
 	max_size = page_get_max_insert_size_after_reorganize(next_page, 1);
 
 	/* Extends gap lock for the next page */
-	lock_update_split_left(next_block, block);
+	if (!dict_table_is_locking_disabled(cursor->index->table)) {
+		lock_update_split_left(next_block, block);
+	}
 
 	rec = page_cur_tuple_insert(
 		&next_page_cursor, tuple, cursor->index, offsets, &heap,
 		n_ext, mtr);
 
 	if (rec == NULL) {
-		if (zip_size && is_leaf
-		    && !dict_index_is_clust(cursor->index)) {
+		if (is_leaf
+		    && next_block->page.size.is_compressed()
+		    && !dict_index_is_clust(cursor->index)
+		    && !dict_table_is_temporary(cursor->index->table)) {
 			/* Reset the IBUF_BITMAP_FREE bits, because
 			page_cur_tuple_insert() will have attempted page
 			reorganize before failing. */
@@ -2897,7 +2465,7 @@ btr_insert_into_right_sibling(
 
 	compressed = btr_cur_pessimistic_delete(
 		&err, TRUE, &next_father_cursor,
-		BTR_CREATE_FLAG, RB_NONE, mtr);
+		BTR_CREATE_FLAG, false, mtr);
 
 	ut_a(err == DB_SUCCESS);
 
@@ -2906,7 +2474,7 @@ btr_insert_into_right_sibling(
 	}
 
 	dtuple_t*	node_ptr = dict_index_build_node_ptr(
-		cursor->index, rec, buf_block_get_page_no(next_block),
+		cursor->index, rec, next_block->page.id.page_no(),
 		heap, level);
 
 	btr_insert_on_non_leaf_level(
@@ -2914,11 +2482,13 @@ btr_insert_into_right_sibling(
 
 	ut_ad(rec_offs_validate(rec, cursor->index, *offsets));
 
-	if (is_leaf && !dict_index_is_clust(cursor->index)) {
+	if (is_leaf
+	    && !dict_index_is_clust(cursor->index)
+	    && !dict_table_is_temporary(cursor->index->table)) {
 		/* Update the free bits of the B-tree page in the
 		insert buffer bitmap. */
 
-		if (zip_size) {
+		if (next_block->page.size.is_compressed()) {
 			ibuf_update_free_bits_zip(next_block, mtr);
 		} else {
 			ibuf_update_free_bits_if_full(
@@ -2939,7 +2509,6 @@ free disk space (2 pages) must be guaranteed to be available before
 this function is called.
 
 @return inserted record */
-UNIV_INTERN
 rec_t*
 btr_page_split_and_insert(
 /*======================*/
@@ -2975,6 +2544,15 @@ btr_page_split_and_insert(
 	ulint		n_iterations = 0;
 	rec_t*		rec;
 	ulint		n_uniq;
+	dict_index_t*	index;
+
+	index = btr_cur_get_index(cursor);
+
+	if (dict_index_is_spatial(index)) {
+		/* Split rtree page and update parent */
+		return(rtr_page_split_and_insert(flags, cursor, offsets, heap,
+						 tuple, n_ext, mtr));
+	}
 
 	if (!*heap) {
 		*heap = mem_heap_create(1024);
@@ -2984,20 +2562,23 @@ func_start:
 	mem_heap_empty(*heap);
 	*offsets = NULL;
 
-	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index),
-				MTR_MEMO_X_LOCK));
+	ut_ad(mtr_memo_contains_flagged(mtr,
+					dict_index_get_lock(cursor->index),
+					MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK)
+	      || dict_table_is_intrinsic(cursor->index->table));
 	ut_ad(!dict_index_is_online_ddl(cursor->index)
 	      || (flags & BTR_CREATE_FLAG)
 	      || dict_index_is_clust(cursor->index));
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(dict_index_get_lock(cursor->index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own_flagged(dict_index_get_lock(cursor->index),
+				  RW_LOCK_FLAG_X | RW_LOCK_FLAG_SX)
+	      || dict_table_is_intrinsic(cursor->index->table));
 
 	block = btr_cur_get_block(cursor);
 	page = buf_block_get_frame(block);
 	page_zip = buf_block_get_page_zip(block);
 
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_is_block_fix(
+		mtr, block, MTR_MEMO_PAGE_X_FIX, cursor->index->table));
 	ut_ad(!page_is_empty(page));
 
 	/* try to insert to the next page if possible before split */
@@ -3008,7 +2589,7 @@ func_start:
 		return(rec);
 	}
 
-	page_no = buf_block_get_page_no(block);
+	page_no = block->page.id.page_no();
 
 	/* 1. Decide the split record; split_rec == NULL means that the
 	tuple to be inserted should be the first record on the upper
@@ -3087,8 +2668,9 @@ func_start:
 insert_empty:
 		ut_ad(!split_rec);
 		ut_ad(!insert_left);
-		buf = (byte*) mem_alloc(rec_get_converted_size(cursor->index,
-							       tuple, n_ext));
+		buf = UT_NEW_ARRAY_NOKEY(
+			byte,
+			rec_get_converted_size(cursor->index, tuple, n_ext));
 
 		first_rec = rec_convert_dtuple_to_rec(buf, cursor->index,
 						      tuple, n_ext);
@@ -3111,7 +2693,7 @@ insert_empty:
 						offsets, tuple, n_ext, heap);
 	} else {
 		if (!insert_left) {
-			mem_free(buf);
+			UT_DELETE_ARRAY(buf);
 			buf = NULL;
 		}
 
@@ -3120,11 +2702,18 @@ insert_empty:
 						offsets, tuple, n_ext, heap);
 	}
 
-	if (insert_will_fit && page_is_leaf(page)
+	if (!srv_read_only_mode
+	    && !dict_table_is_intrinsic(cursor->index->table)
+	    && insert_will_fit
+	    && page_is_leaf(page)
 	    && !dict_index_is_online_ddl(cursor->index)) {
 
-		mtr_memo_release(mtr, dict_index_get_lock(cursor->index),
-				 MTR_MEMO_X_LOCK);
+		mtr->memo_release(
+			dict_index_get_lock(cursor->index),
+			MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK);
+
+		/* NOTE: We cannot release root block latch here, because it
+		has segment header and already modified in most of cases.*/
 	}
 
 	/* 5. Move then the records to the new page */
@@ -3153,9 +2742,12 @@ insert_empty:
 
 			/* Update the lock table and possible hash index. */
 
-			lock_move_rec_list_start(
-				new_block, block, move_limit,
-				new_page + PAGE_NEW_INFIMUM);
+			if (!dict_table_is_locking_disabled(
+				cursor->index->table)) {
+				lock_move_rec_list_start(
+					new_block, block, move_limit,
+					new_page + PAGE_NEW_INFIMUM);
+			}
 
 			btr_search_move_or_delete_hash_entries(
 				new_block, block, cursor->index);
@@ -3169,7 +2761,9 @@ insert_empty:
 		left_block = new_block;
 		right_block = block;
 
-		lock_update_split_left(right_block, left_block);
+		if (!dict_table_is_locking_disabled(cursor->index->table)) {
+			lock_update_split_left(right_block, left_block);
+		}
 	} else {
 		/*		fputs("Split right\n", stderr); */
 
@@ -3193,8 +2787,13 @@ insert_empty:
 						   cursor->index, mtr);
 
 			/* Update the lock table and possible hash index. */
+			if (!dict_table_is_locking_disabled(
+				cursor->index->table)) {
+				lock_move_rec_list_end(
+					new_block, block, move_limit);
+			}
 
-			lock_move_rec_list_end(new_block, block, move_limit);
+			ut_ad(!dict_index_is_spatial(index));
 
 			btr_search_move_or_delete_hash_entries(
 				new_block, block, cursor->index);
@@ -3210,7 +2809,9 @@ insert_empty:
 		left_block = block;
 		right_block = new_block;
 
-		lock_update_split_right(right_block, left_block);
+		if (!dict_table_is_locking_disabled(cursor->index->table)) {
+			lock_update_split_right(right_block, left_block);
+		}
 	}
 
 #ifdef UNIV_ZIP_DEBUG
@@ -3235,8 +2836,7 @@ insert_empty:
 	/* 7. Reposition the cursor for insert and try insertion */
 	page_cursor = btr_cur_get_page_cur(cursor);
 
-	page_cur_search(insert_block, cursor->index, tuple,
-			PAGE_CUR_LE, page_cursor);
+	page_cur_search(insert_block, cursor->index, tuple, page_cursor);
 
 	rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index,
 				    offsets, heap, n_ext, mtr);
@@ -3277,14 +2877,13 @@ insert_empty:
 		/* The insert did not fit on the page: loop back to the
 		start of the function for a new split */
 insert_failed:
-		/* We play safe and reset the free bits */
-		if (!dict_index_is_clust(cursor->index)) {
+		/* We play safe and reset the free bits for new_page */
+		if (!dict_index_is_clust(cursor->index)
+		    && !dict_table_is_temporary(cursor->index->table)) {
 			ibuf_reset_free_bits(new_block);
 			ibuf_reset_free_bits(block);
 		}
 
-		/* fprintf(stderr, "Split second round %lu\n",
-		page_get_page_no(page)); */
 		n_iterations++;
 		ut_ad(n_iterations < 2
 		      || buf_block_get_page_zip(insert_block));
@@ -3297,17 +2896,14 @@ func_exit:
 	/* Insert fit on the page: update the free bits for the
 	left and right pages in the same mtr */
 
-	if (!dict_index_is_clust(cursor->index) && page_is_leaf(page)) {
+	if (!dict_index_is_clust(cursor->index)
+	    && !dict_table_is_temporary(cursor->index->table)
+	    && page_is_leaf(page)) {
+
 		ibuf_update_free_bits_for_two_pages_low(
-			buf_block_get_zip_size(left_block),
 			left_block, right_block, mtr);
 	}
 
-#if 0
-	fprintf(stderr, "Split and insert done %lu %lu\n",
-		buf_block_get_page_no(left_block),
-		buf_block_get_page_no(right_block));
-#endif
 	MONITOR_INC(MONITOR_INDEX_SPLIT);
 
 	ut_ad(page_validate(buf_block_get_frame(left_block), cursor->index));
@@ -3317,60 +2913,46 @@ func_exit:
 	return(rec);
 }
 
-#ifdef UNIV_SYNC_DEBUG
-/*************************************************************//**
-Removes a page from the level list of pages.
- at param space	in: space where removed
- at param zip_size	in: compressed page size in bytes, or 0 for uncompressed
- at param page	in/out: page to remove
- at param index	in: index tree
- at param mtr	in/out: mini-transaction */
-# define btr_level_list_remove(space,zip_size,page,index,mtr)		\
-	btr_level_list_remove_func(space,zip_size,page,index,mtr)
-#else /* UNIV_SYNC_DEBUG */
-/*************************************************************//**
-Removes a page from the level list of pages.
- at param space	in: space where removed
- at param zip_size	in: compressed page size in bytes, or 0 for uncompressed
- at param page	in/out: page to remove
- at param index	in: index tree
- at param mtr	in/out: mini-transaction */
-# define btr_level_list_remove(space,zip_size,page,index,mtr)		\
-	btr_level_list_remove_func(space,zip_size,page,mtr)
-#endif /* UNIV_SYNC_DEBUG */
-
-/*************************************************************//**
-Removes a page from the level list of pages. */
-static __attribute__((nonnull))
+/** Removes a page from the level list of pages.
+ at param[in]	space		space where removed
+ at param[in]	page_size	page size
+ at param[in,out]	page		page to remove
+ at param[in]	index		index tree
+ at param[in,out]	mtr		mini-transaction */
+# define btr_level_list_remove(space,page_size,page,index,mtr)		\
+	btr_level_list_remove_func(space,page_size,page,index,mtr)
+
+/** Removes a page from the level list of pages.
+ at param[in]	space		space where removed
+ at param[in]	page_size	page size
+ at param[in,out]	page		page to remove
+ at param[in]	index		index tree
+ at param[in,out]	mtr		mini-transaction */
+static
 void
 btr_level_list_remove_func(
-/*=======================*/
-	ulint			space,	/*!< in: space where removed */
-	ulint			zip_size,/*!< in: compressed page size in bytes
-					or 0 for uncompressed pages */
-	page_t*			page,	/*!< in/out: page to remove */
-#ifdef UNIV_SYNC_DEBUG
-	const dict_index_t*	index,	/*!< in: index tree */
-#endif /* UNIV_SYNC_DEBUG */
-	mtr_t*			mtr)	/*!< in/out: mini-transaction */
+	ulint			space,
+	const page_size_t&	page_size,
+	page_t*			page,
+	const dict_index_t*	index,
+	mtr_t*			mtr)
 {
-	ulint	prev_page_no;
-	ulint	next_page_no;
-
-	ut_ad(page && mtr);
-	ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(page != NULL);
+	ut_ad(mtr != NULL);
+	ut_ad(mtr_is_page_fix(mtr, page, MTR_MEMO_PAGE_X_FIX, index->table));
 	ut_ad(space == page_get_space_id(page));
 	/* Get the previous and next page numbers of page */
 
-	prev_page_no = btr_page_get_prev(page, mtr);
-	next_page_no = btr_page_get_next(page, mtr);
+	const ulint	prev_page_no = btr_page_get_prev(page, mtr);
+	const ulint	next_page_no = btr_page_get_next(page, mtr);
 
 	/* Update page links of the level */
 
 	if (prev_page_no != FIL_NULL) {
 		buf_block_t*	prev_block
-			= btr_block_get(space, zip_size, prev_page_no,
-					RW_X_LATCH, index, mtr);
+			= btr_block_get(page_id_t(space, prev_page_no),
+					page_size, RW_X_LATCH, index, mtr);
+
 		page_t*		prev_page
 			= buf_block_get_frame(prev_block);
 #ifdef UNIV_BTR_DEBUG
@@ -3386,8 +2968,10 @@ btr_level_list_remove_func(
 
 	if (next_page_no != FIL_NULL) {
 		buf_block_t*	next_block
-			= btr_block_get(space, zip_size, next_page_no,
-					RW_X_LATCH, index, mtr);
+			= btr_block_get(
+				page_id_t(space, next_page_no), page_size,
+				RW_X_LATCH, index, mtr);
+
 		page_t*		next_page
 			= buf_block_get_frame(next_block);
 #ifdef UNIV_BTR_DEBUG
@@ -3409,9 +2993,10 @@ UNIV_INLINE
 void
 btr_set_min_rec_mark_log(
 /*=====================*/
-	rec_t*	rec,	/*!< in: record */
-	byte	type,	/*!< in: MLOG_COMP_REC_MIN_MARK or MLOG_REC_MIN_MARK */
-	mtr_t*	mtr)	/*!< in: mtr */
+	rec_t*		rec,	/*!< in: record */
+	mlog_id_t	type,	/*!< in: MLOG_COMP_REC_MIN_MARK or
+				MLOG_REC_MIN_MARK */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	mlog_write_initial_log_record(rec, type, mtr);
 
@@ -3425,8 +3010,7 @@ btr_set_min_rec_mark_log(
 /****************************************************************//**
 Parses the redo log record for setting an index record as the predefined
 minimum record.
- at return	end of log record or NULL */
-UNIV_INTERN
+ at return end of log record or NULL */
 byte*
 btr_parse_set_min_rec_mark(
 /*=======================*/
@@ -3456,7 +3040,6 @@ btr_parse_set_min_rec_mark(
 
 /****************************************************************//**
 Sets a record as the predefined minimum record. */
-UNIV_INTERN
 void
 btr_set_min_rec_mark(
 /*=================*/
@@ -3483,7 +3066,6 @@ btr_set_min_rec_mark(
 #ifndef UNIV_HOTBACKUP
 /*************************************************************//**
 Deletes on the upper level the node pointer to a page. */
-UNIV_INTERN
 void
 btr_node_ptr_delete(
 /*================*/
@@ -3495,13 +3077,13 @@ btr_node_ptr_delete(
 	ibool		compressed;
 	dberr_t		err;
 
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
 
 	/* Delete node pointer on father page */
 	btr_page_get_father(index, block, mtr, &cursor);
 
 	compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor,
-						BTR_CREATE_FLAG, RB_NONE, mtr);
+						BTR_CREATE_FLAG, false, mtr);
 	ut_a(err == DB_SUCCESS);
 
 	if (!compressed) {
@@ -3538,7 +3120,7 @@ btr_lift_page_up(
 
 	ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
 	ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
 
 	page_level = btr_page_get_level(page, mtr);
 	root_page_no = dict_index_get_page(index);
@@ -3565,7 +3147,7 @@ btr_lift_page_up(
 		the first level, the tree is in an inconsistent state
 		and can not be searched. */
 		for (b = father_block;
-		     buf_block_get_page_no(b) != root_page_no; ) {
+		     b->page.id.page_no() != root_page_no; ) {
 			ut_a(n_blocks < BTR_MAX_LEVELS);
 
 			offsets = btr_page_get_father_block(offsets, heap,
@@ -3591,7 +3173,8 @@ btr_lift_page_up(
 
 			ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
 			ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
-			ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+			ut_ad(mtr_is_block_fix(
+				mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
 
 			father_block = blocks[0];
 			father_page_zip = buf_block_get_page_zip(father_block);
@@ -3626,15 +3209,23 @@ btr_lift_page_up(
 
 		/* Update the lock table and possible hash index. */
 
-		lock_move_rec_list_end(father_block, block,
-				       page_get_infimum_rec(page));
+		if (!dict_table_is_locking_disabled(index->table)) {
+			lock_move_rec_list_end(father_block, block,
+					       page_get_infimum_rec(page));
+		}
+
+		/* Also update the predicate locks */
+		if (dict_index_is_spatial(index)) {
+			lock_prdt_rec_move(father_block, block);
+		}
 
 		btr_search_move_or_delete_hash_entries(father_block, block,
 						       index);
 	}
 
-	btr_blob_dbg_remove(page, index, "btr_lift_page_up");
-	lock_update_copy_and_discard(father_block, block);
+	if (!dict_table_is_locking_disabled(index->table)) {
+		lock_update_copy_and_discard(father_block, block);
+	}
 
 	/* Go upward to root page, decrementing levels by one. */
 	for (i = lift_father_up ? 1 : 0; i < n_blocks; i++, page_level++) {
@@ -3649,11 +3240,16 @@ btr_lift_page_up(
 #endif /* UNIV_ZIP_DEBUG */
 	}
 
+	if (dict_index_is_spatial(index)) {
+		rtr_check_discard_page(index, NULL, block);
+	}
+
 	/* Free the file page */
 	btr_page_free(index, block, mtr);
 
 	/* We play it safe and reset the free bits for the father */
-	if (!dict_index_is_clust(index)) {
+	if (!dict_index_is_clust(index)
+	    && !dict_table_is_temporary(index->table)) {
 		ibuf_reset_free_bits(father_block);
 	}
 	ut_ad(page_validate(father_page, index));
@@ -3671,8 +3267,7 @@ level lifts the records of the page to the father page, thus reducing the
 tree height. It is assumed that mtr holds an x-latch on the tree and on the
 page. If cursor is on the leaf level, mtr must also hold x-latches to the
 brothers, if they exist.
- at return	TRUE on success */
-UNIV_INTERN
+ at return TRUE on success */
 ibool
 btr_compress(
 /*=========*/
@@ -3686,7 +3281,6 @@ btr_compress(
 {
 	dict_index_t*	index;
 	ulint		space;
-	ulint		zip_size;
 	ulint		left_page_no;
 	ulint		right_page_no;
 	buf_block_t*	merge_block;
@@ -3699,6 +3293,10 @@ btr_compress(
 	mem_heap_t*	heap;
 	ulint*		offsets;
 	ulint		nth_rec = 0; /* remove bogus warning */
+	bool		mbr_changed = false;
+#ifdef UNIV_DEBUG
+	bool		leftmost_child;
+#endif
 	DBUG_ENTER("btr_compress");
 
 	block = btr_cur_get_block(cursor);
@@ -3707,11 +3305,22 @@ btr_compress(
 
 	btr_assert_not_corrupted(block, index);
 
-	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
-				MTR_MEMO_X_LOCK));
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+#ifdef UNIV_DEBUG
+	if (dict_index_is_spatial(index)) {
+		ut_ad(mtr_memo_contains_flagged(mtr, dict_index_get_lock(index),
+						MTR_MEMO_X_LOCK));
+	} else {
+		ut_ad(mtr_memo_contains_flagged(mtr, dict_index_get_lock(index),
+						MTR_MEMO_X_LOCK
+						| MTR_MEMO_SX_LOCK)
+		      || dict_table_is_intrinsic(index->table));
+	}
+#endif /* UNIV_DEBUG */
+
+	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
 	space = dict_index_get_space(index);
-	zip_size = dict_table_zip_size(index->table);
+
+	const page_size_t	page_size(dict_table_page_size(index->table));
 
 	MONITOR_INC(MONITOR_INDEX_MERGE_ATTEMPTS);
 
@@ -3727,8 +3336,27 @@ btr_compress(
 #endif /* UNIV_DEBUG */
 
 	heap = mem_heap_create(100);
-	offsets = btr_page_get_father_block(NULL, heap, index, block, mtr,
-					    &father_cursor);
+
+	if (dict_index_is_spatial(index)) {
+		offsets = rtr_page_get_father_block(
+			NULL, heap, index, block, mtr, cursor, &father_cursor);
+		ut_ad(cursor->page_cur.block->page.id.page_no()
+		      == block->page.id.page_no());
+		rec_t*  my_rec = father_cursor.page_cur.rec;
+
+		ulint page_no = btr_node_ptr_get_child_page_no(my_rec, offsets);
+
+		if (page_no != block->page.id.page_no()) {
+			ib::info() << "father positioned on page "
+				<< page_no << "instead of "
+				<< block->page.id.page_no();
+			offsets = btr_page_get_father_block(
+				NULL, heap, index, block, mtr, &father_cursor);
+		}
+	} else {
+		offsets = btr_page_get_father_block(
+			NULL, heap, index, block, mtr, &father_cursor);
+	}
 
 	if (adjust) {
 		nth_rec = page_rec_get_n_recs_before(btr_cur_get_rec(cursor));
@@ -3743,6 +3371,13 @@ btr_compress(
 		goto func_exit;
 	}
 
+	ut_d(leftmost_child =
+		left_page_no != FIL_NULL
+		&& (page_rec_get_next(
+			page_get_infimum_rec(
+				btr_cur_get_page(&father_cursor)))
+		    == btr_cur_get_rec(&father_cursor)));
+
 	/* Decide the page to which we try to merge and which will inherit
 	the locks */
 
@@ -3750,10 +3385,13 @@ btr_compress(
 					  &merge_block, mtr);
 
 	DBUG_EXECUTE_IF("ib_always_merge_right", is_left = FALSE;);
-
-	if(!is_left
+retry:
+	if (!is_left
 	   && !btr_can_merge_with_page(cursor, right_page_no, &merge_block,
 				       mtr)) {
+		if (!merge_block) {
+			merge_page = NULL;
+		}
 		goto err_exit;
 	}
 
@@ -3761,14 +3399,26 @@ btr_compress(
 
 #ifdef UNIV_BTR_DEBUG
 	if (is_left) {
-                ut_a(btr_page_get_next(merge_page, mtr)
-                     == buf_block_get_page_no(block));
+		ut_a(btr_page_get_next(merge_page, mtr)
+		     == block->page.id.page_no());
 	} else {
-               ut_a(btr_page_get_prev(merge_page, mtr)
-                     == buf_block_get_page_no(block));
+		ut_a(btr_page_get_prev(merge_page, mtr)
+		     == block->page.id.page_no());
 	}
 #endif /* UNIV_BTR_DEBUG */
 
+#ifdef UNIV_GIS_DEBUG
+	if (dict_index_is_spatial(index)) {
+		if (is_left) {
+			fprintf(stderr, "GIS_DIAG: merge left  %ld to %ld \n",
+				(long) block->page.id.page_no(), left_page_no);
+		} else {
+			fprintf(stderr, "GIS_DIAG: merge right %ld to %ld\n",
+				(long) block->page.id.page_no(), right_page_no);
+		}
+	}
+#endif /* UNIV_GIS_DEBUG */
+
 	ut_ad(page_validate(merge_page, index));
 
 	merge_page_zip = buf_block_get_page_zip(merge_block);
@@ -3784,6 +3434,33 @@ btr_compress(
 
 	/* Move records to the merge page */
 	if (is_left) {
+		btr_cur_t	cursor2;
+		rtr_mbr_t	new_mbr;
+		ulint*		offsets2 = NULL;
+
+		/* For rtree, we need to update father's mbr. */
+		if (dict_index_is_spatial(index)) {
+			/* We only support merge pages with the same parent
+			page */
+			if (!rtr_check_same_block(
+				index, &cursor2,
+				btr_cur_get_block(&father_cursor),
+				merge_block, heap)) {
+				is_left = false;
+				goto retry;
+			}
+
+			offsets2 = rec_get_offsets(
+				btr_cur_get_rec(&cursor2), index,
+				NULL, ULINT_UNDEFINED, &heap);
+
+			/* Check if parent entry needs to be updated */
+			mbr_changed = rtr_merge_mbr_changed(
+				&cursor2, &father_cursor,
+				offsets2, offsets, &new_mbr,
+				merge_block, block, index);
+		}
+
 		rec_t*	orig_pred = page_copy_rec_list_start(
 			merge_block, block, page_get_supremum_rec(page),
 			index, mtr);
@@ -3795,10 +3472,51 @@ btr_compress(
 		btr_search_drop_page_hash_index(block);
 
 		/* Remove the page from the level list */
-		btr_level_list_remove(space, zip_size, page, index, mtr);
+		btr_level_list_remove(space, page_size, page, index, mtr);
+
+		if (dict_index_is_spatial(index)) {
+			rec_t*  my_rec = father_cursor.page_cur.rec;
+
+			ulint page_no = btr_node_ptr_get_child_page_no(
+						my_rec, offsets);
+
+			if (page_no != block->page.id.page_no()) {
+
+				ib::fatal() << "father positioned on "
+					<< page_no << " instead of "
+					<< block->page.id.page_no();
+
+				ut_ad(0);
+			}
+
+			if (mbr_changed) {
+#ifdef UNIV_DEBUG
+				bool	success = rtr_update_mbr_field(
+					&cursor2, offsets2, &father_cursor,
+					merge_page, &new_mbr, NULL, mtr);
+
+				ut_ad(success);
+#else
+				rtr_update_mbr_field(
+					&cursor2, offsets2, &father_cursor,
+					merge_page, &new_mbr, NULL, mtr);
+#endif
+			} else {
+				rtr_node_ptr_delete(
+					index, &father_cursor, block, mtr);
+			}
 
-		btr_node_ptr_delete(index, block, mtr);
-		lock_update_merge_left(merge_block, orig_pred, block);
+			/* No GAP lock needs to be worrying about */
+			lock_mutex_enter();
+			lock_rec_free_all_from_discard_page(block);
+			lock_mutex_exit();
+		} else {
+			btr_node_ptr_delete(index, block, mtr);
+			if (!dict_table_is_locking_disabled(index->table)) {
+				lock_update_merge_left(
+					merge_block, orig_pred, block);
+			}
+		}
 
 		if (adjust) {
 			nth_rec += page_rec_get_n_recs_before(orig_pred);
@@ -3814,7 +3532,22 @@ btr_compress(
 		byte		fil_page_prev[4];
 #endif /* UNIV_BTR_DEBUG */
 
-		btr_page_get_father(index, merge_block, mtr, &cursor2);
+		if (dict_index_is_spatial(index)) {
+			cursor2.rtr_info = NULL;
+
+			/* For spatial index, we disallow merge of blocks
+			with different parents, since the merge would need
+			to update entry (for MBR and Primary key) in the
+			parent of block being merged */
+			if (!rtr_check_same_block(
+				index, &cursor2,
+				btr_cur_get_block(&father_cursor),
+				merge_block, heap)) {
+				goto err_exit;
+			}
+		} else {
+			btr_page_get_father(index, merge_block, mtr, &cursor2);
+		}
 
 		if (merge_page_zip && left_page_no == FIL_NULL) {
 
@@ -3864,7 +3597,11 @@ btr_compress(
 #endif /* UNIV_BTR_DEBUG */
 
 		/* Remove the page from the level list */
-		btr_level_list_remove(space, zip_size, page, index, mtr);
+		btr_level_list_remove(space, page_size, page, index, mtr);
+
+		ut_ad(btr_node_ptr_get_child_page_no(
+			btr_cur_get_rec(&father_cursor), offsets)
+			== block->page.id.page_no());
 
 		/* Replace the address of the old child node (= page) with the
 		address of the merge page to the right */
@@ -3873,21 +3610,58 @@ btr_compress(
 			btr_cur_get_page_zip(&father_cursor),
 			offsets, right_page_no, mtr);
 
-		compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor2,
-							BTR_CREATE_FLAG,
-							RB_NONE, mtr);
-		ut_a(err == DB_SUCCESS);
-
-		if (!compressed) {
-			btr_cur_compress_if_useful(&cursor2, FALSE, mtr);
+#ifdef UNIV_DEBUG
+		if (!page_is_leaf(page) && left_page_no == FIL_NULL) {
+			ut_ad(REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
+				page_rec_get_next(page_get_infimum_rec(
+					buf_block_get_frame(merge_block))),
+				page_is_comp(page)));
 		}
+#endif /* UNIV_DEBUG */
 
-		lock_update_merge_right(merge_block, orig_succ, block);
-	}
+		/* For rtree, we need to update father's mbr. */
+		if (dict_index_is_spatial(index)) {
+			ulint*	offsets2;
+			offsets2 = rec_get_offsets(
+				btr_cur_get_rec(&cursor2),
+				index, NULL, ULINT_UNDEFINED, &heap);
+
+			ut_ad(btr_node_ptr_get_child_page_no(
+				btr_cur_get_rec(&cursor2), offsets2)
+				== right_page_no);
+
+			rtr_merge_and_update_mbr(&father_cursor,
+						 &cursor2,
+						 offsets, offsets2,
+						 merge_page, merge_block,
+						 block, index, mtr);
+			lock_mutex_enter();
+			lock_rec_free_all_from_discard_page(block);
+			lock_mutex_exit();
+		} else {
+
+			compressed = btr_cur_pessimistic_delete(&err, TRUE,
+								&cursor2,
+								BTR_CREATE_FLAG,
+								false, mtr);
+			ut_a(err == DB_SUCCESS);
+
+			if (!compressed) {
+				btr_cur_compress_if_useful(&cursor2,
+							   FALSE,
+							   mtr);
+			}
 
-	btr_blob_dbg_remove(page, index, "btr_compress");
+			if (!dict_table_is_locking_disabled(index->table)) {
+				lock_update_merge_right(
+					merge_block, orig_succ, block);
+			}
+		}
+	}
 
-	if (!dict_index_is_clust(index) && page_is_leaf(merge_page)) {
+	if (!dict_index_is_clust(index)
+	    && !dict_table_is_temporary(index->table)
+	    && page_is_leaf(merge_page)) {
 		/* Update the free bits of the B-tree page in the
 		insert buffer bitmap.  This has to be done in a
 		separate mini-transaction that is committed before the
@@ -3910,7 +3684,7 @@ btr_compress(
 		committed mini-transaction, because in crash recovery,
 		the free bits could momentarily be set too high. */
 
-		if (zip_size) {
+		if (page_size.is_compressed()) {
 			/* Because the free bits may be incremented
 			and we cannot update the insert buffer bitmap
 			in the same mini-transaction, the only safe
@@ -3934,10 +3708,25 @@ btr_compress(
 						  index));
 #endif /* UNIV_ZIP_DEBUG */
 
+	if (dict_index_is_spatial(index)) {
+#ifdef UNIV_GIS_DEBUG
+		fprintf(stderr, "GIS_DIAG: compressed away  %ld\n",
+			(long) block->page.id.page_no());
+		fprintf(stderr, "GIS_DIAG: merged to %ld\n",
+			(long) merge_block->page.id.page_no());
+#endif
+
+		rtr_check_discard_page(index, NULL, block);
+	}
+
 	/* Free the file page */
 	btr_page_free(index, block, mtr);
 
-	ut_ad(btr_check_node_ptr(index, merge_block, mtr));
+	/* btr_check_node_ptr() needs parent block latched.
+	If the merge_block's parent block is not same,
+	we cannot use btr_check_node_ptr() */
+	ut_ad(leftmost_child
+	      || btr_check_node_ptr(index, merge_block, mtr));
 func_exit:
 	mem_heap_free(heap);
 
@@ -3955,10 +3744,11 @@ func_exit:
 
 err_exit:
 	/* We play it safe and reset the free bits. */
-	if (zip_size
+	if (page_size.is_compressed()
 	    && merge_page
 	    && page_is_leaf(merge_page)
 	    && !dict_index_is_clust(index)) {
+
 		ibuf_reset_free_bits(merge_block);
 	}
 
@@ -3985,7 +3775,7 @@ btr_discard_only_page_on_level(
 	/* Save the PAGE_MAX_TRX_ID from the leaf page. */
 	max_trx_id = page_get_max_trx_id(buf_block_get_frame(block));
 
-	while (buf_block_get_page_no(block) != dict_index_get_page(index)) {
+	while (block->page.id.page_no() != dict_index_get_page(index)) {
 		btr_cur_t	cursor;
 		buf_block_t*	father;
 		const page_t*	page	= buf_block_get_frame(block);
@@ -3995,13 +3785,22 @@ btr_discard_only_page_on_level(
 		ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
 		ut_a(btr_page_get_next(page, mtr) == FIL_NULL);
 
-		ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+		ut_ad(mtr_is_block_fix(
+			mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
 		btr_search_drop_page_hash_index(block);
 
+		if (dict_index_is_spatial(index)) {
+			/* Check any concurrent search having this page */
+			rtr_check_discard_page(index, NULL, block);
+		}
+
 		btr_page_get_father(index, block, mtr, &cursor);
 		father = btr_cur_get_block(&cursor);
 
-		lock_update_discard(father, PAGE_HEAP_NO_SUPREMUM, block);
+		if (!dict_table_is_locking_disabled(index->table)) {
+			lock_update_discard(
+				father, PAGE_HEAP_NO_SUPREMUM, block);
+		}
 
 		/* Free the file page */
 		btr_page_free(index, block, mtr);
@@ -4027,7 +3826,8 @@ btr_discard_only_page_on_level(
 	btr_page_empty(block, buf_block_get_page_zip(block), index, 0, mtr);
 	ut_ad(page_is_leaf(buf_block_get_frame(block)));
 
-	if (!dict_index_is_clust(index)) {
+	if (!dict_index_is_clust(index)
+	    && !dict_table_is_temporary(index->table)) {
 		/* We play it safe and reset the free bits for the root */
 		ibuf_reset_free_bits(block);
 
@@ -4042,7 +3842,6 @@ btr_discard_only_page_on_level(
 Discards a page from a B-tree. This is used to remove the last record from
 a B-tree page: the whole page must be removed at the same time. This cannot
 be used for the root page, which is allowed to be empty. */
-UNIV_INTERN
 void
 btr_discard_page(
 /*=============*/
@@ -4051,8 +3850,6 @@ btr_discard_page(
 	mtr_t*		mtr)	/*!< in: mtr */
 {
 	dict_index_t*	index;
-	ulint		space;
-	ulint		zip_size;
 	ulint		left_page_no;
 	ulint		right_page_no;
 	buf_block_t*	merge_block;
@@ -4060,40 +3857,65 @@ btr_discard_page(
 	buf_block_t*	block;
 	page_t*		page;
 	rec_t*		node_ptr;
+#ifdef UNIV_DEBUG
+	btr_cur_t	parent_cursor;
+	bool		parent_is_different = false;
+#endif
 
 	block = btr_cur_get_block(cursor);
 	index = btr_cur_get_index(cursor);
 
-	ut_ad(dict_index_get_page(index) != buf_block_get_page_no(block));
-	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
-				MTR_MEMO_X_LOCK));
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
-	space = dict_index_get_space(index);
-	zip_size = dict_table_zip_size(index->table);
+	ut_ad(dict_index_get_page(index) != block->page.id.page_no());
+
+	ut_ad(mtr_memo_contains_flagged(mtr, dict_index_get_lock(index),
+					MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK)
+	      || dict_table_is_intrinsic(index->table));
+
+	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
+
+	const ulint	space = dict_index_get_space(index);
 
 	MONITOR_INC(MONITOR_INDEX_DISCARD);
 
+#ifdef UNIV_DEBUG
+	btr_page_get_father(index, block, mtr, &parent_cursor);
+#endif
+
 	/* Decide the page which will inherit the locks */
 
 	left_page_no = btr_page_get_prev(buf_block_get_frame(block), mtr);
 	right_page_no = btr_page_get_next(buf_block_get_frame(block), mtr);
 
+	const page_size_t	page_size(dict_table_page_size(index->table));
+
 	if (left_page_no != FIL_NULL) {
-		merge_block = btr_block_get(space, zip_size, left_page_no,
-					    RW_X_LATCH, index, mtr);
+		merge_block = btr_block_get(
+			page_id_t(space, left_page_no), page_size,
+			RW_X_LATCH, index, mtr);
+
 		merge_page = buf_block_get_frame(merge_block);
 #ifdef UNIV_BTR_DEBUG
 		ut_a(btr_page_get_next(merge_page, mtr)
-		     == buf_block_get_page_no(block));
+		     == block->page.id.page_no());
 #endif /* UNIV_BTR_DEBUG */
+		ut_d(parent_is_different =
+			(page_rec_get_next(
+				page_get_infimum_rec(
+					btr_cur_get_page(
+						&parent_cursor)))
+			 == btr_cur_get_rec(&parent_cursor)));
 	} else if (right_page_no != FIL_NULL) {
-		merge_block = btr_block_get(space, zip_size, right_page_no,
-					    RW_X_LATCH, index, mtr);
+		merge_block = btr_block_get(
+			page_id_t(space, right_page_no), page_size,
+			RW_X_LATCH, index, mtr);
+
 		merge_page = buf_block_get_frame(merge_block);
 #ifdef UNIV_BTR_DEBUG
 		ut_a(btr_page_get_prev(merge_page, mtr)
-		     == buf_block_get_page_no(block));
+		     == block->page.id.page_no());
 #endif /* UNIV_BTR_DEBUG */
+		ut_d(parent_is_different = page_rec_is_supremum(
+			page_rec_get_next(btr_cur_get_rec(&parent_cursor))));
 	} else {
 		btr_discard_only_page_on_level(index, block, mtr);
 
@@ -4123,7 +3945,7 @@ btr_discard_page(
 	btr_node_ptr_delete(index, block, mtr);
 
 	/* Remove the page from the level list */
-	btr_level_list_remove(space, zip_size, page, index, mtr);
+	btr_level_list_remove(space, page_size, page, index, mtr);
 #ifdef UNIV_ZIP_DEBUG
 	{
 		page_zip_des_t*	merge_page_zip
@@ -4133,27 +3955,34 @@ btr_discard_page(
 	}
 #endif /* UNIV_ZIP_DEBUG */
 
-	if (left_page_no != FIL_NULL) {
-		lock_update_discard(merge_block, PAGE_HEAP_NO_SUPREMUM,
-				    block);
-	} else {
-		lock_update_discard(merge_block,
-				    lock_get_min_heap_no(merge_block),
-				    block);
+	if (!dict_table_is_locking_disabled(index->table)) {
+		if (left_page_no != FIL_NULL) {
+			lock_update_discard(merge_block, PAGE_HEAP_NO_SUPREMUM,
+					    block);
+		} else {
+			lock_update_discard(merge_block,
+					    lock_get_min_heap_no(merge_block),
+					    block);
+		}
 	}
 
-	btr_blob_dbg_remove(page, index, "btr_discard_page");
+	if (dict_index_is_spatial(index)) {
+		rtr_check_discard_page(index, cursor, block);
+	}
 
 	/* Free the file page */
 	btr_page_free(index, block, mtr);
 
-	ut_ad(btr_check_node_ptr(index, merge_block, mtr));
+	/* btr_check_node_ptr() needs parent block latched.
+	If the merge_block's parent block is not same,
+	we cannot use btr_check_node_ptr() */
+	ut_ad(parent_is_different
+	      || btr_check_node_ptr(index, merge_block, mtr));
 }
 
 #ifdef UNIV_BTR_PRINT
 /*************************************************************//**
 Prints size info of a B-tree. */
-UNIV_INTERN
 void
 btr_print_size(
 /*===========*/
@@ -4179,7 +4008,7 @@ btr_print_size(
 	fputs("INFO OF THE NON-LEAF PAGE SEGMENT\n", stderr);
 	fseg_print(seg, &mtr);
 
-	if (!dict_index_is_univ(index)) {
+	if (!dict_index_is_ibuf(index)) {
 
 		seg = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
 
@@ -4210,10 +4039,10 @@ btr_print_recursive(
 	ulint		i	= 0;
 	mtr_t		mtr2;
 
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
-	fprintf(stderr, "NODE ON LEVEL %lu page number %lu\n",
-		(ulong) btr_page_get_level(page, mtr),
-		(ulong) buf_block_get_page_no(block));
+	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_SX_FIX, index->table));
+
+	ib::info() << "NODE ON LEVEL " << btr_page_get_level(page, mtr)
+		<< " page " << block->page.id;
 
 	page_print(block, index, width, width);
 
@@ -4254,7 +4083,6 @@ btr_print_recursive(
 
 /**************************************************************//**
 Prints directories and other info of all nodes in the tree. */
-UNIV_INTERN
 void
 btr_print_index(
 /*============*/
@@ -4274,7 +4102,7 @@ btr_print_index(
 
 	mtr_start(&mtr);
 
-	root = btr_root_block_get(index, RW_X_LATCH, &mtr);
+	root = btr_root_block_get(index, RW_SX_LATCH, &mtr);
 
 	btr_print_recursive(index, root, width, &heap, &offsets, &mtr);
 	if (heap) {
@@ -4283,15 +4111,14 @@ btr_print_index(
 
 	mtr_commit(&mtr);
 
-	btr_validate_index(index, 0);
+	ut_ad(btr_validate_index(index, 0, false));
 }
 #endif /* UNIV_BTR_PRINT */
 
 #ifdef UNIV_DEBUG
 /************************************************************//**
 Checks that the node pointer to a page is appropriate.
- at return	TRUE */
-UNIV_INTERN
+ at return TRUE */
 ibool
 btr_check_node_ptr(
 /*===============*/
@@ -4305,8 +4132,9 @@ btr_check_node_ptr(
 	btr_cur_t	cursor;
 	page_t*		page = buf_block_get_frame(block);
 
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
-	if (dict_index_get_page(index) == buf_block_get_page_no(block)) {
+	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
+
+	if (dict_index_get_page(index) == block->page.id.page_no()) {
 
 		return(TRUE);
 	}
@@ -4324,7 +4152,16 @@ btr_check_node_ptr(
 		index, page_rec_get_next(page_get_infimum_rec(page)), 0, heap,
 		btr_page_get_level(page, mtr));
 
-	ut_a(!cmp_dtuple_rec(tuple, btr_cur_get_rec(&cursor), offsets));
+	/* For spatial index, the MBR in the parent rec could be different
+	with that of first rec of child, their relationship should be
+	"WITHIN" relationship */
+	if (dict_index_is_spatial(index)) {
+		ut_a(!cmp_dtuple_rec_with_gis(
+			tuple, btr_cur_get_rec(&cursor),
+			offsets, PAGE_CUR_WITHIN));
+	} else {
+		ut_a(!cmp_dtuple_rec(tuple, btr_cur_get_rec(&cursor), offsets));
+	}
 func_exit:
 	mem_heap_free(heap);
 
@@ -4342,17 +4179,17 @@ btr_index_rec_validate_report(
 	const rec_t*		rec,	/*!< in: index record */
 	const dict_index_t*	index)	/*!< in: index */
 {
-	fputs("InnoDB: Record in ", stderr);
-	dict_index_name_print(stderr, NULL, index);
-	fprintf(stderr, ", page %lu, at offset %lu\n",
-		page_get_page_no(page), (ulint) page_offset(rec));
+	ib::info() << "Record in index " << index->name
+		<< " of table " << index->table->name
+		<< ", page " << page_id_t(page_get_space_id(page),
+					  page_get_page_no(page))
+		<< ", at offset " << page_offset(rec);
 }
 
 /************************************************************//**
 Checks the size and number of fields in a record based on the definition of
 the index.
- at return	TRUE if ok */
-UNIV_INTERN
+ at return TRUE if ok */
 ibool
 btr_index_rec_validate(
 /*===================*/
@@ -4373,7 +4210,7 @@ btr_index_rec_validate(
 
 	page = page_align(rec);
 
-	if (dict_index_is_univ(index)) {
+	if (dict_index_is_ibuf(index)) {
 		/* The insert buffer index tree can contain records from any
 		other index: we cannot check the number of fields or
 		their length */
@@ -4381,25 +4218,34 @@ btr_index_rec_validate(
 		return(TRUE);
 	}
 
+#ifdef VIRTUAL_INDEX_DEBUG
+	if (dict_index_has_virtual(index)) {
+		fprintf(stderr, "index name is %s\n", index->name());
+	}
+#endif
 	if ((ibool)!!page_is_comp(page) != dict_table_is_comp(index->table)) {
 		btr_index_rec_validate_report(page, rec, index);
-		fprintf(stderr, "InnoDB: compact flag=%lu, should be %lu\n",
-			(ulong) !!page_is_comp(page),
-			(ulong) dict_table_is_comp(index->table));
+
+		ib::error() << "Compact flag=" << !!page_is_comp(page)
+			<< ", should be " << dict_table_is_comp(index->table);
 
 		return(FALSE);
 	}
 
 	n = dict_index_get_n_fields(index);
 
-	if (!page_is_comp(page) && rec_get_n_fields_old(rec) != n) {
+	if (!page_is_comp(page)
+	    && (rec_get_n_fields_old(rec) != n
+		/* a record for older SYS_INDEXES table
+		(missing merge_threshold column) is acceptable. */
+		&& !(index->id == DICT_INDEXES_ID
+		     && rec_get_n_fields_old(rec) == n - 1))) {
 		btr_index_rec_validate_report(page, rec, index);
-		fprintf(stderr, "InnoDB: has %lu fields, should have %lu\n",
-			(ulong) rec_get_n_fields_old(rec), (ulong) n);
 
-		if (dump_on_error) {
-			buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH);
+		ib::error() << "Has " << rec_get_n_fields_old(rec)
+			<< " fields, should have " << n;
 
+		if (dump_on_error) {
 			fputs("InnoDB: corrupt record ", stderr);
 			rec_print_old(stderr, rec);
 			putc('\n', stderr);
@@ -4410,38 +4256,58 @@ btr_index_rec_validate(
 	offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
 
 	for (i = 0; i < n; i++) {
-		ulint	fixed_size = dict_col_get_fixed_size(
-			dict_index_get_nth_col(index, i), page_is_comp(page));
+		dict_field_t*	field = dict_index_get_nth_field(index, i);
+		ulint		fixed_size = dict_col_get_fixed_size(
+						dict_field_get_col(field),
+						page_is_comp(page));
 
 		rec_get_nth_field_offs(offsets, i, &len);
 
 		/* Note that if fixed_size != 0, it equals the
-		length of a fixed-size column in the clustered index.
+		length of a fixed-size column in the clustered index,
+		except the DATA_POINT, whose length would be MBR_LEN
+		when it's indexed in a R-TREE. We should adjust it here.
 		A prefix index of the column is of fixed, but different
 		length.  When fixed_size == 0, prefix_len is the maximum
 		length of the prefix index column. */
 
-		if ((dict_index_get_nth_field(index, i)->prefix_len == 0
+		if (dict_field_get_col(field)->mtype == DATA_POINT) {
+			ut_ad(fixed_size == DATA_POINT_LEN);
+			if (dict_index_is_spatial(index)) {
+				/* For DATA_POINT data, when it has R-tree
+				index, the fixed_len is the MBR of the point.
+				But if it's a primary key and on R-TREE
+				as the PK pointer, the length shall be
+				DATA_POINT_LEN as well. */
+				ut_ad((field->fixed_len == DATA_MBR_LEN
+				       && i == 0)
+				      || (field->fixed_len == DATA_POINT_LEN
+					  && i != 0));
+				fixed_size = field->fixed_len;
+			}
+		}
+
+		if ((field->prefix_len == 0
 		     && len != UNIV_SQL_NULL && fixed_size
 		     && len != fixed_size)
-		    || (dict_index_get_nth_field(index, i)->prefix_len > 0
+		    || (field->prefix_len > 0
 			&& len != UNIV_SQL_NULL
 			&& len
-			> dict_index_get_nth_field(index, i)->prefix_len)) {
+			> field->prefix_len)) {
 
 			btr_index_rec_validate_report(page, rec, index);
-			fprintf(stderr,
-				"InnoDB: field %lu len is %lu,"
-				" should be %lu\n",
-				(ulong) i, (ulong) len, (ulong) fixed_size);
 
-			if (dump_on_error) {
-				buf_page_print(page, 0,
-					       BUF_PAGE_PRINT_NO_CRASH);
+			ib::error	error;
 
-				fputs("InnoDB: corrupt record ", stderr);
-				rec_print_new(stderr, rec, offsets);
-				putc('\n', stderr);
+			error << "Field " << i << " len is " << len
+				<< ", should be " << fixed_size;
+
+			if (dump_on_error) {
+				error << "; ";
+				rec_print(error.m_oss, rec,
+					  rec_get_info_bits(
+						  rec, rec_offs_comp(offsets)),
+					  offsets);
 			}
 			if (heap) {
 				mem_heap_free(heap);
@@ -4450,6 +4316,12 @@ btr_index_rec_validate(
 		}
 	}
 
+#ifdef VIRTUAL_INDEX_DEBUG
+	if (dict_index_has_virtual(index)) {
+		rec_print_new(stderr, rec, offsets);
+	}
+#endif
+
 	if (heap) {
 		mem_heap_free(heap);
 	}
@@ -4459,7 +4331,7 @@ btr_index_rec_validate(
 /************************************************************//**
 Checks the size and number of fields in records based on the definition of
 the index.
- at return	TRUE if ok */
+ at return TRUE if ok */
 static
 ibool
 btr_index_page_validate(
@@ -4524,13 +4396,14 @@ btr_validate_report1(
 	ulint			level,	/*!< in: B-tree level */
 	const buf_block_t*	block)	/*!< in: index page */
 {
-	fprintf(stderr, "InnoDB: Error in page %lu of ",
-		buf_block_get_page_no(block));
-	dict_index_name_print(stderr, NULL, index);
-	if (level) {
-		fprintf(stderr, ", index tree level %lu", level);
+	ib::error	error;
+	error << "In page " << block->page.id.page_no()
+		<< " of index " << index->name
+		<< " of table " << index->table->name;
+
+	if (level > 0) {
+		error << ", index tree level " << level;
 	}
-	putc('\n', stderr);
 }
 
 /************************************************************//**
@@ -4544,30 +4417,28 @@ btr_validate_report2(
 	const buf_block_t*	block1,	/*!< in: first index page */
 	const buf_block_t*	block2)	/*!< in: second index page */
 {
-	fprintf(stderr, "InnoDB: Error in pages %lu and %lu of ",
-		buf_block_get_page_no(block1),
-		buf_block_get_page_no(block2));
-	dict_index_name_print(stderr, NULL, index);
-	if (level) {
-		fprintf(stderr, ", index tree level %lu", level);
-	}
-	putc('\n', stderr);
+	ib::error	error;
+	error << "In pages " << block1->page.id
+		<< " and " << block2->page.id << " of index " << index->name
+		<< " of table " << index->table->name;
+
+	if (level > 0) {
+		error << ", index tree level " << level;
+	}
 }
 
 /************************************************************//**
 Validates index tree level.
- at return	TRUE if ok */
+ at return TRUE if ok */
 static
 bool
 btr_validate_level(
 /*===============*/
 	dict_index_t*	index,	/*!< in: index tree */
 	const trx_t*	trx,	/*!< in: transaction or NULL */
-	ulint		level)	/*!< in: level number */
+	ulint		level,	/*!< in: level number */
+	bool		lockout)/*!< in: true if X-latch index is intended */
 {
-	ulint		space;
-	ulint		space_flags;
-	ulint		zip_size;
 	buf_block_t*	block;
 	page_t*		page;
 	buf_block_t*	right_block = 0; /* remove warning */
@@ -4589,25 +4460,42 @@ btr_validate_level(
 #ifdef UNIV_ZIP_DEBUG
 	page_zip_des_t*	page_zip;
 #endif /* UNIV_ZIP_DEBUG */
+	ulint		savepoint = 0;
+	ulint		savepoint2 = 0;
+	ulint		parent_page_no = FIL_NULL;
+	ulint		parent_right_page_no = FIL_NULL;
+	bool		rightmost_child = false;
 
 	mtr_start(&mtr);
 
-	mtr_x_lock(dict_index_get_lock(index), &mtr);
+	if (!srv_read_only_mode) {
+		if (lockout) {
+			mtr_x_lock(dict_index_get_lock(index), &mtr);
+		} else {
+			mtr_sx_lock(dict_index_get_lock(index), &mtr);
+		}
+	}
 
-	block = btr_root_block_get(index, RW_X_LATCH, &mtr);
+	block = btr_root_block_get(index, RW_SX_LATCH, &mtr);
 	page = buf_block_get_frame(block);
 	seg = page + PAGE_HEADER + PAGE_BTR_SEG_TOP;
 
-	space = dict_index_get_space(index);
-	zip_size = dict_table_zip_size(index->table);
+#ifdef UNIV_DEBUG
+	if (dict_index_is_spatial(index)) {
+		fprintf(stderr, "Root page no: %lu\n",
+			(ulong) page_get_page_no(page));
+	}
+#endif
 
-	fil_space_get_latch(space, &space_flags);
+	const fil_space_t*	space	= fil_space_get(index->space);
+	const page_size_t	table_page_size(
+		dict_table_page_size(index->table));
+	const page_size_t	space_page_size(space->flags);
 
-	if (zip_size != dict_tf_get_zip_size(space_flags)) {
+	if (!table_page_size.equals_to(space_page_size)) {
 
-		ib_logf(IB_LOG_LEVEL_WARN,
-			"Flags mismatch: table=%lu, tablespace=%lu",
-			(ulint) index->table->flags, (ulint) space_flags);
+		ib::warn() << "Flags mismatch: table=" << index->table->flags
+			<< ", tablespace=" << space->flags;
 
 		mtr_commit(&mtr);
 
@@ -4618,17 +4506,18 @@ btr_validate_level(
 		const rec_t*	node_ptr;
 
 		if (fseg_page_is_free(seg,
-				      block->page.space, block->page.offset)) {
+				      block->page.id.space(),
+				      block->page.id.page_no())) {
 
 			btr_validate_report1(index, level, block);
 
-			ib_logf(IB_LOG_LEVEL_WARN, "page is free");
+			ib::warn() << "Page is free";
 
 			ret = false;
 		}
 
-		ut_a(space == buf_block_get_space(block));
-		ut_a(space == page_get_space_id(page));
+		ut_a(index->space == block->page.id.space());
+		ut_a(index->space == page_get_space_id(page));
 #ifdef UNIV_ZIP_DEBUG
 		page_zip = buf_block_get_page_zip(block);
 		ut_a(!page_zip || page_zip_validate(page_zip, page, index));
@@ -4641,8 +4530,38 @@ btr_validate_level(
 		node_ptr = page_cur_get_rec(&cursor);
 		offsets = rec_get_offsets(node_ptr, index, offsets,
 					  ULINT_UNDEFINED, &heap);
+
+		savepoint2 = mtr_set_savepoint(&mtr);
 		block = btr_node_ptr_get_child(node_ptr, index, offsets, &mtr);
 		page = buf_block_get_frame(block);
+
+		/* For R-Tree, since record order might not be the same as
+		linked index page in the lower level, we need to travers
+		backwards to get the first page rec in this level.
+		This is only used for index validation. Spatial index
+		does not use such scan for any of its DML or query
+		operations  */
+		if (dict_index_is_spatial(index)) {
+			left_page_no = btr_page_get_prev(page, &mtr);
+
+			while (left_page_no != FIL_NULL) {
+				page_id_t	left_page_id(
+					index->space, left_page_no);
+				/* To obey latch order of tree blocks,
+				we should release the right_block once to
+				obtain lock of the uncle block. */
+				mtr_release_block_at_savepoint(
+					&mtr, savepoint2, block);
+
+				savepoint2 = mtr_set_savepoint(&mtr);
+				block = btr_block_get(
+					left_page_id,
+					table_page_size,
+					RW_SX_LATCH, index, &mtr);
+				page = buf_block_get_frame(block);
+				left_page_no = btr_page_get_prev(page, &mtr);
+			}
+		}
 	}
 
 	/* Now we are on the desired level. Loop through the pages on that
@@ -4656,28 +4575,34 @@ btr_validate_level(
 loop:
 	mem_heap_empty(heap);
 	offsets = offsets2 = NULL;
-	mtr_x_lock(dict_index_get_lock(index), &mtr);
+	if (!srv_read_only_mode) {
+		if (lockout) {
+			mtr_x_lock(dict_index_get_lock(index), &mtr);
+		} else {
+			mtr_sx_lock(dict_index_get_lock(index), &mtr);
+		}
+	}
 
 #ifdef UNIV_ZIP_DEBUG
 	page_zip = buf_block_get_page_zip(block);
 	ut_a(!page_zip || page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
 
-	ut_a(block->page.space == space);
+	ut_a(block->page.id.space() == index->space);
 
-	if (fseg_page_is_free(seg, block->page.space, block->page.offset)) {
+	if (fseg_page_is_free(seg,
+			      block->page.id.space(),
+			      block->page.id.page_no())) {
 
 		btr_validate_report1(index, level, block);
 
-		ib_logf(IB_LOG_LEVEL_WARN, "Page is marked as free");
+		ib::warn() << "Page is marked as free";
 		ret = false;
 
 	} else if (btr_page_get_index_id(page) != index->id) {
 
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Page index id " IB_ID_FMT " != data dictionary "
-			"index id " IB_ID_FMT,
-			btr_page_get_index_id(page), index->id);
+		ib::error() << "Page index id " << btr_page_get_index_id(page)
+			<< " != data dictionary index id " << index->id;
 
 		ret = false;
 
@@ -4705,17 +4630,21 @@ loop:
 
 	if (right_page_no != FIL_NULL) {
 		const rec_t*	right_rec;
-		right_block = btr_block_get(space, zip_size, right_page_no,
-					    RW_X_LATCH, index, &mtr);
+		savepoint = mtr_set_savepoint(&mtr);
+
+		right_block = btr_block_get(
+			page_id_t(index->space, right_page_no),
+			table_page_size,
+			RW_SX_LATCH, index, &mtr);
+
 		right_page = buf_block_get_frame(right_block);
+
 		if (btr_page_get_prev(right_page, &mtr)
 		    != page_get_page_no(page)) {
 
 			btr_validate_report2(index, level, block, right_block);
 			fputs("InnoDB: broken FIL_PAGE_NEXT"
 			      " or FIL_PAGE_PREV links\n", stderr);
-			buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH);
-			buf_page_print(right_page, 0, BUF_PAGE_PRINT_NO_CRASH);
 
 			ret = false;
 		}
@@ -4723,8 +4652,6 @@ loop:
 		if (page_is_comp(right_page) != page_is_comp(page)) {
 			btr_validate_report2(index, level, block, right_block);
 			fputs("InnoDB: 'compact' flag mismatch\n", stderr);
-			buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH);
-			buf_page_print(right_page, 0, BUF_PAGE_PRINT_NO_CRASH);
 
 			ret = false;
 
@@ -4738,17 +4665,19 @@ loop:
 					  offsets, ULINT_UNDEFINED, &heap);
 		offsets2 = rec_get_offsets(right_rec, index,
 					   offsets2, ULINT_UNDEFINED, &heap);
-		if (cmp_rec_rec(rec, right_rec, offsets, offsets2,
-			        index) >= 0) {
+
+		/* For spatial index, we cannot guarantee the key ordering
+		across pages, so skip the record compare verification for
+		now. Will enhanced in special R-Tree index validation scheme */
+		if (!dict_index_is_spatial(index)
+		    && cmp_rec_rec(rec, right_rec,
+				   offsets, offsets2, index) >= 0) {
 
 			btr_validate_report2(index, level, block, right_block);
 
 			fputs("InnoDB: records in wrong order"
 			      " on adjacent pages\n", stderr);
 
-			buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH);
-			buf_page_print(right_page, 0, BUF_PAGE_PRINT_NO_CRASH);
-
 			fputs("InnoDB: record ", stderr);
 			rec = page_rec_get_prev(page_get_supremum_rec(page));
 			rec_print(stderr, rec, index);
@@ -4769,35 +4698,49 @@ loop:
 			     page_is_comp(page)));
 	}
 
-	if (buf_block_get_page_no(block) != dict_index_get_page(index)) {
+	/* Similarly skip the father node check for spatial index for now,
+	for a couple of reasons:
+	1) As mentioned, there is no ordering relationship between records
+	in parent level and linked pages in the child level.
+	2) Search parent from root is very costly for R-tree.
+	We will add special validation mechanism for R-tree later (WL #7520) */
+	if (!dict_index_is_spatial(index)
+	    && block->page.id.page_no() != dict_index_get_page(index)) {
 
 		/* Check father node pointers */
-
 		rec_t*	node_ptr;
 
-		offsets = btr_page_get_father_block(offsets, heap, index,
-						    block, &mtr, &node_cur);
+		btr_cur_position(
+			index, page_rec_get_next(page_get_infimum_rec(page)),
+			block, &node_cur);
+		offsets = btr_page_get_father_node_ptr_for_validate(
+			offsets, heap, &node_cur, &mtr);
+
 		father_page = btr_cur_get_page(&node_cur);
 		node_ptr = btr_cur_get_rec(&node_cur);
 
+		parent_page_no = page_get_page_no(father_page);
+		parent_right_page_no = btr_page_get_next(father_page, &mtr);
+		rightmost_child = page_rec_is_supremum(
+					page_rec_get_next(node_ptr));
+
 		btr_cur_position(
-			index, page_rec_get_prev(page_get_supremum_rec(page)),
+			index,
+			page_rec_get_prev(page_get_supremum_rec(page)),
 			block, &node_cur);
-		offsets = btr_page_get_father_node_ptr(offsets, heap,
-						       &node_cur, &mtr);
+
+		offsets = btr_page_get_father_node_ptr_for_validate(
+				offsets, heap, &node_cur, &mtr);
 
 		if (node_ptr != btr_cur_get_rec(&node_cur)
 		    || btr_node_ptr_get_child_page_no(node_ptr, offsets)
-				     != buf_block_get_page_no(block)) {
+				     != block->page.id.page_no()) {
 
 			btr_validate_report1(index, level, block);
 
 			fputs("InnoDB: node pointer to the page is wrong\n",
 			      stderr);
 
-			buf_page_print(father_page, 0, BUF_PAGE_PRINT_NO_CRASH);
-			buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH);
-
 			fputs("InnoDB: node ptr ", stderr);
 			rec_print(stderr, node_ptr, index);
 
@@ -4828,14 +4771,9 @@ loop:
 
 				btr_validate_report1(index, level, block);
 
-				buf_page_print(father_page, 0,
-					       BUF_PAGE_PRINT_NO_CRASH);
-				buf_page_print(page, 0,
-					       BUF_PAGE_PRINT_NO_CRASH);
+				ib::error() << "Node ptrs differ on levels > 0";
 
-				fputs("InnoDB: Error: node ptrs differ"
-				      " on levels > 0\n"
-				      "InnoDB: node ptr ", stderr);
+				fputs("InnoDB: node ptr ",stderr);
 				rec_print_new(stderr, node_ptr, offsets);
 				fputs("InnoDB: first rec ", stderr);
 				rec_print(stderr, first_rec, index);
@@ -4857,12 +4795,41 @@ loop:
 				     page_get_supremum_rec(father_page)));
 			ut_a(btr_page_get_next(father_page, &mtr) == FIL_NULL);
 		} else {
-			const rec_t*	right_node_ptr
-				= page_rec_get_next(node_ptr);
+			const rec_t*	right_node_ptr;
+
+			right_node_ptr = page_rec_get_next(node_ptr);
+
+			if (!lockout && rightmost_child) {
+
+				/* To obey latch order of tree blocks,
+				we should release the right_block once to
+				obtain lock of the uncle block. */
+				mtr_release_block_at_savepoint(
+					&mtr, savepoint, right_block);
+
+				btr_block_get(
+					page_id_t(index->space,
+						  parent_right_page_no),
+					table_page_size,
+					RW_SX_LATCH, index, &mtr);
+
+				right_block = btr_block_get(
+					page_id_t(index->space,
+						  right_page_no),
+					table_page_size,
+					RW_SX_LATCH, index, &mtr);
+			}
+
+			btr_cur_position(
+				index, page_rec_get_next(
+					page_get_infimum_rec(
+						buf_block_get_frame(
+							right_block))),
+				right_block, &right_node_cur);
+
+			offsets = btr_page_get_father_node_ptr_for_validate(
+					offsets, heap, &right_node_cur, &mtr);
 
-			offsets = btr_page_get_father_block(
-				offsets, heap, index, right_block,
-				&mtr, &right_node_cur);
 			if (right_node_ptr
 			    != page_get_supremum_rec(father_page)) {
 
@@ -4875,16 +4842,6 @@ loop:
 
 					btr_validate_report1(index, level,
 							     block);
-
-					buf_page_print(
-						father_page, 0,
-						BUF_PAGE_PRINT_NO_CRASH);
-					buf_page_print(
-						page, 0,
-						BUF_PAGE_PRINT_NO_CRASH);
-					buf_page_print(
-						right_page, 0,
-						BUF_PAGE_PRINT_NO_CRASH);
 				}
 			} else {
 				page_t*	right_father_page
@@ -4901,19 +4858,6 @@ loop:
 
 					btr_validate_report1(index, level,
 							     block);
-
-					buf_page_print(
-						father_page, 0,
-						BUF_PAGE_PRINT_NO_CRASH);
-					buf_page_print(
-						right_father_page, 0,
-						BUF_PAGE_PRINT_NO_CRASH);
-					buf_page_print(
-						page, 0,
-						BUF_PAGE_PRINT_NO_CRASH);
-					buf_page_print(
-						right_page, 0,
-						BUF_PAGE_PRINT_NO_CRASH);
 				}
 
 				if (page_get_page_no(right_father_page)
@@ -4926,19 +4870,6 @@ loop:
 
 					btr_validate_report1(index, level,
 							     block);
-
-					buf_page_print(
-						father_page, 0,
-						BUF_PAGE_PRINT_NO_CRASH);
-					buf_page_print(
-						right_father_page, 0,
-						BUF_PAGE_PRINT_NO_CRASH);
-					buf_page_print(
-						page, 0,
-						BUF_PAGE_PRINT_NO_CRASH);
-					buf_page_print(
-						right_page, 0,
-						BUF_PAGE_PRINT_NO_CRASH);
 				}
 			}
 		}
@@ -4956,9 +4887,29 @@ node_ptr_fails:
 
 		mtr_start(&mtr);
 
+		if (!lockout) {
+			if (rightmost_child) {
+				if (parent_right_page_no != FIL_NULL) {
+					btr_block_get(
+						page_id_t(
+							index->space,
+							parent_right_page_no),
+						table_page_size,
+						RW_SX_LATCH, index, &mtr);
+				}
+			} else if (parent_page_no != FIL_NULL) {
+				btr_block_get(
+					page_id_t(index->space,
+						  parent_page_no),
+					table_page_size,
+					RW_SX_LATCH, index, &mtr);
+			}
+		}
+
 		block = btr_block_get(
-			space, zip_size, right_page_no,
-			RW_X_LATCH, index, &mtr);
+			page_id_t(index->space, right_page_no),
+			table_page_size,
+			RW_SX_LATCH, index, &mtr);
 
 		page = buf_block_get_frame(block);
 
@@ -4971,14 +4922,54 @@ node_ptr_fails:
 }
 
 /**************************************************************//**
+Do an index level validation of spaital index tree.
+ at return	true if no error found */
+bool
+btr_validate_spatial_index(
+/*=======================*/
+	dict_index_t*	index,	/*!< in: index */
+	const trx_t*	trx)	/*!< in: transaction or NULL */
+{
+
+	mtr_t	mtr;
+	bool	ok = true;
+
+	mtr_start(&mtr);
+
+	mtr_x_lock(dict_index_get_lock(index), &mtr);
+
+	page_t*	root = btr_root_get(index, &mtr);
+	ulint	n = btr_page_get_level(root, &mtr);
+
+#ifdef UNIV_RTR_DEBUG
+	fprintf(stderr, "R-tree level is %lu\n", n);
+#endif /* UNIV_RTR_DEBUG */
+
+	for (ulint i = 0; i <= n; ++i) {
+#ifdef UNIV_RTR_DEBUG
+		fprintf(stderr, "Level %lu:\n", n - i);
+#endif /* UNIV_RTR_DEBUG */
+
+		if (!btr_validate_level(index, trx, n - i, true)) {
+			ok = false;
+			break;
+		}
+	}
+
+	mtr_commit(&mtr);
+
+	return(ok);
+}
+
+/**************************************************************//**
 Checks the consistency of an index tree.
- at return	TRUE if ok */
-UNIV_INTERN
+ at return true if ok */
 bool
 btr_validate_index(
 /*===============*/
 	dict_index_t*	index,	/*!< in: index */
-	const trx_t*	trx)	/*!< in: transaction or NULL */
+	const trx_t*	trx,	/*!< in: transaction or NULL */
+	bool		lockout)/*!< in: true if X-latch index is intended */
 {
 	/* Full Text index are implemented by auxiliary tables,
 	not the B-tree */
@@ -4986,11 +4977,21 @@ btr_validate_index(
 		return(true);
 	}
 
+	if (dict_index_is_spatial(index)) {
+		return(btr_validate_spatial_index(index, trx));
+	}
+
 	mtr_t		mtr;
 
 	mtr_start(&mtr);
 
-	mtr_x_lock(dict_index_get_lock(index), &mtr);
+	if (!srv_read_only_mode) {
+		if (lockout) {
+			mtr_x_lock(dict_index_get_lock(index), &mtr);
+		} else {
+			mtr_sx_lock(dict_index_get_lock(index), &mtr);
+		}
+	}
 
 	bool	ok = true;
 	page_t*	root = btr_root_get(index, &mtr);
@@ -4998,7 +4999,7 @@ btr_validate_index(
 
 	for (ulint i = 0; i <= n; ++i) {
 
-		if (!btr_validate_level(index, trx, n - i)) {
+		if (!btr_validate_level(index, trx, n - i, lockout)) {
 			ok = false;
 			break;
 		}
@@ -5012,9 +5013,8 @@ btr_validate_index(
 /**************************************************************//**
 Checks if the page in the cursor can be merged with given page.
 If necessary, re-organize the merge_page.
- at return	TRUE if possible to merge. */
-UNIV_INTERN
-ibool
+ at return	true if possible to merge. */
+bool
 btr_can_merge_with_page(
 /*====================*/
 	btr_cur_t*	cursor,		/*!< in: cursor on the page to merge */
@@ -5024,34 +5024,33 @@ btr_can_merge_with_page(
 {
 	dict_index_t*	index;
 	page_t*		page;
-	ulint		space;
-	ulint		zip_size;
 	ulint		n_recs;
 	ulint		data_size;
-        ulint           max_ins_size_reorg;
+	ulint		max_ins_size_reorg;
 	ulint		max_ins_size;
 	buf_block_t*	mblock;
 	page_t*		mpage;
 	DBUG_ENTER("btr_can_merge_with_page");
 
 	if (page_no == FIL_NULL) {
-		goto error;
+		*merge_block = NULL;
+		DBUG_RETURN(false);
 	}
 
 	index = btr_cur_get_index(cursor);
-	page  = btr_cur_get_page(cursor);
-	space = dict_index_get_space(index);
-        zip_size = dict_table_zip_size(index->table);
+	page = btr_cur_get_page(cursor);
 
-	mblock = btr_block_get(space, zip_size, page_no, RW_X_LATCH, index,
-			       mtr);
+	const page_id_t		page_id(dict_index_get_space(index), page_no);
+	const page_size_t	page_size(dict_table_page_size(index->table));
+
+	mblock = btr_block_get(page_id, page_size, RW_X_LATCH, index, mtr);
 	mpage = buf_block_get_frame(mblock);
 
-        n_recs = page_get_n_recs(page);
-        data_size = page_get_data_size(page);
+	n_recs = page_get_n_recs(page);
+	data_size = page_get_data_size(page);
 
-        max_ins_size_reorg = page_get_max_insert_size_after_reorganize(
-                mpage, n_recs);
+	max_ins_size_reorg = page_get_max_insert_size_after_reorganize(
+		mpage, n_recs);
 
 	if (data_size > max_ins_size_reorg) {
 		goto error;
@@ -5060,7 +5059,7 @@ btr_can_merge_with_page(
 	/* If compression padding tells us that merging will result in
 	too packed up page i.e.: which is likely to cause compression
 	failure then don't merge the pages. */
-	if (zip_size && page_is_leaf(mpage)
+	if (page_size.is_compressed() && page_is_leaf(mpage)
 	    && (page_get_data_size(mpage) + data_size
 		>= dict_index_zip_pad_optimal_page_size(index))) {
 
@@ -5095,11 +5094,11 @@ btr_can_merge_with_page(
 	}
 
 	*merge_block = mblock;
-	DBUG_RETURN(TRUE);
+	DBUG_RETURN(true);
 
 error:
 	*merge_block = NULL;
-	DBUG_RETURN(FALSE);
+	DBUG_RETURN(false);
 }
 
 #endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/btr/btr0bulk.cc b/storage/innobase/btr/btr0bulk.cc
new file mode 100644
index 0000000..f5ff293
--- /dev/null
+++ b/storage/innobase/btr/btr0bulk.cc
@@ -0,0 +1,988 @@
+/*****************************************************************************
+
+Copyright (c) 2014, 2015, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+ at file btr/btr0bulk.cc
+The B-tree bulk load
+
+Created 03/11/2014 Shaohua Wang
+*******************************************************/
+
+#include "btr0bulk.h"
+#include "btr0btr.h"
+#include "btr0cur.h"
+#include "btr0pcur.h"
+#include "ibuf0ibuf.h"
+
+/** Innodb B-tree index fill factor for bulk load. */
+long	innobase_fill_factor;
+
+/** Initialize members, allocate page if needed and start mtr.
+Note: we commit all mtrs on failure.
+ at return error code. */
+dberr_t
+PageBulk::init()
+{
+	mtr_t*		mtr;
+	buf_block_t*	new_block;
+	page_t*		new_page;
+	page_zip_des_t*	new_page_zip;
+	ulint		new_page_no;
+
+	ut_ad(m_heap == NULL);
+	m_heap = mem_heap_create(1000);
+
+	mtr = static_cast<mtr_t*>(
+		mem_heap_alloc(m_heap, sizeof(mtr_t)));
+	mtr_start(mtr);
+	mtr_x_lock(dict_index_get_lock(m_index), mtr);
+	mtr_set_log_mode(mtr, MTR_LOG_NO_REDO);
+	mtr_set_flush_observer(mtr, m_flush_observer);
+
+	if (m_page_no == FIL_NULL) {
+		mtr_t	alloc_mtr;
+
+		/* We commit redo log for allocation by a separate mtr,
+		because we don't guarantee pages are committed following
+		the allocation order, and we will always generate redo log
+		for page allocation, even when creating a new tablespace. */
+		mtr_start(&alloc_mtr);
+		alloc_mtr.set_named_space(dict_index_get_space(m_index));
+
+		ulint	n_reserved;
+		bool	success;
+		success = fsp_reserve_free_extents(&n_reserved, m_index->space,
+						   1, FSP_NORMAL, &alloc_mtr);
+		if (!success) {
+			mtr_commit(&alloc_mtr);
+			mtr_commit(mtr);
+			return(DB_OUT_OF_FILE_SPACE);
+		}
+
+		/* Allocate a new page. */
+		new_block = btr_page_alloc(m_index, 0, FSP_UP, m_level,
+					   &alloc_mtr, mtr);
+
+		if (n_reserved > 0) {
+			fil_space_release_free_extents(m_index->space,
+						       n_reserved);
+		}
+
+		mtr_commit(&alloc_mtr);
+
+		new_page = buf_block_get_frame(new_block);
+		new_page_zip = buf_block_get_page_zip(new_block);
+		new_page_no = page_get_page_no(new_page);
+
+		if (new_page_zip) {
+			page_create_zip(new_block, m_index, m_level, 0,
+					NULL, mtr);
+		} else {
+			ut_ad(!dict_index_is_spatial(m_index));
+			page_create(new_block, mtr,
+				    dict_table_is_comp(m_index->table),
+				    false);
+			btr_page_set_level(new_page, NULL, m_level, mtr);
+		}
+
+		btr_page_set_next(new_page, NULL, FIL_NULL, mtr);
+		btr_page_set_prev(new_page, NULL, FIL_NULL, mtr);
+
+		btr_page_set_index_id(new_page, NULL, m_index->id, mtr);
+	} else {
+		page_id_t	page_id(dict_index_get_space(m_index), m_page_no);
+		page_size_t	page_size(dict_table_page_size(m_index->table));
+
+		new_block = btr_block_get(page_id, page_size,
+					  RW_X_LATCH, m_index, mtr);
+
+		new_page = buf_block_get_frame(new_block);
+		new_page_zip = buf_block_get_page_zip(new_block);
+		new_page_no = page_get_page_no(new_page);
+		ut_ad(m_page_no == new_page_no);
+
+		ut_ad(page_dir_get_n_heap(new_page) == PAGE_HEAP_NO_USER_LOW);
+
+		btr_page_set_level(new_page, NULL, m_level, mtr);
+	}
+
+	if (dict_index_is_sec_or_ibuf(m_index)
+	    && !dict_table_is_temporary(m_index->table)
+	    && page_is_leaf(new_page)) {
+		page_update_max_trx_id(new_block, NULL, m_trx_id, mtr);
+	}
+
+	m_mtr = mtr;
+	m_block = new_block;
+	m_block->skip_flush_check = true;
+	m_page = new_page;
+	m_page_zip = new_page_zip;
+	m_page_no = new_page_no;
+	m_cur_rec = page_get_infimum_rec(new_page);
+	ut_ad(m_is_comp == !!page_is_comp(new_page));
+	m_free_space = page_get_free_space_of_empty(m_is_comp);
+
+	if (innobase_fill_factor == 100 && dict_index_is_clust(m_index)) {
+		/* Keep default behavior compatible with 5.6 */
+		m_reserved_space = dict_index_get_space_reserve();
+	} else {
+		m_reserved_space =
+			UNIV_PAGE_SIZE * (100 - innobase_fill_factor) / 100;
+	}
+
+	m_padding_space =
+		UNIV_PAGE_SIZE - dict_index_zip_pad_optimal_page_size(m_index);
+	m_heap_top = page_header_get_ptr(new_page, PAGE_HEAP_TOP);
+	m_rec_no = page_header_get_field(new_page, PAGE_N_RECS);
+
+	ut_d(m_total_data = 0);
+	page_header_set_field(m_page, NULL, PAGE_HEAP_TOP, UNIV_PAGE_SIZE - 1);
+
+	return(DB_SUCCESS);
+}
+
+/** Insert a record in the page.
+ at param[in]	rec		record
+ at param[in]	offsets		record offsets */
+void
+PageBulk::insert(
+	const rec_t*		rec,
+	ulint*			offsets)
+{
+	ulint		rec_size;
+
+	ut_ad(m_heap != NULL);
+
+	rec_size = rec_offs_size(offsets);
+
+#ifdef UNIV_DEBUG
+	/* Check whether records are in order. */
+	if (!page_rec_is_infimum(m_cur_rec)) {
+		rec_t*	old_rec = m_cur_rec;
+		ulint*	old_offsets = rec_get_offsets(
+			old_rec, m_index, NULL,	ULINT_UNDEFINED, &m_heap);
+
+		ut_ad(cmp_rec_rec(rec, old_rec, offsets, old_offsets, m_index)
+		      > 0);
+	}
+
+	m_total_data += rec_size;
+#endif /* UNIV_DEBUG */
+
+	/* 1. Copy the record to page. */
+	rec_t*	insert_rec = rec_copy(m_heap_top, rec, offsets);
+	rec_offs_make_valid(insert_rec, m_index, offsets);
+
+	/* 2. Insert the record in the linked list. */
+	rec_t*	next_rec = page_rec_get_next(m_cur_rec);
+
+	page_rec_set_next(insert_rec, next_rec);
+	page_rec_set_next(m_cur_rec, insert_rec);
+
+	/* 3. Set the n_owned field in the inserted record to zero,
+	and set the heap_no field. */
+	if (m_is_comp) {
+		rec_set_n_owned_new(insert_rec, NULL, 0);
+		rec_set_heap_no_new(insert_rec,
+				    PAGE_HEAP_NO_USER_LOW + m_rec_no);
+	} else {
+		rec_set_n_owned_old(insert_rec, 0);
+		rec_set_heap_no_old(insert_rec,
+				    PAGE_HEAP_NO_USER_LOW + m_rec_no);
+	}
+
+	/* 4. Set member variables. */
+	ulint		slot_size;
+	slot_size = page_dir_calc_reserved_space(m_rec_no + 1)
+		- page_dir_calc_reserved_space(m_rec_no);
+
+	ut_ad(m_free_space >= rec_size + slot_size);
+	ut_ad(m_heap_top + rec_size < m_page + UNIV_PAGE_SIZE);
+
+	m_free_space -= rec_size + slot_size;
+	m_heap_top += rec_size;
+	m_rec_no += 1;
+	m_cur_rec = insert_rec;
+}
+
+/** Mark end of insertion to the page. Scan all records to set page dirs,
+and set page header members.
+Note: we refer to page_copy_rec_list_end_to_created_page. */
+void
+PageBulk::finish()
+{
+	ut_ad(m_rec_no > 0);
+
+#ifdef UNIV_DEBUG
+	ut_ad(m_total_data + page_dir_calc_reserved_space(m_rec_no)
+	      <= page_get_free_space_of_empty(m_is_comp));
+
+	/* To pass the debug tests we have to set these dummy values
+	in the debug version */
+	page_dir_set_n_slots(m_page, NULL, UNIV_PAGE_SIZE / 2);
+#endif
+
+	ulint	count = 0;
+	ulint	n_recs = 0;
+	ulint	slot_index = 0;
+	rec_t*	insert_rec = page_rec_get_next(page_get_infimum_rec(m_page));
+	page_dir_slot_t* slot = NULL;
+
+	/* Set owner & dir. */
+	do {
+
+		count++;
+		n_recs++;
+
+		if (count == (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2) {
+
+			slot_index++;
+
+			slot = page_dir_get_nth_slot(m_page, slot_index);
+
+			page_dir_slot_set_rec(slot, insert_rec);
+			page_dir_slot_set_n_owned(slot, NULL, count);
+
+			count = 0;
+		}
+
+		insert_rec = page_rec_get_next(insert_rec);
+	} while (!page_rec_is_supremum(insert_rec));
+
+	if (slot_index > 0
+	    && (count + 1 + (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2
+		<= PAGE_DIR_SLOT_MAX_N_OWNED)) {
+		/* We can merge the two last dir slots. This operation is
+		here to make this function imitate exactly the equivalent
+		task made using page_cur_insert_rec, which we use in database
+		recovery to reproduce the task performed by this function.
+		To be able to check the correctness of recovery, it is good
+		that it imitates exactly. */
+
+		count += (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2;
+
+		page_dir_slot_set_n_owned(slot, NULL, 0);
+
+		slot_index--;
+	}
+
+	slot = page_dir_get_nth_slot(m_page, 1 + slot_index);
+	page_dir_slot_set_rec(slot, page_get_supremum_rec(m_page));
+	page_dir_slot_set_n_owned(slot, NULL, count + 1);
+
+	ut_ad(!dict_index_is_spatial(m_index));
+	page_dir_set_n_slots(m_page, NULL, 2 + slot_index);
+	page_header_set_ptr(m_page, NULL, PAGE_HEAP_TOP, m_heap_top);
+	page_dir_set_n_heap(m_page, NULL, PAGE_HEAP_NO_USER_LOW + m_rec_no);
+	page_header_set_field(m_page, NULL, PAGE_N_RECS, m_rec_no);
+
+	page_header_set_ptr(m_page, NULL, PAGE_LAST_INSERT, m_cur_rec);
+	page_header_set_field(m_page, NULL, PAGE_DIRECTION, PAGE_RIGHT);
+	page_header_set_field(m_page, NULL, PAGE_N_DIRECTION, 0);
+
+	m_block->skip_flush_check = false;
+}
+
+/** Commit inserts done to the page
+ at param[in]	success		Flag whether all inserts succeed. */
+void
+PageBulk::commit(
+	bool	success)
+{
+	if (success) {
+		ut_ad(page_validate(m_page, m_index));
+
+		/* Set no free space left and no buffered changes in ibuf. */
+		if (!dict_index_is_clust(m_index)
+		    && !dict_table_is_temporary(m_index->table)
+		    && page_is_leaf(m_page)) {
+			ibuf_set_bitmap_for_bulk_load(
+				m_block, innobase_fill_factor == 100);
+		}
+	}
+
+	mtr_commit(m_mtr);
+}
+
+/** Compress a page of compressed table
+ at return	true	compress successfully or no need to compress
+ at return	false	compress failed. */
+bool
+PageBulk::compress()
+{
+	ut_ad(m_page_zip != NULL);
+
+	return(page_zip_compress(m_page_zip, m_page, m_index,
+				 page_zip_level, NULL, m_mtr));
+}
+
+/** Get node pointer
+ at return node pointer */
+dtuple_t*
+PageBulk::getNodePtr()
+{
+	rec_t*		first_rec;
+	dtuple_t*	node_ptr;
+
+	/* Create node pointer */
+	first_rec = page_rec_get_next(page_get_infimum_rec(m_page));
+	ut_a(page_rec_is_user_rec(first_rec));
+	node_ptr = dict_index_build_node_ptr(m_index, first_rec, m_page_no,
+					     m_heap, m_level);
+
+	return(node_ptr);
+}
+
+/** Get split rec in left page.We split a page in half when compresssion fails,
+and the split rec will be copied to right page.
+ at return split rec */
+rec_t*
+PageBulk::getSplitRec()
+{
+	rec_t*		rec;
+	ulint*		offsets;
+	ulint		total_used_size;
+	ulint		total_recs_size;
+	ulint		n_recs;
+
+	ut_ad(m_page_zip != NULL);
+	ut_ad(m_rec_no >= 2);
+
+	ut_ad(page_get_free_space_of_empty(m_is_comp) > m_free_space);
+	total_used_size = page_get_free_space_of_empty(m_is_comp)
+		- m_free_space;
+
+	total_recs_size = 0;
+	n_recs = 0;
+	offsets = NULL;
+	rec = page_get_infimum_rec(m_page);
+
+	do {
+		rec = page_rec_get_next(rec);
+		ut_ad(page_rec_is_user_rec(rec));
+
+		offsets = rec_get_offsets(rec, m_index,
+					  offsets, ULINT_UNDEFINED,
+					  &(m_heap));
+		total_recs_size += rec_offs_size(offsets);
+		n_recs++;
+	} while (total_recs_size + page_dir_calc_reserved_space(n_recs)
+		 < total_used_size / 2);
+
+	/* Keep at least one record on left page */
+	if (page_rec_is_infimum(page_rec_get_prev(rec))) {
+		rec = page_rec_get_next(rec);
+		ut_ad(page_rec_is_user_rec(rec));
+	}
+
+	return(rec);
+}
+
+/** Copy all records after split rec including itself.
+ at param[in]	rec	split rec */
+void
+PageBulk::copyIn(
+	rec_t*		split_rec)
+{
+
+	rec_t*		rec = split_rec;
+	ulint*		offsets = NULL;
+
+	ut_ad(m_rec_no == 0);
+	ut_ad(page_rec_is_user_rec(rec));
+
+	do {
+		offsets = rec_get_offsets(rec, m_index, offsets,
+					  ULINT_UNDEFINED, &(m_heap));
+
+		insert(rec, offsets);
+
+		rec = page_rec_get_next(rec);
+	} while (!page_rec_is_supremum(rec));
+
+	ut_ad(m_rec_no > 0);
+}
+
+/** Remove all records after split rec including itself.
+ at param[in]	rec	split rec	*/
+void
+PageBulk::copyOut(
+	rec_t*		split_rec)
+{
+	rec_t*		rec;
+	rec_t*		last_rec;
+	ulint		n;
+
+	/* Suppose before copyOut, we have 5 records on the page:
+	infimum->r1->r2->r3->r4->r5->supremum, and r3 is the split rec.
+
+	after copyOut, we have 2 records on the page:
+	infimum->r1->r2->supremum. slot ajustment is not done. */
+
+	rec = page_rec_get_next(page_get_infimum_rec(m_page));
+	last_rec = page_rec_get_prev(page_get_supremum_rec(m_page));
+	n = 0;
+
+	while (rec != split_rec) {
+		rec = page_rec_get_next(rec);
+		n++;
+	}
+
+	ut_ad(n > 0);
+
+	/* Set last record's next in page */
+	ulint*		offsets = NULL;
+	rec = page_rec_get_prev(split_rec);
+	offsets = rec_get_offsets(rec, m_index,
+				  offsets, ULINT_UNDEFINED,
+				  &(m_heap));
+	page_rec_set_next(rec, page_get_supremum_rec(m_page));
+
+	/* Set related members */
+	m_cur_rec = rec;
+	m_heap_top = rec_get_end(rec, offsets);
+
+	offsets = rec_get_offsets(last_rec, m_index,
+				  offsets, ULINT_UNDEFINED,
+				  &(m_heap));
+
+	m_free_space += rec_get_end(last_rec, offsets)
+		- m_heap_top
+		+ page_dir_calc_reserved_space(m_rec_no)
+		- page_dir_calc_reserved_space(n);
+	ut_ad(m_free_space > 0);
+	m_rec_no = n;
+
+#ifdef UNIV_DEBUG
+	m_total_data -= rec_get_end(last_rec, offsets) - m_heap_top;
+#endif /* UNIV_DEBUG */
+}
+
+/** Set next page
+ at param[in]	next_page_no	next page no */
+void
+PageBulk::setNext(
+	ulint		next_page_no)
+{
+	btr_page_set_next(m_page, NULL, next_page_no, m_mtr);
+}
+
+/** Set previous page
+ at param[in]	prev_page_no	previous page no */
+void
+PageBulk::setPrev(
+	ulint		prev_page_no)
+{
+	btr_page_set_prev(m_page, NULL, prev_page_no, m_mtr);
+}
+
+/** Check if required space is available in the page for the rec to be inserted.
+We check fill factor & padding here.
+ at param[in]	length		required length
+ at return true	if space is available */
+bool
+PageBulk::isSpaceAvailable(
+	ulint		rec_size)
+{
+	ulint	slot_size;
+	ulint	required_space;
+
+	slot_size = page_dir_calc_reserved_space(m_rec_no + 1)
+		- page_dir_calc_reserved_space(m_rec_no);
+
+	required_space = rec_size + slot_size;
+
+	if (required_space > m_free_space) {
+		ut_ad(m_rec_no > 0);
+		return false;
+	}
+
+	/* Fillfactor & Padding apply to both leaf and non-leaf pages.
+	Note: we keep at least 2 records in a page to avoid B-tree level
+	growing too high. */
+	if (m_rec_no >= 2
+	    && ((m_page_zip == NULL && m_free_space - required_space
+		 < m_reserved_space)
+		|| (m_page_zip != NULL && m_free_space - required_space
+		    < m_padding_space))) {
+		return(false);
+	}
+
+	return(true);
+}
+
+/** Check whether the record needs to be stored externally.
+ at return false if the entire record can be stored locally on the page  */
+bool
+PageBulk::needExt(
+	const dtuple_t*		tuple,
+	ulint			rec_size)
+{
+	return(page_zip_rec_needs_ext(rec_size, m_is_comp,
+		dtuple_get_n_fields(tuple), m_block->page.size));
+}
+
+/** Store external record
+Since the record is not logged yet, so we don't log update to the record.
+the blob data is logged first, then the record is logged in bulk mode.
+ at param[in]	big_rec		external recrod
+ at param[in]	offsets		record offsets
+ at return	error code */
+dberr_t
+PageBulk::storeExt(
+	const big_rec_t*	big_rec,
+	ulint*			offsets)
+{
+	/* Note: not all fileds are initialized in btr_pcur. */
+	btr_pcur_t	btr_pcur;
+	btr_pcur.pos_state = BTR_PCUR_IS_POSITIONED;
+	btr_pcur.latch_mode = BTR_MODIFY_LEAF;
+	btr_pcur.btr_cur.index = m_index;
+
+	page_cur_t*	page_cur = &btr_pcur.btr_cur.page_cur;
+	page_cur->index = m_index;
+	page_cur->rec = m_cur_rec;
+	page_cur->offsets = offsets;
+	page_cur->block = m_block;
+
+	dberr_t	err = btr_store_big_rec_extern_fields(
+		&btr_pcur, NULL, offsets, big_rec, m_mtr,
+		BTR_STORE_INSERT_BULK);
+
+	ut_ad(page_offset(m_cur_rec) == page_offset(page_cur->rec));
+
+	/* Reset m_block and m_cur_rec from page cursor, because
+	block may be changed during blob insert. */
+	m_block = page_cur->block;
+	m_cur_rec = page_cur->rec;
+	m_page = buf_block_get_frame(m_block);
+
+	return(err);
+}
+
+/** Release block by commiting mtr
+Note: log_free_check requires holding no lock/latch in current thread. */
+void
+PageBulk::release()
+{
+	ut_ad(!dict_index_is_spatial(m_index));
+
+	/* We fix the block because we will re-pin it soon. */
+	buf_block_buf_fix_inc(m_block, __FILE__, __LINE__);
+
+	/* No other threads can modify this block. */
+	m_modify_clock = buf_block_get_modify_clock(m_block);
+
+	mtr_commit(m_mtr);
+}
+
+/** Start mtr and latch the block */
+void
+PageBulk::latch()
+{
+	ibool	ret;
+
+	mtr_start(m_mtr);
+	mtr_x_lock(dict_index_get_lock(m_index), m_mtr);
+	mtr_set_log_mode(m_mtr, MTR_LOG_NO_REDO);
+	mtr_set_flush_observer(m_mtr, m_flush_observer);
+
+	/* TODO: need a simple and wait version of buf_page_optimistic_get. */
+	ret = buf_page_optimistic_get(RW_X_LATCH, m_block, m_modify_clock,
+				      __FILE__, __LINE__, m_mtr);
+	/* In case the block is S-latched by page_cleaner. */
+	if (!ret) {
+		page_id_t       page_id(dict_index_get_space(m_index), m_page_no);
+		page_size_t     page_size(dict_table_page_size(m_index->table));
+
+		m_block = buf_page_get_gen(page_id, page_size, RW_X_LATCH,
+					   m_block, BUF_GET_IF_IN_POOL,
+					   __FILE__, __LINE__, m_mtr);
+		ut_ad(m_block != NULL);
+	}
+
+	buf_block_buf_fix_dec(m_block);
+
+	ut_ad(m_cur_rec > m_page && m_cur_rec < m_heap_top);
+}
+
+/** Split a page
+ at param[in]	page_bulk	page to split
+ at param[in]	next_page_bulk	next page
+ at return	error code */
+dberr_t
+BtrBulk::pageSplit(
+	PageBulk*	page_bulk,
+	PageBulk*	next_page_bulk)
+{
+	ut_ad(page_bulk->getPageZip() != NULL);
+
+	/* 1. Check if we have only one user record on the page. */
+	if (page_bulk->getRecNo() <= 1) {
+		return DB_TOO_BIG_RECORD;
+	}
+
+	/* 2. create a new page. */
+	PageBulk new_page_bulk(m_index, m_trx_id, FIL_NULL,
+			       page_bulk->getLevel(), m_flush_observer);
+	dberr_t	err = new_page_bulk.init();
+	if (err != DB_SUCCESS) {
+		return(err);
+	}
+
+	/* 3. copy the upper half to new page. */
+	rec_t*	split_rec = page_bulk->getSplitRec();
+	new_page_bulk.copyIn(split_rec);
+	page_bulk->copyOut(split_rec);
+
+	/* 4. commit the splitted page. */
+	err = pageCommit(page_bulk, &new_page_bulk, true);
+	if (err != DB_SUCCESS) {
+		pageAbort(&new_page_bulk);
+		return(err);
+	}
+
+	/* 5. commit the new page. */
+	err = pageCommit(&new_page_bulk, next_page_bulk, true);
+	if (err != DB_SUCCESS) {
+		pageAbort(&new_page_bulk);
+		return(err);
+	}
+
+	return(err);
+}
+
+/** Commit(finish) a page. We set next/prev page no, compress a page of
+compressed table and split the page if compression fails, insert a node
+pointer to father page if needed, and commit mini-transaction.
+ at param[in]	page_bulk	page to commit
+ at param[in]	next_page_bulk	next page
+ at param[in]	insert_father	false when page_bulk is a root page and
+				true when it's a non-root page
+ at return	error code */
+dberr_t
+BtrBulk::pageCommit(
+	PageBulk*	page_bulk,
+	PageBulk*	next_page_bulk,
+	bool		insert_father)
+{
+	page_bulk->finish();
+
+	/* Set page links */
+	if (next_page_bulk != NULL) {
+		ut_ad(page_bulk->getLevel() == next_page_bulk->getLevel());
+
+		page_bulk->setNext(next_page_bulk->getPageNo());
+		next_page_bulk->setPrev(page_bulk->getPageNo());
+	} else {
+		/** Suppose a page is released and latched again, we need to
+		mark it modified in mini-transaction.  */
+		page_bulk->setNext(FIL_NULL);
+	}
+
+	/* Compress page if it's a compressed table. */
+	if (page_bulk->getPageZip() != NULL && !page_bulk->compress()) {
+		return(pageSplit(page_bulk, next_page_bulk));
+	}
+
+	/* Insert node pointer to father page. */
+	if (insert_father) {
+		dtuple_t*	node_ptr = page_bulk->getNodePtr();
+		dberr_t		err = insert(node_ptr, page_bulk->getLevel()+1);
+
+		if (err != DB_SUCCESS) {
+			return(err);
+		}
+	}
+
+	/* Commit mtr. */
+	page_bulk->commit(true);
+
+	return(DB_SUCCESS);
+}
+
+/** Log free check */
+void
+BtrBulk::logFreeCheck()
+{
+	if (log_sys->check_flush_or_checkpoint) {
+		release();
+
+		log_free_check();
+
+		latch();
+	}
+}
+
+/** Release all latches */
+void
+BtrBulk::release()
+{
+	ut_ad(m_root_level + 1 == m_page_bulks->size());
+
+	for (ulint level = 0; level <= m_root_level; level++) {
+		PageBulk*    page_bulk = m_page_bulks->at(level);
+
+		page_bulk->release();
+	}
+}
+
+/** Re-latch all latches */
+void
+BtrBulk::latch()
+{
+	ut_ad(m_root_level + 1 == m_page_bulks->size());
+
+	for (ulint level = 0; level <= m_root_level; level++) {
+		PageBulk*    page_bulk = m_page_bulks->at(level);
+		page_bulk->latch();
+	}
+}
+
+/** Insert a tuple to page in a level
+ at param[in]	tuple	tuple to insert
+ at param[in]	level	B-tree level
+ at return error code */
+dberr_t
+BtrBulk::insert(
+	dtuple_t*	tuple,
+	ulint		level)
+{
+	bool		is_left_most = false;
+
+	ut_ad(m_heap != NULL);
+
+	/* Check if we need to create a PageBulk for the level. */
+	if (level + 1 > m_page_bulks->size()) {
+		PageBulk*	new_page_bulk
+			= UT_NEW_NOKEY(PageBulk(m_index, m_trx_id, FIL_NULL,
+						level, m_flush_observer));
+		dberr_t	err = new_page_bulk->init();
+		if (err != DB_SUCCESS) {
+			return(err);
+		}
+
+		m_page_bulks->push_back(new_page_bulk);
+		ut_ad(level + 1 == m_page_bulks->size());
+		m_root_level = level;
+
+		is_left_most = true;
+	}
+
+	ut_ad(m_page_bulks->size() > level);
+
+	PageBulk*	page_bulk = m_page_bulks->at(level);
+
+	if (is_left_most && level > 0 && page_bulk->getRecNo() == 0) {
+		/* The node pointer must be marked as the predefined minimum
+		record,	as there is no lower alphabetical limit to records in
+		the leftmost node of a level: */
+		dtuple_set_info_bits(tuple, dtuple_get_info_bits(tuple)
+					    | REC_INFO_MIN_REC_FLAG);
+	}
+
+	ulint		n_ext = 0;
+	ulint		rec_size = rec_get_converted_size(m_index, tuple, n_ext);
+	big_rec_t*	big_rec = NULL;
+
+	if (page_bulk->needExt(tuple, rec_size)) {
+		/* The record is so big that we have to store some fields
+		externally on separate database pages */
+		big_rec = dtuple_convert_big_rec(m_index, 0, tuple, &n_ext);
+
+		if (UNIV_UNLIKELY(big_rec == NULL)) {
+			return(DB_TOO_BIG_RECORD);
+		}
+
+		rec_size = rec_get_converted_size(m_index, tuple, n_ext);
+	}
+
+	if (!page_bulk->isSpaceAvailable(rec_size)) {
+		/* Create a sibling page_bulk. */
+		PageBulk*	sibling_page_bulk;
+		sibling_page_bulk = UT_NEW_NOKEY(PageBulk(m_index, m_trx_id,
+							  FIL_NULL, level,
+							  m_flush_observer));
+		dberr_t	err = sibling_page_bulk->init();
+		if (err != DB_SUCCESS) {
+			UT_DELETE(sibling_page_bulk);
+			return(err);
+		}
+
+		/* Commit page bulk. */
+		err = pageCommit(page_bulk, sibling_page_bulk, true);
+		if (err != DB_SUCCESS) {
+			pageAbort(sibling_page_bulk);
+			UT_DELETE(sibling_page_bulk);
+			return(err);
+		}
+
+		/* Set new page bulk to page_bulks. */
+		ut_ad(sibling_page_bulk->getLevel() <= m_root_level);
+		m_page_bulks->at(level) = sibling_page_bulk;
+
+		UT_DELETE(page_bulk);
+		page_bulk = sibling_page_bulk;
+
+		/* Important: log_free_check whether we need a checkpoint. */
+		if (page_is_leaf(sibling_page_bulk->getPage())) {
+			/* Check whether trx is interrupted */
+			if (m_flush_observer->check_interrupted()) {
+				return(DB_INTERRUPTED);
+			}
+
+			/* Wake up page cleaner to flush dirty pages. */
+			srv_inc_activity_count();
+			os_event_set(buf_flush_event);
+
+			logFreeCheck();
+		}
+
+	}
+
+	rec_t*		rec;
+	ulint*		offsets = NULL;
+	/* Convert tuple to rec. */
+        rec = rec_convert_dtuple_to_rec(static_cast<byte*>(mem_heap_alloc(
+		page_bulk->m_heap, rec_size)), m_index, tuple, n_ext);
+        offsets = rec_get_offsets(rec, m_index, offsets, ULINT_UNDEFINED,
+		&(page_bulk->m_heap));
+
+	page_bulk->insert(rec, offsets);
+
+	if (big_rec != NULL) {
+		dberr_t		err;
+
+		ut_ad(dict_index_is_clust(m_index));
+		ut_ad(page_bulk->getLevel() == 0);
+		ut_ad(page_bulk == m_page_bulks->at(0));
+
+		/* Release all latched but leaf node. */
+		for (ulint level = 1; level <= m_root_level; level++) {
+			PageBulk*    page_bulk = m_page_bulks->at(level);
+
+			page_bulk->release();
+		}
+
+		err = page_bulk->storeExt(big_rec, offsets);
+
+		/* Latch */
+		for (ulint level = 1; level <= m_root_level; level++) {
+			PageBulk*    page_bulk = m_page_bulks->at(level);
+			page_bulk->latch();
+		}
+
+		dtuple_convert_back_big_rec(m_index, tuple, big_rec);
+
+		return(err);
+	}
+
+	return(DB_SUCCESS);
+}
+
+/** Btree bulk load finish. We commit the last page in each level
+and copy the last page in top level to the root page of the index
+if no error occurs.
+ at param[in]	err	whether bulk load was successful until now
+ at return error code  */
+dberr_t
+BtrBulk::finish(dberr_t	err)
+{
+	ulint		last_page_no = FIL_NULL;
+
+	ut_ad(!dict_table_is_temporary(m_index->table));
+
+	if (m_page_bulks->size() == 0) {
+		/* The table is empty. The root page of the index tree
+		is already in a consistent state. No need to flush. */
+		return(err);
+	}
+
+	ut_ad(m_root_level + 1 == m_page_bulks->size());
+
+	/* Finish all page bulks */
+	for (ulint level = 0; level <= m_root_level; level++) {
+		PageBulk*	page_bulk = m_page_bulks->at(level);
+
+		last_page_no = page_bulk->getPageNo();
+
+		if (err == DB_SUCCESS) {
+			err = pageCommit(page_bulk, NULL,
+					 level != m_root_level);
+		}
+
+		if (err != DB_SUCCESS) {
+			pageAbort(page_bulk);
+		}
+
+		UT_DELETE(page_bulk);
+	}
+
+	if (err == DB_SUCCESS) {
+		rec_t*		first_rec;
+		mtr_t		mtr;
+		buf_block_t*	last_block;
+		page_t*		last_page;
+		page_id_t	page_id(dict_index_get_space(m_index),
+					last_page_no);
+		page_size_t	page_size(dict_table_page_size(m_index->table));
+		ulint		root_page_no = dict_index_get_page(m_index);
+		PageBulk	root_page_bulk(m_index, m_trx_id,
+					       root_page_no, m_root_level,
+					       m_flush_observer);
+
+		mtr_start(&mtr);
+		mtr.set_named_space(dict_index_get_space(m_index));
+		mtr_x_lock(dict_index_get_lock(m_index), &mtr);
+
+		ut_ad(last_page_no != FIL_NULL);
+		last_block = btr_block_get(page_id, page_size,
+					   RW_X_LATCH, m_index, &mtr);
+		last_page = buf_block_get_frame(last_block);
+		first_rec = page_rec_get_next(page_get_infimum_rec(last_page));
+		ut_ad(page_rec_is_user_rec(first_rec));
+
+		/* Copy last page to root page. */
+		err = root_page_bulk.init();
+		if (err != DB_SUCCESS) {
+			mtr_commit(&mtr);
+			return(err);
+		}
+		root_page_bulk.copyIn(first_rec);
+
+		/* Remove last page. */
+		btr_page_free_low(m_index, last_block, m_root_level, &mtr);
+
+		/* Do not flush the last page. */
+		last_block->page.flush_observer = NULL;
+
+		mtr_commit(&mtr);
+
+		err = pageCommit(&root_page_bulk, NULL, false);
+		ut_ad(err == DB_SUCCESS);
+	}
+
+#ifdef UNIV_DEBUG
+	dict_sync_check check(true);
+
+	ut_ad(!sync_check_iterate(check));
+#endif /* UNIV_DEBUG */
+
+	ut_ad(err != DB_SUCCESS || btr_validate_index(m_index, NULL, false));
+	return(err);
+}
diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc
index ad32353..5e8f1c4 100644
--- a/storage/innobase/btr/btr0cur.cc
+++ b/storage/innobase/btr/btr0cur.cc
@@ -62,13 +62,14 @@ Created 10/16/1994 Heikki Tuuri
 #include "row0purge.h"
 #include "row0upd.h"
 #include "trx0rec.h"
-#include "trx0roll.h" /* trx_is_recv() */
+#include "trx0roll.h"
 #include "que0que.h"
 #include "row0row.h"
 #include "srv0srv.h"
 #include "ibuf0ibuf.h"
 #include "lock0lock.h"
 #include "zlib.h"
+#include "srv0start.h"
 
 /** Buffered B-tree operation types, introduced as part of delete buffering. */
 enum btr_op_t {
@@ -79,29 +80,47 @@ enum btr_op_t {
 	BTR_DELMARK_OP			/*!< Mark a record for deletion */
 };
 
-#ifdef UNIV_DEBUG
-/** If the following is set to TRUE, this module prints a lot of
-trace information of individual record operations */
-UNIV_INTERN ibool	btr_cur_print_record_ops = FALSE;
-#endif /* UNIV_DEBUG */
+/** Modification types for the B-tree operation. */
+enum btr_intention_t {
+	BTR_INTENTION_DELETE,
+	BTR_INTENTION_BOTH,
+	BTR_INTENTION_INSERT
+};
+#if BTR_INTENTION_DELETE > BTR_INTENTION_BOTH
+#error "BTR_INTENTION_DELETE > BTR_INTENTION_BOTH"
+#endif
+#if BTR_INTENTION_BOTH > BTR_INTENTION_INSERT
+#error "BTR_INTENTION_BOTH > BTR_INTENTION_INSERT"
+#endif
+
+/** For the index->lock scalability improvement, only possibility of clear
+performance regression observed was caused by grown huge history list length.
+That is because the exclusive use of index->lock also worked as reserving
+free blocks and read IO bandwidth with priority. To avoid huge glowing history
+list as same level with previous implementation, prioritizes pessimistic tree
+operations by purge as the previous, when it seems to be growing huge.
+
+ Experimentally, the history list length starts to affect to performance
+throughput clearly from about 100000. */
+#define BTR_CUR_FINE_HISTORY_LENGTH	100000
 
 /** Number of searches down the B-tree in btr_cur_search_to_nth_level(). */
-UNIV_INTERN ulint	btr_cur_n_non_sea	= 0;
+ulint	btr_cur_n_non_sea	= 0;
 /** Number of successful adaptive hash index lookups in
 btr_cur_search_to_nth_level(). */
-UNIV_INTERN ulint	btr_cur_n_sea		= 0;
+ulint	btr_cur_n_sea		= 0;
 /** Old value of btr_cur_n_non_sea.  Copied by
 srv_refresh_innodb_monitor_stats().  Referenced by
 srv_printf_innodb_monitor(). */
-UNIV_INTERN ulint	btr_cur_n_non_sea_old	= 0;
+ulint	btr_cur_n_non_sea_old	= 0;
 /** Old value of btr_cur_n_sea.  Copied by
 srv_refresh_innodb_monitor_stats().  Referenced by
 srv_printf_innodb_monitor(). */
-UNIV_INTERN ulint	btr_cur_n_sea_old	= 0;
+ulint	btr_cur_n_sea_old	= 0;
 
 #ifdef UNIV_DEBUG
 /* Flag to limit optimistic insert records */
-UNIV_INTERN uint	btr_cur_limit_optimistic_insert_debug = 0;
+uint	btr_cur_limit_optimistic_insert_debug = 0;
 #endif /* UNIV_DEBUG */
 
 /** In the optimistic insert, if the insert does not fit, but this much space
@@ -120,29 +139,19 @@ can be released by page reorganize, then it is reorganized */
 						part header, in bytes */
 
 /** Estimated table level stats from sampled value.
- at param value		sampled stats
- at param index		index being sampled
- at param sample		number of sampled rows
- at param ext_size		external stored data size
- at param not_empty	table not empty
+ at param value sampled stats
+ at param index index being sampled
+ at param sample number of sampled rows
+ at param ext_size external stored data size
+ at param not_empty table not empty
 @return estimated table wide stats from sampled value */
-#define BTR_TABLE_STATS_FROM_SAMPLE(value, index, sample, ext_size, not_empty)\
-	(((value) * (ib_int64_t) index->stat_n_leaf_pages		\
+#define BTR_TABLE_STATS_FROM_SAMPLE(value, index, sample, ext_size, not_empty) \
+	(((value) * static_cast<int64_t>(index->stat_n_leaf_pages) \
 	  + (sample) - 1 + (ext_size) + (not_empty)) / ((sample) + (ext_size)))
 
 /* @} */
 #endif /* !UNIV_HOTBACKUP */
 
-/** A BLOB field reference full of zero, for use in assertions and tests.
-Initially, BLOB field references are set to zero, in
-dtuple_convert_big_rec(). */
-const byte field_ref_zero[BTR_EXTERN_FIELD_REF_SIZE] = {
-	0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0,
-};
-
 #ifndef UNIV_HOTBACKUP
 /*******************************************************************//**
 Marks all extern fields in a record as owned by the record. This function
@@ -183,7 +192,7 @@ btr_rec_free_updated_extern_fields(
 				part will be updated, or NULL */
 	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
 	const upd_t*	update,	/*!< in: update vector */
-	enum trx_rb_ctx	rb_ctx,	/*!< in: rollback context */
+	bool		rollback,/*!< in: performing rollback? */
 	mtr_t*		mtr);	/*!< in: mini-transaction handle which contains
 				an X-latch to record page and to the tree */
 /***********************************************************//**
@@ -198,119 +207,172 @@ btr_rec_free_externally_stored_fields(
 	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
 	page_zip_des_t*	page_zip,/*!< in: compressed page whose uncompressed
 				part will be updated, or NULL */
-	enum trx_rb_ctx	rb_ctx,	/*!< in: rollback context */
+	bool		rollback,/*!< in: performing rollback? */
 	mtr_t*		mtr);	/*!< in: mini-transaction handle which contains
 				an X-latch to record page and to the index
 				tree */
 #endif /* !UNIV_HOTBACKUP */
 
-/******************************************************//**
-The following function is used to set the deleted bit of a record. */
-UNIV_INLINE
-void
-btr_rec_set_deleted_flag(
-/*=====================*/
-	rec_t*		rec,	/*!< in/out: physical record */
-	page_zip_des_t*	page_zip,/*!< in/out: compressed page (or NULL) */
-	ulint		flag)	/*!< in: nonzero if delete marked */
-{
-	if (page_rec_is_comp(rec)) {
-		rec_set_deleted_flag_new(rec, page_zip, flag);
-	} else {
-		ut_ad(!page_zip);
-		rec_set_deleted_flag_old(rec, flag);
-	}
-}
-
 #ifndef UNIV_HOTBACKUP
 /*==================== B-TREE SEARCH =========================*/
 
-/********************************************************************//**
-Latches the leaf page or pages requested. */
-static
-void
+#if MTR_MEMO_PAGE_S_FIX != RW_S_LATCH
+#error "MTR_MEMO_PAGE_S_FIX != RW_S_LATCH"
+#endif
+#if MTR_MEMO_PAGE_X_FIX != RW_X_LATCH
+#error "MTR_MEMO_PAGE_X_FIX != RW_X_LATCH"
+#endif
+#if MTR_MEMO_PAGE_SX_FIX != RW_SX_LATCH
+#error "MTR_MEMO_PAGE_SX_FIX != RW_SX_LATCH"
+#endif
+
+/** Latches the leaf page or pages requested.
+ at param[in]	block		leaf page where the search converged
+ at param[in]	page_id		page id of the leaf
+ at param[in]	latch_mode	BTR_SEARCH_LEAF, ...
+ at param[in]	cursor		cursor
+ at param[in]	mtr		mini-transaction
+ at return	blocks and savepoints which actually latched. */
+btr_latch_leaves_t
 btr_cur_latch_leaves(
-/*=================*/
-	page_t*		page,		/*!< in: leaf page where the search
-					converged */
-	ulint		space,		/*!< in: space id */
-	ulint		zip_size,	/*!< in: compressed page size in bytes
-					or 0 for uncompressed pages */
-	ulint		page_no,	/*!< in: page number of the leaf */
-	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF, ... */
-	btr_cur_t*	cursor,		/*!< in: cursor */
-	mtr_t*		mtr)		/*!< in: mtr */
+	buf_block_t*		block,
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	ulint			latch_mode,
+	btr_cur_t*		cursor,
+	mtr_t*			mtr)
 {
 	ulint		mode;
 	ulint		left_page_no;
 	ulint		right_page_no;
 	buf_block_t*	get_block;
+	page_t*		page = buf_block_get_frame(block);
+	bool		spatial;
+	btr_latch_leaves_t latch_leaves = {{NULL, NULL, NULL}, {0, 0, 0}};
 
-	ut_ad(page && mtr);
+	spatial = dict_index_is_spatial(cursor->index) && cursor->rtr_info;
+	ut_ad(buf_page_in_file(&block->page));
 
 	switch (latch_mode) {
 	case BTR_SEARCH_LEAF:
 	case BTR_MODIFY_LEAF:
-		mode = latch_mode == BTR_SEARCH_LEAF ? RW_S_LATCH : RW_X_LATCH;
-		get_block = btr_block_get(
-			space, zip_size, page_no, mode, cursor->index, mtr);
+	case BTR_SEARCH_TREE:
+		if (spatial) {
+			cursor->rtr_info->tree_savepoints[RTR_MAX_LEVELS]
+				= mtr_set_savepoint(mtr);
+		}
+
+		mode = latch_mode == BTR_MODIFY_LEAF ? RW_X_LATCH : RW_S_LATCH;
+		latch_leaves.savepoints[1] = mtr_set_savepoint(mtr);
+		get_block = btr_block_get(page_id, page_size, mode,
+					  cursor->index, mtr);
+		latch_leaves.blocks[1] = get_block;
 #ifdef UNIV_BTR_DEBUG
 		ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
 #endif /* UNIV_BTR_DEBUG */
-		get_block->check_index_page_at_flush = TRUE;
-		return;
+		if (spatial) {
+			cursor->rtr_info->tree_blocks[RTR_MAX_LEVELS]
+				= get_block;
+		}
+
+		return(latch_leaves);
 	case BTR_MODIFY_TREE:
-		/* x-latch also brothers from left to right */
+		/* It is exclusive for other operations which calls
+		btr_page_set_prev() */
+		ut_ad(mtr_memo_contains_flagged(mtr,
+			dict_index_get_lock(cursor->index),
+			MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK)
+		      || dict_table_is_intrinsic(cursor->index->table));
+		/* x-latch also siblings from left to right */
 		left_page_no = btr_page_get_prev(page, mtr);
 
 		if (left_page_no != FIL_NULL) {
+
+			if (spatial) {
+				cursor->rtr_info->tree_savepoints[
+					RTR_MAX_LEVELS] = mtr_set_savepoint(mtr);
+			}
+
+			latch_leaves.savepoints[0] = mtr_set_savepoint(mtr);
 			get_block = btr_block_get(
-				space, zip_size, left_page_no,
-				RW_X_LATCH, cursor->index, mtr);
-#ifdef UNIV_BTR_DEBUG
-			ut_a(page_is_comp(get_block->frame)
-			     == page_is_comp(page));
-			ut_a(btr_page_get_next(get_block->frame, mtr)
-			     == page_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-			get_block->check_index_page_at_flush = TRUE;
+				page_id_t(page_id.space(), left_page_no),
+				page_size, RW_X_LATCH, cursor->index, mtr);
+			latch_leaves.blocks[0] = get_block;
+
+			if (spatial) {
+				cursor->rtr_info->tree_blocks[RTR_MAX_LEVELS]
+					= get_block;
+			}
 		}
 
+		if (spatial) {
+			cursor->rtr_info->tree_savepoints[RTR_MAX_LEVELS + 1]
+				= mtr_set_savepoint(mtr);
+		}
+
+		latch_leaves.savepoints[1] = mtr_set_savepoint(mtr);
 		get_block = btr_block_get(
-			space, zip_size, page_no,
-			RW_X_LATCH, cursor->index, mtr);
+			page_id, page_size, RW_X_LATCH, cursor->index, mtr);
+		latch_leaves.blocks[1] = get_block;
+
 #ifdef UNIV_BTR_DEBUG
+		/* Sanity check only after both the blocks are latched. */
+		if (latch_leaves.blocks[0] != NULL) {
+			ut_a(page_is_comp(latch_leaves.blocks[0]->frame)
+				== page_is_comp(page));
+			ut_a(btr_page_get_next(
+				latch_leaves.blocks[0]->frame, mtr)
+				== page_get_page_no(page));
+		}
 		ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
 #endif /* UNIV_BTR_DEBUG */
-		get_block->check_index_page_at_flush = TRUE;
+
+		if (spatial) {
+			cursor->rtr_info->tree_blocks[RTR_MAX_LEVELS + 1]
+				= get_block;
+		}
 
 		right_page_no = btr_page_get_next(page, mtr);
 
 		if (right_page_no != FIL_NULL) {
+			if (spatial) {
+				cursor->rtr_info->tree_savepoints[
+					RTR_MAX_LEVELS + 2] = mtr_set_savepoint(
+								mtr);
+			}
+			latch_leaves.savepoints[2] = mtr_set_savepoint(mtr);
 			get_block = btr_block_get(
-				space, zip_size, right_page_no,
-				RW_X_LATCH, cursor->index, mtr);
+				page_id_t(page_id.space(), right_page_no),
+				page_size, RW_X_LATCH, cursor->index, mtr);
+			latch_leaves.blocks[2] = get_block;
 #ifdef UNIV_BTR_DEBUG
 			ut_a(page_is_comp(get_block->frame)
 			     == page_is_comp(page));
 			ut_a(btr_page_get_prev(get_block->frame, mtr)
 			     == page_get_page_no(page));
 #endif /* UNIV_BTR_DEBUG */
-			get_block->check_index_page_at_flush = TRUE;
+			if (spatial) {
+				cursor->rtr_info->tree_blocks[
+					RTR_MAX_LEVELS + 2] = get_block;
+			}
 		}
 
-		return;
+		return(latch_leaves);
 
 	case BTR_SEARCH_PREV:
 	case BTR_MODIFY_PREV:
 		mode = latch_mode == BTR_SEARCH_PREV ? RW_S_LATCH : RW_X_LATCH;
-		/* latch also left brother */
+		/* latch also left sibling */
+		rw_lock_s_lock(&block->lock);
 		left_page_no = btr_page_get_prev(page, mtr);
+		rw_lock_s_unlock(&block->lock);
 
 		if (left_page_no != FIL_NULL) {
+			latch_leaves.savepoints[0] = mtr_set_savepoint(mtr);
 			get_block = btr_block_get(
-				space, zip_size,
-				left_page_no, mode, cursor->index, mtr);
+				page_id_t(page_id.space(), left_page_no),
+				page_size, mode, cursor->index, mtr);
+			latch_leaves.blocks[0] = get_block;
 			cursor->left_block = get_block;
 #ifdef UNIV_BTR_DEBUG
 			ut_a(page_is_comp(get_block->frame)
@@ -318,19 +380,333 @@ btr_cur_latch_leaves(
 			ut_a(btr_page_get_next(get_block->frame, mtr)
 			     == page_get_page_no(page));
 #endif /* UNIV_BTR_DEBUG */
-			get_block->check_index_page_at_flush = TRUE;
 		}
 
-		get_block = btr_block_get(
-			space, zip_size, page_no, mode, cursor->index, mtr);
+		latch_leaves.savepoints[1] = mtr_set_savepoint(mtr);
+		get_block = btr_block_get(page_id, page_size, mode,
+					  cursor->index, mtr);
+		latch_leaves.blocks[1] = get_block;
 #ifdef UNIV_BTR_DEBUG
 		ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
 #endif /* UNIV_BTR_DEBUG */
-		get_block->check_index_page_at_flush = TRUE;
-		return;
+		return(latch_leaves);
+	case BTR_CONT_MODIFY_TREE:
+		ut_ad(dict_index_is_spatial(cursor->index));
+		return(latch_leaves);
+	}
+
+	ut_error;
+	return(latch_leaves);
+}
+
+/** Optimistically latches the leaf page or pages requested.
+ at param[in]	block		guessed buffer block
+ at param[in]	modify_clock	modify clock value
+ at param[in,out]	latch_mode	BTR_SEARCH_LEAF, ...
+ at param[in,out]	cursor		cursor
+ at param[in]	file		file name
+ at param[in]	line		line where called
+ at param[in]	mtr		mini-transaction
+ at return true if success */
+bool
+btr_cur_optimistic_latch_leaves(
+	buf_block_t*	block,
+	ib_uint64_t	modify_clock,
+	ulint*		latch_mode,
+	btr_cur_t*	cursor,
+	const char*	file,
+	ulint		line,
+	mtr_t*		mtr)
+{
+	ulint		mode;
+	ulint		left_page_no;
+
+	switch (*latch_mode) {
+	case BTR_SEARCH_LEAF:
+	case BTR_MODIFY_LEAF:
+		return(buf_page_optimistic_get(*latch_mode, block,
+				modify_clock, file, line, mtr));
+	case BTR_SEARCH_PREV:
+	case BTR_MODIFY_PREV:
+		mode = *latch_mode == BTR_SEARCH_PREV
+			? RW_S_LATCH : RW_X_LATCH;
+
+		buf_page_mutex_enter(block);
+		if (buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
+			buf_page_mutex_exit(block);
+			return(false);
+		}
+		/* pin the block not to be relocated */
+		buf_block_buf_fix_inc(block, file, line);
+		buf_page_mutex_exit(block);
+
+		rw_lock_s_lock(&block->lock);
+		if (block->modify_clock != modify_clock) {
+			rw_lock_s_unlock(&block->lock);
+
+			goto unpin_failed;
+		}
+		left_page_no = btr_page_get_prev(
+			buf_block_get_frame(block), mtr);
+		rw_lock_s_unlock(&block->lock);
+
+		if (left_page_no != FIL_NULL) {
+			const page_id_t	page_id(
+				dict_index_get_space(cursor->index),
+				left_page_no);
+
+			cursor->left_block = btr_block_get(
+				page_id,
+				dict_table_page_size(cursor->index->table),
+				mode, cursor->index, mtr);
+		} else {
+			cursor->left_block = NULL;
+		}
+
+		if (buf_page_optimistic_get(mode, block, modify_clock,
+					    file, line, mtr)) {
+			if (btr_page_get_prev(buf_block_get_frame(block), mtr)
+			    == left_page_no) {
+				/* adjust buf_fix_count */
+				buf_page_mutex_enter(block);
+				buf_block_buf_fix_dec(block);
+				buf_page_mutex_exit(block);
+
+				*latch_mode = mode;
+				return(true);
+			} else {
+				/* release the block */
+				btr_leaf_page_release(block, mode, mtr);
+			}
+		}
+
+		/* release the left block */
+		if (cursor->left_block != NULL) {
+			btr_leaf_page_release(cursor->left_block,
+					      mode, mtr);
+		}
+unpin_failed:
+		/* unpin the block */
+		buf_page_mutex_enter(block);
+		buf_block_buf_fix_dec(block);
+		buf_page_mutex_exit(block);
+
+		return(false);
+
+	default:
+		ut_error;
+		return(false);
+	}
+}
+
+/**
+Gets intention in btr_intention_t from latch_mode, and cleares the intention
+at the latch_mode.
+ at param latch_mode	in/out: pointer to latch_mode
+ at return intention for latching tree */
+static
+btr_intention_t
+btr_cur_get_and_clear_intention(
+	ulint	*latch_mode)
+{
+	btr_intention_t	intention;
+
+	switch (*latch_mode & (BTR_LATCH_FOR_INSERT | BTR_LATCH_FOR_DELETE)) {
+	case BTR_LATCH_FOR_INSERT:
+		intention = BTR_INTENTION_INSERT;
+		break;
+	case BTR_LATCH_FOR_DELETE:
+		intention = BTR_INTENTION_DELETE;
+		break;
+	default:
+		/* both or unknown */
+		intention = BTR_INTENTION_BOTH;
+	}
+	*latch_mode &= ~(BTR_LATCH_FOR_INSERT | BTR_LATCH_FOR_DELETE);
+
+	return(intention);
+}
+
+/**
+Gets the desired latch type for the root leaf (root page is root leaf)
+at the latch mode.
+ at param latch_mode	in: BTR_SEARCH_LEAF, ...
+ at return latch type */
+static
+rw_lock_type_t
+btr_cur_latch_for_root_leaf(
+	ulint	latch_mode)
+{
+	switch (latch_mode) {
+	case BTR_SEARCH_LEAF:
+	case BTR_SEARCH_TREE:
+	case BTR_SEARCH_PREV:
+		return(RW_S_LATCH);
+	case BTR_MODIFY_LEAF:
+	case BTR_MODIFY_TREE:
+	case BTR_MODIFY_PREV:
+		return(RW_X_LATCH);
+	case BTR_CONT_MODIFY_TREE:
+	case BTR_CONT_SEARCH_TREE:
+		/* A root page should be latched already,
+		and don't need to be latched here.
+		fall through (RW_NO_LATCH) */
+	case BTR_NO_LATCHES:
+		return(RW_NO_LATCH);
+	}
+
+	ut_error;
+	return(RW_NO_LATCH); /* avoid compiler warnings */
+}
+
+/** Detects whether the modifying record might need a modifying tree structure.
+ at param[in]	index		index
+ at param[in]	page		page
+ at param[in]	lock_intention	lock intention for the tree operation
+ at param[in]	rec		record (current node_ptr)
+ at param[in]	rec_size	size of the record or max size of node_ptr
+ at param[in]	page_size	page size
+ at param[in]	mtr		mtr
+ at return true if tree modification is needed */
+static
+bool
+btr_cur_will_modify_tree(
+	dict_index_t*	index,
+	const page_t*	page,
+	btr_intention_t	lock_intention,
+	const rec_t*	rec,
+	ulint		rec_size,
+	const page_size_t&	page_size,
+	mtr_t*		mtr)
+{
+	ut_ad(!page_is_leaf(page));
+	ut_ad(mtr_memo_contains_flagged(mtr, dict_index_get_lock(index),
+					MTR_MEMO_X_LOCK
+					| MTR_MEMO_SX_LOCK)
+	      || dict_table_is_intrinsic(index->table));
+
+	/* Pessimistic delete of the first record causes delete & insert
+	of node_ptr at upper level. And a subsequent page shrink is
+	possible. It causes delete of node_ptr at the upper level.
+	So we should pay attention also to 2nd record not only
+	first record and last record. Because if the "delete & insert" are
+	done for the different page, the 2nd record become
+	first record and following compress might delete the record and causes
+	the uppper level node_ptr modification. */
+
+	if (lock_intention <= BTR_INTENTION_BOTH) {
+		ulint	margin;
+
+		/* check delete will cause. (BTR_INTENTION_BOTH
+		or BTR_INTENTION_DELETE) */
+		/* first, 2nd, 2nd-last and last records are 4 records */
+		if (page_get_n_recs(page) < 5) {
+			return(true);
+		}
+
+		/* is first, 2nd or last record */
+		if (page_rec_is_first(rec, page)
+		    || (mach_read_from_4(page + FIL_PAGE_NEXT) != FIL_NULL
+			&& (page_rec_is_last(rec, page)
+			    || page_rec_is_second_last(rec, page)))
+		    || (mach_read_from_4(page + FIL_PAGE_PREV) != FIL_NULL
+			&& page_rec_is_second(rec, page))) {
+			return(true);
+		}
+
+		if (lock_intention == BTR_INTENTION_BOTH) {
+			/* Delete at leftmost record in a page causes delete
+			& insert at its parent page. After that, the delete
+			might cause btr_compress() and delete record at its
+			parent page. Thus we should consider max 2 deletes. */
+
+			margin = rec_size * 2;
+		} else {
+			ut_ad(lock_intention == BTR_INTENTION_DELETE);
+
+			margin = rec_size;
+		}
+		/* NOTE: call mach_read_from_4() directly to avoid assertion
+		failure. It is safe because we already have SX latch of the
+		index tree */
+		if (page_get_data_size(page)
+			< margin + BTR_CUR_PAGE_COMPRESS_LIMIT(index)
+		    || (mach_read_from_4(page + FIL_PAGE_NEXT)
+				== FIL_NULL
+			&& mach_read_from_4(page + FIL_PAGE_PREV)
+				== FIL_NULL)) {
+			return(true);
+		}
+	}
+
+	if (lock_intention >= BTR_INTENTION_BOTH) {
+		/* check insert will cause. BTR_INTENTION_BOTH
+		or BTR_INTENTION_INSERT*/
+
+		/* Once we invoke the btr_cur_limit_optimistic_insert_debug,
+		we should check it here in advance, since the max allowable
+		records in a page is limited. */
+		LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page),
+					      return(true));
+
+		/* needs 2 records' space for the case the single split and
+		insert cannot fit.
+		page_get_max_insert_size_after_reorganize() includes space
+		for page directory already */
+		ulint	max_size
+			= page_get_max_insert_size_after_reorganize(page, 2);
+
+		if (max_size < BTR_CUR_PAGE_REORGANIZE_LIMIT + rec_size
+		    || max_size < rec_size * 2) {
+			return(true);
+		}
+		/* TODO: optimize this condition for compressed page.
+		this is based on the worst compress rate.
+		currently looking only uncompressed page, but we can look
+		also compressed page page_zip_available() if already in the
+		buffer pool */
+		/* needs 2 records' space also for worst compress rate. */
+		if (page_size.is_compressed()
+		    && page_zip_empty_size(index->n_fields,
+					   page_size.physical())
+		       < rec_size * 2 + page_get_data_size(page)
+			 + page_dir_calc_reserved_space(
+				page_get_n_recs(page) + 2) + 1) {
+			return(true);
+		}
+	}
+
+	return(false);
+}
+
+/** Detects whether the modifying record might need a opposite modification
+to the intention.
+ at param[in]	page		page
+ at param[in]	lock_intention	lock intention for the tree operation
+ at param[in]	rec		record (current node_ptr)
+ at return	true if tree modification is needed */
+static
+bool
+btr_cur_need_opposite_intention(
+	const page_t*	page,
+	btr_intention_t	lock_intention,
+	const rec_t*	rec)
+{
+	switch (lock_intention) {
+	case BTR_INTENTION_DELETE:
+		return((mach_read_from_4(page + FIL_PAGE_PREV) != FIL_NULL
+			&& page_rec_is_first(rec, page))
+		       || (mach_read_from_4(page + FIL_PAGE_NEXT) != FIL_NULL
+			   && page_rec_is_last(rec, page)));
+	case BTR_INTENTION_INSERT:
+		return(mach_read_from_4(page + FIL_PAGE_NEXT) != FIL_NULL
+		       && page_rec_is_last(rec, page));
+	case BTR_INTENTION_BOTH:
+		return(false);
 	}
 
 	ut_error;
+	return(false);
 }
 
 /********************************************************************//**
@@ -345,7 +721,6 @@ If mode is PAGE_CUR_LE , cursor is left at the place where an insert of the
 search tuple should be performed in the B-tree. InnoDB does an insert
 immediately after the cursor. Thus, the cursor may end up on a user record,
 or on a page infimum record. */
-UNIV_INTERN
 void
 btr_cur_search_to_nth_level(
 /*========================*/
@@ -354,7 +729,7 @@ btr_cur_search_to_nth_level(
 	const dtuple_t*	tuple,	/*!< in: data tuple; NOTE: n_fields_cmp in
 				tuple must be set so that it cannot get
 				compared to the node ptr page number field! */
-	ulint		mode,	/*!< in: PAGE_CUR_L, ...;
+	page_cur_mode_t	mode,	/*!< in: PAGE_CUR_L, ...;
 				Inserts should always be made using
 				PAGE_CUR_LE to search the position! */
 	ulint		latch_mode, /*!< in: BTR_SEARCH_LEAF, ..., ORed with
@@ -370,44 +745,70 @@ btr_cur_search_to_nth_level(
 				to protect the record! */
 	btr_cur_t*	cursor, /*!< in/out: tree cursor; the cursor page is
 				s- or x-latched, but see also above! */
-	ulint		has_search_latch,/*!< in: info on the latch mode the
-				caller currently has on btr_search_latch:
+	ulint		has_search_latch,
+				/*!< in: info on the latch mode the
+				caller currently has on search system:
 				RW_S_LATCH, or 0 */
 	const char*	file,	/*!< in: file name */
 	ulint		line,	/*!< in: line where called */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
-	page_t*		page;
+	page_t*		page = NULL; /* remove warning */
 	buf_block_t*	block;
-	ulint		space;
 	buf_block_t*	guess;
 	ulint		height;
-	ulint		page_no;
 	ulint		up_match;
 	ulint		up_bytes;
 	ulint		low_match;
 	ulint		low_bytes;
 	ulint		savepoint;
 	ulint		rw_latch;
-	ulint		page_mode;
+	page_cur_mode_t	page_mode;
+	page_cur_mode_t	search_mode = PAGE_CUR_UNSUPP;
 	ulint		buf_mode;
 	ulint		estimate;
-	ulint		zip_size;
+	ulint		node_ptr_max_size = UNIV_PAGE_SIZE / 2;
 	page_cur_t*	page_cursor;
 	btr_op_t	btr_op;
 	ulint		root_height = 0; /* remove warning */
 
+	ulint		upper_rw_latch, root_leaf_rw_latch;
+	btr_intention_t	lock_intention;
+	bool		modify_external;
+	buf_block_t*	tree_blocks[BTR_MAX_LEVELS];
+	ulint		tree_savepoints[BTR_MAX_LEVELS];
+	ulint		n_blocks = 0;
+	ulint		n_releases = 0;
+	bool		detected_same_key_root = false;
+
+	bool		retrying_for_search_prev = false;
+	ulint		leftmost_from_level = 0;
+	buf_block_t**	prev_tree_blocks = NULL;
+	ulint*		prev_tree_savepoints = NULL;
+	ulint		prev_n_blocks = 0;
+	ulint		prev_n_releases = 0;
+	bool		need_path = true;
+	bool		rtree_parent_modified = false;
+	bool		mbr_adj = false;
+	bool		found = false;
+
+	DBUG_ENTER("btr_cur_search_to_nth_level");
+
 #ifdef BTR_CUR_ADAPT
 	btr_search_t*	info;
-#endif
+#endif /* BTR_CUR_ADAPT */
 	mem_heap_t*	heap		= NULL;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets		= offsets_;
+	ulint		offsets2_[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets2	= offsets2_;
 	rec_offs_init(offsets_);
+	rec_offs_init(offsets2_);
 	/* Currently, PAGE_CUR_LE is the only search mode used for searches
 	ending to upper levels */
 
-	ut_ad(level == 0 || mode == PAGE_CUR_LE);
+	ut_ad(level == 0 || mode == PAGE_CUR_LE
+	      || RTREE_SEARCH_MODE(mode));
 	ut_ad(dict_index_check_search_tuple(index, tuple));
 	ut_ad(!dict_index_is_ibuf(index) || ibuf_inside(mtr));
 	ut_ad(dtuple_check_typed(tuple));
@@ -421,15 +822,18 @@ btr_cur_search_to_nth_level(
 #ifdef UNIV_DEBUG
 	cursor->up_match = ULINT_UNDEFINED;
 	cursor->low_match = ULINT_UNDEFINED;
-#endif
+#endif /* UNIV_DEBUG */
 
 	ibool	s_latch_by_caller;
 
 	s_latch_by_caller = latch_mode & BTR_ALREADY_S_LATCHED;
 
 	ut_ad(!s_latch_by_caller
-	      || mtr_memo_contains(mtr, dict_index_get_lock(index),
-				   MTR_MEMO_S_LOCK));
+	      || srv_read_only_mode
+	      || mtr_memo_contains_flagged(mtr,
+					   dict_index_get_lock(index),
+					   MTR_MEMO_S_LOCK
+					   | MTR_MEMO_SX_LOCK));
 
 	/* These flags are mutually exclusive, they are lumped together
 	with the latch mode for historical reasons. It's possible for
@@ -462,14 +866,25 @@ btr_cur_search_to_nth_level(
 	ut_ad(btr_op == BTR_NO_OP || !dict_index_is_ibuf(index));
 	/* Operations on the clustered index cannot be buffered. */
 	ut_ad(btr_op == BTR_NO_OP || !dict_index_is_clust(index));
+	/* Operations on the temporary table(indexes) cannot be buffered. */
+	ut_ad(btr_op == BTR_NO_OP || !dict_table_is_temporary(index->table));
+	/* Operation on the spatial index cannot be buffered. */
+	ut_ad(btr_op == BTR_NO_OP || !dict_index_is_spatial(index));
 
 	estimate = latch_mode & BTR_ESTIMATE;
 
+	lock_intention = btr_cur_get_and_clear_intention(&latch_mode);
+
+	modify_external = latch_mode & BTR_MODIFY_EXTERNAL;
+
 	/* Turn the flags unrelated to the latch mode off. */
 	latch_mode = BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode);
 
+	ut_ad(!modify_external || latch_mode == BTR_MODIFY_LEAF);
+
 	ut_ad(!s_latch_by_caller
 	      || latch_mode == BTR_SEARCH_LEAF
+	      || latch_mode == BTR_SEARCH_TREE
 	      || latch_mode == BTR_MODIFY_LEAF);
 
 	cursor->flag = BTR_CUR_BINARY;
@@ -480,24 +895,34 @@ btr_cur_search_to_nth_level(
 #else
 	info = btr_search_get_info(index);
 
-	guess = info->root_guess;
+	if (!buf_pool_is_obsolete(info->withdraw_clock)) {
+		guess = info->root_guess;
+	} else {
+		guess = NULL;
+	}
 
 #ifdef BTR_CUR_HASH_ADAPT
 
 # ifdef UNIV_SEARCH_PERF_STAT
 	info->n_searches++;
 # endif
-	if (rw_lock_get_writer(&btr_search_latch) == RW_LOCK_NOT_LOCKED
+	/* Use of AHI is disabled for intrinsic table as these tables re-use
+	the index-id and AHI validation is based on index-id. */
+	if (rw_lock_get_writer(btr_get_search_latch(index))
+		== RW_LOCK_NOT_LOCKED
 	    && latch_mode <= BTR_MODIFY_LEAF
 	    && info->last_hash_succ
+	    && !index->disable_ahi
 	    && !estimate
 # ifdef PAGE_CUR_LE_OR_EXTENDS
 	    && mode != PAGE_CUR_LE_OR_EXTENDS
 # endif /* PAGE_CUR_LE_OR_EXTENDS */
+	    && !dict_index_is_spatial(index)
 	    /* If !has_search_latch, we do a dirty read of
 	    btr_search_enabled below, and btr_search_guess_on_hash()
 	    will have to check it again. */
 	    && UNIV_LIKELY(btr_search_enabled)
+	    && !modify_external
 	    && btr_search_guess_on_hash(index, info, tuple, mode,
 					latch_mode, cursor,
 					has_search_latch, mtr)) {
@@ -512,7 +937,7 @@ btr_cur_search_to_nth_level(
 		      || mode != PAGE_CUR_LE);
 		btr_cur_n_sea++;
 
-		return;
+		DBUG_VOID_RETURN;
 	}
 # endif /* BTR_CUR_HASH_ADAPT */
 #endif /* BTR_CUR_ADAPT */
@@ -523,7 +948,7 @@ btr_cur_search_to_nth_level(
 
 	if (has_search_latch) {
 		/* Release possible search latch to obey latching order */
-		rw_lock_s_unlock(&btr_search_latch);
+		rw_lock_s_unlock(btr_get_search_latch(index));
 	}
 
 	/* Store the position of the tree latch we push to mtr so that we
@@ -533,23 +958,76 @@ btr_cur_search_to_nth_level(
 
 	switch (latch_mode) {
 	case BTR_MODIFY_TREE:
-		mtr_x_lock(dict_index_get_lock(index), mtr);
+		/* Most of delete-intended operations are purging.
+		Free blocks and read IO bandwidth should be prior
+		for them, when the history list is glowing huge. */
+		if (lock_intention == BTR_INTENTION_DELETE
+		    && trx_sys->rseg_history_len > BTR_CUR_FINE_HISTORY_LENGTH
+			&& buf_get_n_pending_read_ios()) {
+			mtr_x_lock(dict_index_get_lock(index), mtr);
+		} else if (dict_index_is_spatial(index)
+			   && lock_intention <= BTR_INTENTION_BOTH) {
+			/* X lock the if there is possibility of
+			pessimistic delete on spatial index. As we could
+			lock upward for the tree */
+
+			mtr_x_lock(dict_index_get_lock(index), mtr);
+		} else {
+			mtr_sx_lock(dict_index_get_lock(index), mtr);
+		}
+		upper_rw_latch = RW_X_LATCH;
 		break;
 	case BTR_CONT_MODIFY_TREE:
+	case BTR_CONT_SEARCH_TREE:
 		/* Do nothing */
-		ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
-					MTR_MEMO_X_LOCK));
+		ut_ad(srv_read_only_mode
+		      || mtr_memo_contains_flagged(mtr,
+						   dict_index_get_lock(index),
+						   MTR_MEMO_X_LOCK
+						   | MTR_MEMO_SX_LOCK));
+		if (dict_index_is_spatial(index)
+		    && latch_mode == BTR_CONT_MODIFY_TREE) {
+			/* If we are about to locating parent page for split
+			and/or merge operation for R-Tree index, X latch
+			the parent */
+			upper_rw_latch = RW_X_LATCH;
+		} else {
+			upper_rw_latch = RW_NO_LATCH;
+		}
 		break;
 	default:
-		if (!s_latch_by_caller) {
-			mtr_s_lock(dict_index_get_lock(index), mtr);
+		if (!srv_read_only_mode) {
+			if (s_latch_by_caller) {
+				ut_ad(rw_lock_own(dict_index_get_lock(index),
+				              RW_LOCK_S));
+			} else if (!modify_external) {
+				/* BTR_SEARCH_TREE is intended to be used with
+				BTR_ALREADY_S_LATCHED */
+				ut_ad(latch_mode != BTR_SEARCH_TREE);
+
+				mtr_s_lock(dict_index_get_lock(index), mtr);
+			} else {
+				/* BTR_MODIFY_EXTERNAL needs to be excluded */
+				mtr_sx_lock(dict_index_get_lock(index), mtr);
+			}
+			upper_rw_latch = RW_S_LATCH;
+		} else {
+			upper_rw_latch = RW_NO_LATCH;
 		}
 	}
+	root_leaf_rw_latch = btr_cur_latch_for_root_leaf(latch_mode);
 
 	page_cursor = btr_cur_get_page_cur(cursor);
 
-	space = dict_index_get_space(index);
-	page_no = dict_index_get_page(index);
+	const ulint		space = dict_index_get_space(index);
+	const page_size_t	page_size(dict_table_page_size(index->table));
+
+	/* Start with the root page. */
+	page_id_t		page_id(space, dict_index_get_page(index));
+
+	if (root_leaf_rw_latch == RW_X_LATCH) {
+		node_ptr_max_size = dict_index_node_ptr_max_size(index);
+	}
 
 	up_match = 0;
 	up_bytes = 0;
@@ -572,22 +1050,41 @@ btr_cur_search_to_nth_level(
 	default:
 #ifdef PAGE_CUR_LE_OR_EXTENDS
 		ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE
+		      || RTREE_SEARCH_MODE(mode)
 		      || mode == PAGE_CUR_LE_OR_EXTENDS);
 #else /* PAGE_CUR_LE_OR_EXTENDS */
-		ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE);
+		ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE
+		      || RTREE_SEARCH_MODE(mode));
 #endif /* PAGE_CUR_LE_OR_EXTENDS */
 		page_mode = mode;
 		break;
 	}
 
 	/* Loop and search until we arrive at the desired level */
+	btr_latch_leaves_t latch_leaves = {{NULL, NULL, NULL}, {0, 0, 0}};
 
 search_loop:
 	buf_mode = BUF_GET;
 	rw_latch = RW_NO_LATCH;
+	rtree_parent_modified = false;
 
 	if (height != 0) {
 		/* We are about to fetch the root or a non-leaf page. */
+		if ((latch_mode != BTR_MODIFY_TREE
+		     || height == level)
+		    && !retrying_for_search_prev) {
+			/* If doesn't have SX or X latch of index,
+			each pages should be latched before reading. */
+			if (modify_external
+			    && height == ULINT_UNDEFINED
+			    && upper_rw_latch == RW_S_LATCH) {
+				/* needs sx-latch of root page
+				for fseg operation */
+				rw_latch = RW_SX_LATCH;
+			} else {
+				rw_latch = upper_rw_latch;
+			}
+		}
 	} else if (latch_mode <= BTR_MODIFY_LEAF) {
 		rw_latch = latch_mode;
 
@@ -603,12 +1100,12 @@ search_loop:
 		}
 	}
 
-	zip_size = dict_table_zip_size(index->table);
-
 retry_page_get:
-	block = buf_page_get_gen(
-		space, zip_size, page_no, rw_latch, guess, buf_mode,
-		file, line, mtr);
+	ut_ad(n_blocks < BTR_MAX_LEVELS);
+	tree_savepoints[n_blocks] = mtr_set_savepoint(mtr);
+	block = buf_page_get_gen(page_id, page_size, rw_latch, guess,
+				 buf_mode, file, line, mtr);
+	tree_blocks[n_blocks] = block;
 
 	if (block == NULL) {
 		/* This must be a search to perform an insert/delete
@@ -621,10 +1118,10 @@ retry_page_get:
 		case BTR_INSERT_OP:
 		case BTR_INSERT_IGNORE_UNIQUE_OP:
 			ut_ad(buf_mode == BUF_GET_IF_IN_POOL);
+			ut_ad(!dict_index_is_spatial(index));
 
 			if (ibuf_insert(IBUF_OP_INSERT, tuple, index,
-					space, zip_size, page_no,
-					cursor->thr)) {
+					page_id, page_size, cursor->thr)) {
 
 				cursor->flag = BTR_CUR_INSERT_TO_IBUF;
 
@@ -634,10 +1131,11 @@ retry_page_get:
 
 		case BTR_DELMARK_OP:
 			ut_ad(buf_mode == BUF_GET_IF_IN_POOL);
+			ut_ad(!dict_index_is_spatial(index));
 
 			if (ibuf_insert(IBUF_OP_DELETE_MARK, tuple,
-					index, space, zip_size,
-					page_no, cursor->thr)) {
+					index, page_id, page_size,
+					cursor->thr)) {
 
 				cursor->flag = BTR_CUR_DEL_MARK_IBUF;
 
@@ -648,6 +1146,7 @@ retry_page_get:
 
 		case BTR_DELETE_OP:
 			ut_ad(buf_mode == BUF_GET_IF_IN_POOL_OR_WATCH);
+			ut_ad(!dict_index_is_spatial(index));
 
 			if (!row_purge_poss_sec(cursor->purge_node,
 						index, tuple)) {
@@ -655,19 +1154,18 @@ retry_page_get:
 				/* The record cannot be purged yet. */
 				cursor->flag = BTR_CUR_DELETE_REF;
 			} else if (ibuf_insert(IBUF_OP_DELETE, tuple,
-					       index, space, zip_size,
-					       page_no,
+					       index, page_id, page_size,
 					       cursor->thr)) {
 
 				/* The purge was buffered. */
 				cursor->flag = BTR_CUR_DELETE_IBUF;
 			} else {
 				/* The purge could not be buffered. */
-				buf_pool_watch_unset(space, page_no);
+				buf_pool_watch_unset(page_id);
 				break;
 			}
 
-			buf_pool_watch_unset(space, page_no);
+			buf_pool_watch_unset(page_id);
 			goto func_exit;
 
 		default:
@@ -682,9 +1180,69 @@ retry_page_get:
 		goto retry_page_get;
 	}
 
-	block->check_index_page_at_flush = TRUE;
+	if (retrying_for_search_prev && height != 0) {
+		/* also latch left sibling */
+		ulint		left_page_no;
+		buf_block_t*	get_block;
+
+		ut_ad(rw_latch == RW_NO_LATCH);
+
+		rw_latch = upper_rw_latch;
+
+		rw_lock_s_lock(&block->lock);
+		left_page_no = btr_page_get_prev(
+			buf_block_get_frame(block), mtr);
+		rw_lock_s_unlock(&block->lock);
+
+		if (left_page_no != FIL_NULL) {
+			ut_ad(prev_n_blocks < leftmost_from_level);
+
+			prev_tree_savepoints[prev_n_blocks]
+				= mtr_set_savepoint(mtr);
+			get_block = buf_page_get_gen(
+				page_id_t(page_id.space(), left_page_no),
+				page_size, rw_latch, NULL, buf_mode,
+				file, line, mtr);
+			prev_tree_blocks[prev_n_blocks] = get_block;
+			prev_n_blocks++;
+
+			/* BTR_MODIFY_TREE doesn't update prev/next_page_no,
+			without their parent page's lock. So, not needed to
+			retry here, because we have the parent page's lock. */
+		}
+
+		/* release RW_NO_LATCH page and lock with RW_S_LATCH */
+		mtr_release_block_at_savepoint(
+			mtr, tree_savepoints[n_blocks],
+			tree_blocks[n_blocks]);
+
+		tree_savepoints[n_blocks] = mtr_set_savepoint(mtr);
+		block = buf_page_get_gen(page_id, page_size, rw_latch, NULL,
+					 buf_mode, file, line, mtr);
+		tree_blocks[n_blocks] = block;
+	}
+
 	page = buf_block_get_frame(block);
 
+	if (height == ULINT_UNDEFINED
+	    && page_is_leaf(page)
+	    && rw_latch != RW_NO_LATCH
+	    && rw_latch != root_leaf_rw_latch) {
+		/* We should retry to get the page, because the root page
+		is latched with different level as a leaf page. */
+		ut_ad(root_leaf_rw_latch != RW_NO_LATCH);
+		ut_ad(rw_latch == RW_S_LATCH || rw_latch == RW_SX_LATCH);
+		ut_ad(rw_latch == RW_S_LATCH || modify_external);
+
+		ut_ad(n_blocks == 0);
+		mtr_release_block_at_savepoint(
+			mtr, tree_savepoints[n_blocks],
+			tree_blocks[n_blocks]);
+
+		upper_rw_latch = root_leaf_rw_latch;
+		goto search_loop;
+	}
+
 	if (rw_latch != RW_NO_LATCH) {
 #ifdef UNIV_ZIP_DEBUG
 		const page_zip_des_t*	page_zip
@@ -697,7 +1255,7 @@ retry_page_get:
 			? SYNC_IBUF_TREE_NODE : SYNC_TREE_NODE);
 	}
 
-	ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
+	ut_ad(fil_page_index_page_check(page));
 	ut_ad(index->id == btr_page_get_index_id(page));
 
 	if (UNIV_UNLIKELY(height == ULINT_UNDEFINED)) {
@@ -707,9 +1265,32 @@ retry_page_get:
 		root_height = height;
 		cursor->tree_height = root_height + 1;
 
+		if (dict_index_is_spatial(index)) {
+			ut_ad(cursor->rtr_info);
+
+			node_seq_t      seq_no = rtr_get_current_ssn_id(index);
+
+			/* If SSN in memory is not initialized, fetch
+			it from root page */
+			if (seq_no < 1) {
+				node_seq_t      root_seq_no;
+
+				root_seq_no = page_get_ssn_id(page);
+
+				mutex_enter(&(index->rtr_ssn.mutex));
+				index->rtr_ssn.seq_no = root_seq_no + 1;
+				mutex_exit(&(index->rtr_ssn.mutex));
+			}
+
+			/* Save the MBR */
+			cursor->rtr_info->thr = cursor->thr;
+			rtr_get_mbr_from_tuple(tuple, &cursor->rtr_info->mbr);
+		}
+
 #ifdef BTR_CUR_ADAPT
 		if (block != guess) {
 			info->root_guess = block;
+			info->withdraw_clock = buf_withdraw_clock;
 		}
 #endif
 	}
@@ -717,30 +1298,142 @@ retry_page_get:
 	if (height == 0) {
 		if (rw_latch == RW_NO_LATCH) {
 
-			btr_cur_latch_leaves(
-				page, space, zip_size, page_no, latch_mode,
+			latch_leaves = btr_cur_latch_leaves(
+				block, page_id, page_size, latch_mode,
 				cursor, mtr);
 		}
 
 		switch (latch_mode) {
 		case BTR_MODIFY_TREE:
 		case BTR_CONT_MODIFY_TREE:
+		case BTR_CONT_SEARCH_TREE:
 			break;
 		default:
-			if (!s_latch_by_caller) {
+			if (!s_latch_by_caller
+			    && !srv_read_only_mode
+			    && !modify_external) {
 				/* Release the tree s-latch */
+				/* NOTE: BTR_MODIFY_EXTERNAL
+				needs to keep tree sx-latch */
 				mtr_release_s_latch_at_savepoint(
 					mtr, savepoint,
 					dict_index_get_lock(index));
 			}
+
+			/* release upper blocks */
+			if (retrying_for_search_prev) {
+				for (;
+				     prev_n_releases < prev_n_blocks;
+				     prev_n_releases++) {
+					mtr_release_block_at_savepoint(
+						mtr,
+						prev_tree_savepoints[
+							prev_n_releases],
+						prev_tree_blocks[
+							prev_n_releases]);
+				}
+			}
+
+			for (; n_releases < n_blocks; n_releases++) {
+				if (n_releases == 0 && modify_external) {
+					/* keep latch of root page */
+					ut_ad(mtr_memo_contains_flagged(
+						mtr, tree_blocks[n_releases],
+						MTR_MEMO_PAGE_SX_FIX
+						| MTR_MEMO_PAGE_X_FIX));
+					continue;
+				}
+
+				mtr_release_block_at_savepoint(
+					mtr, tree_savepoints[n_releases],
+					tree_blocks[n_releases]);
+			}
 		}
 
 		page_mode = mode;
 	}
 
-	page_cur_search_with_match(
-		block, index, tuple, page_mode, &up_match, &up_bytes,
-		&low_match, &low_bytes, page_cursor);
+	if (dict_index_is_spatial(index)) {
+		/* Remember the page search mode */
+		search_mode = page_mode;
+
+		/* Some adjustment on search mode, when the
+		page search mode is PAGE_CUR_RTREE_LOCATE
+		or PAGE_CUR_RTREE_INSERT, as we are searching
+		with MBRs. When it is not the target level, we
+		should search all sub-trees that "CONTAIN" the
+		search range/MBR. When it is at the target
+		level, the search becomes PAGE_CUR_LE */
+		if (page_mode == PAGE_CUR_RTREE_LOCATE
+		    && level == height) {
+			page_mode = PAGE_CUR_LE;
+		}
+
+		if (page_mode == PAGE_CUR_RTREE_INSERT) {
+			page_mode = (level == height)
+					? PAGE_CUR_LE
+					: PAGE_CUR_RTREE_INSERT;
+
+			ut_ad(!page_is_leaf(page) || page_mode == PAGE_CUR_LE);
+		}
+
+		/* "need_path" indicates if we need to tracking the parent
+		pages, if it is not spatial comparison, then no need to
+		track it */
+		if (page_mode < PAGE_CUR_CONTAIN) {
+			need_path = false;
+		}
+
+		up_match = 0;
+		low_match = 0;
+
+		if (latch_mode == BTR_MODIFY_TREE
+		    || latch_mode == BTR_CONT_MODIFY_TREE
+		    || latch_mode == BTR_CONT_SEARCH_TREE) {
+			/* Tree are locked, no need for Page Lock to protect
+			the "path" */
+			cursor->rtr_info->need_page_lock = false;
+		}
+        }
+
+	if (dict_index_is_spatial(index) && page_mode >= PAGE_CUR_CONTAIN) {
+		ut_ad(need_path);
+		found = rtr_cur_search_with_match(
+			block, index, tuple, page_mode, page_cursor,
+			cursor->rtr_info);
+
+		/* Need to use BTR_MODIFY_TREE to do the MBR adjustment */
+		if (search_mode == PAGE_CUR_RTREE_INSERT
+		    && cursor->rtr_info->mbr_adj) {
+			if (latch_mode & BTR_MODIFY_LEAF) {
+				/* Parent MBR needs updated, should retry
+				with BTR_MODIFY_TREE */
+				goto func_exit;
+			} else if (latch_mode & BTR_MODIFY_TREE) {
+				rtree_parent_modified = true;
+				cursor->rtr_info->mbr_adj = false;
+				mbr_adj = true;
+			} else {
+				ut_ad(0);
+			}
+		}
+	} else if (height == 0 && btr_search_enabled
+		   && !dict_index_is_spatial(index)) {
+		/* The adaptive hash index is only used when searching
+		for leaf pages (height==0), but not in r-trees.
+		We only need the byte prefix comparison for the purpose
+		of updating the adaptive hash index. */
+		page_cur_search_with_match_bytes(
+			block, index, tuple, page_mode, &up_match, &up_bytes,
+			&low_match, &low_bytes, page_cursor);
+	} else {
+		/* Search for complete index fields. */
+		up_bytes = low_bytes = 0;
+		page_cur_search_with_match(
+			block, index, tuple, page_mode, &up_match,
+			&low_match, page_cursor,
+			need_path ? cursor->rtr_info : NULL);
+	}
 
 	if (estimate) {
 		btr_cur_add_path_info(cursor, height, root_height);
@@ -751,6 +1444,34 @@ retry_page_get:
 	ut_ad(height == btr_page_get_level(page_cur_get_page(page_cursor),
 					   mtr));
 
+	/* Add Predicate lock if it is serializable isolation
+	and only if it is in the search case */
+	if (dict_index_is_spatial(index)
+	    && cursor->rtr_info->need_prdt_lock
+	    && mode != PAGE_CUR_RTREE_INSERT
+	    && mode != PAGE_CUR_RTREE_LOCATE
+	    && mode >= PAGE_CUR_CONTAIN) {
+		trx_t*		trx = thr_get_trx(cursor->thr);
+		lock_prdt_t	prdt;
+
+		lock_mutex_enter();
+		lock_init_prdt_from_mbr(
+			&prdt, &cursor->rtr_info->mbr, mode,
+			trx->lock.lock_heap);
+		lock_mutex_exit();
+
+		if (rw_latch == RW_NO_LATCH && height != 0) {
+			rw_lock_s_lock(&(block->lock));
+		}
+
+		lock_prdt_lock(block, &prdt, index, LOCK_S,
+			       LOCK_PREDICATE, cursor->thr, mtr);
+
+		if (rw_latch == RW_NO_LATCH && height != 0) {
+			rw_lock_s_unlock(&(block->lock));
+		}
+	}
+
 	if (level != height) {
 
 		const rec_t*	node_ptr;
@@ -764,42 +1485,426 @@ retry_page_get:
 		offsets = rec_get_offsets(
 			node_ptr, index, offsets, ULINT_UNDEFINED, &heap);
 
-		/* Go to the child node */
-		page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
+		/* If the rec is the first or last in the page for
+		pessimistic delete intention, it might cause node_ptr insert
+		for the upper level. We should change the intention and retry.
+		*/
+		if (latch_mode == BTR_MODIFY_TREE
+		    && btr_cur_need_opposite_intention(
+			page, lock_intention, node_ptr)) {
+
+need_opposite_intention:
+			ut_ad(upper_rw_latch == RW_X_LATCH);
+
+			if (n_releases > 0) {
+				/* release root block */
+				mtr_release_block_at_savepoint(
+					mtr, tree_savepoints[0],
+					tree_blocks[0]);
+			}
 
-		if (UNIV_UNLIKELY(height == 0 && dict_index_is_ibuf(index))) {
-			/* We're doing a search on an ibuf tree and we're one
-			level above the leaf page. */
+			/* release all blocks */
+			for (; n_releases <= n_blocks; n_releases++) {
+				mtr_release_block_at_savepoint(
+					mtr, tree_savepoints[n_releases],
+					tree_blocks[n_releases]);
+			}
 
-			ut_ad(level == 0);
+			lock_intention = BTR_INTENTION_BOTH;
 
-			buf_mode = BUF_GET;
-			rw_latch = RW_NO_LATCH;
-			goto retry_page_get;
-		}
+			page_id.reset(space, dict_index_get_page(index));
+			up_match = 0;
+			low_match = 0;
+			height = ULINT_UNDEFINED;
 
-		goto search_loop;
-	}
+			n_blocks = 0;
+			n_releases = 0;
 
-	if (level != 0) {
-		/* x-latch the page */
-		buf_block_t*	child_block = btr_block_get(
-			space, zip_size, page_no, RW_X_LATCH, index, mtr);
+			goto search_loop;
+		}
 
-		page = buf_block_get_frame(child_block);
-		btr_assert_not_corrupted(child_block, index);
-	} else {
-		cursor->low_match = low_match;
-		cursor->low_bytes = low_bytes;
-		cursor->up_match = up_match;
-		cursor->up_bytes = up_bytes;
+		if (dict_index_is_spatial(index)) {
+			if (page_rec_is_supremum(node_ptr)) {
+				cursor->low_match = 0;
+				cursor->up_match = 0;
+				goto func_exit;
+			}
 
-#ifdef BTR_CUR_ADAPT
+			/* If we are doing insertion or record locating,
+			remember the tree nodes we visited */
+			if (page_mode == PAGE_CUR_RTREE_INSERT
+			    || (search_mode == PAGE_CUR_RTREE_LOCATE
+			        && (latch_mode != BTR_MODIFY_LEAF))) {
+				bool		add_latch = false;
+
+				if (latch_mode == BTR_MODIFY_TREE
+				    && rw_latch == RW_NO_LATCH) {
+					ut_ad(mtr_memo_contains_flagged(
+						mtr, dict_index_get_lock(index),
+						MTR_MEMO_X_LOCK
+						| MTR_MEMO_SX_LOCK));
+					rw_lock_s_lock(&block->lock);
+					add_latch = true;
+				}
+
+				/* Store the parent cursor location */
+#ifdef UNIV_DEBUG
+				ulint	num_stored = rtr_store_parent_path(
+					block, cursor, latch_mode,
+					height + 1, mtr);
+#else
+				rtr_store_parent_path(
+					block, cursor, latch_mode,
+					height + 1, mtr);
+#endif
+
+				if (page_mode == PAGE_CUR_RTREE_INSERT) {
+					btr_pcur_t*     r_cursor =
+						rtr_get_parent_cursor(
+							cursor, height + 1,
+							true);
+					/* If it is insertion, there should
+					be only one parent for each level
+					traverse */
+#ifdef UNIV_DEBUG
+					ut_ad(num_stored == 1);
+#endif
+
+					node_ptr = btr_pcur_get_rec(r_cursor);
+
+				}
+
+				if (add_latch) {
+					rw_lock_s_unlock(&block->lock);
+				}
+
+				ut_ad(!page_rec_is_supremum(node_ptr));
+			}
+
+			ut_ad(page_mode == search_mode
+			      || (page_mode == PAGE_CUR_WITHIN
+				  && search_mode == PAGE_CUR_RTREE_LOCATE));
+
+			page_mode = search_mode;
+		}
+
+		/* If the first or the last record of the page
+		or the same key value to the first record or last record,
+		the another page might be choosen when BTR_CONT_MODIFY_TREE.
+		So, the parent page should not released to avoiding deadlock
+		with blocking the another search with the same key value. */
+		if (!detected_same_key_root
+		    && lock_intention == BTR_INTENTION_BOTH
+		    && !dict_index_is_unique(index)
+		    && latch_mode == BTR_MODIFY_TREE
+		    && (up_match >= rec_offs_n_fields(offsets) - 1
+			|| low_match >= rec_offs_n_fields(offsets) - 1)) {
+			const rec_t*	first_rec
+						= page_rec_get_next_const(
+							page_get_infimum_rec(
+								page));
+			ulint		matched_fields;
+
+			ut_ad(upper_rw_latch == RW_X_LATCH);
+
+			if (node_ptr == first_rec
+			    || page_rec_is_last(node_ptr, page)) {
+				detected_same_key_root = true;
+			} else {
+				matched_fields = 0;
+
+				offsets2 = rec_get_offsets(
+					first_rec, index, offsets2,
+					ULINT_UNDEFINED, &heap);
+				cmp_rec_rec_with_match(node_ptr, first_rec,
+					offsets, offsets2, index, FALSE,
+					&matched_fields);
+
+				if (matched_fields
+				    >= rec_offs_n_fields(offsets) - 1) {
+					detected_same_key_root = true;
+				} else {
+					const rec_t*	last_rec;
+
+					last_rec = page_rec_get_prev_const(
+							page_get_supremum_rec(
+								page));
+
+					matched_fields = 0;
+
+					offsets2 = rec_get_offsets(
+						last_rec, index, offsets2,
+						ULINT_UNDEFINED, &heap);
+					cmp_rec_rec_with_match(
+						node_ptr, last_rec,
+						offsets, offsets2, index,
+						FALSE, &matched_fields);
+					if (matched_fields
+					    >= rec_offs_n_fields(offsets) - 1) {
+						detected_same_key_root = true;
+					}
+				}
+			}
+		}
+
+		/* If the page might cause modify_tree,
+		we should not release the parent page's lock. */
+		if (!detected_same_key_root
+		    && latch_mode == BTR_MODIFY_TREE
+		    && !btr_cur_will_modify_tree(
+				index, page, lock_intention, node_ptr,
+				node_ptr_max_size, page_size, mtr)
+		    && !rtree_parent_modified) {
+			ut_ad(upper_rw_latch == RW_X_LATCH);
+			ut_ad(n_releases <= n_blocks);
+
+			/* we can release upper blocks */
+			for (; n_releases < n_blocks; n_releases++) {
+				if (n_releases == 0) {
+					/* we should not release root page
+					to pin to same block. */
+					continue;
+				}
+
+				/* release unused blocks to unpin */
+				mtr_release_block_at_savepoint(
+					mtr, tree_savepoints[n_releases],
+					tree_blocks[n_releases]);
+			}
+		}
+
+		if (height == level
+		    && latch_mode == BTR_MODIFY_TREE) {
+			ut_ad(upper_rw_latch == RW_X_LATCH);
+			/* we should sx-latch root page, if released already.
+			It contains seg_header. */
+			if (n_releases > 0) {
+				mtr_block_sx_latch_at_savepoint(
+					mtr, tree_savepoints[0],
+					tree_blocks[0]);
+			}
+
+			/* x-latch the branch blocks not released yet. */
+			for (ulint i = n_releases; i <= n_blocks; i++) {
+				mtr_block_x_latch_at_savepoint(
+					mtr, tree_savepoints[i],
+					tree_blocks[i]);
+			}
+		}
+
+		/* We should consider prev_page of parent page, if the node_ptr
+		is the leftmost of the page. because BTR_SEARCH_PREV and
+		BTR_MODIFY_PREV latches prev_page of the leaf page. */
+		if ((latch_mode == BTR_SEARCH_PREV
+		     || latch_mode == BTR_MODIFY_PREV)
+		    && !retrying_for_search_prev) {
+			/* block should be latched for consistent
+			   btr_page_get_prev() */
+			ut_ad(mtr_memo_contains_flagged(mtr, block,
+				MTR_MEMO_PAGE_S_FIX
+				| MTR_MEMO_PAGE_X_FIX));
+
+			if (btr_page_get_prev(page, mtr) != FIL_NULL
+			    && page_rec_is_first(node_ptr, page)) {
+
+				if (leftmost_from_level == 0) {
+					leftmost_from_level = height + 1;
+				}
+			} else {
+				leftmost_from_level = 0;
+			}
+
+			if (height == 0 && leftmost_from_level > 0) {
+				/* should retry to get also prev_page
+				from level==leftmost_from_level. */
+				retrying_for_search_prev = true;
+
+				prev_tree_blocks = static_cast<buf_block_t**>(
+					ut_malloc_nokey(sizeof(buf_block_t*)
+							* leftmost_from_level));
+
+				prev_tree_savepoints = static_cast<ulint*>(
+					ut_malloc_nokey(sizeof(ulint)
+							* leftmost_from_level));
+
+				/* back to the level (leftmost_from_level+1) */
+				ulint	idx = n_blocks
+					- (leftmost_from_level - 1);
+
+				page_id.reset(
+					space,
+					tree_blocks[idx]->page.id.page_no());
+
+				for (ulint i = n_blocks
+					       - (leftmost_from_level - 1);
+				     i <= n_blocks; i++) {
+					mtr_release_block_at_savepoint(
+						mtr, tree_savepoints[i],
+						tree_blocks[i]);
+				}
+
+				n_blocks -= (leftmost_from_level - 1);
+				height = leftmost_from_level;
+				ut_ad(n_releases == 0);
+
+				/* replay up_match, low_match */
+				up_match = 0;
+				low_match = 0;
+				rtr_info_t*	rtr_info	= need_path
+					? cursor->rtr_info : NULL;
+
+				for (ulint i = 0; i < n_blocks; i++) {
+					page_cur_search_with_match(
+						tree_blocks[i], index, tuple,
+						page_mode, &up_match,
+						&low_match, page_cursor,
+						rtr_info);
+				}
+
+				goto search_loop;
+			}
+		}
+
+		/* Go to the child node */
+		page_id.reset(
+			space,
+			btr_node_ptr_get_child_page_no(node_ptr, offsets));
+
+		n_blocks++;
+
+		if (UNIV_UNLIKELY(height == 0 && dict_index_is_ibuf(index))) {
+			/* We're doing a search on an ibuf tree and we're one
+			level above the leaf page. */
+
+			ut_ad(level == 0);
+
+			buf_mode = BUF_GET;
+			rw_latch = RW_NO_LATCH;
+			goto retry_page_get;
+		}
+
+		if (dict_index_is_spatial(index)
+		    && page_mode >= PAGE_CUR_CONTAIN
+		    && page_mode != PAGE_CUR_RTREE_INSERT) {
+			ut_ad(need_path);
+			rtr_node_path_t* path =
+				cursor->rtr_info->path;
+
+			if (!path->empty() && found) {
+#ifdef UNIV_DEBUG
+				node_visit_t    last_visit = path->back();
+
+				ut_ad(last_visit.page_no == page_id.page_no());
+#endif /* UNIV_DEBUG */
+
+				path->pop_back();
+
+#ifdef UNIV_DEBUG
+				if (page_mode == PAGE_CUR_RTREE_LOCATE
+				    && (latch_mode != BTR_MODIFY_LEAF)) {
+					btr_pcur_t*	cur
+					= cursor->rtr_info->parent_path->back(
+					  ).cursor;
+					rec_t*	my_node_ptr
+						= btr_pcur_get_rec(cur);
+
+					offsets = rec_get_offsets(
+						my_node_ptr, index, offsets,
+						ULINT_UNDEFINED, &heap);
+
+					ulint	my_page_no
+					= btr_node_ptr_get_child_page_no(
+						my_node_ptr, offsets);
+
+					ut_ad(page_id.page_no() == my_page_no);
+
+				}
+#endif
+			}
+		}
+
+		goto search_loop;
+	} else if (!dict_index_is_spatial(index)
+		   && latch_mode == BTR_MODIFY_TREE
+		   && lock_intention == BTR_INTENTION_INSERT
+		   && mach_read_from_4(page + FIL_PAGE_NEXT) != FIL_NULL
+		   && page_rec_is_last(page_cur_get_rec(page_cursor), page)) {
+
+		/* btr_insert_into_right_sibling() might cause
+		deleting node_ptr at upper level */
+
+		guess = NULL;
+
+		if (height == 0) {
+			/* release the leaf pages if latched */
+			for (uint i = 0; i < 3; i++) {
+				if (latch_leaves.blocks[i] != NULL) {
+					mtr_release_block_at_savepoint(
+						mtr, latch_leaves.savepoints[i],
+						latch_leaves.blocks[i]);
+					latch_leaves.blocks[i] = NULL;
+				}
+			}
+		}
+
+		goto need_opposite_intention;
+	}
+
+	if (level != 0) {
+		if (upper_rw_latch == RW_NO_LATCH) {
+			/* latch the page */
+			buf_block_t*	child_block;
+
+			if (latch_mode == BTR_CONT_MODIFY_TREE) {
+				child_block = btr_block_get(
+					page_id, page_size, RW_X_LATCH,
+					index, mtr);
+			} else {
+				ut_ad(latch_mode == BTR_CONT_SEARCH_TREE);
+				child_block = btr_block_get(
+					page_id, page_size, RW_SX_LATCH,
+					index, mtr);
+			}
+
+			btr_assert_not_corrupted(child_block, index);
+		} else {
+			ut_ad(mtr_memo_contains(mtr, block, upper_rw_latch));
+			btr_assert_not_corrupted(block, index);
+
+			if (s_latch_by_caller) {
+				ut_ad(latch_mode == BTR_SEARCH_TREE);
+				/* to exclude modifying tree operations
+				should sx-latch the index. */
+				ut_ad(mtr_memo_contains(
+					mtr, dict_index_get_lock(index),
+					MTR_MEMO_SX_LOCK));
+				/* because has sx-latch of index,
+				can release upper blocks. */
+				for (; n_releases < n_blocks; n_releases++) {
+					mtr_release_block_at_savepoint(
+						mtr,
+						tree_savepoints[n_releases],
+						tree_blocks[n_releases]);
+				}
+			}
+		}
+
+		if (page_mode <= PAGE_CUR_LE) {
+			cursor->low_match = low_match;
+			cursor->up_match = up_match;
+		}
+	} else {
+		cursor->low_match = low_match;
+		cursor->low_bytes = low_bytes;
+		cursor->up_match = up_match;
+		cursor->up_bytes = up_bytes;
+
+#ifdef BTR_CUR_ADAPT
 		/* We do a dirty read of btr_search_enabled here.  We
 		will properly check btr_search_enabled again in
 		btr_search_build_page_hash_index() before building a
-		page hash index, while holding btr_search_latch. */
-		if (btr_search_enabled) {
+		page hash index, while holding search latch. */
+		if (btr_search_enabled && !index->disable_ahi) {
 			btr_search_info_update(index, cursor);
 		}
 #endif
@@ -811,21 +1916,206 @@ retry_page_get:
 		      || mode != PAGE_CUR_LE);
 	}
 
+	/* For spatial index, remember  what blocks are still latched */
+	if (dict_index_is_spatial(index)
+	    && (latch_mode == BTR_MODIFY_TREE
+		|| latch_mode == BTR_MODIFY_LEAF)) {
+		for (ulint i = 0; i < n_releases; i++) {
+			cursor->rtr_info->tree_blocks[i] = NULL;
+			cursor->rtr_info->tree_savepoints[i] = 0;
+		}
+
+		for (ulint i = n_releases; i <= n_blocks; i++) {
+			cursor->rtr_info->tree_blocks[i] = tree_blocks[i];
+			cursor->rtr_info->tree_savepoints[i] = tree_savepoints[i];
+		}
+	}
+
 func_exit:
 
 	if (UNIV_LIKELY_NULL(heap)) {
 		mem_heap_free(heap);
 	}
 
+	if (retrying_for_search_prev) {
+		ut_free(prev_tree_blocks);
+		ut_free(prev_tree_savepoints);
+	}
+
 	if (has_search_latch) {
 
-		rw_lock_s_lock(&btr_search_latch);
+		rw_lock_s_lock(btr_get_search_latch(index));
+	}
+
+	if (mbr_adj) {
+		/* remember that we will need to adjust parent MBR */
+		cursor->rtr_info->mbr_adj = true;
+	}
+
+	DBUG_VOID_RETURN;
+}
+
+/** Searches an index tree and positions a tree cursor on a given level.
+This function will avoid latching the traversal path and so should be
+used only for cases where-in latching is not needed.
+
+ at param[in,out]	index	index
+ at param[in]	level	the tree level of search
+ at param[in]	tuple	data tuple; Note: n_fields_cmp in compared
+			to the node ptr page node field
+ at param[in]	mode	PAGE_CUR_L, ....
+			Insert should always be made using PAGE_CUR_LE
+			to search the position.
+ at param[in,out]	cursor	tree cursor; points to record of interest.
+ at param[in]	file	file name
+ at param[in[	line	line where called from
+ at param[in,out]	mtr	mtr
+ at param[in]	mark_dirty
+			if true then mark the block as dirty */
+void
+btr_cur_search_to_nth_level_with_no_latch(
+	dict_index_t*		index,
+	ulint			level,
+	const dtuple_t*		tuple,
+	page_cur_mode_t		mode,
+	btr_cur_t*		cursor,
+	const char*		file,
+	ulint			line,
+	mtr_t*			mtr,
+	bool			mark_dirty)
+{
+	page_t*		page = NULL; /* remove warning */
+	buf_block_t*	block;
+	ulint		height;
+	ulint		up_match;
+	ulint		low_match;
+	ulint		rw_latch;
+	page_cur_mode_t	page_mode;
+	ulint		buf_mode;
+	page_cur_t*	page_cursor;
+	ulint		root_height = 0; /* remove warning */
+	ulint		n_blocks = 0;
+
+	mem_heap_t*	heap		= NULL;
+	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets		= offsets_;
+	rec_offs_init(offsets_);
+
+	DBUG_ENTER("btr_cur_search_to_nth_level_with_no_latch");
+
+	ut_ad(dict_table_is_intrinsic(index->table));
+	ut_ad(level == 0 || mode == PAGE_CUR_LE);
+	ut_ad(dict_index_check_search_tuple(index, tuple));
+	ut_ad(dtuple_check_typed(tuple));
+	ut_ad(index->page != FIL_NULL);
+
+	UNIV_MEM_INVALID(&cursor->up_match, sizeof cursor->up_match);
+	UNIV_MEM_INVALID(&cursor->low_match, sizeof cursor->low_match);
+#ifdef UNIV_DEBUG
+	cursor->up_match = ULINT_UNDEFINED;
+	cursor->low_match = ULINT_UNDEFINED;
+#endif /* UNIV_DEBUG */
+
+	cursor->flag = BTR_CUR_BINARY;
+	cursor->index = index;
+
+	page_cursor = btr_cur_get_page_cur(cursor);
+
+        const ulint		space = dict_index_get_space(index);
+        const page_size_t	page_size(dict_table_page_size(index->table));
+        /* Start with the root page. */
+        page_id_t		page_id(space, dict_index_get_page(index));
+
+	up_match = 0;
+	low_match = 0;
+
+	height = ULINT_UNDEFINED;
+
+	/* We use these modified search modes on non-leaf levels of the
+	B-tree. These let us end up in the right B-tree leaf. In that leaf
+	we use the original search mode. */
+
+	switch (mode) {
+	case PAGE_CUR_GE:
+		page_mode = PAGE_CUR_L;
+		break;
+	case PAGE_CUR_G:
+		page_mode = PAGE_CUR_LE;
+		break;
+	default:
+		page_mode = mode;
+		break;
+	}
+
+	/* Loop and search until we arrive at the desired level */
+	bool at_desired_level = false;
+	while (!at_desired_level) {
+		buf_mode = BUF_GET;
+		rw_latch = RW_NO_LATCH;
+
+		ut_ad(n_blocks < BTR_MAX_LEVELS);
+
+		block = buf_page_get_gen(page_id, page_size, rw_latch, NULL,
+				buf_mode, file, line, mtr, mark_dirty);
+
+		page = buf_block_get_frame(block);
+
+		if (height == ULINT_UNDEFINED) {
+			/* We are in the root node */
+
+			height = btr_page_get_level(page, mtr);
+			root_height = height;
+			cursor->tree_height = root_height + 1;
+		}
+
+		if (height == 0) {
+			/* On leaf level. Switch back to original search mode.*/
+			page_mode = mode;
+		}
+
+		page_cur_search_with_match(
+				block, index, tuple, page_mode, &up_match,
+				&low_match, page_cursor, NULL);
+
+		ut_ad(height == btr_page_get_level(
+			page_cur_get_page(page_cursor), mtr));
+
+		if (level != height) {
+
+			const rec_t*	node_ptr;
+			ut_ad(height > 0);
+
+			height--;
+
+			node_ptr = page_cur_get_rec(page_cursor);
+
+			offsets = rec_get_offsets(
+					node_ptr, index, offsets,
+					ULINT_UNDEFINED, &heap);
+
+			/* Go to the child node */
+			page_id.reset(space, btr_node_ptr_get_child_page_no(
+				node_ptr, offsets));
+
+			n_blocks++;
+		} else {
+			/* If this is the desired level, leave the loop */
+			at_desired_level = true;
+		}
+	}
+
+	cursor->low_match = low_match;
+	cursor->up_match = up_match;
+
+	if (heap != NULL) {
+		mem_heap_free(heap);
 	}
+
+	DBUG_VOID_RETURN;
 }
 
 /*****************************************************************//**
 Opens a cursor at either end of an index. */
-UNIV_INTERN
 void
 btr_cur_open_at_index_side_func(
 /*============================*/
@@ -841,14 +2131,18 @@ btr_cur_open_at_index_side_func(
 	mtr_t*		mtr)		/*!< in/out: mini-transaction */
 {
 	page_cur_t*	page_cursor;
-	ulint		page_no;
-	ulint		space;
-	ulint		zip_size;
+	ulint		node_ptr_max_size = UNIV_PAGE_SIZE / 2;
 	ulint		height;
 	ulint		root_height = 0; /* remove warning */
 	rec_t*		node_ptr;
 	ulint		estimate;
 	ulint		savepoint;
+	ulint		upper_rw_latch, root_leaf_rw_latch;
+	btr_intention_t	lock_intention;
+	buf_block_t*	tree_blocks[BTR_MAX_LEVELS];
+	ulint		tree_savepoints[BTR_MAX_LEVELS];
+	ulint		n_blocks = 0;
+	ulint		n_releases = 0;
 	mem_heap_t*	heap		= NULL;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets		= offsets_;
@@ -859,6 +2153,22 @@ btr_cur_open_at_index_side_func(
 
 	ut_ad(level != ULINT_UNDEFINED);
 
+	bool	s_latch_by_caller;
+
+	s_latch_by_caller = latch_mode & BTR_ALREADY_S_LATCHED;
+	latch_mode &= ~BTR_ALREADY_S_LATCHED;
+
+	lock_intention = btr_cur_get_and_clear_intention(&latch_mode);
+
+	ut_ad(!(latch_mode & BTR_MODIFY_EXTERNAL));
+
+	/* This function doesn't need to lock left page of the leaf page */
+	if (latch_mode == BTR_SEARCH_PREV) {
+		latch_mode = BTR_SEARCH_LEAF;
+	} else if (latch_mode == BTR_MODIFY_PREV) {
+		latch_mode = BTR_MODIFY_LEAF;
+	}
+
 	/* Store the position of the tree latch we push to mtr so that we
 	know how to release it when we have latched the leaf node */
 
@@ -866,39 +2176,98 @@ btr_cur_open_at_index_side_func(
 
 	switch (latch_mode) {
 	case BTR_CONT_MODIFY_TREE:
+	case BTR_CONT_SEARCH_TREE:
+		upper_rw_latch = RW_NO_LATCH;
 		break;
 	case BTR_MODIFY_TREE:
-		mtr_x_lock(dict_index_get_lock(index), mtr);
-		break;
-	case BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED:
-	case BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED:
-		ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
-					MTR_MEMO_S_LOCK));
+		/* Most of delete-intended operations are purging.
+		Free blocks and read IO bandwidth should be prior
+		for them, when the history list is glowing huge. */
+		if (lock_intention == BTR_INTENTION_DELETE
+		    && trx_sys->rseg_history_len > BTR_CUR_FINE_HISTORY_LENGTH
+		    && buf_get_n_pending_read_ios()) {
+			mtr_x_lock(dict_index_get_lock(index), mtr);
+		} else {
+			mtr_sx_lock(dict_index_get_lock(index), mtr);
+		}
+		upper_rw_latch = RW_X_LATCH;
 		break;
 	default:
-		mtr_s_lock(dict_index_get_lock(index), mtr);
+		ut_ad(!s_latch_by_caller
+		      || mtr_memo_contains_flagged(mtr,
+						 dict_index_get_lock(index),
+						 MTR_MEMO_SX_LOCK
+						 | MTR_MEMO_S_LOCK));
+		if (!srv_read_only_mode) {
+			if (!s_latch_by_caller) {
+				/* BTR_SEARCH_TREE is intended to be used with
+				BTR_ALREADY_S_LATCHED */
+				ut_ad(latch_mode != BTR_SEARCH_TREE);
+
+				mtr_s_lock(dict_index_get_lock(index), mtr);
+			}
+			upper_rw_latch = RW_S_LATCH;
+		} else {
+			upper_rw_latch = RW_NO_LATCH;
+		}
 	}
+	root_leaf_rw_latch = btr_cur_latch_for_root_leaf(latch_mode);
 
 	page_cursor = btr_cur_get_page_cur(cursor);
 	cursor->index = index;
 
-	space = dict_index_get_space(index);
-	zip_size = dict_table_zip_size(index->table);
-	page_no = dict_index_get_page(index);
+	page_id_t		page_id(dict_index_get_space(index),
+					dict_index_get_page(index));
+	const page_size_t&	page_size = dict_table_page_size(index->table);
+
+	if (root_leaf_rw_latch == RW_X_LATCH) {
+		node_ptr_max_size = dict_index_node_ptr_max_size(index);
+	}
 
 	height = ULINT_UNDEFINED;
 
 	for (;;) {
 		buf_block_t*	block;
 		page_t*		page;
-		block = buf_page_get_gen(space, zip_size, page_no,
-					 RW_NO_LATCH, NULL, BUF_GET,
-					 file, line, mtr);
+		ulint		rw_latch;
+
+		ut_ad(n_blocks < BTR_MAX_LEVELS);
+
+		if (height != 0
+		    && (latch_mode != BTR_MODIFY_TREE
+			|| height == level)) {
+			rw_latch = upper_rw_latch;
+		} else {
+			rw_latch = RW_NO_LATCH;
+		}
+
+		tree_savepoints[n_blocks] = mtr_set_savepoint(mtr);
+		block = buf_page_get_gen(page_id, page_size, rw_latch, NULL,
+					 BUF_GET, file, line, mtr);
+		tree_blocks[n_blocks] = block;
+
 		page = buf_block_get_frame(block);
-		ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
-		ut_ad(index->id == btr_page_get_index_id(page));
 
-		block->check_index_page_at_flush = TRUE;
+		if (height == ULINT_UNDEFINED
+		    && btr_page_get_level(page, mtr) == 0
+		    && rw_latch != RW_NO_LATCH
+		    && rw_latch != root_leaf_rw_latch) {
+			/* We should retry to get the page, because the root page
+			is latched with different level as a leaf page. */
+			ut_ad(root_leaf_rw_latch != RW_NO_LATCH);
+			ut_ad(rw_latch == RW_S_LATCH);
+
+			ut_ad(n_blocks == 0);
+			mtr_release_block_at_savepoint(
+				mtr, tree_savepoints[n_blocks],
+				tree_blocks[n_blocks]);
+
+			upper_rw_latch = root_leaf_rw_latch;
+			continue;
+		}
+
+		ut_ad(fil_page_index_page_check(page));
+		ut_ad(index->id == btr_page_get_index_id(page));
 
 		if (height == ULINT_UNDEFINED) {
 			/* We are in the root node */
@@ -912,12 +2281,16 @@ btr_cur_open_at_index_side_func(
 		}
 
 		if (height == level) {
-			btr_cur_latch_leaves(
-				page, space, zip_size, page_no,
-				latch_mode & ~BTR_ALREADY_S_LATCHED,
-				cursor, mtr);
-
-			if (height == 0) {
+			if (srv_read_only_mode) {
+				btr_cur_latch_leaves(
+					block, page_id, page_size,
+					latch_mode, cursor, mtr);
+			} else if (height == 0) {
+				if (rw_latch == RW_NO_LATCH) {
+					btr_cur_latch_leaves(
+						block, page_id, page_size,
+						latch_mode, cursor, mtr);
+				}
 				/* In versions <= 3.23.52 we had
 				forgotten to release the tree latch
 				here. If in an index scan we had to
@@ -929,19 +2302,239 @@ btr_cur_open_at_index_side_func(
 				switch (latch_mode) {
 				case BTR_MODIFY_TREE:
 				case BTR_CONT_MODIFY_TREE:
-				case BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED:
-				case BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED:
+				case BTR_CONT_SEARCH_TREE:
 					break;
 				default:
-					/* Release the tree s-latch */
+					if (!s_latch_by_caller) {
+						/* Release the tree s-latch */
+						mtr_release_s_latch_at_savepoint(
+							mtr, savepoint,
+							dict_index_get_lock(
+								index));
+					}
+
+					/* release upper blocks */
+					for (; n_releases < n_blocks;
+					     n_releases++) {
+						mtr_release_block_at_savepoint(
+							mtr,
+							tree_savepoints[
+								n_releases],
+							tree_blocks[
+								n_releases]);
+					}
+				}
+			} else { /* height != 0 */
+				/* We already have the block latched. */
+				ut_ad(latch_mode == BTR_SEARCH_TREE);
+				ut_ad(s_latch_by_caller);
+				ut_ad(upper_rw_latch == RW_S_LATCH);
+
+				ut_ad(mtr_memo_contains(mtr, block,
+							upper_rw_latch));
+
+				if (s_latch_by_caller) {
+					/* to exclude modifying tree operations
+					should sx-latch the index. */
+					ut_ad(mtr_memo_contains(
+						mtr,
+						dict_index_get_lock(index),
+						MTR_MEMO_SX_LOCK));
+					/* because has sx-latch of index,
+					can release upper blocks. */
+					for (; n_releases < n_blocks;
+					     n_releases++) {
+						mtr_release_block_at_savepoint(
+							mtr,
+							tree_savepoints[
+								n_releases],
+							tree_blocks[
+								n_releases]);
+					}
+				}
+			}
+		}
 
-					mtr_release_s_latch_at_savepoint(
-						mtr, savepoint,
-						dict_index_get_lock(index));
+		if (from_left) {
+			page_cur_set_before_first(block, page_cursor);
+		} else {
+			page_cur_set_after_last(block, page_cursor);
+		}
+
+		if (height == level) {
+			if (estimate) {
+				btr_cur_add_path_info(cursor, height,
+						      root_height);
+			}
+
+			break;
+		}
+
+		ut_ad(height > 0);
+
+		if (from_left) {
+			page_cur_move_to_next(page_cursor);
+		} else {
+			page_cur_move_to_prev(page_cursor);
+		}
+
+		if (estimate) {
+			btr_cur_add_path_info(cursor, height, root_height);
+		}
+
+		height--;
+
+		node_ptr = page_cur_get_rec(page_cursor);
+		offsets = rec_get_offsets(node_ptr, cursor->index, offsets,
+					  ULINT_UNDEFINED, &heap);
+
+		/* If the rec is the first or last in the page for
+		pessimistic delete intention, it might cause node_ptr insert
+		for the upper level. We should change the intention and retry.
+		*/
+		if (latch_mode == BTR_MODIFY_TREE
+		    && btr_cur_need_opposite_intention(
+			page, lock_intention, node_ptr)) {
+
+			ut_ad(upper_rw_latch == RW_X_LATCH);
+			/* release all blocks */
+			for (; n_releases <= n_blocks; n_releases++) {
+				mtr_release_block_at_savepoint(
+					mtr, tree_savepoints[n_releases],
+					tree_blocks[n_releases]);
+			}
+
+			lock_intention = BTR_INTENTION_BOTH;
+
+			page_id.set_page_no(dict_index_get_page(index));
+
+			height = ULINT_UNDEFINED;
+
+			n_blocks = 0;
+			n_releases = 0;
+
+			continue;
+		}
+
+		if (latch_mode == BTR_MODIFY_TREE
+		    && !btr_cur_will_modify_tree(
+				cursor->index, page, lock_intention, node_ptr,
+				node_ptr_max_size, page_size, mtr)) {
+			ut_ad(upper_rw_latch == RW_X_LATCH);
+			ut_ad(n_releases <= n_blocks);
+
+			/* we can release upper blocks */
+			for (; n_releases < n_blocks; n_releases++) {
+				if (n_releases == 0) {
+					/* we should not release root page
+					to pin to same block. */
+					continue;
 				}
+
+				/* release unused blocks to unpin */
+				mtr_release_block_at_savepoint(
+					mtr, tree_savepoints[n_releases],
+					tree_blocks[n_releases]);
+			}
+		}
+
+		if (height == level
+		    && latch_mode == BTR_MODIFY_TREE) {
+			ut_ad(upper_rw_latch == RW_X_LATCH);
+			/* we should sx-latch root page, if released already.
+			It contains seg_header. */
+			if (n_releases > 0) {
+				mtr_block_sx_latch_at_savepoint(
+					mtr, tree_savepoints[0],
+					tree_blocks[0]);
+			}
+
+			/* x-latch the branch blocks not released yet. */
+			for (ulint i = n_releases; i <= n_blocks; i++) {
+				mtr_block_x_latch_at_savepoint(
+					mtr, tree_savepoints[i],
+					tree_blocks[i]);
 			}
 		}
 
+		/* Go to the child node */
+		page_id.set_page_no(
+			btr_node_ptr_get_child_page_no(node_ptr, offsets));
+
+		n_blocks++;
+	}
+
+	if (heap) {
+		mem_heap_free(heap);
+	}
+}
+
+/** Opens a cursor at either end of an index.
+Avoid taking latches on buffer, just pin (by incrementing fix_count)
+to keep them in buffer pool. This mode is used by intrinsic table
+as they are not shared and so there is no need of latching.
+ at param[in]	from_left	true if open to low end, false if open
+				to high end.
+ at param[in]	index		index
+ at param[in,out]	cursor		cursor
+ at param[in]	file		file name
+ at param[in]	line		line where called
+ at param[in,out]	mtr		mini transaction
+*/
+void
+btr_cur_open_at_index_side_with_no_latch_func(
+	bool		from_left,
+	dict_index_t*	index,
+	btr_cur_t*	cursor,
+	ulint		level,
+	const char*	file,
+	ulint		line,
+	mtr_t*		mtr)
+{
+	page_cur_t*	page_cursor;
+	ulint		height;
+	rec_t*		node_ptr;
+	ulint		n_blocks = 0;
+	mem_heap_t*	heap		= NULL;
+	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets		= offsets_;
+	rec_offs_init(offsets_);
+
+	ut_ad(level != ULINT_UNDEFINED);
+
+	page_cursor = btr_cur_get_page_cur(cursor);
+	cursor->index = index;
+	page_id_t		page_id(dict_index_get_space(index),
+					dict_index_get_page(index));
+	const page_size_t&	page_size = dict_table_page_size(index->table);
+
+	height = ULINT_UNDEFINED;
+
+	for (;;) {
+		buf_block_t*	block;
+		page_t*		page;
+		ulint		rw_latch = RW_NO_LATCH;
+
+		ut_ad(n_blocks < BTR_MAX_LEVELS);
+
+		block = buf_page_get_gen(page_id, page_size, rw_latch, NULL,
+					 BUF_GET, file, line, mtr);
+
+		page = buf_block_get_frame(block);
+
+		ut_ad(fil_page_index_page_check(page));
+		ut_ad(index->id == btr_page_get_index_id(page));
+
+		if (height == ULINT_UNDEFINED) {
+			/* We are in the root node */
+
+			height = btr_page_get_level(page, mtr);
+			ut_a(height >= level);
+		} else {
+			/* TODO: flag the index corrupted if this fails */
+			ut_ad(height == btr_page_get_level(page, mtr));
+		}
+
 		if (from_left) {
 			page_cur_set_before_first(block, page_cursor);
 		} else {
@@ -949,11 +2542,6 @@ btr_cur_open_at_index_side_func(
 		}
 
 		if (height == level) {
-			if (estimate) {
-				btr_cur_add_path_info(cursor, height,
-						      root_height);
-			}
-
 			break;
 		}
 
@@ -965,28 +2553,29 @@ btr_cur_open_at_index_side_func(
 			page_cur_move_to_prev(page_cursor);
 		}
 
-		if (estimate) {
-			btr_cur_add_path_info(cursor, height, root_height);
-		}
-
 		height--;
 
 		node_ptr = page_cur_get_rec(page_cursor);
 		offsets = rec_get_offsets(node_ptr, cursor->index, offsets,
 					  ULINT_UNDEFINED, &heap);
+
 		/* Go to the child node */
-		page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
+		page_id.set_page_no(
+			btr_node_ptr_get_child_page_no(node_ptr, offsets));
+
+		n_blocks++;
 	}
 
-	if (heap) {
+	if (heap != NULL) {
 		mem_heap_free(heap);
 	}
 }
 
 /**********************************************************************//**
-Positions a cursor at a randomly chosen position within a B-tree. */
-UNIV_INTERN
-void
+Positions a cursor at a randomly chosen position within a B-tree.
+ at return true if the index is available and we have put the cursor, false
+if the index is unavailable */
+bool
 btr_cur_open_at_rnd_pos_func(
 /*=========================*/
 	dict_index_t*	index,		/*!< in: index */
@@ -997,43 +2586,129 @@ btr_cur_open_at_rnd_pos_func(
 	mtr_t*		mtr)		/*!< in: mtr */
 {
 	page_cur_t*	page_cursor;
-	ulint		page_no;
-	ulint		space;
-	ulint		zip_size;
+	ulint		node_ptr_max_size = UNIV_PAGE_SIZE / 2;
 	ulint		height;
 	rec_t*		node_ptr;
+	ulint		savepoint;
+	ulint		upper_rw_latch, root_leaf_rw_latch;
+	btr_intention_t	lock_intention;
+	buf_block_t*	tree_blocks[BTR_MAX_LEVELS];
+	ulint		tree_savepoints[BTR_MAX_LEVELS];
+	ulint		n_blocks = 0;
+	ulint		n_releases = 0;
 	mem_heap_t*	heap		= NULL;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets		= offsets_;
 	rec_offs_init(offsets_);
 
+	ut_ad(!dict_index_is_spatial(index));
+
+	lock_intention = btr_cur_get_and_clear_intention(&latch_mode);
+
+	ut_ad(!(latch_mode & BTR_MODIFY_EXTERNAL));
+
+	savepoint = mtr_set_savepoint(mtr);
+
 	switch (latch_mode) {
 	case BTR_MODIFY_TREE:
-		mtr_x_lock(dict_index_get_lock(index), mtr);
+		/* Most of delete-intended operations are purging.
+		Free blocks and read IO bandwidth should be prior
+		for them, when the history list is glowing huge. */
+		if (lock_intention == BTR_INTENTION_DELETE
+		    && trx_sys->rseg_history_len > BTR_CUR_FINE_HISTORY_LENGTH
+		    && buf_get_n_pending_read_ios()) {
+			mtr_x_lock(dict_index_get_lock(index), mtr);
+		} else {
+			mtr_sx_lock(dict_index_get_lock(index), mtr);
+		}
+		upper_rw_latch = RW_X_LATCH;
 		break;
+	case BTR_SEARCH_PREV:
+	case BTR_MODIFY_PREV:
+		/* This function doesn't support left uncle
+		   page lock for left leaf page lock, when
+		   needed. */
+	case BTR_SEARCH_TREE:
+	case BTR_CONT_MODIFY_TREE:
+	case BTR_CONT_SEARCH_TREE:
+		ut_ad(0);
+		/* fall through */
 	default:
-		ut_ad(latch_mode != BTR_CONT_MODIFY_TREE);
-		mtr_s_lock(dict_index_get_lock(index), mtr);
+		if (!srv_read_only_mode) {
+			mtr_s_lock(dict_index_get_lock(index), mtr);
+			upper_rw_latch = RW_S_LATCH;
+		} else {
+			upper_rw_latch = RW_NO_LATCH;
+		}
+	}
+
+	DBUG_EXECUTE_IF("test_index_is_unavailable",
+			return(false););
+
+	if (index->page == FIL_NULL) {
+		/* Since we don't hold index lock until just now, the index
+		could be modified by others, for example, if this is a
+		statistics updater for referenced table, it could be marked
+		as unavailable by 'DROP TABLE' in the mean time, since
+		we don't hold lock for statistics updater */
+		return(false);
 	}
 
+	root_leaf_rw_latch = btr_cur_latch_for_root_leaf(latch_mode);
+
 	page_cursor = btr_cur_get_page_cur(cursor);
 	cursor->index = index;
 
-	space = dict_index_get_space(index);
-	zip_size = dict_table_zip_size(index->table);
-	page_no = dict_index_get_page(index);
+	page_id_t		page_id(dict_index_get_space(index),
+					dict_index_get_page(index));
+	const page_size_t&	page_size = dict_table_page_size(index->table);
+
+	if (root_leaf_rw_latch == RW_X_LATCH) {
+		node_ptr_max_size = dict_index_node_ptr_max_size(index);
+	}
 
 	height = ULINT_UNDEFINED;
 
 	for (;;) {
 		buf_block_t*	block;
 		page_t*		page;
+		ulint		rw_latch;
+
+		ut_ad(n_blocks < BTR_MAX_LEVELS);
+
+		if (height != 0
+		    && latch_mode != BTR_MODIFY_TREE) {
+			rw_latch = upper_rw_latch;
+		} else {
+			rw_latch = RW_NO_LATCH;
+		}
+
+		tree_savepoints[n_blocks] = mtr_set_savepoint(mtr);
+		block = buf_page_get_gen(page_id, page_size, rw_latch, NULL,
+					 BUF_GET, file, line, mtr);
+		tree_blocks[n_blocks] = block;
 
-		block = buf_page_get_gen(space, zip_size, page_no,
-					 RW_NO_LATCH, NULL, BUF_GET,
-					 file, line, mtr);
 		page = buf_block_get_frame(block);
-		ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
+
+		if (height == ULINT_UNDEFINED
+		    && btr_page_get_level(page, mtr) == 0
+		    && rw_latch != RW_NO_LATCH
+		    && rw_latch != root_leaf_rw_latch) {
+			/* We should retry to get the page, because the root page
+			is latched with different level as a leaf page. */
+			ut_ad(root_leaf_rw_latch != RW_NO_LATCH);
+			ut_ad(rw_latch == RW_S_LATCH);
+
+			ut_ad(n_blocks == 0);
+			mtr_release_block_at_savepoint(
+				mtr, tree_savepoints[n_blocks],
+				tree_blocks[n_blocks]);
+
+			upper_rw_latch = root_leaf_rw_latch;
+			continue;
+		}
+
+		ut_ad(fil_page_index_page_check(page));
 		ut_ad(index->id == btr_page_get_index_id(page));
 
 		if (height == ULINT_UNDEFINED) {
@@ -1043,8 +2718,37 @@ btr_cur_open_at_rnd_pos_func(
 		}
 
 		if (height == 0) {
-			btr_cur_latch_leaves(page, space, zip_size, page_no,
-					     latch_mode, cursor, mtr);
+			if (rw_latch == RW_NO_LATCH
+			    || srv_read_only_mode) {
+				btr_cur_latch_leaves(
+					block, page_id, page_size,
+					latch_mode, cursor, mtr);
+			}
+
+			/* btr_cur_open_at_index_side_func() and
+			btr_cur_search_to_nth_level() release
+			tree s-latch here.*/
+			switch (latch_mode) {
+			case BTR_MODIFY_TREE:
+			case BTR_CONT_MODIFY_TREE:
+			case BTR_CONT_SEARCH_TREE:
+				break;
+			default:
+				/* Release the tree s-latch */
+				if (!srv_read_only_mode) {
+					mtr_release_s_latch_at_savepoint(
+						mtr, savepoint,
+						dict_index_get_lock(index));
+				}
+
+				/* release upper blocks */
+				for (; n_releases < n_blocks; n_releases++) {
+					mtr_release_block_at_savepoint(
+						mtr,
+						tree_savepoints[n_releases],
+						tree_blocks[n_releases]);
+				}
+			}
 		}
 
 		page_cur_open_on_rnd_user_rec(block, page_cursor);
@@ -1061,13 +2765,88 @@ btr_cur_open_at_rnd_pos_func(
 		node_ptr = page_cur_get_rec(page_cursor);
 		offsets = rec_get_offsets(node_ptr, cursor->index, offsets,
 					  ULINT_UNDEFINED, &heap);
+
+		/* If the rec is the first or last in the page for
+		pessimistic delete intention, it might cause node_ptr insert
+		for the upper level. We should change the intention and retry.
+		*/
+		if (latch_mode == BTR_MODIFY_TREE
+		    && btr_cur_need_opposite_intention(
+			page, lock_intention, node_ptr)) {
+
+			ut_ad(upper_rw_latch == RW_X_LATCH);
+			/* release all blocks */
+			for (; n_releases <= n_blocks; n_releases++) {
+				mtr_release_block_at_savepoint(
+					mtr, tree_savepoints[n_releases],
+					tree_blocks[n_releases]);
+			}
+
+			lock_intention = BTR_INTENTION_BOTH;
+
+			page_id.set_page_no(dict_index_get_page(index));
+
+			height = ULINT_UNDEFINED;
+
+			n_blocks = 0;
+			n_releases = 0;
+
+			continue;
+		}
+
+		if (latch_mode == BTR_MODIFY_TREE
+		    && !btr_cur_will_modify_tree(
+				cursor->index, page, lock_intention, node_ptr,
+				node_ptr_max_size, page_size, mtr)) {
+			ut_ad(upper_rw_latch == RW_X_LATCH);
+			ut_ad(n_releases <= n_blocks);
+
+			/* we can release upper blocks */
+			for (; n_releases < n_blocks; n_releases++) {
+				if (n_releases == 0) {
+					/* we should not release root page
+					to pin to same block. */
+					continue;
+				}
+
+				/* release unused blocks to unpin */
+				mtr_release_block_at_savepoint(
+					mtr, tree_savepoints[n_releases],
+					tree_blocks[n_releases]);
+			}
+		}
+
+		if (height == 0
+		    && latch_mode == BTR_MODIFY_TREE) {
+			ut_ad(upper_rw_latch == RW_X_LATCH);
+			/* we should sx-latch root page, if released already.
+			It contains seg_header. */
+			if (n_releases > 0) {
+				mtr_block_sx_latch_at_savepoint(
+					mtr, tree_savepoints[0],
+					tree_blocks[0]);
+			}
+
+			/* x-latch the branch blocks not released yet. */
+			for (ulint i = n_releases; i <= n_blocks; i++) {
+				mtr_block_x_latch_at_savepoint(
+					mtr, tree_savepoints[i],
+					tree_blocks[i]);
+			}
+		}
+
 		/* Go to the child node */
-		page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
+		page_id.set_page_no(
+			btr_node_ptr_get_child_page_no(node_ptr, offsets));
+
+		n_blocks++;
 	}
 
 	if (UNIV_LIKELY_NULL(heap)) {
 		mem_heap_free(heap);
 	}
+
+	return(true);
 }
 
 /*==================== B-TREE INSERT =========================*/
@@ -1083,7 +2862,7 @@ if this is a compressed leaf page in a secondary index.
 This has to be done either within the same mini-transaction,
 or by invoking ibuf_reset_free_bits() before mtr_commit().
 
- at return	pointer to inserted record if succeed, else NULL */
+ at return pointer to inserted record if succeed, else NULL */
 static __attribute__((nonnull, warn_unused_result))
 rec_t*
 btr_cur_insert_if_possible(
@@ -1102,8 +2881,9 @@ btr_cur_insert_if_possible(
 
 	ut_ad(dtuple_check_typed(tuple));
 
-	ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
-				MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_is_block_fix(
+		mtr, btr_cur_get_block(cursor),
+		MTR_MEMO_PAGE_X_FIX, cursor->index->table));
 	page_cursor = btr_cur_get_page_cur(cursor);
 
 	/* Now, try the insert */
@@ -1126,7 +2906,7 @@ btr_cur_insert_if_possible(
 
 /*************************************************************//**
 For an insert, checks the locks and does the undo logging if desired.
- at return	DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
+ at return DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
 UNIV_INLINE __attribute__((warn_unused_result, nonnull(2,3,5,6)))
 dberr_t
 btr_cur_ins_lock_and_undo(
@@ -1143,7 +2923,7 @@ btr_cur_ins_lock_and_undo(
 				successor record */
 {
 	dict_index_t*	index;
-	dberr_t		err;
+	dberr_t		err = DB_SUCCESS;
 	rec_t*		rec;
 	roll_ptr_t	roll_ptr;
 
@@ -1156,10 +2936,32 @@ btr_cur_ins_lock_and_undo(
 	ut_ad(!dict_index_is_online_ddl(index)
 	      || dict_index_is_clust(index)
 	      || (flags & BTR_CREATE_FLAG));
+	ut_ad(mtr->is_named_space(index->space));
 
-	err = lock_rec_insert_check_and_lock(flags, rec,
-					     btr_cur_get_block(cursor),
-					     index, thr, mtr, inherit);
+	/* Check if there is predicate or GAP lock preventing the insertion */
+	if (!(flags & BTR_NO_LOCKING_FLAG)) {
+		if (dict_index_is_spatial(index)) {
+			lock_prdt_t	prdt;
+			rtr_mbr_t	mbr;
+
+			rtr_get_mbr_from_tuple(entry, &mbr);
+
+			/* Use on stack MBR variable to test if a lock is
+			needed. If so, the predicate (MBR) will be allocated
+			from lock heap in lock_prdt_insert_check_and_lock() */
+			lock_init_prdt_from_mbr(
+				&prdt, &mbr, 0, NULL);
+
+			err = lock_prdt_insert_check_and_lock(
+				flags, rec, btr_cur_get_block(cursor),
+				index, thr, mtr, &prdt);
+			*inherit = false;
+		} else {
+			err = lock_rec_insert_check_and_lock(
+				flags, rec, btr_cur_get_block(cursor),
+				index, thr, mtr, inherit);
+		}
+	}
 
 	if (err != DB_SUCCESS
 	    || !dict_index_is_clust(index) || dict_index_is_ibuf(index)) {
@@ -1176,9 +2978,11 @@ btr_cur_ins_lock_and_undo(
 		return(err);
 	}
 
-	/* Now we can fill in the roll ptr field in entry */
+	/* Now we can fill in the roll ptr field in entry
+	(except if table is intrinsic) */
 
-	if (!(flags & BTR_KEEP_SYS_FLAG)) {
+	if (!(flags & BTR_KEEP_SYS_FLAG)
+	    && !dict_table_is_intrinsic(index->table)) {
 
 		row_upd_index_entry_sys_field(entry, index,
 					      DATA_ROLL_PTR, roll_ptr);
@@ -1187,23 +2991,36 @@ btr_cur_ins_lock_and_undo(
 	return(DB_SUCCESS);
 }
 
-#ifdef UNIV_DEBUG
-/*************************************************************//**
-Report information about a transaction. */
+/**
+Prefetch siblings of the leaf for the pessimistic operation.
+ at param block	leaf page */
 static
 void
-btr_cur_trx_report(
-/*===============*/
-	trx_id_t		trx_id,	/*!< in: transaction id */
-	const dict_index_t*	index,	/*!< in: index */
-	const char*		op)	/*!< in: operation */
+btr_cur_prefetch_siblings(
+	buf_block_t*	block)
 {
-	fprintf(stderr, "Trx with id " TRX_ID_FMT " going to ", trx_id);
-	fputs(op, stderr);
-	dict_index_name_print(stderr, NULL, index);
-	putc('\n', stderr);
+	page_t*	page = buf_block_get_frame(block);
+
+	ut_ad(page_is_leaf(page));
+
+	ulint left_page_no = fil_page_get_prev(page);
+	ulint right_page_no = fil_page_get_next(page);
+
+	if (left_page_no != FIL_NULL) {
+		buf_read_page_background(
+			page_id_t(block->page.id.space(), left_page_no),
+			block->page.size, false);
+	}
+	if (right_page_no != FIL_NULL) {
+		buf_read_page_background(
+			page_id_t(block->page.id.space(), right_page_no),
+			block->page.size, false);
+	}
+	if (left_page_no != FIL_NULL
+	    || right_page_no != FIL_NULL) {
+		os_aio_simulated_wake_handler_threads();
+	}
 }
-#endif /* UNIV_DEBUG */
 
 /*************************************************************//**
 Tries to perform an insert to a page in an index tree, next to cursor.
@@ -1211,8 +3028,7 @@ It is assumed that mtr holds an x-latch on the page. The operation does
 not succeed if there is too little space on the page. If there is just
 one record on the page, the insert will always succeed; this is to
 prevent trying to split a page with just one record.
- at return	DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
-UNIV_INTERN
+ at return DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
 dberr_t
 btr_cur_optimistic_insert(
 /*======================*/
@@ -1247,7 +3063,6 @@ btr_cur_optimistic_insert(
 	ibool		leaf;
 	ibool		reorg;
 	ibool		inherit = TRUE;
-	ulint		zip_size;
 	ulint		rec_size;
 	dberr_t		err;
 
@@ -1257,38 +3072,34 @@ btr_cur_optimistic_insert(
 	page = buf_block_get_frame(block);
 	index = cursor->index;
 
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	/* Block are not latched for insert if table is intrinsic
+	and index is auto-generated clustered index. */
+	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
 	ut_ad(!dict_index_is_online_ddl(index)
 	      || dict_index_is_clust(index)
 	      || (flags & BTR_CREATE_FLAG));
 	ut_ad(dtuple_check_typed(entry));
 
-	zip_size = buf_block_get_zip_size(block);
+	const page_size_t&	page_size = block->page.size;
+
 #ifdef UNIV_DEBUG_VALGRIND
-	if (zip_size) {
-		UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
-		UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
+	if (page_size.is_compressed()) {
+		UNIV_MEM_ASSERT_RW(page, page_size.logical());
+		UNIV_MEM_ASSERT_RW(block->page.zip.data, page_size.physical());
 	}
 #endif /* UNIV_DEBUG_VALGRIND */
 
-#ifdef UNIV_DEBUG
-	if (btr_cur_print_record_ops && thr) {
-		btr_cur_trx_report(thr_get_trx(thr)->id, index, "insert ");
-		dtuple_print(stderr, entry);
-	}
-#endif /* UNIV_DEBUG */
-
 	leaf = page_is_leaf(page);
 
 	/* Calculate the record size when entry is converted to a record */
 	rec_size = rec_get_converted_size(index, entry, n_ext);
 
 	if (page_zip_rec_needs_ext(rec_size, page_is_comp(page),
-				   dtuple_get_n_fields(entry), zip_size)) {
+				   dtuple_get_n_fields(entry), page_size)) {
 
 		/* The record is so big that we have to store some fields
 		externally on separate database pages */
-		big_rec_vec = dtuple_convert_big_rec(index, entry, &n_ext);
+		big_rec_vec = dtuple_convert_big_rec(index, 0, entry, &n_ext);
 
 		if (UNIV_UNLIKELY(big_rec_vec == NULL)) {
 
@@ -1298,12 +3109,12 @@ btr_cur_optimistic_insert(
 		rec_size = rec_get_converted_size(index, entry, n_ext);
 	}
 
-	if (zip_size) {
+	if (page_size.is_compressed()) {
 		/* Estimate the free space of an empty compressed page.
 		Subtract one byte for the encoded heap_no in the
 		modification log. */
 		ulint	free_space_zip = page_zip_empty_size(
-			cursor->index->n_fields, zip_size);
+			cursor->index->n_fields, page_size.physical());
 		ulint	n_uniq = dict_index_get_n_unique_in_tree(index);
 
 		ut_ad(dict_table_is_comp(index->table));
@@ -1343,7 +3154,7 @@ too_big:
 	LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page),
 				      goto fail);
 
-	if (leaf && zip_size
+	if (leaf && page_size.is_compressed()
 	    && (page_get_data_size(page) + rec_size
 		>= dict_index_zip_pad_optimal_page_size(index))) {
 		/* If compression padding tells us that insertion will
@@ -1352,6 +3163,12 @@ too_big:
 		insertion. */
 fail:
 		err = DB_FAIL;
+
+		/* prefetch siblings of the leaf for the pessimistic
+		operation, if the page is leaf. */
+		if (page_is_leaf(page)) {
+			btr_cur_prefetch_siblings(block);
+		}
 fail_err:
 
 		if (big_rec_vec) {
@@ -1380,7 +3197,7 @@ fail_err:
 	we have to split the page to reserve enough free space for
 	future updates of records. */
 
-	if (leaf && !zip_size && dict_index_is_clust(index)
+	if (leaf && !page_size.is_compressed() && dict_index_is_clust(index)
 	    && page_get_n_recs(page) >= 2
 	    && dict_index_get_space_reserve() + rec_size > max_size
 	    && (btr_page_get_split_rec_to_right(cursor, &dummy)
@@ -1388,37 +3205,67 @@ fail_err:
 		goto fail;
 	}
 
-	/* Check locks and write to the undo log, if specified */
-	err = btr_cur_ins_lock_and_undo(flags, cursor, entry,
-					thr, mtr, &inherit);
-
-	if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
+	page_cursor = btr_cur_get_page_cur(cursor);
 
-		goto fail_err;
-	}
+	DBUG_PRINT("ib_cur", ("insert %s (" IB_ID_FMT ") by " TRX_ID_FMT
+			      ": %s",
+			      index->name(), index->id,
+			      thr != NULL
+			      ? trx_get_id_for_print(thr_get_trx(thr))
+			      : 0,
+			      rec_printer(entry).str().c_str()));
 
-	page_cursor = btr_cur_get_page_cur(cursor);
+	DBUG_EXECUTE_IF("do_page_reorganize",
+			btr_page_reorganize(page_cursor, index, mtr););
 
 	/* Now, try the insert */
-
 	{
-		const rec_t* page_cursor_rec = page_cur_get_rec(page_cursor);
-		*rec = page_cur_tuple_insert(page_cursor, entry, index,
-					     offsets, heap, n_ext, mtr);
+		const rec_t*	page_cursor_rec = page_cur_get_rec(page_cursor);
+
+		if (dict_table_is_intrinsic(index->table)) {
+
+			index->rec_cache.rec_size = rec_size;
+
+			*rec = page_cur_tuple_direct_insert(
+				page_cursor, entry, index, n_ext, mtr);
+		} else {
+			/* Check locks and write to the undo log,
+			if specified */
+			err = btr_cur_ins_lock_and_undo(flags, cursor, entry,
+							thr, mtr, &inherit);
+
+			if (err != DB_SUCCESS) {
+				goto fail_err;
+			}
+
+			*rec = page_cur_tuple_insert(
+				page_cursor, entry, index, offsets, heap,
+				n_ext, mtr);
+		}
+
 		reorg = page_cursor_rec != page_cur_get_rec(page_cursor);
 	}
 
 	if (*rec) {
-	} else if (zip_size) {
+	} else if (page_size.is_compressed()) {
 		/* Reset the IBUF_BITMAP_FREE bits, because
 		page_cur_tuple_insert() will have attempted page
 		reorganize before failing. */
-		if (leaf && !dict_index_is_clust(index)) {
+		if (leaf
+		    && !dict_index_is_clust(index)
+		    && !dict_table_is_temporary(index->table)) {
 			ibuf_reset_free_bits(block);
 		}
 
 		goto fail;
 	} else {
+
+		/* For intrinsic table we take a consistent path
+		to re-organize using pessimistic path. */
+		if (dict_table_is_intrinsic(index->table)) {
+			goto fail;
+		}
+
 		ut_ad(!reorg);
 
 		/* If the record did not fit, reorganize */
@@ -1435,30 +3282,31 @@ fail_err:
 					     offsets, heap, n_ext, mtr);
 
 		if (UNIV_UNLIKELY(!*rec)) {
-			fputs("InnoDB: Error: cannot insert tuple ", stderr);
-			dtuple_print(stderr, entry);
-			fputs(" into ", stderr);
-			dict_index_name_print(stderr, thr_get_trx(thr), index);
-			fprintf(stderr, "\nInnoDB: max insert size %lu\n",
-				(ulong) max_size);
-			ut_error;
+			ib::fatal() <<  "Cannot insert tuple " << *entry
+				<< "into index " << index->name
+				<< " of table " << index->table->name
+				<< ". Max size: " << max_size;
 		}
 	}
 
 #ifdef BTR_CUR_HASH_ADAPT
-	if (!reorg && leaf && (cursor->flag == BTR_CUR_HASH)) {
-		btr_search_update_hash_node_on_insert(cursor);
-	} else {
-		btr_search_update_hash_on_insert(cursor);
+	if (!index->disable_ahi) {
+		if (!reorg && leaf && (cursor->flag == BTR_CUR_HASH)) {
+			btr_search_update_hash_node_on_insert(cursor);
+		} else {
+			btr_search_update_hash_on_insert(cursor);
+		}
 	}
-#endif
+#endif /* BTR_CUR_HASH_ADAPT */
 
 	if (!(flags & BTR_NO_LOCKING_FLAG) && inherit) {
 
 		lock_update_insert(block, *rec);
 	}
 
-	if (leaf && !dict_index_is_clust(index)) {
+	if (leaf
+	    && !dict_index_is_clust(index)
+	    && !dict_table_is_temporary(index->table)) {
 		/* Update the free bits of the B-tree page in the
 		insert buffer bitmap. */
 
@@ -1472,7 +3320,7 @@ fail_err:
 		committed mini-transaction, because in crash recovery,
 		the free bits could momentarily be set too high. */
 
-		if (zip_size) {
+		if (page_size.is_compressed()) {
 			/* Update the bits in the same mini-transaction. */
 			ibuf_update_free_bits_zip(block, mtr);
 		} else {
@@ -1494,8 +3342,7 @@ Performs an insert on a page of an index tree. It is assumed that mtr
 holds an x-latch on the tree and on the cursor page. If the insert is
 made on the leaf level, to avoid deadlocks, mtr must also own x-latches
 to brothers of page, if those brothers exist.
- at return	DB_SUCCESS or error number */
-UNIV_INTERN
+ at return DB_SUCCESS or error number */
 dberr_t
 btr_cur_pessimistic_insert(
 /*=======================*/
@@ -1521,22 +3368,23 @@ btr_cur_pessimistic_insert(
 	mtr_t*		mtr)	/*!< in/out: mini-transaction */
 {
 	dict_index_t*	index		= cursor->index;
-	ulint		zip_size	= dict_table_zip_size(index->table);
 	big_rec_t*	big_rec_vec	= NULL;
 	dberr_t		err;
 	ibool		inherit = FALSE;
-	ibool		success;
+	bool		success;
 	ulint		n_reserved	= 0;
 
 	ut_ad(dtuple_check_typed(entry));
 
 	*big_rec = NULL;
 
-	ut_ad(mtr_memo_contains(mtr,
-				dict_index_get_lock(btr_cur_get_index(cursor)),
-				MTR_MEMO_X_LOCK));
-	ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
-				MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_flagged(
+		mtr, dict_index_get_lock(btr_cur_get_index(cursor)),
+		MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK)
+	      || dict_table_is_intrinsic(cursor->index->table));
+	ut_ad(mtr_is_block_fix(
+		mtr, btr_cur_get_block(cursor),
+		MTR_MEMO_PAGE_X_FIX, cursor->index->table));
 	ut_ad(!dict_index_is_online_ddl(index)
 	      || dict_index_is_clust(index)
 	      || (flags & BTR_CREATE_FLAG));
@@ -1553,7 +3401,8 @@ btr_cur_pessimistic_insert(
 		return(err);
 	}
 
-	if (!(flags & BTR_NO_UNDO_LOG_FLAG)) {
+	if (!(flags & BTR_NO_UNDO_LOG_FLAG)
+	    || dict_table_is_intrinsic(index->table)) {
 		/* First reserve enough free space for the file segments
 		of the index tree, so that the insert will not fail because
 		of lack of space */
@@ -1570,7 +3419,7 @@ btr_cur_pessimistic_insert(
 	if (page_zip_rec_needs_ext(rec_get_converted_size(index, entry, n_ext),
 				   dict_table_is_comp(index->table),
 				   dtuple_get_n_fields(entry),
-				   zip_size)) {
+				   dict_table_page_size(index->table))) {
 		/* The record is so big that we have to store some fields
 		externally on separate database pages */
 
@@ -1581,7 +3430,7 @@ btr_cur_pessimistic_insert(
 			dtuple_convert_back_big_rec(index, entry, big_rec_vec);
 		}
 
-		big_rec_vec = dtuple_convert_big_rec(index, entry, &n_ext);
+		big_rec_vec = dtuple_convert_big_rec(index, 0, entry, &n_ext);
 
 		if (big_rec_vec == NULL) {
 
@@ -1594,7 +3443,7 @@ btr_cur_pessimistic_insert(
 	}
 
 	if (dict_index_get_page(index)
-	    == buf_block_get_page_no(btr_cur_get_block(cursor))) {
+	    == btr_cur_get_block(cursor)->page.id.page_no()) {
 
 		/* The page is the root page */
 		*rec = btr_root_raise_and_insert(
@@ -1604,31 +3453,39 @@ btr_cur_pessimistic_insert(
 			flags, cursor, offsets, heap, entry, n_ext, mtr);
 	}
 
-	ut_ad(page_rec_get_next(btr_cur_get_rec(cursor)) == *rec);
+	ut_ad(page_rec_get_next(btr_cur_get_rec(cursor)) == *rec
+	      || dict_index_is_spatial(index));
 
 	if (!(flags & BTR_NO_LOCKING_FLAG)) {
-		/* The cursor might be moved to the other page,
-		and the max trx id field should be updated after
-		the cursor was fixed. */
-		if (!dict_index_is_clust(index)) {
-			page_update_max_trx_id(
-				btr_cur_get_block(cursor),
-				btr_cur_get_page_zip(cursor),
-				thr_get_trx(thr)->id, mtr);
-		}
-		if (!page_rec_is_infimum(btr_cur_get_rec(cursor))
-		    || btr_page_get_prev(
-			buf_block_get_frame(
-				btr_cur_get_block(cursor)), mtr)
-		       == FIL_NULL) {
-			/* split and inserted need to call
-			lock_update_insert() always. */
-			inherit = TRUE;
+		ut_ad(!dict_table_is_temporary(index->table));
+		if (dict_index_is_spatial(index)) {
+			/* Do nothing */
+		} else {
+			/* The cursor might be moved to the other page
+			and the max trx id field should be updated after
+			the cursor was fixed. */
+			if (!dict_index_is_clust(index)) {
+				page_update_max_trx_id(
+					btr_cur_get_block(cursor),
+					btr_cur_get_page_zip(cursor),
+					thr_get_trx(thr)->id, mtr);
+			}
+			if (!page_rec_is_infimum(btr_cur_get_rec(cursor))
+			    || btr_page_get_prev(
+				buf_block_get_frame(
+					btr_cur_get_block(cursor)), mtr)
+			       == FIL_NULL) {
+				/* split and inserted need to call
+				lock_update_insert() always. */
+				inherit = TRUE;
+			}
 		}
 	}
 
 #ifdef BTR_CUR_ADAPT
-	btr_search_update_hash_on_insert(cursor);
+	if (!index->disable_ahi) {
+		btr_search_update_hash_on_insert(cursor);
+	}
 #endif
 	if (inherit && !(flags & BTR_NO_LOCKING_FLAG)) {
 
@@ -1648,8 +3505,8 @@ btr_cur_pessimistic_insert(
 
 /*************************************************************//**
 For an update, checks the locks and does the undo logging.
- at return	DB_SUCCESS, DB_WAIT_LOCK, or error number */
-UNIV_INLINE __attribute__((warn_unused_result, nonnull(2,3,6,7)))
+ at return DB_SUCCESS, DB_WAIT_LOCK, or error number */
+UNIV_INLINE __attribute__((warn_unused_result))
 dberr_t
 btr_cur_upd_lock_and_undo(
 /*======================*/
@@ -1668,12 +3525,13 @@ btr_cur_upd_lock_and_undo(
 	const rec_t*	rec;
 	dberr_t		err;
 
-	ut_ad(thr || (flags & BTR_NO_LOCKING_FLAG));
+	ut_ad(thr != NULL || (flags & BTR_NO_LOCKING_FLAG));
 
 	rec = btr_cur_get_rec(cursor);
 	index = cursor->index;
 
 	ut_ad(rec_offs_validate(rec, index, offsets));
+	ut_ad(mtr->is_named_space(index->space));
 
 	if (!dict_index_is_clust(index)) {
 		ut_ad(dict_index_is_online_ddl(index)
@@ -1708,7 +3566,6 @@ btr_cur_upd_lock_and_undo(
 
 /***********************************************************//**
 Writes a redo log record of updating a record in-place. */
-UNIV_INTERN
 void
 btr_cur_update_in_place_log(
 /*========================*/
@@ -1756,7 +3613,7 @@ btr_cur_update_in_place_log(
 		trx_write_roll_ptr(log_ptr, 0);
 		log_ptr += DATA_ROLL_PTR_LEN;
 		/* TRX_ID */
-		log_ptr += mach_ull_write_compressed(log_ptr, 0);
+		log_ptr += mach_u64_write_compressed(log_ptr, 0);
 	}
 
 	mach_write_to_2(log_ptr, page_offset(rec));
@@ -1768,8 +3625,7 @@ btr_cur_update_in_place_log(
 
 /***********************************************************//**
 Parses a redo log record of updating a record in-place.
- at return	end of log record or NULL */
-UNIV_INTERN
+ at return end of log record or NULL */
 byte*
 btr_cur_parse_update_in_place(
 /*==========================*/
@@ -1826,7 +3682,7 @@ btr_cur_parse_update_in_place(
 	ut_a((ibool)!!page_is_comp(page) == dict_table_is_comp(index->table));
 	rec = page + rec_offset;
 
-	/* We do not need to reserve btr_search_latch, as the page is only
+	/* We do not need to reserve search latch, as the page is only
 	being recovered, and there cannot be a hash index to it. */
 
 	offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
@@ -1851,13 +3707,12 @@ an update-in-place.
 
 @retval false if out of space; IBUF_BITMAP_FREE will be reset
 outside mtr if the page was recompressed
- at retval	true if enough place;
+ at retval true if enough place;
 
 IMPORTANT: The caller will have to update IBUF_BITMAP_FREE if this is
 a secondary index leaf page. This has to be done either within the
 same mini-transaction, or by invoking ibuf_reset_free_bits() before
 mtr_commit(mtr). */
-UNIV_INTERN
 bool
 btr_cur_update_alloc_zip_func(
 /*==========================*/
@@ -1921,7 +3776,9 @@ out_of_space:
 	ut_ad(rec_offs_validate(page_cur_get_rec(cursor), index, offsets));
 
 	/* Out of space: reset the free bits. */
-	if (!dict_index_is_clust(index) && page_is_leaf(page)) {
+	if (!dict_index_is_clust(index)
+	    && !dict_table_is_temporary(index->table)
+	    && page_is_leaf(page)) {
 		ibuf_reset_free_bits(page_cur_get_block(cursor));
 	}
 
@@ -1935,7 +3792,6 @@ We assume here that the ordering fields of the record do not change.
 @retval DB_SUCCESS on success
 @retval DB_ZIP_OVERFLOW if there is not enough space left
 on the compressed page (IBUF_BITMAP_FREE was reset outside mtr) */
-UNIV_INTERN
 dberr_t
 btr_cur_update_in_place(
 /*====================*/
@@ -1967,6 +3823,9 @@ btr_cur_update_in_place(
 	index = cursor->index;
 	ut_ad(rec_offs_validate(rec, index, offsets));
 	ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
+	ut_ad(trx_id > 0
+	      || (flags & BTR_KEEP_SYS_FLAG)
+	      || dict_table_is_intrinsic(index->table));
 	/* The insert buffer tree should never be updated in place. */
 	ut_ad(!dict_index_is_ibuf(index));
 	ut_ad(dict_index_is_online_ddl(index) == !!(flags & BTR_CREATE_FLAG)
@@ -1975,15 +3834,13 @@ btr_cur_update_in_place(
 	      || (flags & ~(BTR_KEEP_POS_FLAG | BTR_KEEP_IBUF_BITMAP))
 	      == (BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG
 		  | BTR_CREATE_FLAG | BTR_KEEP_SYS_FLAG));
-	ut_ad(fil_page_get_type(btr_cur_get_page(cursor)) == FIL_PAGE_INDEX);
+	ut_ad(fil_page_index_page_check(btr_cur_get_page(cursor)));
 	ut_ad(btr_page_get_index_id(btr_cur_get_page(cursor)) == index->id);
 
-#ifdef UNIV_DEBUG
-	if (btr_cur_print_record_ops) {
-		btr_cur_trx_report(trx_id, index, "update ");
-		rec_print_new(stderr, rec, offsets);
-	}
-#endif /* UNIV_DEBUG */
+	DBUG_PRINT("ib_cur", ("update-in-place %s (" IB_ID_FMT
+			      ") by " TRX_ID_FMT ": %s",
+			      index->name(), index->id, trx_id,
+			      rec_printer(rec, offsets).str().c_str()));
 
 	block = btr_cur_get_block(cursor);
 	page_zip = buf_block_get_page_zip(block);
@@ -2011,7 +3868,8 @@ btr_cur_update_in_place(
 		goto func_exit;
 	}
 
-	if (!(flags & BTR_KEEP_SYS_FLAG)) {
+	if (!(flags & BTR_KEEP_SYS_FLAG)
+	    && !dict_table_is_intrinsic(index->table)) {
 		row_upd_rec_sys_fields(rec, NULL, index, offsets,
 				       thr_get_trx(thr), roll_ptr);
 	}
@@ -2038,13 +3896,13 @@ btr_cur_update_in_place(
 			btr_search_update_hash_on_delete(cursor);
 		}
 
-		rw_lock_x_lock(&btr_search_latch);
+		rw_lock_x_lock(btr_get_search_latch(index));
 	}
 
 	row_upd_rec_in_place(rec, index, offsets, update, page_zip);
 
 	if (is_hashed) {
-		rw_lock_x_unlock(&btr_search_latch);
+		rw_lock_x_unlock(btr_get_search_latch(index));
 	}
 
 	btr_cur_update_in_place_log(flags, rec, index, update,
@@ -2066,6 +3924,7 @@ func_exit:
 	if (page_zip
 	    && !(flags & BTR_KEEP_IBUF_BITMAP)
 	    && !dict_index_is_clust(index)
+	    && !dict_table_is_temporary(index->table)
 	    && page_is_leaf(buf_block_get_frame(block))) {
 		/* Update the free bits in the insert buffer. */
 		ibuf_update_free_bits_zip(block, mtr);
@@ -2086,7 +3945,6 @@ fields of the record do not change.
 @retval DB_UNDERFLOW if the page would become too empty
 @retval DB_ZIP_OVERFLOW if there is not enough space left
 on the compressed page (IBUF_BITMAP_FREE was reset outside mtr) */
-UNIV_INTERN
 dberr_t
 btr_cur_optimistic_update(
 /*======================*/
@@ -2127,8 +3985,13 @@ btr_cur_optimistic_update(
 	page = buf_block_get_frame(block);
 	rec = btr_cur_get_rec(cursor);
 	index = cursor->index;
+	ut_ad(trx_id > 0
+	      || (flags & BTR_KEEP_SYS_FLAG)
+	      || dict_table_is_intrinsic(index->table));
 	ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
+	/* This is intended only for leaf page updates */
+	ut_ad(page_is_leaf(page));
 	/* The insert buffer tree should never be updated in place. */
 	ut_ad(!dict_index_is_ibuf(index));
 	ut_ad(dict_index_is_online_ddl(index) == !!(flags & BTR_CREATE_FLAG)
@@ -2137,7 +4000,7 @@ btr_cur_optimistic_update(
 	      || (flags & ~(BTR_KEEP_POS_FLAG | BTR_KEEP_IBUF_BITMAP))
 	      == (BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG
 		  | BTR_CREATE_FLAG | BTR_KEEP_SYS_FLAG));
-	ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
+	ut_ad(fil_page_index_page_check(page));
 	ut_ad(btr_page_get_index_id(page) == index->id);
 
 	*offsets = rec_get_offsets(rec, index, *offsets,
@@ -2147,13 +4010,6 @@ btr_cur_optimistic_update(
 	     || trx_is_recv(thr_get_trx(thr)));
 #endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
 
-#ifdef UNIV_DEBUG
-	if (btr_cur_print_record_ops) {
-		btr_cur_trx_report(trx_id, index, "update ");
-		rec_print_new(stderr, rec, *offsets);
-	}
-#endif /* UNIV_DEBUG */
-
 	if (!row_upd_changes_field_size_or_external(index, *offsets, update)) {
 
 		/* The simplest and the most common case: the update does not
@@ -2171,6 +4027,10 @@ any_extern:
 		/* Externally stored fields are treated in pessimistic
 		update */
 
+		/* prefetch siblings of the leaf for the pessimistic
+		operation. */
+		btr_cur_prefetch_siblings(block);
+
 		return(DB_OVERFLOW);
 	}
 
@@ -2181,6 +4041,11 @@ any_extern:
 		}
 	}
 
+	DBUG_PRINT("ib_cur", ("update %s (" IB_ID_FMT ") by " TRX_ID_FMT
+			      ": %s",
+			      index->name(), index->id, trx_id,
+			      rec_printer(rec, *offsets).str().c_str()));
+
 	page_cursor = btr_cur_get_page_cur(cursor);
 
 	if (!*heap) {
@@ -2217,6 +4082,13 @@ any_extern:
 		rec = page_cur_get_rec(page_cursor);
 	}
 
+	/* We limit max record size to 16k even for 64k page size. */
+	if (new_rec_size >= REC_MAX_DATA_SIZE) {
+		err = DB_OVERFLOW;
+
+		goto func_exit;
+	}
+
 	if (UNIV_UNLIKELY(new_rec_size
 			  >= (page_get_free_space_of_empty(page_is_comp(page))
 			      / 2))) {
@@ -2229,7 +4101,7 @@ any_extern:
 
 	if (UNIV_UNLIKELY(page_get_data_size(page)
 			  - old_rec_size + new_rec_size
-			  < BTR_CUR_PAGE_COMPRESS_LIMIT)) {
+			  < BTR_CUR_PAGE_COMPRESS_LIMIT(index))) {
 		/* We may need to update the IBUF_BITMAP_FREE
 		bits after a reorganize that was done in
 		btr_cur_update_alloc_zip(). */
@@ -2247,7 +4119,8 @@ any_extern:
 		   + page_get_max_insert_size_after_reorganize(page, 1));
 
 	if (!page_zip) {
-		max_ins_size = page_get_max_insert_size_after_reorganize(page, 1);
+		max_ins_size = page_get_max_insert_size_after_reorganize(
+				page, 1);
 	}
 
 	if (!(((max_size >= BTR_CUR_PAGE_REORGANIZE_LIMIT)
@@ -2280,8 +4153,9 @@ any_extern:
 	/* Ok, we may do the replacement. Store on the page infimum the
 	explicit locks on rec, before deleting rec (see the comment in
 	btr_cur_pessimistic_update). */
-
-	lock_rec_store_on_page_infimum(block, rec);
+	if (!dict_table_is_locking_disabled(index->table)) {
+		lock_rec_store_on_page_infimum(block, rec);
+	}
 
 	btr_search_update_hash_on_delete(cursor);
 
@@ -2289,7 +4163,8 @@ any_extern:
 
 	page_cur_move_to_prev(page_cursor);
 
-	if (!(flags & BTR_KEEP_SYS_FLAG)) {
+	if (!(flags & BTR_KEEP_SYS_FLAG)
+	    && !dict_table_is_intrinsic(index->table)) {
 		row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
 					      roll_ptr);
 		row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
@@ -2302,8 +4177,9 @@ any_extern:
 	ut_a(rec); /* <- We calculated above the insert would fit */
 
 	/* Restore the old explicit lock state on the record */
-
-	lock_rec_restore_from_page_infimum(block, rec, block);
+	if (!dict_table_is_locking_disabled(index->table)) {
+		lock_rec_restore_from_page_infimum(block, rec, block);
+	}
 
 	page_cur_move_to_next(page_cursor);
 	ut_ad(err == DB_SUCCESS);
@@ -2311,8 +4187,8 @@ any_extern:
 func_exit:
 	if (!(flags & BTR_KEEP_IBUF_BITMAP)
 	    && !dict_index_is_clust(index)
-	    && page_is_leaf(page)) {
-
+	    && !dict_table_is_temporary(index->table)) {
+		/* Update the free bits in the insert buffer. */
 		if (page_zip) {
 			ibuf_update_free_bits_zip(block, mtr);
 		} else {
@@ -2320,6 +4196,12 @@ func_exit:
 		}
 	}
 
+	if (err != DB_SUCCESS) {
+		/* prefetch siblings of the leaf for the pessimistic
+		operation. */
+		btr_cur_prefetch_siblings(block);
+	}
+
 	return(err);
 }
 
@@ -2339,9 +4221,6 @@ btr_cur_pess_upd_restore_supremum(
 {
 	page_t*		page;
 	buf_block_t*	prev_block;
-	ulint		space;
-	ulint		zip_size;
-	ulint		prev_page_no;
 
 	page = buf_block_get_frame(block);
 
@@ -2351,13 +4230,12 @@ btr_cur_pess_upd_restore_supremum(
 		return;
 	}
 
-	space = buf_block_get_space(block);
-	zip_size = buf_block_get_zip_size(block);
-	prev_page_no = btr_page_get_prev(page, mtr);
+	const ulint	prev_page_no = btr_page_get_prev(page, mtr);
+
+	const page_id_t	page_id(block->page.id.space(), prev_page_no);
 
 	ut_ad(prev_page_no != FIL_NULL);
-	prev_block = buf_page_get_with_no_latch(space, zip_size,
-						prev_page_no, mtr);
+	prev_block = buf_page_get_with_no_latch(page_id, block->page.size, mtr);
 #ifdef UNIV_BTR_DEBUG
 	ut_a(btr_page_get_next(prev_block->frame, mtr)
 	     == page_get_page_no(page));
@@ -2372,45 +4250,12 @@ btr_cur_pess_upd_restore_supremum(
 }
 
 /*************************************************************//**
-Check if the total length of the modified blob for the row is within 10%
-of the total redo log size.  This constraint on the blob length is to
-avoid overwriting the redo logs beyond the last checkpoint lsn.
- at return	DB_SUCCESS or DB_TOO_BIG_FOR_REDO. */
-static
-dberr_t
-btr_check_blob_limit(const big_rec_t*	big_rec_vec)
-{
-	const	ib_uint64_t redo_size = srv_n_log_files * srv_log_file_size
-		* UNIV_PAGE_SIZE;
-	const	ib_uint64_t redo_10p = redo_size / 10;
-	ib_uint64_t	total_blob_len = 0;
-	dberr_t	err = DB_SUCCESS;
-
-	/* Calculate the total number of bytes for blob data */
-	for (ulint i = 0; i < big_rec_vec->n_fields; i++) {
-		total_blob_len += big_rec_vec->fields[i].len;
-	}
-
-	if (total_blob_len > redo_10p) {
-		ib_logf(IB_LOG_LEVEL_ERROR, "The total blob data"
-			" length (" UINT64PF ") is greater than"
-			" 10%% of the total redo log size (" UINT64PF
-			"). Please increase total redo log size.",
-			total_blob_len, redo_size);
-		err = DB_TOO_BIG_FOR_REDO;
-	}
-
-	return(err);
-}
-
-/*************************************************************//**
 Performs an update of a record on a page of a tree. It is assumed
 that mtr holds an x-latch on the tree and on the cursor page. If the
 update is made on the leaf level, to avoid deadlocks, mtr must also
 own x-latches to brothers of page, if those brothers exist. We assume
 here that the ordering fields of the record do not change.
- at return	DB_SUCCESS or error code */
-UNIV_INTERN
+ at return DB_SUCCESS or error code */
 dberr_t
 btr_cur_pessimistic_update(
 /*=======================*/
@@ -2428,9 +4273,10 @@ btr_cur_pessimistic_update(
 				big_rec and the index tuple */
 	big_rec_t**	big_rec,/*!< out: big rec vector whose fields have to
 				be stored externally by the caller, or NULL */
-	const upd_t*	update,	/*!< in: update vector; this is allowed also
-				contain trx id and roll ptr fields, but
-				the values in update vector have no effect */
+	upd_t*		update,	/*!< in/out: update vector; this is allowed to
+				also contain trx id and roll ptr fields.
+				Non-updated columns that are moved offpage will
+				be appended to this. */
 	ulint		cmpl_info,/*!< in: compiler info on secondary index
 				updates */
 	que_thr_t*	thr,	/*!< in: query thread */
@@ -2462,14 +4308,19 @@ btr_cur_pessimistic_update(
 	page_zip = buf_block_get_page_zip(block);
 	index = cursor->index;
 
-	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
-				MTR_MEMO_X_LOCK));
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_flagged(mtr, dict_index_get_lock(index),
+					MTR_MEMO_X_LOCK |
+					MTR_MEMO_SX_LOCK)
+	      || dict_table_is_intrinsic(index->table));
+	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
 #ifdef UNIV_ZIP_DEBUG
 	ut_a(!page_zip || page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
 	/* The insert buffer tree should never be updated in place. */
 	ut_ad(!dict_index_is_ibuf(index));
+	ut_ad(trx_id > 0
+	      || (flags & BTR_KEEP_SYS_FLAG)
+	      || dict_table_is_intrinsic(index->table));
 	ut_ad(dict_index_is_online_ddl(index) == !!(flags & BTR_CREATE_FLAG)
 	      || dict_index_is_clust(index));
 	ut_ad(thr_get_trx(thr)->id == trx_id
@@ -2496,41 +4347,16 @@ btr_cur_pessimistic_update(
 		if (page_zip
 		    && optim_err != DB_ZIP_OVERFLOW
 		    && !dict_index_is_clust(index)
+		    && !dict_table_is_temporary(index->table)
 		    && page_is_leaf(page)) {
 			ibuf_update_free_bits_zip(block, mtr);
 		}
 
-		return(err);
-	}
-
-	/* Do lock checking and undo logging */
-	err = btr_cur_upd_lock_and_undo(flags, cursor, *offsets,
-					update, cmpl_info,
-					thr, mtr, &roll_ptr);
-	if (err != DB_SUCCESS) {
-		goto err_exit;
-	}
-
-	if (optim_err == DB_OVERFLOW) {
-		ulint	reserve_flag;
-
-		/* First reserve enough free space for the file segments
-		of the index tree, so that the update will not fail because
-		of lack of space */
-
-		ulint	n_extents = cursor->tree_height / 16 + 3;
-
-		if (flags & BTR_NO_UNDO_LOG_FLAG) {
-			reserve_flag = FSP_CLEANING;
-		} else {
-			reserve_flag = FSP_NORMAL;
+		if (big_rec_vec != NULL) {
+			dtuple_big_rec_free(big_rec_vec);
 		}
 
-		if (!fsp_reserve_free_extents(&n_reserved, index->space,
-					      n_extents, reserve_flag, mtr)) {
-			err = DB_OUT_OF_FILE_SPACE;
-			goto err_exit;
-		}
+		return(err);
 	}
 
 	rec = btr_cur_get_rec(cursor);
@@ -2549,14 +4375,19 @@ btr_cur_pessimistic_update(
 	itself.  Thus the following call is safe. */
 	row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update,
 						     FALSE, entry_heap);
-	if (!(flags & BTR_KEEP_SYS_FLAG)) {
-		row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
-					      roll_ptr);
-		row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
-					      trx_id);
-	}
 
-	if ((flags & BTR_NO_UNDO_LOG_FLAG) && rec_offs_any_extern(*offsets)) {
+	/* We have to set appropriate extern storage bits in the new
+	record to be inserted: we have to remember which fields were such */
+
+	ut_ad(!page_is_comp(page) || !rec_get_node_ptr_flag(rec));
+	ut_ad(rec_offs_validate(rec, index, *offsets));
+	n_ext += btr_push_update_extern_fields(new_entry, update, entry_heap);
+
+	/* UNDO logging is also turned-off during normal operation on intrinsic
+	table so condition needs to ensure that table is not intrinsic. */
+	if ((flags & BTR_NO_UNDO_LOG_FLAG)
+	    && rec_offs_any_extern(*offsets)
+	    && !dict_table_is_intrinsic(index->table)) {
 		/* We are in a transaction rollback undoing a row
 		update: we must free possible externally stored fields
 		which got new values in the update, if they are not
@@ -2565,35 +4396,23 @@ btr_cur_pessimistic_update(
 		update it back again. */
 
 		ut_ad(big_rec_vec == NULL);
+		ut_ad(dict_index_is_clust(index));
+		ut_ad(thr_get_trx(thr)->in_rollback);
+
+		DBUG_EXECUTE_IF("ib_blob_update_rollback", DBUG_SUICIDE(););
+		RECOVERY_CRASH(99);
 
 		btr_rec_free_updated_extern_fields(
-			index, rec, page_zip, *offsets, update,
-			trx_is_recv(thr_get_trx(thr))
-			? RB_RECOVERY : RB_NORMAL, mtr);
+			index, rec, page_zip, *offsets, update, true, mtr);
 	}
 
-	/* We have to set appropriate extern storage bits in the new
-	record to be inserted: we have to remember which fields were such */
-
-	ut_ad(!page_is_comp(page) || !rec_get_node_ptr_flag(rec));
-	ut_ad(rec_offs_validate(rec, index, *offsets));
-	n_ext += btr_push_update_extern_fields(new_entry, update, entry_heap);
+	if (page_zip_rec_needs_ext(
+			rec_get_converted_size(index, new_entry, n_ext),
+			page_is_comp(page),
+			dict_index_get_n_fields(index),
+			block->page.size)) {
 
-	if (page_zip) {
-		ut_ad(page_is_comp(page));
-		if (page_zip_rec_needs_ext(
-			    rec_get_converted_size(index, new_entry, n_ext),
-			    TRUE,
-			    dict_index_get_n_fields(index),
-			    page_zip_get_size(page_zip))) {
-
-			goto make_external;
-		}
-	} else if (page_zip_rec_needs_ext(
-			   rec_get_converted_size(index, new_entry, n_ext),
-			   page_is_comp(page), 0, 0)) {
-make_external:
-		big_rec_vec = dtuple_convert_big_rec(index, new_entry, &n_ext);
+		big_rec_vec = dtuple_convert_big_rec(index, update, new_entry, &n_ext);
 		if (UNIV_UNLIKELY(big_rec_vec == NULL)) {
 
 			/* We cannot goto return_after_reservations,
@@ -2618,21 +4437,43 @@ make_external:
 		ut_ad(flags & BTR_KEEP_POS_FLAG);
 	}
 
-	if (big_rec_vec) {
+	/* Do lock checking and undo logging */
+	err = btr_cur_upd_lock_and_undo(flags, cursor, *offsets,
+					update, cmpl_info,
+					thr, mtr, &roll_ptr);
+	if (err != DB_SUCCESS) {
+		goto err_exit;
+	}
+
+	if (optim_err == DB_OVERFLOW) {
+
+		/* First reserve enough free space for the file segments
+		of the index tree, so that the update will not fail because
+		of lack of space */
 
-		err = btr_check_blob_limit(big_rec_vec);
+		ulint	n_extents = cursor->tree_height / 16 + 3;
 
-		if (err != DB_SUCCESS) {
-			if (n_reserved > 0) {
-				fil_space_release_free_extents(
-					index->space, n_reserved);
-			}
+		if (!fsp_reserve_free_extents(
+		            &n_reserved, index->space, n_extents,
+		            flags & BTR_NO_UNDO_LOG_FLAG
+		            ? FSP_CLEANING : FSP_NORMAL,
+		            mtr)) {
+			err = DB_OUT_OF_FILE_SPACE;
 			goto err_exit;
 		}
 	}
 
+	if (!(flags & BTR_KEEP_SYS_FLAG)
+	    && !dict_table_is_intrinsic(index->table)) {
+		row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
+					      roll_ptr);
+		row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
+					      trx_id);
+	}
+
 	if (!page_zip) {
-		max_ins_size = page_get_max_insert_size_after_reorganize(page, 1);
+		max_ins_size = page_get_max_insert_size_after_reorganize(
+				page, 1);
 	}
 
 	/* Store state of explicit locks on rec on the page infimum record,
@@ -2643,8 +4484,9 @@ make_external:
 	btr_root_raise_and_insert. Therefore we cannot in the lock system
 	delete the lock structs set on the root page even if the root
 	page carries just node pointers. */
-
-	lock_rec_store_on_page_infimum(block, rec);
+	if (!dict_table_is_locking_disabled(index->table)) {
+		lock_rec_store_on_page_infimum(block, rec);
+	}
 
 	btr_search_update_hash_on_delete(cursor);
 
@@ -2663,8 +4505,10 @@ make_external:
 	if (rec) {
 		page_cursor->rec = rec;
 
-		lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor),
-						   rec, block);
+		if (!dict_table_is_locking_disabled(index->table)) {
+			lock_rec_restore_from_page_infimum(
+				btr_cur_get_block(cursor), rec, block);
+		}
 
 		if (!rec_get_deleted_flag(rec, rec_offs_comp(*offsets))) {
 			/* The new inserted record owns its possible externally
@@ -2681,8 +4525,8 @@ make_external:
 					page_cursor->rec, index, *offsets);
 			}
 		} else if (!dict_index_is_clust(index)
+			   && !dict_table_is_temporary(index->table)
 			   && page_is_leaf(page)) {
-
 			/* Update the free bits in the insert buffer.
 			This is the same block which was skipped by
 			BTR_KEEP_IBUF_BITMAP. */
@@ -2694,6 +4538,18 @@ make_external:
 			}
 		}
 
+		if (!srv_read_only_mode
+		    && !big_rec_vec
+		    && page_is_leaf(page)
+		    && !dict_index_is_online_ddl(index)) {
+
+			mtr_memo_release(mtr, dict_index_get_lock(index),
+					 MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK);
+
+			/* NOTE: We cannot release root block latch here, because it
+			has segment header and already modified in most of cases.*/
+		}
+
 		err = DB_SUCCESS;
 		goto return_after_reservations;
 	} else {
@@ -2707,24 +4563,31 @@ make_external:
 		/* Out of space: reset the free bits.
 		This is the same block which was skipped by
 		BTR_KEEP_IBUF_BITMAP. */
-		if (!dict_index_is_clust(index) && page_is_leaf(page)) {
+		if (!dict_index_is_clust(index)
+		    && !dict_table_is_temporary(index->table)
+		    && page_is_leaf(page)) {
 			ibuf_reset_free_bits(block);
 		}
 	}
 
-	if (big_rec_vec) {
+	if (big_rec_vec != NULL && !dict_table_is_intrinsic(index->table)) {
 		ut_ad(page_is_leaf(page));
 		ut_ad(dict_index_is_clust(index));
 		ut_ad(flags & BTR_KEEP_POS_FLAG);
 
 		/* btr_page_split_and_insert() in
 		btr_cur_pessimistic_insert() invokes
-		mtr_memo_release(mtr, index->lock, MTR_MEMO_X_LOCK).
+		mtr_memo_release(mtr, index->lock, MTR_MEMO_SX_LOCK).
 		We must keep the index->lock when we created a
 		big_rec, so that row_upd_clust_rec() can store the
 		big_rec in the same mini-transaction. */
 
-		mtr_x_lock(dict_index_get_lock(index), mtr);
+		ut_ad(mtr_memo_contains_flagged(mtr,
+						dict_index_get_lock(index),
+						MTR_MEMO_X_LOCK |
+						MTR_MEMO_SX_LOCK));
+
+		mtr_sx_lock(dict_index_get_lock(index), mtr);
 	}
 
 	/* Was the record to be updated positioned as the first user
@@ -2748,7 +4611,12 @@ make_external:
 	ut_ad(rec_offs_validate(rec, cursor->index, *offsets));
 	page_cursor->rec = rec;
 
-	if (dict_index_is_sec_or_ibuf(index)) {
+	/* Multiple transactions cannot simultaneously operate on the
+	same temp-table in parallel.
+	max_trx_id is ignored for temp tables because it not required
+	for MVCC. */
+	if (dict_index_is_sec_or_ibuf(index)
+	    && !dict_table_is_temporary(index->table)) {
 		/* Update PAGE_MAX_TRX_ID in the index page header.
 		It was not updated by btr_cur_pessimistic_insert()
 		because of BTR_NO_LOCKING_FLAG. */
@@ -2776,15 +4644,17 @@ make_external:
 					     rec, index, *offsets, mtr);
 	}
 
-	lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor),
-					   rec, block);
+	if (!dict_table_is_locking_disabled(index->table)) {
+		lock_rec_restore_from_page_infimum(
+			btr_cur_get_block(cursor), rec, block);
+	}
 
 	/* If necessary, restore also the correct lock state for a new,
 	preceding supremum record created in a page split. While the old
 	record was nonexistent, the supremum might have inherited its locks
 	from a wrong record. */
 
-	if (!was_first) {
+	if (!was_first && !dict_table_is_locking_disabled(index->table)) {
 		btr_cur_pess_upd_restore_supremum(btr_cur_get_block(cursor),
 						  rec, mtr);
 	}
@@ -2821,6 +4691,7 @@ btr_cur_del_mark_set_clust_rec_log(
 	byte*	log_ptr;
 
 	ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
+	ut_ad(mtr->is_named_space(index->space));
 
 	log_ptr = mlog_open_and_write_index(mtr, rec, index,
 					    page_rec_is_comp(rec)
@@ -2849,8 +4720,7 @@ btr_cur_del_mark_set_clust_rec_log(
 /****************************************************************//**
 Parses the redo log record for delete marking or unmarking of a clustered
 index record.
- at return	end of log record or NULL */
-UNIV_INTERN
+ at return end of log record or NULL */
 byte*
 btr_cur_parse_del_mark_set_clust_rec(
 /*=================================*/
@@ -2901,7 +4771,7 @@ btr_cur_parse_del_mark_set_clust_rec(
 	if (page) {
 		rec = page + offset;
 
-		/* We do not need to reserve btr_search_latch, as the page
+		/* We do not need to reserve search latch, as the page
 		is only being recovered, and there cannot be a hash index to
 		it. Besides, these fields are being updated in place
 		and the adaptive hash index does not depend on them. */
@@ -2933,16 +4803,18 @@ Marks a clustered index record deleted. Writes an undo log record to
 undo log on this delete marking. Writes in the trx id field the id
 of the deleting transaction, and in the roll ptr field pointer to the
 undo log record created.
- at return	DB_SUCCESS, DB_LOCK_WAIT, or error number */
-UNIV_INTERN
+ at return DB_SUCCESS, DB_LOCK_WAIT, or error number */
 dberr_t
 btr_cur_del_mark_set_clust_rec(
 /*===========================*/
+	ulint		flags,  /*!< in: undo logging and locking flags */
 	buf_block_t*	block,	/*!< in/out: buffer block of the record */
 	rec_t*		rec,	/*!< in/out: record */
 	dict_index_t*	index,	/*!< in: clustered index of the record */
 	const ulint*	offsets,/*!< in: rec_get_offsets(rec) */
 	que_thr_t*	thr,	/*!< in: query thread */
+	const dtuple_t*	entry,	/*!< in: dtuple for the deleting record, also
+				contains the virtual cols if there are any */
 	mtr_t*		mtr)	/*!< in/out: mini-transaction */
 {
 	roll_ptr_t	roll_ptr;
@@ -2955,16 +4827,13 @@ btr_cur_del_mark_set_clust_rec(
 	ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
 	ut_ad(buf_block_get_frame(block) == page_align(rec));
 	ut_ad(page_is_leaf(page_align(rec)));
+	ut_ad(mtr->is_named_space(index->space));
 
-#ifdef UNIV_DEBUG
-	if (btr_cur_print_record_ops && thr) {
-		btr_cur_trx_report(thr_get_trx(thr)->id, index, "del mark ");
-		rec_print_new(stderr, rec, offsets);
+	if (rec_get_deleted_flag(rec, rec_offs_comp(offsets))) {
+		/* While cascading delete operations, this becomes possible. */
+		ut_ad(rec_get_trx_id(rec, index) == thr_get_trx(thr)->id);
+		return(DB_SUCCESS);
 	}
-#endif /* UNIV_DEBUG */
-
-	ut_ad(dict_index_is_clust(index));
-	ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
 
 	err = lock_clust_rec_modify_check_and_lock(BTR_NO_LOCKING_FLAG, block,
 						   rec, index, offsets, thr);
@@ -2974,27 +4843,42 @@ btr_cur_del_mark_set_clust_rec(
 		return(err);
 	}
 
-	err = trx_undo_report_row_operation(0, TRX_UNDO_MODIFY_OP, thr,
-					    index, NULL, NULL, 0, rec, offsets,
+	err = trx_undo_report_row_operation(flags, TRX_UNDO_MODIFY_OP, thr,
+					    index, entry, NULL, 0, rec, offsets,
 					    &roll_ptr);
 	if (err != DB_SUCCESS) {
 
 		return(err);
 	}
 
-	/* The btr_search_latch is not needed here, because
+	/* The search latch is not needed here, because
 	the adaptive hash index does not depend on the delete-mark
 	and the delete-mark is being updated in place. */
 
 	page_zip = buf_block_get_page_zip(block);
 
-	btr_blob_dbg_set_deleted_flag(rec, index, offsets, TRUE);
 	btr_rec_set_deleted_flag(rec, page_zip, TRUE);
 
+	/* For intrinsic table, roll-ptr is not maintained as there is no UNDO
+	logging. Skip updating it. */
+	if (dict_table_is_intrinsic(index->table)) {
+		return(err);
+	}
+
 	trx = thr_get_trx(thr);
+	/* This function must not be invoked during rollback
+	(of a TRX_STATE_PREPARE transaction or otherwise). */
+	ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
+	ut_ad(!trx->in_rollback);
+
+	DBUG_PRINT("ib_cur", ("delete-mark clust %s (" IB_ID_FMT
+			      ") by " TRX_ID_FMT ": %s",
+			      index->table_name, index->id,
+			      trx_get_id_for_print(trx),
+			      rec_printer(rec, offsets).str().c_str()));
 
 	if (dict_index_is_online_ddl(index)) {
-		row_log_table_delete(rec, index, offsets, NULL);
+		row_log_table_delete(rec, entry, index, offsets, NULL);
 	}
 
 	row_upd_rec_sys_fields(rec, page_zip, index, offsets, trx, roll_ptr);
@@ -3042,8 +4926,7 @@ btr_cur_del_mark_set_sec_rec_log(
 /****************************************************************//**
 Parses the redo log record for delete marking or unmarking of a secondary
 index record.
- at return	end of log record or NULL */
-UNIV_INTERN
+ at return end of log record or NULL */
 byte*
 btr_cur_parse_del_mark_set_sec_rec(
 /*===============================*/
@@ -3072,7 +4955,7 @@ btr_cur_parse_del_mark_set_sec_rec(
 	if (page) {
 		rec = page + offset;
 
-		/* We do not need to reserve btr_search_latch, as the page
+		/* We do not need to reserve search latch, as the page
 		is only being recovered, and there cannot be a hash index to
 		it. Besides, the delete-mark flag is being updated in place
 		and the adaptive hash index does not depend on it. */
@@ -3086,8 +4969,7 @@ btr_cur_parse_del_mark_set_sec_rec(
 #ifndef UNIV_HOTBACKUP
 /***********************************************************//**
 Sets a secondary index record delete mark to TRUE or FALSE.
- at return	DB_SUCCESS, DB_LOCK_WAIT, or error number */
-UNIV_INTERN
+ at return DB_SUCCESS, DB_LOCK_WAIT, or error number */
 dberr_t
 btr_cur_del_mark_set_sec_rec(
 /*=========================*/
@@ -3104,14 +4986,6 @@ btr_cur_del_mark_set_sec_rec(
 	block = btr_cur_get_block(cursor);
 	rec = btr_cur_get_rec(cursor);
 
-#ifdef UNIV_DEBUG
-	if (btr_cur_print_record_ops && thr) {
-		btr_cur_trx_report(thr_get_trx(thr)->id, cursor->index,
-				   "del mark ");
-		rec_print(stderr, rec, cursor->index);
-	}
-#endif /* UNIV_DEBUG */
-
 	err = lock_sec_rec_modify_check_and_lock(flags,
 						 btr_cur_get_block(cursor),
 						 rec, cursor->index, thr, mtr);
@@ -3123,7 +4997,15 @@ btr_cur_del_mark_set_sec_rec(
 	ut_ad(!!page_rec_is_comp(rec)
 	      == dict_table_is_comp(cursor->index->table));
 
-	/* We do not need to reserve btr_search_latch, as the
+	DBUG_PRINT("ib_cur", ("delete-mark=%u sec %u:%u:%u in %s("
+			      IB_ID_FMT ") by " TRX_ID_FMT,
+			      unsigned(val),
+			      block->page.id.space(), block->page.id.page_no(),
+			      unsigned(page_rec_get_heap_no(rec)),
+			      cursor->index->name(), cursor->index->id,
+			      trx_get_id_for_print(thr_get_trx(thr))));
+
+	/* We do not need to reserve search latch, as the
 	delete-mark flag is being updated in place and the adaptive
 	hash index does not depend on it. */
 	btr_rec_set_deleted_flag(rec, buf_block_get_page_zip(block), val);
@@ -3136,7 +5018,6 @@ btr_cur_del_mark_set_sec_rec(
 /***********************************************************//**
 Sets a secondary index record's delete mark to the given value. This
 function is only used by the insert buffer merge mechanism. */
-UNIV_INTERN
 void
 btr_cur_set_deleted_flag_for_ibuf(
 /*==============================*/
@@ -3148,7 +5029,7 @@ btr_cur_set_deleted_flag_for_ibuf(
 	ibool		val,		/*!< in: value to set */
 	mtr_t*		mtr)		/*!< in/out: mini-transaction */
 {
-	/* We do not need to reserve btr_search_latch, as the page
+	/* We do not need to reserve search latch, as the page
 	has just been read to the buffer pool and there cannot be
 	a hash index to it.  Besides, the delete-mark flag is being
 	updated in place and the adaptive hash index does not depend
@@ -3167,8 +5048,7 @@ that mtr holds an x-latch on the tree and on the cursor page. To avoid
 deadlocks, mtr must also own x-latches to brothers of page, if those
 brothers exist. NOTE: it is assumed that the caller has reserved enough
 free extents so that the compression will always succeed if done!
- at return	TRUE if compression occurred */
-UNIV_INTERN
+ at return TRUE if compression occurred */
 ibool
 btr_cur_compress_if_useful(
 /*=======================*/
@@ -3179,11 +5059,29 @@ btr_cur_compress_if_useful(
 				cursor position even if compression occurs */
 	mtr_t*		mtr)	/*!< in/out: mini-transaction */
 {
-	ut_ad(mtr_memo_contains(mtr,
-				dict_index_get_lock(btr_cur_get_index(cursor)),
-				MTR_MEMO_X_LOCK));
-	ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
-				MTR_MEMO_PAGE_X_FIX));
+	/* Avoid applying compression as we don't accept lot of page garbage
+	given the workload of intrinsic table. */
+	if (dict_table_is_intrinsic(cursor->index->table)) {
+		return(FALSE);
+	}
+
+	ut_ad(mtr_memo_contains_flagged(
+		mtr, dict_index_get_lock(btr_cur_get_index(cursor)),
+		MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK)
+	      || dict_table_is_intrinsic(cursor->index->table));
+	ut_ad(mtr_is_block_fix(
+		mtr, btr_cur_get_block(cursor),
+		MTR_MEMO_PAGE_X_FIX, cursor->index->table));
+
+	if (dict_index_is_spatial(cursor->index)) {
+		const page_t*   page = btr_cur_get_page(cursor);
+
+		/* Check whether page lock prevents the compression */
+		if (!lock_test_prdt_page_lock(
+			page_get_space_id(page), page_get_page_no(page))) {
+			return(false);
+		}
+	}
 
 	return(btr_cur_compress_recommendation(cursor, mtr)
 	       && btr_compress(cursor, adjust, mtr));
@@ -3193,8 +5091,7 @@ btr_cur_compress_if_useful(
 Removes the record on which the tree cursor is positioned on a leaf page.
 It is assumed that the mtr has an x-latch on the page where the cursor is
 positioned, but no latch on the whole tree.
- at return	TRUE if success, i.e., the page did not become too empty */
-UNIV_INTERN
+ at return TRUE if success, i.e., the page did not become too empty */
 ibool
 btr_cur_optimistic_delete_func(
 /*===========================*/
@@ -3221,6 +5118,10 @@ btr_cur_optimistic_delete_func(
 	ut_ad(flags == 0 || flags == BTR_CREATE_FLAG);
 	ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
 				MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_is_block_fix(mtr, btr_cur_get_block(cursor),
+			       MTR_MEMO_PAGE_X_FIX, cursor->index->table));
+	ut_ad(mtr->is_named_space(cursor->index->space));
+
 	/* This is intended only for leaf page deletions */
 
 	block = btr_cur_get_block(cursor);
@@ -3273,12 +5174,16 @@ btr_cur_optimistic_delete_func(
 			/* The change buffer does not handle inserts
 			into non-leaf pages, into clustered indexes,
 			or into the change buffer. */
-			if (page_is_leaf(page)
-			    && !dict_index_is_clust(cursor->index)
+			if (!dict_index_is_clust(cursor->index)
+			    && !dict_table_is_temporary(cursor->index->table)
 			    && !dict_index_is_ibuf(cursor->index)) {
 				ibuf_update_free_bits_low(block, max_ins, mtr);
 			}
 		}
+	} else {
+		/* prefetch siblings of the leaf for the pessimistic
+		operation. */
+		btr_cur_prefetch_siblings(block);
 	}
 
 	if (UNIV_LIKELY_NULL(heap)) {
@@ -3295,8 +5200,8 @@ or if it is the only page on the level. It is assumed that mtr holds
 an x-latch on the tree and on the cursor page. To avoid deadlocks,
 mtr must also own x-latches to brothers of page, if those brothers
 exist.
- at return	TRUE if compression occurred */
-UNIV_INTERN
+ at return TRUE if compression occurred and FALSE if not or something
+wrong. */
 ibool
 btr_cur_pessimistic_delete(
 /*=======================*/
@@ -3314,7 +5219,7 @@ btr_cur_pessimistic_delete(
 				stays valid: it points to successor of
 				deleted record on function exit */
 	ulint		flags,	/*!< in: BTR_CREATE_FLAG or 0 */
-	enum trx_rb_ctx	rb_ctx,	/*!< in: rollback context */
+	bool		rollback,/*!< in: performing rollback? */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
 	buf_block_t*	block;
@@ -3323,11 +5228,14 @@ btr_cur_pessimistic_delete(
 	dict_index_t*	index;
 	rec_t*		rec;
 	ulint		n_reserved	= 0;
-	ibool		success;
+	bool		success;
 	ibool		ret		= FALSE;
 	ulint		level;
 	mem_heap_t*	heap;
 	ulint*		offsets;
+#ifdef UNIV_DEBUG
+	bool		parent_latched	= false;
+#endif /* UNIV_DEBUG */
 
 	block = btr_cur_get_block(cursor);
 	page = buf_block_get_frame(block);
@@ -3337,9 +5245,13 @@ btr_cur_pessimistic_delete(
 	ut_ad(!dict_index_is_online_ddl(index)
 	      || dict_index_is_clust(index)
 	      || (flags & BTR_CREATE_FLAG));
-	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
-				MTR_MEMO_X_LOCK));
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_flagged(mtr, dict_index_get_lock(index),
+					MTR_MEMO_X_LOCK
+					| MTR_MEMO_SX_LOCK)
+	      || dict_table_is_intrinsic(index->table));
+	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
+	ut_ad(mtr->is_named_space(index->space));
+
 	if (!has_reserved_extents) {
 		/* First reserve enough free space for the file segments
 		of the index tree, so that the node pointer updates will
@@ -3370,7 +5282,7 @@ btr_cur_pessimistic_delete(
 	if (rec_offs_any_extern(offsets)) {
 		btr_rec_free_externally_stored_fields(index,
 						      rec, offsets, page_zip,
-						      rb_ctx, mtr);
+						      rollback, mtr);
 #ifdef UNIV_ZIP_DEBUG
 		ut_a(!page_zip || page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
@@ -3378,7 +5290,7 @@ btr_cur_pessimistic_delete(
 
 	if (UNIV_UNLIKELY(page_get_n_recs(page) < 2)
 	    && UNIV_UNLIKELY(dict_index_get_page(index)
-			     != buf_block_get_page_no(block))) {
+			     != block->page.id.page_no())) {
 
 		/* If there is only one record, drop the whole page in
 		btr_discard_page, if this is not the root page */
@@ -3414,20 +5326,55 @@ btr_cur_pessimistic_delete(
 			mini-transaction and because writing to the redo log
 			is an atomic operation (performed by mtr_commit()). */
 			btr_set_min_rec_mark(next_rec, mtr);
+		} else if (dict_index_is_spatial(index)) {
+			/* For rtree, if delete the leftmost node pointer,
+			we need to update parent page. */
+			rtr_mbr_t	father_mbr;
+			rec_t*		father_rec;
+			btr_cur_t	father_cursor;
+			ulint*		offsets;
+			bool		upd_ret;
+			ulint		len;
+
+			rtr_page_get_father_block(NULL, heap, index,
+						  block, mtr, NULL,
+						  &father_cursor);
+			offsets = rec_get_offsets(
+				btr_cur_get_rec(&father_cursor), index,
+				NULL, ULINT_UNDEFINED, &heap);
+
+			father_rec = btr_cur_get_rec(&father_cursor);
+			rtr_read_mbr(rec_get_nth_field(
+				father_rec, offsets, 0, &len), &father_mbr);
+
+			upd_ret = rtr_update_mbr_field(&father_cursor, offsets,
+						       NULL, page, &father_mbr,
+						       next_rec, mtr);
+
+			if (!upd_ret) {
+				*err = DB_ERROR;
+
+				mem_heap_free(heap);
+				return(FALSE);
+			}
+
+			ut_d(parent_latched = true);
 		} else {
 			/* Otherwise, if we delete the leftmost node pointer
-			on a page, we have to change the father node pointer
+			on a page, we have to change the parent node pointer
 			so that it is equal to the new leftmost node pointer
 			on the page */
 
 			btr_node_ptr_delete(index, block, mtr);
 
 			dtuple_t*	node_ptr = dict_index_build_node_ptr(
-				index, next_rec, buf_block_get_page_no(block),
+				index, next_rec, block->page.id.page_no(),
 				heap, level);
 
 			btr_insert_on_non_leaf_level(
 				flags, index, level + 1, node_ptr, mtr);
+
+			ut_d(parent_latched = true);
 		}
 	}
 
@@ -3438,7 +5385,8 @@ btr_cur_pessimistic_delete(
 	ut_a(!page_zip || page_zip_validate(page_zip, page, index));
 #endif /* UNIV_ZIP_DEBUG */
 
-	ut_ad(btr_check_node_ptr(index, block, mtr));
+	/* btr_check_node_ptr() needs parent block latched */
+	ut_ad(!parent_latched || btr_check_node_ptr(index, block, mtr));
 
 return_after_reservations:
 	*err = DB_SUCCESS;
@@ -3449,6 +5397,17 @@ return_after_reservations:
 		ret = btr_cur_compress_if_useful(cursor, FALSE, mtr);
 	}
 
+	if (!srv_read_only_mode
+	    && page_is_leaf(page)
+	    && !dict_index_is_online_ddl(index)) {
+
+		mtr_memo_release(mtr, dict_index_get_lock(index),
+				 MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK);
+
+		/* NOTE: We cannot release root block latch here, because it
+		has segment header and already modified in most of cases.*/
+	}
+
 	if (n_reserved > 0) {
 		fil_space_release_free_extents(index->space, n_reserved);
 	}
@@ -3512,63 +5471,63 @@ so far and assume that all pages that we did not scan up to slot2->page
 contain the same number of records, then we multiply that average to
 the number of pages between slot1->page and slot2->page (which is
 n_rows_on_prev_level). In this case we set is_n_rows_exact to FALSE.
- at return	number of rows (exact or estimated) */
+ at return number of rows, not including the borders (exact or estimated) */
 static
-ib_int64_t
+int64_t
 btr_estimate_n_rows_in_range_on_level(
 /*==================================*/
 	dict_index_t*	index,			/*!< in: index */
 	btr_path_t*	slot1,			/*!< in: left border */
 	btr_path_t*	slot2,			/*!< in: right border */
-	ib_int64_t	n_rows_on_prev_level,	/*!< in: number of rows
+	int64_t		n_rows_on_prev_level,	/*!< in: number of rows
 						on the previous level for the
 						same descend paths; used to
-						determine the numbe of pages
+						determine the number of pages
 						on this level */
 	ibool*		is_n_rows_exact)	/*!< out: TRUE if the returned
 						value is exact i.e. not an
 						estimation */
 {
-	ulint		space;
-	ib_int64_t	n_rows;
+	int64_t		n_rows;
 	ulint		n_pages_read;
-	ulint		page_no;
-	ulint		zip_size;
 	ulint		level;
 
-	space = dict_index_get_space(index);
-
 	n_rows = 0;
 	n_pages_read = 0;
 
 	/* Assume by default that we will scan all pages between
-	slot1->page_no and slot2->page_no */
+	slot1->page_no and slot2->page_no. */
 	*is_n_rows_exact = TRUE;
 
-	/* add records from slot1->page_no which are to the right of
-	the record which serves as a left border of the range, if any */
-	if (slot1->nth_rec < slot1->n_recs) {
+	/* Add records from slot1->page_no which are to the right of
+	the record which serves as a left border of the range, if any
+	(we don't include the record itself in this count). */
+	if (slot1->nth_rec <= slot1->n_recs) {
 		n_rows += slot1->n_recs - slot1->nth_rec;
 	}
 
-	/* add records from slot2->page_no which are to the left of
-	the record which servers as a right border of the range, if any */
+	/* Add records from slot2->page_no which are to the left of
+	the record which servers as a right border of the range, if any
+	(we don't include the record itself in this count). */
 	if (slot2->nth_rec > 1) {
 		n_rows += slot2->nth_rec - 1;
 	}
 
-	/* count the records in the pages between slot1->page_no and
-	slot2->page_no (non inclusive), if any */
-
-	zip_size = fil_space_get_zip_size(space);
+	/* Count the records in the pages between slot1->page_no and
+	slot2->page_no (non inclusive), if any. */
 
 	/* Do not read more than this number of pages in order not to hurt
 	performance with this code which is just an estimation. If we read
 	this many pages before reaching slot2->page_no then we estimate the
-	average from the pages scanned so far */
+	average from the pages scanned so far. */
 #	define N_PAGES_READ_LIMIT	10
 
-	page_no = slot1->page_no;
+	page_id_t		page_id(
+		dict_index_get_space(index), slot1->page_no);
+	const fil_space_t*	space = fil_space_get(index->space);
+	ut_ad(space);
+	const page_size_t	page_size(space->flags);
+
 	level = slot1->page_level;
 
 	do {
@@ -3583,7 +5542,7 @@ btr_estimate_n_rows_in_range_on_level(
 		attempting to read a page that is no longer part of
 		the B-tree. We pass BUF_GET_POSSIBLY_FREED in order to
 		silence a debug assertion about this. */
-		block = buf_page_get_gen(space, zip_size, page_no, RW_S_LATCH,
+		block = buf_page_get_gen(page_id, page_size, RW_S_LATCH,
 					 NULL, BUF_GET_POSSIBLY_FREED,
 					 __FILE__, __LINE__, &mtr);
 
@@ -3595,7 +5554,7 @@ btr_estimate_n_rows_in_range_on_level(
 		this is only an estimate. We are sure that a page with
 		page_no exists because InnoDB never frees pages, only
 		reuses them. */
-		if (fil_page_get_type(page) != FIL_PAGE_INDEX
+		if (!fil_page_index_page_check(page)
 		    || btr_page_get_index_id(page) != index->id
 		    || btr_page_get_level_low(page) != level) {
 
@@ -3613,18 +5572,18 @@ btr_estimate_n_rows_in_range_on_level(
 
 		n_pages_read++;
 
-		if (page_no != slot1->page_no) {
+		if (page_id.page_no() != slot1->page_no) {
 			/* Do not count the records on slot1->page_no,
 			we already counted them before this loop. */
 			n_rows += page_get_n_recs(page);
 		}
 
-		page_no = btr_page_get_next(page, &mtr);
+		page_id.set_page_no(btr_page_get_next(page, &mtr));
 
 		mtr_commit(&mtr);
 
 		if (n_pages_read == N_PAGES_READ_LIMIT
-		    || page_no == FIL_NULL) {
+		    || page_id.page_no() == FIL_NULL) {
 			/* Either we read too many pages or
 			we reached the end of the level without passing
 			through slot2->page_no, the tree must have changed
@@ -3632,7 +5591,7 @@ btr_estimate_n_rows_in_range_on_level(
 			goto inexact;
 		}
 
-	} while (page_no != slot2->page_no);
+	} while (page_id.page_no() != slot2->page_no);
 
 	return(n_rows);
 
@@ -3657,18 +5616,40 @@ inexact:
 	return(n_rows);
 }
 
-/*******************************************************************//**
-Estimates the number of rows in a given index range.
- at return	estimated number of rows */
-UNIV_INTERN
-ib_int64_t
-btr_estimate_n_rows_in_range(
-/*=========================*/
-	dict_index_t*	index,	/*!< in: index */
-	const dtuple_t*	tuple1,	/*!< in: range start, may also be empty tuple */
-	ulint		mode1,	/*!< in: search mode for range start */
-	const dtuple_t*	tuple2,	/*!< in: range end, may also be empty tuple */
-	ulint		mode2)	/*!< in: search mode for range end */
+/** If the tree gets changed too much between the two dives for the left
+and right boundary then btr_estimate_n_rows_in_range_low() will retry
+that many times before giving up and returning the value stored in
+rows_in_range_arbitrary_ret_val. */
+static const unsigned	rows_in_range_max_retries = 4;
+
+/** We pretend that a range has that many records if the tree keeps changing
+for rows_in_range_max_retries retries while we try to estimate the records
+in a given range. */
+static const int64_t	rows_in_range_arbitrary_ret_val = 10;
+
+/** Estimates the number of rows in a given index range.
+ at param[in]	index		index
+ at param[in]	tuple1		range start, may also be empty tuple
+ at param[in]	mode1		search mode for range start
+ at param[in]	tuple2		range end, may also be empty tuple
+ at param[in]	mode2		search mode for range end
+ at param[in]	nth_attempt	if the tree gets modified too much while
+we are trying to analyze it, then we will retry (this function will call
+itself, incrementing this parameter)
+ at return estimated number of rows; if after rows_in_range_max_retries
+retries the tree keeps changing, then we will just return
+rows_in_range_arbitrary_ret_val as a result (if
+nth_attempt >= rows_in_range_max_retries and the tree is modified between
+the two dives). */
+static
+int64_t
+btr_estimate_n_rows_in_range_low(
+	dict_index_t*	index,
+	const dtuple_t*	tuple1,
+	page_cur_mode_t	mode1,
+	const dtuple_t*	tuple2,
+	page_cur_mode_t	mode2,
+	unsigned	nth_attempt)
 {
 	btr_path_t	path1[BTR_PATH_ARRAY_N_SLOTS];
 	btr_path_t	path2[BTR_PATH_ARRAY_N_SLOTS];
@@ -3678,28 +5659,58 @@ btr_estimate_n_rows_in_range(
 	ibool		diverged;
 	ibool		diverged_lot;
 	ulint		divergence_level;
-	ib_int64_t	n_rows;
+	int64_t		n_rows;
 	ibool		is_n_rows_exact;
 	ulint		i;
 	mtr_t		mtr;
-	ib_int64_t	table_n_rows;
+	int64_t		table_n_rows;
 
 	table_n_rows = dict_table_get_n_rows(index->table);
 
+	/* Below we dive to the two records specified by tuple1 and tuple2 and
+	we remember the entire dive paths from the tree root. The place where
+	the tuple1 path ends on the leaf level we call "left border" of our
+	interval and the place where the tuple2 path ends on the leaf level -
+	"right border". We take care to either include or exclude the interval
+	boundaries depending on whether <, <=, > or >= was specified. For
+	example if "5 < x AND x <= 10" then we should not include the left
+	boundary, but should include the right one. */
+
 	mtr_start(&mtr);
 
 	cursor.path_arr = path1;
 
+	bool	should_count_the_left_border;
+
 	if (dtuple_get_n_fields(tuple1) > 0) {
 
 		btr_cur_search_to_nth_level(index, 0, tuple1, mode1,
 					    BTR_SEARCH_LEAF | BTR_ESTIMATE,
 					    &cursor, 0,
 					    __FILE__, __LINE__, &mtr);
+
+		ut_ad(!page_rec_is_infimum(btr_cur_get_rec(&cursor)));
+
+		/* We should count the border if there are any records to
+		match the criteria, i.e. if the maximum record on the tree is
+		5 and x > 3 is specified then the cursor will be positioned at
+		5 and we should count the border, but if x > 7 is specified,
+		then the cursor will be positioned at 'sup' on the rightmost
+		leaf page in the tree and we should not count the border. */
+		should_count_the_left_border
+			= !page_rec_is_supremum(btr_cur_get_rec(&cursor));
 	} else {
 		btr_cur_open_at_index_side(true, index,
 					   BTR_SEARCH_LEAF | BTR_ESTIMATE,
 					   &cursor, 0, &mtr);
+
+		ut_ad(page_rec_is_infimum(btr_cur_get_rec(&cursor)));
+
+		/* The range specified is wihout a left border, just
+		'x < 123' or 'x <= 123' and btr_cur_open_at_index_side()
+		positioned the cursor on the infimum record on the leftmost
+		page, which must not be counted. */
+		should_count_the_left_border = false;
 	}
 
 	mtr_commit(&mtr);
@@ -3708,30 +5719,74 @@ btr_estimate_n_rows_in_range(
 
 	cursor.path_arr = path2;
 
+	bool	should_count_the_right_border;
+
 	if (dtuple_get_n_fields(tuple2) > 0) {
 
 		btr_cur_search_to_nth_level(index, 0, tuple2, mode2,
 					    BTR_SEARCH_LEAF | BTR_ESTIMATE,
 					    &cursor, 0,
 					    __FILE__, __LINE__, &mtr);
+
+		const rec_t*	rec = btr_cur_get_rec(&cursor);
+
+		ut_ad(!(mode2 == PAGE_CUR_L && page_rec_is_supremum(rec)));
+
+		should_count_the_right_border
+			= (mode2 == PAGE_CUR_LE /* if the range is '<=' */
+			   /* and the record was found */
+			   && cursor.low_match >= dtuple_get_n_fields(tuple2))
+			|| (mode2 == PAGE_CUR_L /* or if the range is '<' */
+			    /* and there are any records to match the criteria,
+			    i.e. if the minimum record on the tree is 5 and
+			    x < 7 is specified then the cursor will be
+			    positioned at 5 and we should count the border, but
+			    if x < 2 is specified, then the cursor will be
+			    positioned at 'inf' and we should not count the
+			    border */
+			    && !page_rec_is_infimum(rec));
+		/* Notice that for "WHERE col <= 'foo'" MySQL passes to
+		ha_innobase::records_in_range():
+		min_key=NULL (left-unbounded) which is expected
+		max_key='foo' flag=HA_READ_AFTER_KEY (PAGE_CUR_G), which is
+		unexpected - one would expect
+		flag=HA_READ_KEY_OR_PREV (PAGE_CUR_LE). In this case the
+		cursor will be positioned on the first record to the right of
+		the requested one (can also be positioned on the 'sup') and
+		we should not count the right border. */
 	} else {
 		btr_cur_open_at_index_side(false, index,
 					   BTR_SEARCH_LEAF | BTR_ESTIMATE,
 					   &cursor, 0, &mtr);
+
+		ut_ad(page_rec_is_supremum(btr_cur_get_rec(&cursor)));
+
+		/* The range specified is wihout a right border, just
+		'x > 123' or 'x >= 123' and btr_cur_open_at_index_side()
+		positioned the cursor on the supremum record on the rightmost
+		page, which must not be counted. */
+		should_count_the_right_border = false;
 	}
 
 	mtr_commit(&mtr);
 
 	/* We have the path information for the range in path1 and path2 */
 
-	n_rows = 1;
+	n_rows = 0;
 	is_n_rows_exact = TRUE;
-	diverged = FALSE;	    /* This becomes true when the path is not
-				    the same any more */
-	diverged_lot = FALSE;	    /* This becomes true when the paths are
-				    not the same or adjacent any more */
-	divergence_level = 1000000; /* This is the level where paths diverged
-				    a lot */
+
+	/* This becomes true when the two paths do not pass through the
+	same pages anymore. */
+	diverged = FALSE;
+
+	/* This becomes true when the paths are not the same or adjacent
+	any more. This means that they pass through the same or
+	neighboring-on-the-same-level pages only. */
+	diverged_lot = FALSE;
+
+	/* This is the level where paths diverged a lot. */
+	divergence_level = 1000000;
+
 	for (i = 0; ; i++) {
 		ut_ad(i < BTR_PATH_ARRAY_N_SLOTS);
 
@@ -3741,6 +5796,70 @@ btr_estimate_n_rows_in_range(
 		if (slot1->nth_rec == ULINT_UNDEFINED
 		    || slot2->nth_rec == ULINT_UNDEFINED) {
 
+			/* Here none of the borders were counted. For example,
+			if on the leaf level we descended to:
+			(inf, a, b, c, d, e, f, sup)
+			         ^        ^
+			       path1    path2
+			then n_rows will be 2 (c and d). */
+
+			if (is_n_rows_exact) {
+				/* Only fiddle to adjust this off-by-one
+				if the number is exact, otherwise we do
+				much grosser adjustments below. */
+
+				btr_path_t*	last1 = &path1[i - 1];
+				btr_path_t*	last2 = &path2[i - 1];
+
+				/* If both paths end up on the same record on
+				the leaf level. */
+				if (last1->page_no == last2->page_no
+				    && last1->nth_rec == last2->nth_rec) {
+
+					/* n_rows can be > 0 here if the paths
+					were first different and then converged
+					to the same record on the leaf level.
+					For example:
+					SELECT ... LIKE 'wait/synch/rwlock%'
+					mode1=PAGE_CUR_GE,
+					tuple1="wait/synch/rwlock"
+					path1[0]={nth_rec=58, n_recs=58,
+						  page_no=3, page_level=1}
+					path1[1]={nth_rec=56, n_recs=55,
+						  page_no=119, page_level=0}
+
+					mode2=PAGE_CUR_G
+					tuple2="wait/synch/rwlock"
+					path2[0]={nth_rec=57, n_recs=57,
+						  page_no=3, page_level=1}
+					path2[1]={nth_rec=56, n_recs=55,
+						  page_no=119, page_level=0} */
+
+					/* If the range is such that we should
+					count both borders, then avoid
+					counting that record twice - once as a
+					left border and once as a right
+					border. */
+					if (should_count_the_left_border
+					    && should_count_the_right_border) {
+
+						n_rows = 1;
+					} else {
+						/* Some of the borders should
+						not be counted, e.g. [3,3). */
+						n_rows = 0;
+					}
+				} else {
+					if (should_count_the_left_border) {
+						n_rows++;
+					}
+
+					if (should_count_the_right_border) {
+						n_rows++;
+					}
+				}
+			}
+
 			if (i > divergence_level + 1 && !is_n_rows_exact) {
 				/* In trees whose height is > 1 our algorithm
 				tends to underestimate: multiply the estimate
@@ -3772,12 +5891,41 @@ btr_estimate_n_rows_in_range(
 
 		if (!diverged && slot1->nth_rec != slot2->nth_rec) {
 
+			/* If both slots do not point to the same page,
+			this means that the tree must have changed between
+			the dive for slot1 and the dive for slot2 at the
+			beginning of this function. */
+			if (slot1->page_no != slot2->page_no
+			    || slot1->page_level != slot2->page_level) {
+
+				/* If the tree keeps changing even after a
+				few attempts, then just return some arbitrary
+				number. */
+				if (nth_attempt >= rows_in_range_max_retries) {
+					return(rows_in_range_arbitrary_ret_val);
+				}
+
+				const int64_t	ret =
+					btr_estimate_n_rows_in_range_low(
+						index, tuple1, mode1,
+						tuple2, mode2, nth_attempt + 1);
+
+				return(ret);
+			}
+
 			diverged = TRUE;
 
 			if (slot1->nth_rec < slot2->nth_rec) {
-				n_rows = slot2->nth_rec - slot1->nth_rec;
-
-				if (n_rows > 1) {
+				/* We do not count the borders (nor the left
+				nor the right one), thus "- 1". */
+				n_rows = slot2->nth_rec - slot1->nth_rec - 1;
+
+				if (n_rows > 0) {
+					/* There is at least one row between
+					the two borders pointed to by slot1
+					and slot2, so on the level below the
+					slots will point to non-adjacent
+					pages. */
 					diverged_lot = TRUE;
 					divergence_level = i;
 				}
@@ -3789,8 +5937,10 @@ btr_estimate_n_rows_in_range(
 				and we select where x > 20 and x < 30;
 				in this case slot1->nth_rec will point
 				to the supr record and slot2->nth_rec
-				will point to 6 */
+				will point to 6. */
 				n_rows = 0;
+				should_count_the_left_border = false;
+				should_count_the_right_border = false;
 			}
 
 		} else if (diverged && !diverged_lot) {
@@ -3821,6 +5971,27 @@ btr_estimate_n_rows_in_range(
 	}
 }
 
+/** Estimates the number of rows in a given index range.
+ at param[in]	index	index
+ at param[in]	tuple1	range start, may also be empty tuple
+ at param[in]	mode1	search mode for range start
+ at param[in]	tuple2	range end, may also be empty tuple
+ at param[in]	mode2	search mode for range end
+ at return estimated number of rows */
+int64_t
+btr_estimate_n_rows_in_range(
+	dict_index_t*	index,
+	const dtuple_t*	tuple1,
+	page_cur_mode_t	mode1,
+	const dtuple_t*	tuple2,
+	page_cur_mode_t	mode2)
+{
+	const int64_t	ret = btr_estimate_n_rows_in_range_low(
+		index, tuple1, mode1, tuple2, mode2, 1 /* first attempt */);
+
+	return(ret);
+}
+
 /*******************************************************************//**
 Record the number of non_null key values in a given index for
 each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index).
@@ -3864,9 +6035,10 @@ The estimates are stored in the array index->stat_n_diff_key_vals[] (indexed
 index->stat_n_sample_sizes[].
 If innodb_stats_method is nulls_ignored, we also record the number of
 non-null values for each prefix and stored the estimates in
-array index->stat_n_non_null_key_vals. */
-UNIV_INTERN
-void
+array index->stat_n_non_null_key_vals.
+ at return true if the index is available and we get the estimated numbers,
+false if the index is unavailable. */
+bool
 btr_estimate_number_of_different_key_vals(
 /*======================================*/
 	dict_index_t*	index)	/*!< in: index */
@@ -3875,22 +6047,26 @@ btr_estimate_number_of_different_key_vals(
 	page_t*		page;
 	rec_t*		rec;
 	ulint		n_cols;
-	ulint		matched_fields;
-	ulint		matched_bytes;
 	ib_uint64_t*	n_diff;
 	ib_uint64_t*	n_not_null;
 	ibool		stats_null_not_equal;
-	ullint		n_sample_pages; /* number of pages to sample */
+	uintmax_t	n_sample_pages; /* number of pages to sample */
 	ulint		not_empty_flag	= 0;
 	ulint		total_external_size = 0;
 	ulint		i;
 	ulint		j;
-	ullint		add_on;
+	uintmax_t	add_on;
 	mtr_t		mtr;
 	mem_heap_t*	heap		= NULL;
 	ulint*		offsets_rec	= NULL;
 	ulint*		offsets_next_rec = NULL;
 
+	/* For spatial index, there is no such stats can be
+	fetched. */
+	if (dict_index_is_spatial(index)) {
+		return(false);
+	}
+
 	n_cols = dict_index_get_n_unique(index);
 
 	heap = mem_heap_create((sizeof *n_diff + sizeof *n_not_null)
@@ -3900,7 +6076,7 @@ btr_estimate_number_of_different_key_vals(
 				  + sizeof *offsets_next_rec));
 
 	n_diff = (ib_uint64_t*) mem_heap_zalloc(
-		heap, n_cols * sizeof(ib_int64_t));
+		heap, n_cols * sizeof(n_diff[0]));
 
 	n_not_null = NULL;
 
@@ -3925,7 +6101,7 @@ btr_estimate_number_of_different_key_vals(
 
 	default:
 		ut_error;
-        }
+	}
 
 	/* It makes no sense to test more pages than are contained
 	in the index, thus we lower the number if it is too high */
@@ -3944,7 +6120,17 @@ btr_estimate_number_of_different_key_vals(
 	for (i = 0; i < n_sample_pages; i++) {
 		mtr_start(&mtr);
 
-		btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, &cursor, &mtr);
+		bool	available;
+
+		available = btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF,
+						    &cursor, &mtr);
+
+		if (!available) {
+			mtr_commit(&mtr);
+			mem_heap_free(heap);
+
+			return(false);
+		}
 
 		/* Count the number of different key values for each prefix of
 		the key on this index page. If the prefix does not determine
@@ -3968,6 +6154,7 @@ btr_estimate_number_of_different_key_vals(
 		}
 
 		while (!page_rec_is_supremum(rec)) {
+			ulint	matched_fields;
 			rec_t*	next_rec = page_rec_get_next(rec);
 			if (page_rec_is_supremum(next_rec)) {
 				total_external_size +=
@@ -3976,8 +6163,6 @@ btr_estimate_number_of_different_key_vals(
 				break;
 			}
 
-			matched_fields = 0;
-			matched_bytes = 0;
 			offsets_next_rec = rec_get_offsets(next_rec, index,
 							   offsets_next_rec,
 							   ULINT_UNDEFINED,
@@ -3986,8 +6171,7 @@ btr_estimate_number_of_different_key_vals(
 			cmp_rec_rec_with_match(rec, next_rec,
 					       offsets_rec, offsets_next_rec,
 					       index, stats_null_not_equal,
-					       &matched_fields,
-					       &matched_bytes);
+					       &matched_fields);
 
 			for (j = matched_fields; j < n_cols; j++) {
 				/* We add one if this index record has
@@ -4085,13 +6269,15 @@ btr_estimate_number_of_different_key_vals(
 	}
 
 	mem_heap_free(heap);
+
+	return(true);
 }
 
 /*================== EXTERNAL STORAGE OF BIG FIELDS ===================*/
 
 /***********************************************************//**
 Gets the offset of the pointer to the externally stored part of a field.
- at return	offset of the pointer to the externally stored part */
+ at return offset of the pointer to the externally stored part */
 static
 ulint
 btr_rec_get_field_ref_offs(
@@ -4111,9 +6297,9 @@ btr_rec_get_field_ref_offs(
 }
 
 /** Gets a pointer to the externally stored part of a field.
- at param rec	record
- at param offsets	rec_get_offsets(rec)
- at param n	index of the externally stored field
+ at param rec record
+ at param offsets rec_get_offsets(rec)
+ at param n index of the externally stored field
 @return pointer to the externally stored part */
 #define btr_rec_get_field_ref(rec, offsets, n)			\
 	((rec) + btr_rec_get_field_ref_offs(offsets, n))
@@ -4121,8 +6307,7 @@ btr_rec_get_field_ref_offs(
 /** Gets the externally stored size of a record, in units of a database page.
 @param[in]	rec	record
 @param[in]	offsets	array returned by rec_get_offsets()
- at return	externally stored part, in units of a database page */
-
+ at return externally stored part, in units of a database page */
 ulint
 btr_rec_get_externally_stored_len(
 	const rec_t*	rec,
@@ -4201,8 +6386,6 @@ btr_cur_set_ownership_of_extern_field(
 	} else {
 		mach_write_to_1(data + local_len + BTR_EXTERN_LEN, byte_val);
 	}
-
-	btr_blob_dbg_owner(rec, index, offsets, i, val);
 }
 
 /*******************************************************************//**
@@ -4210,7 +6393,6 @@ Marks non-updated off-page fields as disowned by this record. The ownership
 must be transferred to the updated record which is inserted elsewhere in the
 index tree. In purge only the owner of externally stored field is allowed
 to free the field. */
-UNIV_INTERN
 void
 btr_cur_disown_inherited_fields(
 /*============================*/
@@ -4231,7 +6413,7 @@ btr_cur_disown_inherited_fields(
 
 	for (i = 0; i < rec_offs_n_fields(offsets); i++) {
 		if (rec_offs_nth_extern(offsets, i)
-		    && !upd_get_field_by_field_no(update, i)) {
+		    && !upd_get_field_by_field_no(update, i, false)) {
 			btr_cur_set_ownership_of_extern_field(
 				page_zip, rec, index, offsets, i, FALSE, mtr);
 		}
@@ -4277,8 +6459,7 @@ btr_cur_unmark_extern_fields(
 Flags the data tuple fields that are marked as extern storage in the
 update vector.  We use this function to remember which fields we must
 mark as extern storage in a record inserted for an update.
- at return	number of flagged external columns */
-UNIV_INTERN
+ at return number of flagged external columns */
 ulint
 btr_push_update_extern_fields(
 /*==========================*/
@@ -4318,7 +6499,8 @@ btr_push_update_extern_fields(
 				InnoDB writes a longer prefix of externally
 				stored columns, so that column prefixes
 				in secondary indexes can be reconstructed. */
-				dfield_set_data(field, (byte*) dfield_get_data(field)
+				dfield_set_data(field,
+						(byte*) dfield_get_data(field)
 						+ dfield_get_len(field)
 						- BTR_EXTERN_FIELD_REF_SIZE,
 						BTR_EXTERN_FIELD_REF_SIZE);
@@ -4356,7 +6538,7 @@ btr_push_update_extern_fields(
 
 /*******************************************************************//**
 Returns the length of a BLOB part stored on the header page.
- at return	part length */
+ at return part length */
 static
 ulint
 btr_blob_get_part_len(
@@ -4368,7 +6550,7 @@ btr_blob_get_part_len(
 
 /*******************************************************************//**
 Returns the page number where the next BLOB part is stored.
- at return	page number or FIL_NULL if no more pages */
+ at return page number or FIL_NULL if no more pages */
 static
 ulint
 btr_blob_get_next_page_no(
@@ -4384,16 +6566,17 @@ static
 void
 btr_blob_free(
 /*==========*/
+	dict_index_t*	index,	/*!< in: index */
 	buf_block_t*	block,	/*!< in: buffer block */
 	ibool		all,	/*!< in: TRUE=remove also the compressed page
 				if there is one */
 	mtr_t*		mtr)	/*!< in: mini-transaction to commit */
 {
 	buf_pool_t*	buf_pool = buf_pool_from_block(block);
-	ulint		space	= buf_block_get_space(block);
-	ulint		page_no	= buf_block_get_page_no(block);
+	ulint		space = block->page.id.space();
+	ulint		page_no	= block->page.id.page_no();
 
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
 
 	mtr_commit(mtr);
 
@@ -4404,8 +6587,8 @@ btr_blob_free(
 
 	if (buf_block_get_state(block)
 	    == BUF_BLOCK_FILE_PAGE
-	    && buf_block_get_space(block) == space
-	    && buf_block_get_page_no(block) == page_no) {
+	    && block->page.id.space() == space
+	    && block->page.id.page_no() == page_no) {
 
 		if (!buf_LRU_free_page(&block->page, all)
 		    && all && block->page.zip.data) {
@@ -4419,28 +6602,149 @@ btr_blob_free(
 	buf_pool_mutex_exit(buf_pool);
 }
 
+/** Helper class used while writing blob pages, during insert or update. */
+struct btr_blob_log_check_t {
+	/** Persistent cursor on a clusterex index record with blobs. */
+	btr_pcur_t*	m_pcur;
+	/** Mini transaction holding the latches for m_pcur */
+	mtr_t*		m_mtr;
+	/** rec_get_offsets(rec, index); offset of clust_rec */
+	const ulint*	m_offsets;
+	/** The block containing clustered record */
+	buf_block_t**	m_block;
+	/** The clustered record pointer */
+	rec_t**		m_rec;
+	/** The blob operation code */
+	enum blob_op	m_op;
+
+	/** Constructor
+	@param[in]	pcur		persistent cursor on a clustered
+					index record with blobs.
+	@param[in]	mtr		mini-transaction holding latches for
+					pcur.
+	@param[in]	offsets		offsets of the clust_rec
+	@param[in,out]	block		record block containing pcur record
+	@param[in,out]	rec		the clustered record pointer
+	@param[in]	op		the blob operation code */
+	btr_blob_log_check_t(
+		btr_pcur_t*	pcur,
+		mtr_t*		mtr,
+		const ulint*	offsets,
+		buf_block_t**	block,
+		rec_t**		rec,
+		enum blob_op	op)
+		: m_pcur(pcur),
+		  m_mtr(mtr),
+		  m_offsets(offsets),
+		  m_block(block),
+		  m_rec(rec),
+		  m_op(op)
+	{
+		ut_ad(rec_offs_validate(*m_rec, m_pcur->index(), m_offsets));
+		ut_ad((*m_block)->frame == page_align(*m_rec));
+		ut_ad(*m_rec == btr_pcur_get_rec(m_pcur));
+	}
+
+	/** Check if there is enough space in log file. Commit and re-start the
+	mini transaction. */
+	void check()
+	{
+		dict_index_t*	index = m_pcur->index();
+		ulint		offs = 0;
+		ulint		page_no = ULINT_UNDEFINED;
+		FlushObserver*	observer = m_mtr->get_flush_observer();
+
+		if (m_op == BTR_STORE_INSERT_BULK) {
+			offs = page_offset(*m_rec);
+			page_no = page_get_page_no(
+				buf_block_get_frame(*m_block));
+
+			buf_block_buf_fix_inc(*m_block, __FILE__, __LINE__);
+		} else {
+			btr_pcur_store_position(m_pcur, m_mtr);
+		}
+		m_mtr->commit();
+
+		DEBUG_SYNC_C("blob_write_middle");
+
+		log_free_check();
+
+		const mtr_log_t log_mode = m_mtr->get_log_mode();
+		m_mtr->start();
+		m_mtr->set_log_mode(log_mode);
+		m_mtr->set_named_space(index->space);
+		m_mtr->set_flush_observer(observer);
+
+		if (m_op == BTR_STORE_INSERT_BULK) {
+			page_id_t       page_id(dict_index_get_space(index),
+						page_no);
+			page_size_t     page_size(dict_table_page_size(
+						index->table));
+			page_cur_t*	page_cur = &m_pcur->btr_cur.page_cur;
+
+			mtr_x_lock(dict_index_get_lock(index), m_mtr);
+			page_cur->block = btr_block_get(
+				page_id, page_size, RW_X_LATCH, index, m_mtr);
+			page_cur->rec = buf_block_get_frame(page_cur->block)
+				+ offs;
+
+			buf_block_buf_fix_dec(page_cur->block);
+		} else {
+			ut_ad(m_pcur->rel_pos == BTR_PCUR_ON);
+			bool ret = btr_pcur_restore_position(
+				BTR_MODIFY_LEAF | BTR_MODIFY_EXTERNAL,
+				m_pcur, m_mtr);
+
+			ut_a(ret);
+		}
+
+		*m_block	= btr_pcur_get_block(m_pcur);
+		*m_rec		= btr_pcur_get_rec(m_pcur);
+
+		ut_d(rec_offs_make_valid(
+			*m_rec, index, const_cast<ulint*>(m_offsets)));
+
+		ut_ad(m_mtr->memo_contains_page_flagged(
+		      *m_rec,
+		      MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX)
+		      || dict_table_is_intrinsic(index->table));
+
+		ut_ad(mtr_memo_contains_flagged(m_mtr,
+		      dict_index_get_lock(index),
+		      MTR_MEMO_SX_LOCK | MTR_MEMO_X_LOCK)
+		      || dict_table_is_intrinsic(index->table));
+	}
+};
+
+
 /*******************************************************************//**
 Stores the fields in big_rec_vec to the tablespace and puts pointers to
 them in rec.  The extern flags in rec will have to be set beforehand.
 The fields are stored on pages allocated from leaf node
 file segment of the index tree.
- at return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE or DB_TOO_BIG_FOR_REDO */
-UNIV_INTERN
+
+TODO: If the allocation extends the tablespace, it will not be redo logged, in
+any mini-transaction.  Tablespace extension should be redo-logged, so that
+recovery will not fail when the big_rec was written to the extended portion of
+the file, in case the file was somehow truncated in the crash.
+
+ at return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
 dberr_t
 btr_store_big_rec_extern_fields(
 /*============================*/
-	dict_index_t*	index,		/*!< in: index of rec; the index tree
-					MUST be X-latched */
-	buf_block_t*	rec_block,	/*!< in/out: block containing rec */
-	rec_t*		rec,		/*!< in/out: record */
-	const ulint*	offsets,	/*!< in: rec_get_offsets(rec, index);
-					the "external storage" flags in offsets
-					will not correspond to rec when
-					this function returns */
+	btr_pcur_t*	pcur,		/*!< in/out: a persistent cursor. if
+					btr_mtr is restarted, then this can
+					be repositioned. */
+	const upd_t*	upd,		/*!< in: update vector */
+	ulint*		offsets,	/*!< in/out: rec_get_offsets() on
+					pcur. the "external storage" flags
+					in offsets will correctly correspond
+					to rec when this function returns */
 	const big_rec_t*big_rec_vec,	/*!< in: vector containing fields
 					to be stored externally */
-	mtr_t*		btr_mtr,	/*!< in: mtr containing the
-					latches to the clustered index */
+	mtr_t*		btr_mtr,	/*!< in/out: mtr containing the
+					latches to the clustered index. can be
+					committed and restarted. */
 	enum blob_op	op)		/*! in: operation code */
 {
 	ulint		rec_page_no;
@@ -4449,43 +6753,40 @@ btr_store_big_rec_extern_fields(
 	ulint		store_len;
 	ulint		page_no;
 	ulint		space_id;
-	ulint		zip_size;
 	ulint		prev_page_no;
 	ulint		hint_page_no;
 	ulint		i;
 	mtr_t		mtr;
-	mtr_t*		alloc_mtr;
 	mem_heap_t*	heap = NULL;
 	page_zip_des_t*	page_zip;
 	z_stream	c_stream;
-	buf_block_t**	freed_pages	= NULL;
-	ulint		n_freed_pages	= 0;
 	dberr_t		error		= DB_SUCCESS;
+	dict_index_t*	index		= pcur->index();
+	buf_block_t*	rec_block	= btr_pcur_get_block(pcur);
+	rec_t*		rec		= btr_pcur_get_rec(pcur);
 
 	ut_ad(rec_offs_validate(rec, index, offsets));
 	ut_ad(rec_offs_any_extern(offsets));
 	ut_ad(btr_mtr);
-	ut_ad(mtr_memo_contains(btr_mtr, dict_index_get_lock(index),
-				MTR_MEMO_X_LOCK));
-	ut_ad(mtr_memo_contains(btr_mtr, rec_block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_flagged(btr_mtr, dict_index_get_lock(index),
+					MTR_MEMO_X_LOCK
+					| MTR_MEMO_SX_LOCK)
+	      || dict_table_is_intrinsic(index->table));
+	ut_ad(mtr_is_block_fix(
+		btr_mtr, rec_block, MTR_MEMO_PAGE_X_FIX, index->table));
 	ut_ad(buf_block_get_frame(rec_block) == page_align(rec));
 	ut_a(dict_index_is_clust(index));
 
-	page_zip = buf_block_get_page_zip(rec_block);
-	ut_a(dict_table_zip_size(index->table)
-	     == buf_block_get_zip_size(rec_block));
-
-	space_id = buf_block_get_space(rec_block);
-	zip_size = buf_block_get_zip_size(rec_block);
-	rec_page_no = buf_block_get_page_no(rec_block);
-	ut_a(fil_page_get_type(page_align(rec)) == FIL_PAGE_INDEX);
+	ut_a(dict_table_page_size(index->table)
+		.equals_to(rec_block->page.size));
 
-	error = btr_check_blob_limit(big_rec_vec);
-
-	if (error != DB_SUCCESS) {
-		ut_ad(op == BTR_STORE_INSERT);
-		return(error);
-	}
+	btr_blob_log_check_t redo_log(pcur, btr_mtr, offsets, &rec_block,
+				      &rec, op);
+	page_zip = buf_block_get_page_zip(rec_block);
+	space_id = rec_block->page.id.space();
+	rec_page_no = rec_block->page.id.page_no();
+	ut_a(fil_page_index_page_check(page_align(rec))
+	     || op == BTR_STORE_INSERT_BULK);
 
 	if (page_zip) {
 		int	err;
@@ -4503,52 +6804,13 @@ btr_store_big_rec_extern_fields(
 		ut_a(err == Z_OK);
 	}
 
-	if (btr_blob_op_is_update(op)) {
-		/* Avoid reusing pages that have been previously freed
-		in btr_mtr. */
-		if (btr_mtr->n_freed_pages) {
-			if (heap == NULL) {
-				heap = mem_heap_create(
-					btr_mtr->n_freed_pages
-					* sizeof *freed_pages);
-			}
-
-			freed_pages = static_cast<buf_block_t**>(
-				mem_heap_alloc(
-					heap,
-					btr_mtr->n_freed_pages
-					* sizeof *freed_pages));
-			n_freed_pages = 0;
-		}
-
-		/* Because btr_mtr will be committed after mtr, it is
-		possible that the tablespace has been extended when
-		the B-tree record was updated or inserted, or it will
-		be extended while allocating pages for big_rec.
-
-		TODO: In mtr (not btr_mtr), write a redo log record
-		about extending the tablespace to its current size,
-		and remember the current size. Whenever the tablespace
-		grows as pages are allocated, write further redo log
-		records to mtr. (Currently tablespace extension is not
-		covered by the redo log. If it were, the record would
-		only be written to btr_mtr, which is committed after
-		mtr.) */
-		alloc_mtr = btr_mtr;
-	} else {
-		/* Use the local mtr for allocations. */
-		alloc_mtr = &mtr;
-	}
-
 #if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
 	/* All pointers to externally stored columns in the record
 	must either be zero or they must be pointers to inherited
 	columns, owned by this record or an earlier record version. */
-	for (i = 0; i < rec_offs_n_fields(offsets); i++) {
-		if (!rec_offs_nth_extern(offsets, i)) {
-			continue;
-		}
-		field_ref = btr_rec_get_field_ref(rec, offsets, i);
+	for (i = 0; i < big_rec_vec->n_fields; i++) {
+		field_ref = btr_rec_get_field_ref(
+			rec, offsets, big_rec_vec->fields[i].field_no);
 
 		ut_a(!(field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG));
 		/* Either this must be an update in place,
@@ -4560,12 +6822,76 @@ btr_store_big_rec_extern_fields(
 				BTR_EXTERN_FIELD_REF_SIZE));
 	}
 #endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
+
+	/* Calculate the total number of pages for blob data */
+	ulint	total_blob_pages = 0;
+	const page_size_t	page_size(dict_table_page_size(index->table));
+	const ulint pages_in_extent = dict_table_extent_size(index->table);
+
+	/* Space available in compressed page to carry blob data */
+	const ulint	payload_size_zip = page_size.physical()
+		- FIL_PAGE_DATA;
+
+	/* Space available in uncompressed page to carry blob data */
+	const ulint	payload_size = page_size.physical()
+		- FIL_PAGE_DATA - BTR_BLOB_HDR_SIZE - FIL_PAGE_DATA_END;
+
+	if (page_size.is_compressed()) {
+		for (ulint i = 0; i < big_rec_vec->n_fields; i++) {
+			total_blob_pages
+				+= static_cast<ulint>
+				   (compressBound(static_cast<uLong>
+						  (big_rec_vec->fields[i].len))
+				    + payload_size_zip - 1)
+				   / payload_size_zip;
+		}
+	} else {
+		for (ulint i = 0; i < big_rec_vec->n_fields; i++) {
+			total_blob_pages += (big_rec_vec->fields[i].len
+					     + payload_size - 1)
+				/ payload_size;
+		}
+	}
+
+	const ulint	n_extents = (total_blob_pages + pages_in_extent - 1)
+		/ pages_in_extent;
+	ulint	n_reserved = 0;
+#ifdef UNIV_DEBUG
+	ulint	n_used = 0;	/* number of pages used */
+#endif /* UNIV_DEBUG */
+
+	if (op == BTR_STORE_INSERT_BULK) {
+		mtr_t	alloc_mtr;
+
+		mtr_start(&alloc_mtr);
+		alloc_mtr.set_named_space(index->space);
+
+		if (!fsp_reserve_free_extents(&n_reserved, space_id, n_extents,
+					      FSP_BLOB, &alloc_mtr)) {
+			mtr_commit(&alloc_mtr);
+			error = DB_OUT_OF_FILE_SPACE;
+			goto func_exit;
+		}
+
+		mtr_commit(&alloc_mtr);
+	} else {
+		if (!fsp_reserve_free_extents(&n_reserved, space_id, n_extents,
+					      FSP_BLOB, btr_mtr)) {
+			error = DB_OUT_OF_FILE_SPACE;
+			goto func_exit;
+		}
+	}
+
+	ut_ad(n_reserved > 0);
+	ut_ad(n_reserved == n_extents);
+
 	/* We have to create a file segment to the tablespace
 	for each field and put the pointer to the field in rec */
 
 	for (i = 0; i < big_rec_vec->n_fields; i++) {
-		field_ref = btr_rec_get_field_ref(
-			rec, offsets, big_rec_vec->fields[i].field_no);
+		const ulint field_no = big_rec_vec->fields[i].field_no;
+
+		field_ref = btr_rec_get_field_ref(rec, offsets, field_no);
 #if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
 		/* A zero BLOB pointer should have been initially inserted. */
 		ut_a(!memcmp(field_ref, field_ref_zero,
@@ -4588,11 +6914,31 @@ btr_store_big_rec_extern_fields(
 			c_stream.avail_in = static_cast<uInt>(extern_len);
 		}
 
-		for (;;) {
+		for (ulint blob_npages = 0;; ++blob_npages) {
 			buf_block_t*	block;
 			page_t*		page;
+			const ulint	commit_freq = 4;
+
+			ut_ad(page_align(field_ref) == page_align(rec));
+
+			if (!(blob_npages % commit_freq)) {
+
+				redo_log.check();
+
+				field_ref = btr_rec_get_field_ref(
+					rec, offsets, field_no);
+
+				page_zip = buf_block_get_page_zip(rec_block);
+				rec_page_no = rec_block->page.id.page_no();
+			}
 
 			mtr_start(&mtr);
+			mtr.set_named_space(index->space);
+			mtr.set_log_mode(btr_mtr->get_log_mode());
+			mtr.set_flush_observer(btr_mtr->get_flush_observer());
+
+			buf_page_get(rec_block->page.id,
+				     rec_block->page.size, RW_X_LATCH, &mtr);
 
 			if (prev_page_no == FIL_NULL) {
 				hint_page_no = 1 + rec_page_no;
@@ -4600,36 +6946,36 @@ btr_store_big_rec_extern_fields(
 				hint_page_no = prev_page_no + 1;
 			}
 
-alloc_another:
-			block = btr_page_alloc(index, hint_page_no,
-					       FSP_NO_DIR, 0, alloc_mtr, &mtr);
-			if (UNIV_UNLIKELY(block == NULL)) {
-				mtr_commit(&mtr);
-				error = DB_OUT_OF_FILE_SPACE;
-				goto func_exit;
-			}
+			if (op == BTR_STORE_INSERT_BULK) {
+				mtr_t	alloc_mtr;
+
+				mtr_start(&alloc_mtr);
+				alloc_mtr.set_named_space(index->space);
+
+				block = btr_page_alloc(index, hint_page_no,
+					FSP_NO_DIR, 0, &alloc_mtr, &mtr);
+				mtr_commit(&alloc_mtr);
 
-			if (rw_lock_get_x_lock_count(&block->lock) > 1) {
-				/* This page must have been freed in
-				btr_mtr previously. Put it aside, and
-				allocate another page for the BLOB data. */
-				ut_ad(alloc_mtr == btr_mtr);
-				ut_ad(btr_blob_op_is_update(op));
-				ut_ad(n_freed_pages < btr_mtr->n_freed_pages);
-				freed_pages[n_freed_pages++] = block;
-				goto alloc_another;
+			} else {
+				block = btr_page_alloc(index, hint_page_no,
+					FSP_NO_DIR, 0, &mtr, &mtr);
 			}
 
-			page_no = buf_block_get_page_no(block);
+			ut_a(block != NULL);
+			ut_ad(++n_used <= (n_reserved * pages_in_extent));
+
+			page_no = block->page.id.page_no();
 			page = buf_block_get_frame(block);
 
 			if (prev_page_no != FIL_NULL) {
 				buf_block_t*	prev_block;
 				page_t*		prev_page;
 
-				prev_block = buf_page_get(space_id, zip_size,
-							  prev_page_no,
-							  RW_X_LATCH, &mtr);
+				prev_block = buf_page_get(
+					page_id_t(space_id, prev_page_no),
+					rec_block->page.size,
+					RW_X_LATCH, &mtr);
+
 				buf_block_dbg_add_level(prev_block,
 							SYNC_EXTERN_STORAGE);
 				prev_page = buf_block_get_frame(prev_block);
@@ -4675,9 +7021,8 @@ alloc_another:
 
 				c_stream.next_out = page
 					+ FIL_PAGE_DATA;
-				c_stream.avail_out
-					= static_cast<uInt>(page_zip_get_size(page_zip))
-					- FIL_PAGE_DATA;
+				c_stream.avail_out = static_cast<uInt>(
+					payload_size_zip);
 
 				err = deflate(&c_stream, Z_FINISH);
 				ut_a(err == Z_OK || err == Z_STREAM_END);
@@ -4703,7 +7048,12 @@ alloc_another:
 				btr_page_reorganize().  However, also
 				the page number of the record may
 				change when B-tree nodes are split or
-				merged. */
+				merged.
+				NOTE: FIL_PAGE_FILE_FLUSH_LSN space is
+				used by R-tree index for a Split Sequence
+				Number */
+				ut_ad(!dict_index_is_spatial(index));
+
 				mlog_write_ulint(page
 						 + FIL_PAGE_FILE_FLUSH_LSN,
 						 space_id,
@@ -4736,16 +7086,6 @@ alloc_another:
 					goto next_zip_page;
 				}
 
-				if (alloc_mtr == &mtr) {
-					rec_block = buf_page_get(
-						space_id, zip_size,
-						rec_page_no,
-						RW_X_LATCH, &mtr);
-					buf_block_dbg_add_level(
-						rec_block,
-						SYNC_NO_ORDER_CHECK);
-				}
-
 				if (err == Z_STREAM_END) {
 					mach_write_to_4(field_ref
 							+ BTR_EXTERN_LEN, 0);
@@ -4758,11 +7098,7 @@ alloc_another:
 				}
 
 				if (prev_page_no == FIL_NULL) {
-					btr_blob_dbg_add_blob(
-						rec, big_rec_vec->fields[i]
-						.field_no, page_no, index,
-						"store");
-
+					ut_ad(blob_npages == 0);
 					mach_write_to_4(field_ref
 							+ BTR_EXTERN_SPACE_ID,
 							space_id);
@@ -4776,17 +7112,19 @@ alloc_another:
 							FIL_PAGE_NEXT);
 				}
 
-				page_zip_write_blob_ptr(
-					page_zip, rec, index, offsets,
-					big_rec_vec->fields[i].field_no,
-					alloc_mtr);
+				/* We compress a page when finish bulk insert.*/
+				if (op != BTR_STORE_INSERT_BULK) {
+					page_zip_write_blob_ptr(
+						page_zip, rec, index, offsets,
+						field_no, &mtr);
+				}
 
 next_zip_page:
 				prev_page_no = page_no;
 
 				/* Commit mtr and release the
 				uncompressed page frame to save memory. */
-				btr_blob_free(block, FALSE, &mtr);
+				btr_blob_free(index, block, FALSE, &mtr);
 
 				if (err == Z_STREAM_END) {
 					break;
@@ -4796,14 +7134,8 @@ next_zip_page:
 						 FIL_PAGE_TYPE_BLOB,
 						 MLOG_2BYTES, &mtr);
 
-				if (extern_len > (UNIV_PAGE_SIZE
-						  - FIL_PAGE_DATA
-						  - BTR_BLOB_HDR_SIZE
-						  - FIL_PAGE_DATA_END)) {
-					store_len = UNIV_PAGE_SIZE
-						- FIL_PAGE_DATA
-						- BTR_BLOB_HDR_SIZE
-						- FIL_PAGE_DATA_END;
+				if (extern_len > payload_size) {
+					store_len = payload_size;
 				} else {
 					store_len = extern_len;
 				}
@@ -4824,45 +7156,31 @@ next_zip_page:
 
 				extern_len -= store_len;
 
-				if (alloc_mtr == &mtr) {
-					rec_block = buf_page_get(
-						space_id, zip_size,
-						rec_page_no,
-						RW_X_LATCH, &mtr);
-					buf_block_dbg_add_level(
-						rec_block,
-						SYNC_NO_ORDER_CHECK);
-				}
-
 				mlog_write_ulint(field_ref + BTR_EXTERN_LEN, 0,
-						 MLOG_4BYTES, alloc_mtr);
+						 MLOG_4BYTES, &mtr);
 				mlog_write_ulint(field_ref
 						 + BTR_EXTERN_LEN + 4,
 						 big_rec_vec->fields[i].len
 						 - extern_len,
-						 MLOG_4BYTES, alloc_mtr);
+						 MLOG_4BYTES, &mtr);
 
 				if (prev_page_no == FIL_NULL) {
-					btr_blob_dbg_add_blob(
-						rec, big_rec_vec->fields[i]
-						.field_no, page_no, index,
-						"store");
-
+					ut_ad(blob_npages == 0);
 					mlog_write_ulint(field_ref
 							 + BTR_EXTERN_SPACE_ID,
 							 space_id, MLOG_4BYTES,
-							 alloc_mtr);
+							 &mtr);
 
 					mlog_write_ulint(field_ref
 							 + BTR_EXTERN_PAGE_NO,
 							 page_no, MLOG_4BYTES,
-							 alloc_mtr);
+							 &mtr);
 
 					mlog_write_ulint(field_ref
 							 + BTR_EXTERN_OFFSET,
 							 FIL_PAGE_DATA,
 							 MLOG_4BYTES,
-							 alloc_mtr);
+							 &mtr);
 				}
 
 				prev_page_no = page_no;
@@ -4878,28 +7196,28 @@ next_zip_page:
 		DBUG_EXECUTE_IF("btr_store_big_rec_extern",
 				error = DB_OUT_OF_FILE_SPACE;
 				goto func_exit;);
+
+		rec_offs_make_nth_extern(offsets, field_no);
 	}
 
+	/* Verify that the number of extents used is the same as the number
+	of extents reserved. */
+	ut_ad(page_zip != NULL
+	      || ((n_used + pages_in_extent - 1) / pages_in_extent
+		  == n_reserved));
+	ut_ad((n_used + pages_in_extent - 1) / pages_in_extent <= n_reserved);
+
 func_exit:
 	if (page_zip) {
 		deflateEnd(&c_stream);
 	}
 
-	if (n_freed_pages) {
-		ulint	i;
-
-		ut_ad(alloc_mtr == btr_mtr);
-		ut_ad(btr_blob_op_is_update(op));
-
-		for (i = 0; i < n_freed_pages; i++) {
-			btr_page_free_low(index, freed_pages[i], 0, alloc_mtr);
-		}
-	}
-
 	if (heap != NULL) {
 		mem_heap_free(heap);
 	}
 
+	fil_space_release_free_extents(space_id, n_reserved);
+
 #if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
 	/* All pointers to externally stored columns in the record
 	must be valid. */
@@ -4951,13 +7269,10 @@ btr_check_blob_fil_page_type(
 		}
 #endif /* !UNIV_DEBUG */
 
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: FIL_PAGE_TYPE=%lu"
-			" on BLOB %s space %lu page %lu flags %lx\n",
-			(ulong) type, read ? "read" : "purge",
-			(ulong) space_id, (ulong) page_no, (ulong) flags);
-		ut_error;
+		ib::fatal() << "FIL_PAGE_TYPE=" << type
+			<< " on BLOB " << (read ? "read" : "purge")
+			<< " space " << space_id << " page " << page_no
+			<< " flags " << flags;
 	}
 }
 
@@ -4966,7 +7281,6 @@ Frees the space in an externally stored field to the file space
 management if the field in data is owned by the externally stored field,
 in a rollback we may have the additional condition that the field must
 not be inherited. */
-UNIV_INTERN
 void
 btr_free_externally_stored_field(
 /*=============================*/
@@ -4987,8 +7301,8 @@ btr_free_externally_stored_field(
 					to rec, or NULL if rec == NULL */
 	ulint		i,		/*!< in: field number of field_ref;
 					ignored if rec == NULL */
-	enum trx_rb_ctx	rb_ctx,		/*!< in: rollback context */
-	mtr_t*		local_mtr __attribute__((unused))) /*!< in: mtr
+	bool		rollback,	/*!< in: performing rollback? */
+	mtr_t*		local_mtr)	/*!< in: mtr
 					containing the latch to data an an
 					X-latch to the index tree */
 {
@@ -4997,95 +7311,68 @@ btr_free_externally_stored_field(
 		field_ref + BTR_EXTERN_SPACE_ID);
 	const ulint	start_page	= mach_read_from_4(
 		field_ref + BTR_EXTERN_PAGE_NO);
-	ulint		rec_zip_size = dict_table_zip_size(index->table);
-	ulint		ext_zip_size;
 	ulint		page_no;
 	ulint		next_page_no;
 	mtr_t		mtr;
 
 	ut_ad(dict_index_is_clust(index));
-	ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index),
-				MTR_MEMO_X_LOCK));
-	ut_ad(mtr_memo_contains_page(local_mtr, field_ref,
-				     MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_flagged(local_mtr, dict_index_get_lock(index),
+					MTR_MEMO_X_LOCK
+					| MTR_MEMO_SX_LOCK)
+	      || dict_table_is_intrinsic(index->table));
+	ut_ad(mtr_is_page_fix(
+		local_mtr, field_ref, MTR_MEMO_PAGE_X_FIX, index->table));
 	ut_ad(!rec || rec_offs_validate(rec, index, offsets));
 	ut_ad(!rec || field_ref == btr_rec_get_field_ref(rec, offsets, i));
+	ut_ad(local_mtr->is_named_space(
+		      page_get_space_id(page_align(field_ref))));
 
 	if (UNIV_UNLIKELY(!memcmp(field_ref, field_ref_zero,
 				  BTR_EXTERN_FIELD_REF_SIZE))) {
 		/* In the rollback, we may encounter a clustered index
 		record with some unwritten off-page columns. There is
 		nothing to free then. */
-		ut_a(rb_ctx != RB_NONE);
+		ut_a(rollback);
 		return;
 	}
 
+	ut_ad(!(mach_read_from_4(field_ref + BTR_EXTERN_LEN)
+	        & ~((BTR_EXTERN_OWNER_FLAG
+	             | BTR_EXTERN_INHERITED_FLAG) << 24)));
 	ut_ad(space_id == index->space);
 
-	if (UNIV_UNLIKELY(space_id != dict_index_get_space(index))) {
-		ext_zip_size = fil_space_get_zip_size(space_id);
-		/* This must be an undo log record in the system tablespace,
-		that is, in row_purge_upd_exist_or_extern().
-		Currently, externally stored records are stored in the
-		same tablespace as the referring records. */
-		ut_ad(!page_get_space_id(page_align(field_ref)));
-		ut_ad(!rec);
-		ut_ad(!page_zip);
-	} else {
-		ext_zip_size = rec_zip_size;
-	}
-
-	if (!rec) {
+	const page_size_t	ext_page_size(dict_table_page_size(index->table));
+	const page_size_t&	rec_page_size(rec == NULL
+					      ? univ_page_size
+					      : ext_page_size);
+	if (rec == NULL) {
 		/* This is a call from row_purge_upd_exist_or_extern(). */
 		ut_ad(!page_zip);
-		rec_zip_size = 0;
-	}
-
-#ifdef UNIV_BLOB_DEBUG
-	if (!(field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG)
-	    && !((field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_INHERITED_FLAG)
-		 && (rb_ctx == RB_NORMAL || rb_ctx == RB_RECOVERY))) {
-		/* This off-page column will be freed.
-		Check that no references remain. */
-
-		btr_blob_dbg_t	b;
-
-		b.blob_page_no = start_page;
-
-		if (rec) {
-			/* Remove the reference from the record to the
-			BLOB. If the BLOB were not freed, the
-			reference would be removed when the record is
-			removed. Freeing the BLOB will overwrite the
-			BTR_EXTERN_PAGE_NO in the field_ref of the
-			record with FIL_NULL, which would make the
-			btr_blob_dbg information inconsistent with the
-			record. */
-			b.ref_page_no = page_get_page_no(page_align(rec));
-			b.ref_heap_no = page_rec_get_heap_no(rec);
-			b.ref_field_no = i;
-			btr_blob_dbg_rbt_delete(index, &b, "free");
-		}
-
-		btr_blob_dbg_assert_empty(index, b.blob_page_no);
 	}
-#endif /* UNIV_BLOB_DEBUG */
 
 	for (;;) {
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 		buf_block_t*	rec_block;
-#endif /* UNIV_SYNC_DEBUG */
+#endif /* UNIV_DEBUG */
 		buf_block_t*	ext_block;
 
 		mtr_start(&mtr);
+		mtr.set_spaces(*local_mtr);
+		mtr.set_log_mode(local_mtr->get_log_mode());
+
+		ut_ad(!dict_table_is_temporary(index->table)
+		      || local_mtr->get_log_mode() == MTR_LOG_NO_REDO);
 
-#ifdef UNIV_SYNC_DEBUG
+		const page_t*	p = page_align(field_ref);
+
+		const page_id_t	page_id(page_get_space_id(p),
+					page_get_page_no(p));
+
+#ifdef UNIV_DEBUG
 		rec_block =
-#endif /* UNIV_SYNC_DEBUG */
-		buf_page_get(page_get_space_id(page_align(field_ref)),
-			     rec_zip_size,
-			     page_get_page_no(page_align(field_ref)),
-			     RW_X_LATCH, &mtr);
+#endif /* UNIV_DEBUG */
+		buf_page_get(page_id, rec_page_size, RW_X_LATCH, &mtr);
+
 		buf_block_dbg_add_level(rec_block, SYNC_NO_ORDER_CHECK);
 		page_no = mach_read_from_4(field_ref + BTR_EXTERN_PAGE_NO);
 
@@ -5095,7 +7382,7 @@ btr_free_externally_stored_field(
 		    || (mach_read_from_1(field_ref + BTR_EXTERN_LEN)
 			& BTR_EXTERN_OWNER_FLAG)
 		    /* Rollback and inherited field */
-		    || ((rb_ctx == RB_NORMAL || rb_ctx == RB_RECOVERY)
+		    || (rollback
 			&& (mach_read_from_1(field_ref + BTR_EXTERN_LEN)
 			    & BTR_EXTERN_INHERITED_FLAG))) {
 
@@ -5109,12 +7396,14 @@ btr_free_externally_stored_field(
 			row_log_table_blob_free(index, start_page);
 		}
 
-		ext_block = buf_page_get(space_id, ext_zip_size, page_no,
-					 RW_X_LATCH, &mtr);
+		ext_block = buf_page_get(
+			page_id_t(space_id, page_no), ext_page_size,
+			RW_X_LATCH, &mtr);
+
 		buf_block_dbg_add_level(ext_block, SYNC_EXTERN_STORAGE);
 		page = buf_block_get_frame(ext_block);
 
-		if (ext_zip_size) {
+		if (ext_page_size.is_compressed()) {
 			/* Note that page_zip will be NULL
 			in row_purge_upd_exist_or_extern(). */
 			switch (fil_page_get_type(page)) {
@@ -5126,7 +7415,8 @@ btr_free_externally_stored_field(
 			}
 			next_page_no = mach_read_from_4(page + FIL_PAGE_NEXT);
 
-			btr_page_free_low(index, ext_block, 0, &mtr);
+			btr_page_free_low(index, ext_block, ULINT_UNDEFINED,
+					  &mtr);
 
 			if (page_zip != NULL) {
 				mach_write_to_4(field_ref + BTR_EXTERN_PAGE_NO,
@@ -5153,11 +7443,8 @@ btr_free_externally_stored_field(
 				page + FIL_PAGE_DATA
 				+ BTR_BLOB_HDR_NEXT_PAGE_NO);
 
-			/* We must supply the page level (= 0) as an argument
-			because we did not store it on the page (we save the
-			space overhead from an index page header. */
-
-			btr_page_free_low(index, ext_block, 0, &mtr);
+			btr_page_free_low(index, ext_block, ULINT_UNDEFINED,
+					  &mtr);
 
 			mlog_write_ulint(field_ref + BTR_EXTERN_PAGE_NO,
 					 next_page_no,
@@ -5173,7 +7460,7 @@ btr_free_externally_stored_field(
 		}
 
 		/* Commit mtr and release the BLOB block to save memory. */
-		btr_blob_free(ext_block, TRUE, &mtr);
+		btr_blob_free(index, ext_block, TRUE, &mtr);
 	}
 }
 
@@ -5189,7 +7476,7 @@ btr_rec_free_externally_stored_fields(
 	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
 	page_zip_des_t*	page_zip,/*!< in: compressed page whose uncompressed
 				part will be updated, or NULL */
-	enum trx_rb_ctx	rb_ctx,	/*!< in: rollback context */
+	bool		rollback,/*!< in: performing rollback? */
 	mtr_t*		mtr)	/*!< in: mini-transaction handle which contains
 				an X-latch to record page and to the index
 				tree */
@@ -5198,7 +7485,7 @@ btr_rec_free_externally_stored_fields(
 	ulint	i;
 
 	ut_ad(rec_offs_validate(rec, index, offsets));
-	ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_is_page_fix(mtr, rec, MTR_MEMO_PAGE_X_FIX, index->table));
 	/* Free possible externally stored fields in the record */
 
 	ut_ad(dict_table_is_comp(index->table) == !!rec_offs_comp(offsets));
@@ -5208,7 +7495,7 @@ btr_rec_free_externally_stored_fields(
 		if (rec_offs_nth_extern(offsets, i)) {
 			btr_free_externally_stored_field(
 				index, btr_rec_get_field_ref(rec, offsets, i),
-				rec, offsets, page_zip, i, rb_ctx, mtr);
+				rec, offsets, page_zip, i, rollback, mtr);
 		}
 	}
 }
@@ -5227,7 +7514,7 @@ btr_rec_free_updated_extern_fields(
 				part will be updated, or NULL */
 	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
 	const upd_t*	update,	/*!< in: update vector */
-	enum trx_rb_ctx	rb_ctx,	/*!< in: rollback context */
+	bool		rollback,/*!< in: performing rollback? */
 	mtr_t*		mtr)	/*!< in: mini-transaction handle which contains
 				an X-latch to record page and to the tree */
 {
@@ -5235,7 +7522,7 @@ btr_rec_free_updated_extern_fields(
 	ulint	i;
 
 	ut_ad(rec_offs_validate(rec, index, offsets));
-	ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_is_page_fix(mtr, rec, MTR_MEMO_PAGE_X_FIX, index->table));
 
 	/* Free possible externally stored fields in the record */
 
@@ -5253,7 +7540,7 @@ btr_rec_free_updated_extern_fields(
 			btr_free_externally_stored_field(
 				index, data + len - BTR_EXTERN_FIELD_REF_SIZE,
 				rec, offsets, page_zip,
-				ufield->field_no, rb_ctx, mtr);
+				ufield->field_no, rollback, mtr);
 		}
 	}
 }
@@ -5261,7 +7548,7 @@ btr_rec_free_updated_extern_fields(
 /*******************************************************************//**
 Copies the prefix of an uncompressed BLOB.  The clustered index record
 that points to this BLOB must be protected by a lock or a page latch.
- at return	number of bytes written to buf */
+ at return number of bytes written to buf */
 static
 ulint
 btr_copy_blob_prefix(
@@ -5285,7 +7572,8 @@ btr_copy_blob_prefix(
 
 		mtr_start(&mtr);
 
-		block = buf_page_get(space_id, 0, page_no, RW_S_LATCH, &mtr);
+		block = buf_page_get(page_id_t(space_id, page_no),
+				     univ_page_size, RW_S_LATCH, &mtr);
 		buf_block_dbg_add_level(block, SYNC_EXTERN_STORAGE);
 		page = buf_block_get_frame(block);
 
@@ -5317,21 +7605,25 @@ btr_copy_blob_prefix(
 	}
 }
 
-/*******************************************************************//**
-Copies the prefix of a compressed BLOB.  The clustered index record
-that points to this BLOB must be protected by a lock or a page latch.
- at return	number of bytes written to buf */
+/** Copies the prefix of a compressed BLOB.
+The clustered index record that points to this BLOB must be protected
+by a lock or a page latch.
+ at param[out]	buf		the externally stored part of the field,
+or a prefix of it
+ at param[in]	len		length of buf, in bytes
+ at param[in]	page_size	compressed BLOB page size
+ at param[in]	space_id	space id of the BLOB pages
+ at param[in]	offset		offset on the first BLOB page
+ at return number of bytes written to buf */
 static
 ulint
 btr_copy_zblob_prefix(
-/*==================*/
-	byte*		buf,	/*!< out: the externally stored part of
-				the field, or a prefix of it */
-	ulint		len,	/*!< in: length of buf, in bytes */
-	ulint		zip_size,/*!< in: compressed BLOB page size */
-	ulint		space_id,/*!< in: space id of the BLOB pages */
-	ulint		page_no,/*!< in: page number of the first BLOB page */
-	ulint		offset)	/*!< in: offset on the first BLOB page */
+	byte*			buf,
+	ulint			len,
+	const page_size_t&	page_size,
+	ulint			space_id,
+	ulint			page_no,
+	ulint			offset)
 {
 	ulint		page_type = FIL_PAGE_TYPE_ZBLOB;
 	mem_heap_t*	heap;
@@ -5348,9 +7640,7 @@ btr_copy_zblob_prefix(
 	heap = mem_heap_create(40000);
 	page_zip_set_alloc(&d_stream, heap);
 
-	ut_ad(ut_is_2pow(zip_size));
-	ut_ad(zip_size >= UNIV_ZIP_SIZE_MIN);
-	ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
+	ut_ad(page_size.is_compressed());
 	ut_ad(space_id);
 
 	err = inflateInit(&d_stream);
@@ -5364,27 +7654,23 @@ btr_copy_zblob_prefix(
 		bpage is protected by the B-tree page latch that
 		is being held on the clustered index record, or,
 		in row_merge_copy_blobs(), by an exclusive table lock. */
-		bpage = buf_page_get_zip(space_id, zip_size, page_no);
+		bpage = buf_page_get_zip(page_id_t(space_id, page_no),
+					 page_size);
 
 		if (UNIV_UNLIKELY(!bpage)) {
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				"  InnoDB: Cannot load"
-				" compressed BLOB"
-				" page %lu space %lu\n",
-				(ulong) page_no, (ulong) space_id);
+			ib::error() << "Cannot load compressed BLOB "
+				<< page_id_t(space_id, page_no);
 			goto func_exit;
 		}
 
 		if (UNIV_UNLIKELY
 		    (fil_page_get_type(bpage->zip.data) != page_type)) {
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				"  InnoDB: Unexpected type %lu of"
-				" compressed BLOB"
-				" page %lu space %lu\n",
-				(ulong) fil_page_get_type(bpage->zip.data),
-				(ulong) page_no, (ulong) space_id);
+
+			ib::error() << "Unexpected type "
+				<< fil_page_get_type(bpage->zip.data)
+				<< " of compressed BLOB page "
+				<< page_id_t(space_id, page_no);
+
 			ut_ad(0);
 			goto end_of_blob;
 		}
@@ -5401,7 +7687,8 @@ btr_copy_zblob_prefix(
 		}
 
 		d_stream.next_in = bpage->zip.data + offset;
-		d_stream.avail_in = static_cast<uInt>(zip_size - offset);
+		d_stream.avail_in = static_cast<uInt>(page_size.physical()
+						      - offset);
 
 		err = inflate(&d_stream, Z_NO_FLUSH);
 		switch (err) {
@@ -5417,26 +7704,21 @@ btr_copy_zblob_prefix(
 			/* fall through */
 		default:
 inflate_error:
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				"  InnoDB: inflate() of"
-				" compressed BLOB"
-				" page %lu space %lu returned %d (%s)\n",
-				(ulong) page_no, (ulong) space_id,
-				err, d_stream.msg);
+			ib::error() << "inflate() of compressed BLOB page "
+				<< page_id_t(space_id, page_no)
+				<< " returned " << err
+				<< " (" << d_stream.msg << ")";
+
 		case Z_BUF_ERROR:
 			goto end_of_blob;
 		}
 
 		if (next_page_no == FIL_NULL) {
 			if (!d_stream.avail_in) {
-				ut_print_timestamp(stderr);
-				fprintf(stderr,
-					"  InnoDB: unexpected end of"
-					" compressed BLOB"
-					" page %lu space %lu\n",
-					(ulong) page_no,
-					(ulong) space_id);
+				ib::error()
+					<< "Unexpected end of compressed "
+					<< "BLOB page "
+					<< page_id_t(space_id, page_no);
 			} else {
 				err = inflate(&d_stream, Z_FINISH);
 				switch (err) {
@@ -5470,55 +7752,59 @@ func_exit:
 	return(d_stream.total_out);
 }
 
-/*******************************************************************//**
-Copies the prefix of an externally stored field of a record.  The
-clustered index record that points to this BLOB must be protected by a
-lock or a page latch.
- at return	number of bytes written to buf */
+/** Copies the prefix of an externally stored field of a record.
+The clustered index record that points to this BLOB must be protected
+by a lock or a page latch.
+ at param[out]	buf		the externally stored part of the
+field, or a prefix of it
+ at param[in]	len		length of buf, in bytes
+ at param[in]	page_size	BLOB page size
+ at param[in]	space_id	space id of the first BLOB page
+ at param[in]	page_no		page number of the first BLOB page
+ at param[in]	offset		offset on the first BLOB page
+ at return number of bytes written to buf */
 static
 ulint
 btr_copy_externally_stored_field_prefix_low(
-/*========================================*/
-	byte*		buf,	/*!< out: the externally stored part of
-				the field, or a prefix of it */
-	ulint		len,	/*!< in: length of buf, in bytes */
-	ulint		zip_size,/*!< in: nonzero=compressed BLOB page size,
-				zero for uncompressed BLOBs */
-	ulint		space_id,/*!< in: space id of the first BLOB page */
-	ulint		page_no,/*!< in: page number of the first BLOB page */
-	ulint		offset)	/*!< in: offset on the first BLOB page */
+	byte*			buf,
+	ulint			len,
+	const page_size_t&	page_size,
+	ulint			space_id,
+	ulint			page_no,
+	ulint			offset)
 {
-	if (UNIV_UNLIKELY(len == 0)) {
+	if (len == 0) {
 		return(0);
 	}
 
-	if (zip_size) {
-		return(btr_copy_zblob_prefix(buf, len, zip_size,
+	if (page_size.is_compressed()) {
+		return(btr_copy_zblob_prefix(buf, len, page_size,
 					     space_id, page_no, offset));
 	} else {
+		ut_ad(page_size.equals_to(univ_page_size));
 		return(btr_copy_blob_prefix(buf, len, space_id,
 					    page_no, offset));
 	}
 }
 
-/*******************************************************************//**
-Copies the prefix of an externally stored field of a record.  The
-clustered index record must be protected by a lock or a page latch.
+/** Copies the prefix of an externally stored field of a record.
+The clustered index record must be protected by a lock or a page latch.
+ at param[out]	buf		the field, or a prefix of it
+ at param[in]	len		length of buf, in bytes
+ at param[in]	page_size	BLOB page size
+ at param[in]	data		'internally' stored part of the field
+containing also the reference to the external part; must be protected by
+a lock or a page latch
+ at param[in]	local_len	length of data, in bytes
 @return the length of the copied field, or 0 if the column was being
 or has been deleted */
-UNIV_INTERN
 ulint
 btr_copy_externally_stored_field_prefix(
-/*====================================*/
-	byte*		buf,	/*!< out: the field, or a prefix of it */
-	ulint		len,	/*!< in: length of buf, in bytes */
-	ulint		zip_size,/*!< in: nonzero=compressed BLOB page size,
-				zero for uncompressed BLOBs */
-	const byte*	data,	/*!< in: 'internally' stored part of the
-				field containing also the reference to
-				the external part; must be protected by
-				a lock or a page latch */
-	ulint		local_len)/*!< in: length of data, in bytes */
+	byte*			buf,
+	ulint			len,
+	const page_size_t&	page_size,
+	const byte*		data,
+	ulint			local_len)
 {
 	ulint	space_id;
 	ulint	page_no;
@@ -5555,28 +7841,28 @@ btr_copy_externally_stored_field_prefix(
 	return(local_len
 	       + btr_copy_externally_stored_field_prefix_low(buf + local_len,
 							     len - local_len,
-							     zip_size,
+							     page_size,
 							     space_id, page_no,
 							     offset));
 }
 
-/*******************************************************************//**
-Copies an externally stored field of a record to mem heap.  The
-clustered index record must be protected by a lock or a page latch.
- at return	the whole field copied to heap */
-UNIV_INTERN
+/** Copies an externally stored field of a record to mem heap.
+The clustered index record must be protected by a lock or a page latch.
+ at param[out]	len		length of the whole field
+ at param[in]	data		'internally' stored part of the field
+containing also the reference to the external part; must be protected by
+a lock or a page latch
+ at param[in]	page_size	BLOB page size
+ at param[in]	local_len	length of data
+ at param[in,out]	heap		mem heap
+ at return the whole field copied to heap */
 byte*
 btr_copy_externally_stored_field(
-/*=============================*/
-	ulint*		len,	/*!< out: length of the whole field */
-	const byte*	data,	/*!< in: 'internally' stored part of the
-				field containing also the reference to
-				the external part; must be protected by
-				a lock or a page latch */
-	ulint		zip_size,/*!< in: nonzero=compressed BLOB page size,
-				zero for uncompressed BLOBs */
-	ulint		local_len,/*!< in: length of data */
-	mem_heap_t*	heap)	/*!< in: mem heap */
+	ulint*			len,
+	const byte*		data,
+	const page_size_t&	page_size,
+	ulint			local_len,
+	mem_heap_t*		heap)
 {
 	ulint	space_id;
 	ulint	page_no;
@@ -5605,28 +7891,30 @@ btr_copy_externally_stored_field(
 	*len = local_len
 		+ btr_copy_externally_stored_field_prefix_low(buf + local_len,
 							      extern_len,
-							      zip_size,
+							      page_size,
 							      space_id,
 							      page_no, offset);
 
 	return(buf);
 }
 
-/*******************************************************************//**
-Copies an externally stored field of a record to mem heap.
- at return	the field copied to heap, or NULL if the field is incomplete */
-UNIV_INTERN
+/** Copies an externally stored field of a record to mem heap.
+ at param[in]	rec		record in a clustered index; must be
+protected by a lock or a page latch
+ at param[in]	offset		array returned by rec_get_offsets()
+ at param[in]	page_size	BLOB page size
+ at param[in]	no		field number
+ at param[out]	len		length of the field
+ at param[in,out]	heap		mem heap
+ at return the field copied to heap, or NULL if the field is incomplete */
 byte*
 btr_rec_copy_externally_stored_field(
-/*=================================*/
-	const rec_t*	rec,	/*!< in: record in a clustered index;
-				must be protected by a lock or a page latch */
-	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
-	ulint		zip_size,/*!< in: nonzero=compressed BLOB page size,
-				zero for uncompressed BLOBs */
-	ulint		no,	/*!< in: field number */
-	ulint*		len,	/*!< out: length of the field */
-	mem_heap_t*	heap)	/*!< in: mem heap */
+	const rec_t*		rec,
+	const ulint*		offsets,
+	const page_size_t&	page_size,
+	ulint			no,
+	ulint*			len,
+	mem_heap_t*		heap)
 {
 	ulint		local_len;
 	const byte*	data;
@@ -5657,6 +7945,6 @@ btr_rec_copy_externally_stored_field(
 	}
 
 	return(btr_copy_externally_stored_field(len, data,
-						zip_size, local_len, heap));
+						page_size, local_len, heap));
 }
 #endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/btr/btr0pcur.cc b/storage/innobase/btr/btr0pcur.cc
index 82a2b6d..f673b7b 100644
--- a/storage/innobase/btr/btr0pcur.cc
+++ b/storage/innobase/btr/btr0pcur.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -35,43 +35,38 @@ Created 2/23/1996 Heikki Tuuri
 
 /**************************************************************//**
 Allocates memory for a persistent cursor object and initializes the cursor.
- at return	own: persistent cursor */
-UNIV_INTERN
+ at return own: persistent cursor */
 btr_pcur_t*
 btr_pcur_create_for_mysql(void)
 /*============================*/
 {
 	btr_pcur_t*	pcur;
+	DBUG_ENTER("btr_pcur_create_for_mysql");
 
-	pcur = (btr_pcur_t*) mem_alloc(sizeof(btr_pcur_t));
+	pcur = (btr_pcur_t*) ut_malloc_nokey(sizeof(btr_pcur_t));
 
 	pcur->btr_cur.index = NULL;
 	btr_pcur_init(pcur);
 
-	return(pcur);
+	DBUG_PRINT("btr_pcur_create_for_mysql", ("pcur: %p", pcur));
+	DBUG_RETURN(pcur);
 }
 
 /**************************************************************//**
 Resets a persistent cursor object, freeing ::old_rec_buf if it is
 allocated and resetting the other members to their initial values. */
-UNIV_INTERN
 void
 btr_pcur_reset(
 /*===========*/
 	btr_pcur_t*	cursor)	/*!< in, out: persistent cursor */
 {
-	if (cursor->old_rec_buf != NULL) {
-
-		mem_free(cursor->old_rec_buf);
-
-		cursor->old_rec_buf = NULL;
-	}
-
+	btr_pcur_free(cursor);
+	cursor->old_rec_buf = NULL;
 	cursor->btr_cur.index = NULL;
 	cursor->btr_cur.page_cur.rec = NULL;
 	cursor->old_rec = NULL;
 	cursor->old_n_fields = 0;
-	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+	cursor->old_stored = false;
 
 	cursor->latch_mode = BTR_NO_LATCHES;
 	cursor->pos_state = BTR_PCUR_NOT_POSITIONED;
@@ -79,14 +74,17 @@ btr_pcur_reset(
 
 /**************************************************************//**
 Frees the memory for a persistent cursor object. */
-UNIV_INTERN
 void
 btr_pcur_free_for_mysql(
 /*====================*/
 	btr_pcur_t*	cursor)	/*!< in, own: persistent cursor */
 {
-	btr_pcur_reset(cursor);
-	mem_free(cursor);
+	DBUG_ENTER("btr_pcur_free_for_mysql");
+	DBUG_PRINT("btr_pcur_free_for_mysql", ("pcur: %p", cursor));
+
+	btr_pcur_free(cursor);
+	ut_free(cursor);
+	DBUG_VOID_RETURN;
 }
 
 /**************************************************************//**
@@ -96,7 +94,6 @@ cursor data structure, or just setting a flag if the cursor id before the
 first in an EMPTY tree, or after the last in an EMPTY tree. NOTE that the
 page where the cursor is positioned must not be empty if the index tree is
 not totally empty! */
-UNIV_INTERN
 void
 btr_pcur_store_position(
 /*====================*/
@@ -122,8 +119,23 @@ btr_pcur_store_position(
 	page = page_align(rec);
 	offs = page_offset(rec);
 
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_S_FIX)
-	      || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+#ifdef UNIV_DEBUG
+	if (dict_index_is_spatial(index)) {
+		/* For spatial index, when we do positioning on parent
+		buffer if necessary, it might not hold latches, but the
+		tree must be locked to prevent change on the page */
+		ut_ad((mtr_memo_contains_flagged(
+				mtr, dict_index_get_lock(index),
+				MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK)
+		       || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_S_FIX)
+		       || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX))
+		      && (block->page.buf_fix_count > 0));
+	} else {
+		ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_S_FIX)
+		      || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)
+		      || dict_table_is_intrinsic(index->table));
+	}
+#endif /* UNIV_DEBUG */
 
 	if (page_is_empty(page)) {
 		/* It must be an empty index tree; NOTE that in this case
@@ -135,7 +147,7 @@ btr_pcur_store_position(
 		ut_ad(page_is_leaf(page));
 		ut_ad(page_get_page_no(page) == index->page);
 
-		cursor->old_stored = BTR_PCUR_OLD_STORED;
+		cursor->old_stored = true;
 
 		if (page_rec_is_supremum_low(offs)) {
 
@@ -162,18 +174,20 @@ btr_pcur_store_position(
 		cursor->rel_pos = BTR_PCUR_ON;
 	}
 
-	cursor->old_stored = BTR_PCUR_OLD_STORED;
+	cursor->old_stored = true;
 	cursor->old_rec = dict_index_copy_rec_order_prefix(
 		index, rec, &cursor->old_n_fields,
 		&cursor->old_rec_buf, &cursor->buf_size);
 
 	cursor->block_when_stored = block;
+
+	/* Function try to check if block is S/X latch. */
 	cursor->modify_clock = buf_block_get_modify_clock(block);
+	cursor->withdraw_clock = buf_withdraw_clock;
 }
 
 /**************************************************************//**
 Copies the stored position of a pcur to another pcur. */
-UNIV_INTERN
 void
 btr_pcur_copy_stored_position(
 /*==========================*/
@@ -182,16 +196,13 @@ btr_pcur_copy_stored_position(
 	btr_pcur_t*	pcur_donate)	/*!< in: pcur from which the info is
 					copied */
 {
-	if (pcur_receive->old_rec_buf) {
-		mem_free(pcur_receive->old_rec_buf);
-	}
-
+	ut_free(pcur_receive->old_rec_buf);
 	ut_memcpy(pcur_receive, pcur_donate, sizeof(btr_pcur_t));
 
 	if (pcur_donate->old_rec_buf) {
 
 		pcur_receive->old_rec_buf = (byte*)
-			mem_alloc(pcur_donate->buf_size);
+			ut_malloc_nokey(pcur_donate->buf_size);
 
 		ut_memcpy(pcur_receive->old_rec_buf, pcur_donate->old_rec_buf,
 			  pcur_donate->buf_size);
@@ -217,7 +228,6 @@ restores to before first or after the last in the tree.
 @return TRUE if the cursor position was stored when it was on a user
 record and it can be restored on a user record whose ordering fields
 are identical to the ones of the original user record */
-UNIV_INTERN
 ibool
 btr_pcur_restore_position_func(
 /*===========================*/
@@ -229,13 +239,12 @@ btr_pcur_restore_position_func(
 {
 	dict_index_t*	index;
 	dtuple_t*	tuple;
-	ulint		mode;
-	ulint		old_mode;
+	page_cur_mode_t	mode;
+	page_cur_mode_t	old_mode;
 	mem_heap_t*	heap;
 
-	ut_ad(mtr);
-	ut_ad(mtr->state == MTR_ACTIVE);
-	ut_ad(cursor->old_stored == BTR_PCUR_OLD_STORED);
+	ut_ad(mtr->is_active());
+	ut_ad(cursor->old_stored);
 	ut_ad(cursor->pos_state == BTR_PCUR_WAS_POSITIONED
 	      || cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 
@@ -253,7 +262,8 @@ btr_pcur_restore_position_func(
 			index, latch_mode,
 			btr_pcur_get_btr_cur(cursor), 0, mtr);
 
-		cursor->latch_mode = latch_mode;
+		cursor->latch_mode =
+			BTR_LATCH_MODE_WITHOUT_INTENTION(latch_mode);
 		cursor->pos_state = BTR_PCUR_IS_POSITIONED;
 		cursor->block_when_stored = btr_pcur_get_block(cursor);
 
@@ -263,14 +273,21 @@ btr_pcur_restore_position_func(
 	ut_a(cursor->old_rec);
 	ut_a(cursor->old_n_fields);
 
-	if (UNIV_LIKELY(latch_mode == BTR_SEARCH_LEAF)
-	    || UNIV_LIKELY(latch_mode == BTR_MODIFY_LEAF)) {
+	/* Optimistic latching involves S/X latch not required for
+	intrinsic table instead we would prefer to search fresh. */
+	if ((latch_mode == BTR_SEARCH_LEAF
+	     || latch_mode == BTR_MODIFY_LEAF
+	     || latch_mode == BTR_SEARCH_PREV
+	     || latch_mode == BTR_MODIFY_PREV)
+            && !dict_table_is_intrinsic(cursor->btr_cur.index->table)) {
 		/* Try optimistic restoration. */
 
-		if (buf_page_optimistic_get(latch_mode,
-					    cursor->block_when_stored,
-					    cursor->modify_clock,
-					    file, line, mtr)) {
+		if (!buf_pool_is_obsolete(cursor->withdraw_clock)
+		    && btr_cur_optimistic_latch_leaves(
+			cursor->block_when_stored, cursor->modify_clock,
+			&latch_mode, btr_pcur_get_btr_cur(cursor),
+			file, line, mtr)) {
+
 			cursor->pos_state = BTR_PCUR_IS_POSITIONED;
 			cursor->latch_mode = latch_mode;
 
@@ -334,7 +351,7 @@ btr_pcur_restore_position_func(
 		break;
 	default:
 		ut_error;
-		mode = 0;
+		mode = PAGE_CUR_UNSUPP;
 	}
 
 	btr_pcur_open_with_no_init_func(index, tuple, mode, latch_mode,
@@ -343,39 +360,28 @@ btr_pcur_restore_position_func(
 	/* Restore the old search mode */
 	cursor->search_mode = old_mode;
 
-	switch (cursor->rel_pos) {
-	case BTR_PCUR_ON:
-		if (btr_pcur_is_on_user_rec(cursor)
-		    && !cmp_dtuple_rec(
-			    tuple, btr_pcur_get_rec(cursor),
-			    rec_get_offsets(btr_pcur_get_rec(cursor),
-					    index, NULL,
-					    ULINT_UNDEFINED, &heap))) {
-
-			/* We have to store the NEW value for
-			the modify clock, since the cursor can
-			now be on a different page! But we can
-			retain the value of old_rec */
-
-			cursor->block_when_stored =
-				btr_pcur_get_block(cursor);
-			cursor->modify_clock =
-				buf_block_get_modify_clock(
-					cursor->block_when_stored);
-			cursor->old_stored = BTR_PCUR_OLD_STORED;
-
-			mem_heap_free(heap);
-
-			return(TRUE);
-		}
-#ifdef UNIV_DEBUG
-		/* fall through */
-	case BTR_PCUR_BEFORE:
-	case BTR_PCUR_AFTER:
-		break;
-	default:
-		ut_error;
-#endif /* UNIV_DEBUG */
+	ut_ad(cursor->rel_pos == BTR_PCUR_ON
+	      || cursor->rel_pos == BTR_PCUR_BEFORE
+	      || cursor->rel_pos == BTR_PCUR_AFTER);
+	if (cursor->rel_pos == BTR_PCUR_ON
+	    && btr_pcur_is_on_user_rec(cursor)
+	    && !cmp_dtuple_rec(tuple, btr_pcur_get_rec(cursor),
+			       rec_get_offsets(btr_pcur_get_rec(cursor),
+			       index, NULL, ULINT_UNDEFINED, &heap))) {
+
+		/* We have to store the NEW value for the modify clock,
+		since the cursor can now be on a different page!
+		But we can retain the value of old_rec */
+
+		cursor->block_when_stored = btr_pcur_get_block(cursor);
+		cursor->modify_clock = buf_block_get_modify_clock(
+						cursor->block_when_stored);
+		cursor->old_stored = true;
+		cursor->withdraw_clock = buf_withdraw_clock;
+
+		mem_heap_free(heap);
+
+		return(TRUE);
 	}
 
 	mem_heap_free(heap);
@@ -394,7 +400,6 @@ Moves the persistent cursor to the first record on the next page. Releases the
 latch on the current page, and bufferunfixes it. Note that there must not be
 modifications on the current page, as then the x-latch can be released only in
 mtr_commit. */
-UNIV_INTERN
 void
 btr_pcur_move_to_next_page(
 /*=======================*/
@@ -403,42 +408,57 @@ btr_pcur_move_to_next_page(
 	mtr_t*		mtr)	/*!< in: mtr */
 {
 	ulint		next_page_no;
-	ulint		space;
-	ulint		zip_size;
 	page_t*		page;
 	buf_block_t*	next_block;
 	page_t*		next_page;
+	ulint		mode;
+	dict_table_t*	table = btr_pcur_get_btr_cur(cursor)->index->table;
 
 	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
 	ut_ad(btr_pcur_is_after_last_on_page(cursor));
 
-	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+	cursor->old_stored = false;
 
 	page = btr_pcur_get_page(cursor);
 	next_page_no = btr_page_get_next(page, mtr);
-	space = buf_block_get_space(btr_pcur_get_block(cursor));
-	zip_size = buf_block_get_zip_size(btr_pcur_get_block(cursor));
 
 	ut_ad(next_page_no != FIL_NULL);
 
-	next_block = btr_block_get(space, zip_size, next_page_no,
-				   cursor->latch_mode,
-				   btr_pcur_get_btr_cur(cursor)->index, mtr);
+	mode = cursor->latch_mode;
+	switch (mode) {
+	case BTR_SEARCH_TREE:
+		mode = BTR_SEARCH_LEAF;
+		break;
+	case BTR_MODIFY_TREE:
+		mode = BTR_MODIFY_LEAF;
+	}
+
+	/* For intrinsic tables we avoid taking any latches as table is
+	accessed by only one thread at any given time. */
+	if (dict_table_is_intrinsic(table)) {
+		mode = BTR_NO_LATCHES;
+	}
+
+	buf_block_t*	block = btr_pcur_get_block(cursor);
+
+	next_block = btr_block_get(
+		page_id_t(block->page.id.space(), next_page_no),
+		block->page.size, mode,
+		btr_pcur_get_btr_cur(cursor)->index, mtr);
+
 	next_page = buf_block_get_frame(next_block);
 #ifdef UNIV_BTR_DEBUG
 	ut_a(page_is_comp(next_page) == page_is_comp(page));
 	ut_a(btr_page_get_prev(next_page, mtr)
-	     == buf_block_get_page_no(btr_pcur_get_block(cursor)));
+	     == btr_pcur_get_block(cursor)->page.id.page_no());
 #endif /* UNIV_BTR_DEBUG */
-	next_block->check_index_page_at_flush = TRUE;
 
-	btr_leaf_page_release(btr_pcur_get_block(cursor),
-			      cursor->latch_mode, mtr);
+	btr_leaf_page_release(btr_pcur_get_block(cursor), mode, mtr);
 
 	page_cur_set_before_first(next_block, btr_pcur_get_page_cur(cursor));
 
-	page_check_dir(next_page);
+	ut_d(page_check_dir(next_page));
 }
 
 /*********************************************************//**
@@ -450,7 +470,6 @@ alphabetical position of the cursor is guaranteed to be sensible on
 return, but it may happen that the cursor is not positioned on the last
 record of any page, because the structure of the tree may have changed
 during the time when the cursor had no latches. */
-UNIV_INTERN
 void
 btr_pcur_move_backward_from_page(
 /*=============================*/
@@ -494,37 +513,42 @@ btr_pcur_move_backward_from_page(
 
 	prev_page_no = btr_page_get_prev(page, mtr);
 
-	if (prev_page_no == FIL_NULL) {
-	} else if (btr_pcur_is_before_first_on_page(cursor)) {
+	/* For intrinsic table we don't do optimistic restore and so there is
+	no left block that is pinned that needs to be released. */
+	if (!dict_table_is_intrinsic(
+		btr_cur_get_index(btr_pcur_get_btr_cur(cursor))->table)) {
 
-		prev_block = btr_pcur_get_btr_cur(cursor)->left_block;
+		if (prev_page_no == FIL_NULL) {
+		} else if (btr_pcur_is_before_first_on_page(cursor)) {
 
-		btr_leaf_page_release(btr_pcur_get_block(cursor),
-				      latch_mode, mtr);
+			prev_block = btr_pcur_get_btr_cur(cursor)->left_block;
 
-		page_cur_set_after_last(prev_block,
+			btr_leaf_page_release(btr_pcur_get_block(cursor),
+					latch_mode, mtr);
+
+			page_cur_set_after_last(prev_block,
 					btr_pcur_get_page_cur(cursor));
-	} else {
+		} else {
 
-		/* The repositioned cursor did not end on an infimum record on
-		a page. Cursor repositioning acquired a latch also on the
-		previous page, but we do not need the latch: release it. */
+			/* The repositioned cursor did not end on an infimum
+			record on a page. Cursor repositioning acquired a latch
+			also on the previous page, but we do not need the latch:
+			release it. */
 
-		prev_block = btr_pcur_get_btr_cur(cursor)->left_block;
+			prev_block = btr_pcur_get_btr_cur(cursor)->left_block;
 
-		btr_leaf_page_release(prev_block, latch_mode, mtr);
+			btr_leaf_page_release(prev_block, latch_mode, mtr);
+		}
 	}
 
 	cursor->latch_mode = latch_mode;
-
-	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+	cursor->old_stored = false;
 }
 
 /*********************************************************//**
 Moves the persistent cursor to the previous record in the tree. If no records
 are left, the cursor stays 'before first in tree'.
- at return	TRUE if the cursor was not before first in tree */
-UNIV_INTERN
+ at return TRUE if the cursor was not before first in tree */
 ibool
 btr_pcur_move_to_prev(
 /*==================*/
@@ -535,7 +559,7 @@ btr_pcur_move_to_prev(
 	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
 
-	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+	cursor->old_stored = false;
 
 	if (btr_pcur_is_before_first_on_page(cursor)) {
 
@@ -561,13 +585,12 @@ PAGE_CUR_LE, on the last user record. If no such user record exists, then
 in the first case sets the cursor after last in tree, and in the latter case
 before first in tree. The latching mode must be BTR_SEARCH_LEAF or
 BTR_MODIFY_LEAF. */
-UNIV_INTERN
 void
 btr_pcur_open_on_user_rec_func(
 /*===========================*/
 	dict_index_t*	index,		/*!< in: index */
 	const dtuple_t*	tuple,		/*!< in: tuple on which search done */
-	ulint		mode,		/*!< in: PAGE_CUR_L, ... */
+	page_cur_mode_t	mode,		/*!< in: PAGE_CUR_L, ... */
 	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF or
 					BTR_MODIFY_LEAF */
 	btr_pcur_t*	cursor,		/*!< in: memory buffer for persistent
diff --git a/storage/innobase/btr/btr0sea.cc b/storage/innobase/btr/btr0sea.cc
index df70f8a..ce8cef6 100644
--- a/storage/innobase/btr/btr0sea.cc
+++ b/storage/innobase/btr/btr0sea.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2008, Google Inc.
 
 Portions of this file contain modifications contributed and copyrighted by
@@ -33,7 +33,7 @@ Created 2/17/1996 Heikki Tuuri
 #include "btr0sea.h"
 #ifdef UNIV_NONINL
 #include "btr0sea.ic"
-#endif
+#endif /* UNIV_NOINL */
 
 #include "buf0buf.h"
 #include "page0page.h"
@@ -42,47 +42,42 @@ Created 2/17/1996 Heikki Tuuri
 #include "btr0pcur.h"
 #include "btr0btr.h"
 #include "ha0ha.h"
+#include "srv0mon.h"
+#include "sync0sync.h"
 
-/** Flag: has the search system been enabled?
-Protected by btr_search_latch. */
-UNIV_INTERN char		btr_search_enabled	= TRUE;
+/** Is search system enabled.
+Search system is protected by array of latches. */
+char		btr_search_enabled	= true;
 
-/** A dummy variable to fool the compiler */
-UNIV_INTERN ulint		btr_search_this_is_zero = 0;
+/** Number of adaptive hash index partition. */
+ulong		btr_ahi_parts		= 8;
 
 #ifdef UNIV_SEARCH_PERF_STAT
 /** Number of successful adaptive hash index lookups */
-UNIV_INTERN ulint		btr_search_n_succ	= 0;
+ulint		btr_search_n_succ	= 0;
 /** Number of failed adaptive hash index lookups */
-UNIV_INTERN ulint		btr_search_n_hash_fail	= 0;
+ulint		btr_search_n_hash_fail	= 0;
 #endif /* UNIV_SEARCH_PERF_STAT */
 
 /** padding to prevent other memory update
 hotspots from residing on the same memory
-cache line as btr_search_latch */
-UNIV_INTERN byte		btr_sea_pad1[64];
+cache line as btr_search_latches */
+byte		btr_sea_pad1[64];
 
-/** The latch protecting the adaptive search system: this latch protects the
+/** The latches protecting the adaptive search system: this latches protects the
 (1) positions of records on those pages where a hash index has been built.
 NOTE: It does not protect values of non-ordering fields within a record from
 being updated in-place! We can use fact (1) to perform unique searches to
-indexes. */
-
-/* We will allocate the latch from dynamic memory to get it to the
+indexes. We will allocate the latches from dynamic memory to get it to the
 same DRAM page as other hotspot semaphores */
-UNIV_INTERN rw_lock_t*		btr_search_latch_temp;
+rw_lock_t**	btr_search_latches;
 
 /** padding to prevent other memory update hotspots from residing on
 the same memory cache line */
-UNIV_INTERN byte		btr_sea_pad2[64];
+byte		btr_sea_pad2[64];
 
 /** The adaptive hash index */
-UNIV_INTERN btr_search_sys_t*	btr_search_sys;
-
-#ifdef UNIV_PFS_RWLOCK
-/* Key to register btr_search_sys with performance schema */
-UNIV_INTERN mysql_pfs_key_t	btr_search_latch_key;
-#endif /* UNIV_PFS_RWLOCK */
+btr_search_sys_t*	btr_search_sys;
 
 /** If the number of records on the page divided by this parameter
 would have been successfully accessed using a hash index, the index
@@ -93,6 +88,30 @@ is then built on the page, assuming the global limit has been reached */
 before hash index building is started */
 #define BTR_SEARCH_BUILD_LIMIT		100
 
+/** Determine the number of accessed key fields.
+ at param[in]	n_fields	number of complete fields
+ at param[in]	n_bytes		number of bytes in an incomplete last field
+ at return	number of complete or incomplete fields */
+inline __attribute__((warn_unused_result))
+ulint
+btr_search_get_n_fields(
+	ulint	n_fields,
+	ulint	n_bytes)
+{
+	return(n_fields + (n_bytes > 0 ? 1 : 0));
+}
+
+/** Determine the number of accessed key fields.
+ at param[in]	cursor		b-tree cursor
+ at return	number of complete or incomplete fields */
+inline __attribute__((warn_unused_result))
+ulint
+btr_search_get_n_fields(
+	const btr_cur_t*	cursor)
+{
+	return(btr_search_get_n_fields(cursor->n_fields, cursor->n_bytes));
+}
+
 /********************************************************************//**
 Builds a hash index on a page with the given parameters. If the page already
 has a hash index with different parameters, the old hash index is removed.
@@ -110,8 +129,7 @@ btr_search_build_page_hash_index(
 				field */
 	ibool		left_side);/*!< in: hash for searches from left side? */
 
-/*****************************************************************//**
-This function should be called before reserving any btr search mutex, if
+/** This function should be called before reserving any btr search mutex, if
 the intended operation might add nodes to the search system hash table.
 Because of the latching order, once we have reserved the btr search system
 latch, we cannot allocate a free frame from the buffer pool. Checks that
@@ -119,21 +137,19 @@ there is a free buffer frame allocated for hash table heap in the btr search
 system. If not, allocates a free frames for the heap. This check makes it
 probable that, when have reserved the btr search system latch and we need to
 allocate a new node to the hash table, it will succeed. However, the check
-will not guarantee success. */
+will not guarantee success.
+ at param[in]	index	index handler */
 static
 void
-btr_search_check_free_space_in_heap(void)
-/*=====================================*/
+btr_search_check_free_space_in_heap(dict_index_t* index)
 {
 	hash_table_t*	table;
 	mem_heap_t*	heap;
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
-	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_S));
+	ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_X));
 
-	table = btr_search_sys->hash_index;
+	table = btr_get_search_table(index);
 
 	heap = table->heap;
 
@@ -144,96 +160,174 @@ btr_search_check_free_space_in_heap(void)
 	if (heap->free_block == NULL) {
 		buf_block_t*	block = buf_block_alloc(NULL);
 
-		rw_lock_x_lock(&btr_search_latch);
+		btr_search_x_lock(index);
 
-		if (heap->free_block == NULL) {
+		if (btr_search_enabled
+		    && heap->free_block == NULL) {
 			heap->free_block = block;
 		} else {
 			buf_block_free(block);
 		}
 
-		rw_lock_x_unlock(&btr_search_latch);
+		btr_search_x_unlock(index);
 	}
 }
 
-/*****************************************************************//**
-Creates and initializes the adaptive search system at a database start. */
-UNIV_INTERN
+/** Creates and initializes the adaptive search system at a database start.
+ at param[in]	hash_size	hash table size. */
 void
-btr_search_sys_create(
-/*==================*/
-	ulint	hash_size)	/*!< in: hash index hash table size */
+btr_search_sys_create(ulint hash_size)
 {
-	/* We allocate the search latch from dynamic memory:
-	see above at the global variable definition */
+	/* Search System is divided into n parts.
+	Each part controls access to distinct set of hash buckets from
+	hash table through its own latch. */
+
+	/* Step-1: Allocate latches (1 per part). */
+	btr_search_latches = reinterpret_cast<rw_lock_t**>(
+		ut_malloc(sizeof(rw_lock_t*) * btr_ahi_parts, mem_key_ahi));
+
+	for (ulint i = 0; i < btr_ahi_parts; ++i) {
+
+		btr_search_latches[i] = reinterpret_cast<rw_lock_t*>(
+			ut_malloc(sizeof(rw_lock_t), mem_key_ahi));
+
+		rw_lock_create(btr_search_latch_key,
+			       btr_search_latches[i], SYNC_SEARCH_SYS);
+	}
 
-	btr_search_latch_temp = (rw_lock_t*) mem_alloc(sizeof(rw_lock_t));
+	/* Step-2: Allocate hash tablees. */
+	btr_search_sys = reinterpret_cast<btr_search_sys_t*>(
+		ut_malloc(sizeof(btr_search_sys_t), mem_key_ahi));
 
-	rw_lock_create(btr_search_latch_key, &btr_search_latch,
-		       SYNC_SEARCH_SYS);
+	btr_search_sys->hash_tables = reinterpret_cast<hash_table_t**>(
+		ut_malloc(sizeof(hash_table_t*) * btr_ahi_parts, mem_key_ahi));
 
-	btr_search_sys = (btr_search_sys_t*)
-		mem_alloc(sizeof(btr_search_sys_t));
+	for (ulint i = 0; i < btr_ahi_parts; ++i) {
+
+		btr_search_sys->hash_tables[i] =
+			ib_create((hash_size / btr_ahi_parts),
+				  LATCH_ID_HASH_TABLE_MUTEX,
+				  0, MEM_HEAP_FOR_BTR_SEARCH);
 
-	btr_search_sys->hash_index = ha_create(hash_size, 0,
-					MEM_HEAP_FOR_BTR_SEARCH, 0);
 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
-	btr_search_sys->hash_index->adaptive = TRUE;
+		btr_search_sys->hash_tables[i]->adaptive = TRUE;
 #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+	}
+}
+
+/** Resize hash index hash table.
+ at param[in]	hash_size	hash index hash table size */
+void
+btr_search_sys_resize(ulint hash_size)
+{
+	/* Step-1: Lock all search latches in exclusive mode. */
+	btr_search_x_lock_all();
+
+	if (btr_search_enabled) {
+
+		btr_search_x_unlock_all();
 
+		ib::error() << "btr_search_sys_resize failed because"
+			" hash index hash table is not empty.";
+		ut_ad(0);
+		return;
+	}
+
+	/* Step-2: Recreate hash tables with new size. */
+	for (ulint i = 0; i < btr_ahi_parts; ++i) {
+
+		mem_heap_free(btr_search_sys->hash_tables[i]->heap);
+		hash_table_free(btr_search_sys->hash_tables[i]);
+
+		btr_search_sys->hash_tables[i] =
+			ib_create((hash_size / btr_ahi_parts),
+				  LATCH_ID_HASH_TABLE_MUTEX,
+				  0, MEM_HEAP_FOR_BTR_SEARCH);
+
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+		btr_search_sys->hash_tables[i]->adaptive = TRUE;
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+	}
+
+	/* Step-3: Unlock all search latches from exclusive mode. */
+	btr_search_x_unlock_all();
 }
 
-/*****************************************************************//**
-Frees the adaptive search system at a database shutdown. */
-UNIV_INTERN
+/** Frees the adaptive search system at a database shutdown. */
 void
-btr_search_sys_free(void)
-/*=====================*/
+btr_search_sys_free()
 {
-	rw_lock_free(&btr_search_latch);
-	mem_free(btr_search_latch_temp);
-	btr_search_latch_temp = NULL;
-	mem_heap_free(btr_search_sys->hash_index->heap);
-	hash_table_free(btr_search_sys->hash_index);
-	mem_free(btr_search_sys);
+	ut_ad(btr_search_sys != NULL && btr_search_latches != NULL);
+
+	/* Step-1: Release the hash tables. */
+	for (ulint i = 0; i < btr_ahi_parts; ++i) {
+
+		mem_heap_free(btr_search_sys->hash_tables[i]->heap);
+		hash_table_free(btr_search_sys->hash_tables[i]);
+
+	}
+
+	ut_free(btr_search_sys->hash_tables);
+	ut_free(btr_search_sys);
 	btr_search_sys = NULL;
+
+	/* Step-2: Release all allocates latches. */
+	for (ulint i = 0; i < btr_ahi_parts; ++i) {
+
+		rw_lock_free(btr_search_latches[i]);
+		ut_free(btr_search_latches[i]);
+	}
+
+	ut_free(btr_search_latches);
+	btr_search_latches = NULL;
 }
 
-/********************************************************************//**
-Set index->ref_count = 0 on all indexes of a table. */
+/** Set index->ref_count = 0 on all indexes of a table.
+ at param[in,out]	table	table handler */
 static
 void
 btr_search_disable_ref_count(
-/*=========================*/
-	dict_table_t*	table)	/*!< in/out: table */
+	dict_table_t*	table)
 {
 	dict_index_t*	index;
 
 	ut_ad(mutex_own(&dict_sys->mutex));
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
 
-	for (index = dict_table_get_first_index(table); index;
+	for (index = dict_table_get_first_index(table);
+	     index != NULL;
 	     index = dict_table_get_next_index(index)) {
 
+		ut_ad(rw_lock_own(btr_get_search_latch(index), RW_LOCK_X));
+
 		index->search_info->ref_count = 0;
 	}
 }
 
-/********************************************************************//**
-Disable the adaptive hash search system and empty the index. */
-UNIV_INTERN
+/** Disable the adaptive hash search system and empty the index.
+ at param[in]	need_mutex	need to acquire dict_sys->mutex */
 void
-btr_search_disable(void)
-/*====================*/
+btr_search_disable(
+	bool	need_mutex)
 {
 	dict_table_t*	table;
 
-	mutex_enter(&dict_sys->mutex);
-	rw_lock_x_lock(&btr_search_latch);
+	if (need_mutex) {
+		mutex_enter(&dict_sys->mutex);
+	}
 
-	btr_search_enabled = FALSE;
+	ut_ad(mutex_own(&dict_sys->mutex));
+	btr_search_x_lock_all();
+
+	if (!btr_search_enabled) {
+		if (need_mutex) {
+			mutex_exit(&dict_sys->mutex);
+		}
+
+		btr_search_x_unlock_all();
+		return;
+	}
+
+	btr_search_enabled = false;
 
 	/* Clear the index->search_info->ref_count of every index in
 	the data dictionary cache. */
@@ -249,51 +343,53 @@ btr_search_disable(void)
 		btr_search_disable_ref_count(table);
 	}
 
-	mutex_exit(&dict_sys->mutex);
+	if (need_mutex) {
+		mutex_exit(&dict_sys->mutex);
+	}
 
 	/* Set all block->index = NULL. */
 	buf_pool_clear_hash_index();
 
 	/* Clear the adaptive hash index. */
-	hash_table_clear(btr_search_sys->hash_index);
-	mem_heap_empty(btr_search_sys->hash_index->heap);
+	for (ulint i = 0; i < btr_ahi_parts; ++i) {
+		hash_table_clear(btr_search_sys->hash_tables[i]);
+		mem_heap_empty(btr_search_sys->hash_tables[i]->heap);
+	}
 
-	rw_lock_x_unlock(&btr_search_latch);
+	btr_search_x_unlock_all();
 }
 
-/********************************************************************//**
-Enable the adaptive hash search system. */
-UNIV_INTERN
+/** Enable the adaptive hash search system. */
 void
-btr_search_enable(void)
-/*====================*/
+btr_search_enable()
 {
-	rw_lock_x_lock(&btr_search_latch);
-
-	btr_search_enabled = TRUE;
+	buf_pool_mutex_enter_all();
+	if (srv_buf_pool_old_size != srv_buf_pool_size) {
+		buf_pool_mutex_exit_all();
+		return;
+	}
+	buf_pool_mutex_exit_all();
 
-	rw_lock_x_unlock(&btr_search_latch);
+	btr_search_x_lock_all();
+	btr_search_enabled = true;
+	btr_search_x_unlock_all();
 }
 
-/*****************************************************************//**
-Creates and initializes a search info struct.
- at return	own: search info struct */
-UNIV_INTERN
+/** Creates and initializes a search info struct.
+ at param[in]	heap		heap where created.
+ at return own: search info struct */
 btr_search_t*
-btr_search_info_create(
-/*===================*/
-	mem_heap_t*	heap)	/*!< in: heap where created */
+btr_search_info_create(mem_heap_t* heap)
 {
 	btr_search_t*	info;
 
 	info = (btr_search_t*) mem_heap_alloc(heap, sizeof(btr_search_t));
 
-#ifdef UNIV_DEBUG
-	info->magic_n = BTR_SEARCH_MAGIC_N;
-#endif /* UNIV_DEBUG */
+	ut_d(info->magic_n = BTR_SEARCH_MAGIC_N);
 
 	info->ref_count = 0;
 	info->root_guess = NULL;
+	info->withdraw_clock = 0;
 
 	info->hash_analysis = 0;
 	info->n_hash_potential = 0;
@@ -316,53 +412,50 @@ btr_search_info_create(
 	return(info);
 }
 
-/*****************************************************************//**
-Returns the value of ref_count. The value is protected by
-btr_search_latch.
- at return	ref_count value. */
-UNIV_INTERN
+/** Returns the value of ref_count. The value is protected by latch.
+ at param[in]	info		search info
+ at param[in]	index		index identifier
+ at return ref_count value. */
 ulint
 btr_search_info_get_ref_count(
-/*==========================*/
-	btr_search_t*   info)	/*!< in: search info. */
+	btr_search_t*	info,
+	dict_index_t*	index)
 {
-	ulint ret;
+	ulint ret = 0;
+
+	if (!btr_search_enabled) {
+		return(ret);
+	}
 
 	ut_ad(info);
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
-	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_S));
+	ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_X));
 
-	rw_lock_s_lock(&btr_search_latch);
+	btr_search_s_lock(index);
 	ret = info->ref_count;
-	rw_lock_s_unlock(&btr_search_latch);
+	btr_search_s_unlock(index);
 
 	return(ret);
 }
 
-/*********************************************************************//**
-Updates the search info of an index about hash successes. NOTE that info
+/** Updates the search info of an index about hash successes. NOTE that info
 is NOT protected by any semaphore, to save CPU time! Do not assume its fields
-are consistent. */
+are consistent.
+ at param[in,out]	info	search info
+ at param[in]	cursor	cursor which was just positioned */
 static
 void
 btr_search_info_update_hash(
-/*========================*/
-	btr_search_t*	info,	/*!< in/out: search info */
-	btr_cur_t*	cursor)	/*!< in: cursor which was just positioned */
+	btr_search_t*	info,
+	btr_cur_t*	cursor)
 {
-	dict_index_t*	index;
+	dict_index_t*	index = cursor->index;
 	ulint		n_unique;
 	int		cmp;
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
-	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
-	index = cursor->index;
+	ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_S));
+	ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_X));
 
 	if (dict_index_is_ibuf(index)) {
 		/* So many deletes are performed on an insert buffer tree
@@ -448,7 +541,6 @@ set_new_recomm:
 
 			info->n_fields = n_unique;
 			info->n_bytes = 0;
-
 		} else if (cursor->low_match > cursor->up_match) {
 
 			info->n_fields = cursor->up_match + 1;
@@ -462,27 +554,24 @@ set_new_recomm:
 	}
 }
 
-/*********************************************************************//**
-Updates the block search info on hash successes. NOTE that info and
-block->n_hash_helps, n_fields, n_bytes, side are NOT protected by any
+/** Update the block search info on hash successes. NOTE that info and
+block->n_hash_helps, n_fields, n_bytes, left_side are NOT protected by any
 semaphore, to save CPU time! Do not assume the fields are consistent.
- at return	TRUE if building a (new) hash index on the block is recommended */
+ at return TRUE if building a (new) hash index on the block is recommended
+ at param[in,out]	info	search info
+ at param[in,out]	block	buffer block
+ at param[in]	cursor	cursor */
 static
 ibool
 btr_search_update_block_hash_info(
-/*==============================*/
-	btr_search_t*	info,	/*!< in: search info */
-	buf_block_t*	block,	/*!< in: buffer block */
-	btr_cur_t*	cursor __attribute__((unused)))
-				/*!< in: cursor */
+	btr_search_t*		info,
+	buf_block_t*		block,
+	const btr_cur_t*	cursor)
 {
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
-	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-	ut_ad(rw_lock_own(&block->lock, RW_LOCK_SHARED)
-	      || rw_lock_own(&block->lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-	ut_ad(cursor);
+	ut_ad(!rw_lock_own(btr_get_search_latch(cursor->index), RW_LOCK_S));
+	ut_ad(!rw_lock_own(btr_get_search_latch(cursor->index), RW_LOCK_X));
+	ut_ad(rw_lock_own(&block->lock, RW_LOCK_S)
+	      || rw_lock_own(&block->lock, RW_LOCK_X));
 
 	info->last_hash_succ = FALSE;
 
@@ -540,32 +629,31 @@ btr_search_update_block_hash_info(
 	return(FALSE);
 }
 
-/*********************************************************************//**
-Updates a hash node reference when it has been unsuccessfully used in a
+/** Updates a hash node reference when it has been unsuccessfully used in a
 search which could have succeeded with the used hash parameters. This can
 happen because when building a hash index for a page, we do not check
 what happens at page boundaries, and therefore there can be misleading
 hash nodes. Also, collisions in the fold value can lead to misleading
 references. This function lazily fixes these imperfections in the hash
-index. */
+index.
+ at param[in]	info	search info
+ at param[in]	block	buffer block where cursor positioned
+ at param[in]	cursor	cursor */
 static
 void
 btr_search_update_hash_ref(
-/*=======================*/
-	btr_search_t*	info,	/*!< in: search info */
-	buf_block_t*	block,	/*!< in: buffer block where cursor positioned */
-	btr_cur_t*	cursor)	/*!< in: cursor */
+	const btr_search_t*	info,
+	buf_block_t*		block,
+	const btr_cur_t*	cursor)
 {
 	dict_index_t*	index;
 	ulint		fold;
 	const rec_t*	rec;
 
 	ut_ad(cursor->flag == BTR_CUR_HASH_FAIL);
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
-	      || rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(btr_get_search_latch(cursor->index), RW_LOCK_X));
+	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_S)
+	      || rw_lock_own(&(block->lock), RW_LOCK_X));
 	ut_ad(page_align(btr_cur_get_rec(cursor))
 	      == buf_block_get_frame(block));
 
@@ -576,6 +664,7 @@ btr_search_update_hash_ref(
 		return;
 	}
 
+	ut_ad(block->page.id.space() == index->space);
 	ut_a(index == cursor->index);
 	ut_a(!dict_index_is_ibuf(index));
 
@@ -602,35 +691,28 @@ btr_search_update_hash_ref(
 		if (UNIV_LIKELY_NULL(heap)) {
 			mem_heap_free(heap);
 		}
-#ifdef UNIV_SYNC_DEBUG
-		ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+		ut_ad(rw_lock_own(btr_get_search_latch(index), RW_LOCK_X));
 
-		ha_insert_for_fold(btr_search_sys->hash_index, fold,
+		ha_insert_for_fold(btr_get_search_table(index), fold,
 				   block, rec);
 
 		MONITOR_INC(MONITOR_ADAPTIVE_HASH_ROW_ADDED);
 	}
 }
 
-/*********************************************************************//**
-Updates the search info. */
-UNIV_INTERN
+/** Updates the search info.
+ at param[in,out]	info	search info
+ at param[in]	cursor	cursor which was just positioned */
 void
 btr_search_info_update_slow(
-/*========================*/
-	btr_search_t*	info,	/*!< in/out: search info */
-	btr_cur_t*	cursor)	/*!< in: cursor which was just positioned */
+	btr_search_t*	info,
+	btr_cur_t*	cursor)
 {
 	buf_block_t*	block;
 	ibool		build_index;
-	ulint*		params;
-	ulint*		params2;
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
-	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(!rw_lock_own(btr_get_search_latch(cursor->index), RW_LOCK_S));
+	ut_ad(!rw_lock_own(btr_get_search_latch(cursor->index), RW_LOCK_X));
 
 	block = btr_cur_get_block(cursor);
 
@@ -645,7 +727,7 @@ btr_search_info_update_slow(
 
 	if (build_index || (cursor->flag == BTR_CUR_HASH_FAIL)) {
 
-		btr_search_check_free_space_in_heap();
+		btr_search_check_free_space_in_heap(cursor->index);
 	}
 
 	if (cursor->flag == BTR_CUR_HASH_FAIL) {
@@ -655,68 +737,51 @@ btr_search_info_update_slow(
 		btr_search_n_hash_fail++;
 #endif /* UNIV_SEARCH_PERF_STAT */
 
-		rw_lock_x_lock(&btr_search_latch);
+		btr_search_x_lock(cursor->index);
 
 		btr_search_update_hash_ref(info, block, cursor);
 
-		rw_lock_x_unlock(&btr_search_latch);
+		btr_search_x_unlock(cursor->index);
 	}
 
 	if (build_index) {
 		/* Note that since we did not protect block->n_fields etc.
 		with any semaphore, the values can be inconsistent. We have
-		to check inside the function call that they make sense. We
-		also malloc an array and store the values there to make sure
-		the compiler does not let the function call parameters change
-		inside the called function. It might be that the compiler
-		would optimize the call just to pass pointers to block. */
-
-		params = (ulint*) mem_alloc(3 * sizeof(ulint));
-		params[0] = block->n_fields;
-		params[1] = block->n_bytes;
-		params[2] = block->left_side;
-
-		/* Make sure the compiler cannot deduce the values and do
-		optimizations */
-
-		params2 = params + btr_search_this_is_zero;
-
-		btr_search_build_page_hash_index(cursor->index,
-						 block,
-						 params2[0],
-						 params2[1],
-						 params2[2]);
-		mem_free(params);
+		to check inside the function call that they make sense. */
+		btr_search_build_page_hash_index(cursor->index, block,
+						 block->n_fields,
+						 block->n_bytes,
+						 block->left_side);
 	}
 }
 
-/******************************************************************//**
-Checks if a guessed position for a tree cursor is right. Note that if
+/** Checks if a guessed position for a tree cursor is right. Note that if
 mode is PAGE_CUR_LE, which is used in inserts, and the function returns
 TRUE, then cursor->up_match and cursor->low_match both have sensible values.
- at return	TRUE if success */
+ at param[in,out]	cursor		guess cursor position
+ at param[in]	can_only_compare_to_cursor_rec
+				if we do not have a latch on the page of cursor,
+				but a latch corresponding search system, then
+				ONLY the columns of the record UNDER the cursor
+				are protected, not the next or previous record
+				in the chain: we cannot look at the next or
+				previous record to check our guess!
+ at param[in]	tuple		data tuple
+ at param[in]	mode		PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G, PAGE_CUR_GE
+ at param[in]	mtr		mini transaction
+ at return TRUE if success */
 static
 ibool
 btr_search_check_guess(
-/*===================*/
-	btr_cur_t*	cursor,	/*!< in: guessed cursor position */
+	btr_cur_t*	cursor,
 	ibool		can_only_compare_to_cursor_rec,
-				/*!< in: if we do not have a latch on the page
-				of cursor, but only a latch on
-				btr_search_latch, then ONLY the columns
-				of the record UNDER the cursor are
-				protected, not the next or previous record
-				in the chain: we cannot look at the next or
-				previous record to check our guess! */
-	const dtuple_t*	tuple,	/*!< in: data tuple */
-	ulint		mode,	/*!< in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G,
-				or PAGE_CUR_GE */
-	mtr_t*		mtr)	/*!< in: mtr */
+	const dtuple_t*	tuple,
+	ulint		mode,
+	mtr_t*		mtr)
 {
 	rec_t*		rec;
 	ulint		n_unique;
 	ulint		match;
-	ulint		bytes;
 	int		cmp;
 	mem_heap_t*	heap		= NULL;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
@@ -731,15 +796,13 @@ btr_search_check_guess(
 	ut_ad(page_rec_is_user_rec(rec));
 
 	match = 0;
-	bytes = 0;
 
 	offsets = rec_get_offsets(rec, cursor->index, offsets,
 				  n_unique, &heap);
-	cmp = page_cmp_dtuple_rec_with_match(tuple, rec,
-					     offsets, &match, &bytes);
+	cmp = cmp_dtuple_rec_with_match(tuple, rec, offsets, &match);
 
 	if (mode == PAGE_CUR_GE) {
-		if (cmp == 1) {
+		if (cmp > 0) {
 			goto exit_func;
 		}
 
@@ -750,18 +813,18 @@ btr_search_check_guess(
 			goto exit_func;
 		}
 	} else if (mode == PAGE_CUR_LE) {
-		if (cmp == -1) {
+		if (cmp < 0) {
 			goto exit_func;
 		}
 
 		cursor->low_match = match;
 
 	} else if (mode == PAGE_CUR_G) {
-		if (cmp != -1) {
+		if (cmp >= 0) {
 			goto exit_func;
 		}
 	} else if (mode == PAGE_CUR_L) {
-		if (cmp != 1) {
+		if (cmp <= 0) {
 			goto exit_func;
 		}
 	}
@@ -773,7 +836,6 @@ btr_search_check_guess(
 	}
 
 	match = 0;
-	bytes = 0;
 
 	if ((mode == PAGE_CUR_G) || (mode == PAGE_CUR_GE)) {
 		rec_t*	prev_rec;
@@ -791,12 +853,12 @@ btr_search_check_guess(
 
 		offsets = rec_get_offsets(prev_rec, cursor->index, offsets,
 					  n_unique, &heap);
-		cmp = page_cmp_dtuple_rec_with_match(tuple, prev_rec,
-						     offsets, &match, &bytes);
+		cmp = cmp_dtuple_rec_with_match(
+			tuple, prev_rec, offsets, &match);
 		if (mode == PAGE_CUR_GE) {
-			success = cmp == 1;
+			success = cmp > 0;
 		} else {
-			success = cmp != -1;
+			success = cmp >= 0;
 		}
 
 		goto exit_func;
@@ -820,13 +882,13 @@ btr_search_check_guess(
 
 		offsets = rec_get_offsets(next_rec, cursor->index, offsets,
 					  n_unique, &heap);
-		cmp = page_cmp_dtuple_rec_with_match(tuple, next_rec,
-						     offsets, &match, &bytes);
+		cmp = cmp_dtuple_rec_with_match(
+			tuple, next_rec, offsets, &match);
 		if (mode == PAGE_CUR_LE) {
-			success = cmp == -1;
+			success = cmp < 0;
 			cursor->up_match = match;
 		} else {
-			success = cmp != 1;
+			success = cmp <= 0;
 		}
 	}
 exit_func:
@@ -836,34 +898,53 @@ exit_func:
 	return(success);
 }
 
-/******************************************************************//**
-Tries to guess the right search position based on the hash search info
+static
+void
+btr_search_failure(btr_search_t* info, btr_cur_t* cursor)
+{
+	cursor->flag = BTR_CUR_HASH_FAIL;
+
+#ifdef UNIV_SEARCH_PERF_STAT
+	++info->n_hash_fail;
+
+	if (info->n_hash_succ > 0) {
+		--info->n_hash_succ;
+	}
+#endif /* UNIV_SEARCH_PERF_STAT */
+
+	info->last_hash_succ = FALSE;
+}
+
+/** Tries to guess the right search position based on the hash search info
 of the index. Note that if mode is PAGE_CUR_LE, which is used in inserts,
 and the function returns TRUE, then cursor->up_match and cursor->low_match
 both have sensible values.
- at return	TRUE if succeeded */
-UNIV_INTERN
+ at param[in,out]	index		index
+ at param[in,out]	info		index search info
+ at param[in]	tuple		logical record
+ at param[in]	mode		PAGE_CUR_L, ....
+ at param[in]	latch_mode	BTR_SEARCH_LEAF, ...;
+				NOTE that only if has_search_latch is 0, we will
+				have a latch set on the cursor page, otherwise
+				we assume the caller uses his search latch
+				to protect the record!
+ at param[out]	cursor		tree cursor
+ at param[in]	has_search_latch
+				latch mode the caller currently has on
+				search system: RW_S/X_LATCH or 0
+ at param[in]	mtr		mini transaction
+ at return TRUE if succeeded */
 ibool
 btr_search_guess_on_hash(
-/*=====================*/
-	dict_index_t*	index,		/*!< in: index */
-	btr_search_t*	info,		/*!< in: index search info */
-	const dtuple_t*	tuple,		/*!< in: logical record */
-	ulint		mode,		/*!< in: PAGE_CUR_L, ... */
-	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF, ...;
-					NOTE that only if has_search_latch
-					is 0, we will have a latch set on
-					the cursor page, otherwise we assume
-					the caller uses his search latch
-					to protect the record! */
-	btr_cur_t*	cursor,		/*!< out: tree cursor */
-	ulint		has_search_latch,/*!< in: latch mode the caller
-					currently has on btr_search_latch:
-					RW_S_LATCH, RW_X_LATCH, or 0 */
-	mtr_t*		mtr)		/*!< in: mtr */
+	dict_index_t*	index,
+	btr_search_t*	info,
+	const dtuple_t*	tuple,
+	ulint		mode,
+	ulint		latch_mode,
+	btr_cur_t*	cursor,
+	ulint		has_search_latch,
+	mtr_t*		mtr)
 {
-	buf_pool_t*	buf_pool;
-	buf_block_t*	block;
 	const rec_t*	rec;
 	ulint		fold;
 	index_id_t	index_id;
@@ -871,15 +952,23 @@ btr_search_guess_on_hash(
 	btr_cur_t	cursor2;
 	btr_pcur_t	pcur;
 #endif
+
+	if (!btr_search_enabled) {
+		return(FALSE);
+	}
+
 	ut_ad(index && info && tuple && cursor && mtr);
 	ut_ad(!dict_index_is_ibuf(index));
 	ut_ad((latch_mode == BTR_SEARCH_LEAF)
 	      || (latch_mode == BTR_MODIFY_LEAF));
 
+	/* Not supported for spatial index */
+	ut_ad(!dict_index_is_spatial(index));
+
 	/* Note that, for efficiency, the struct info may not be protected by
 	any latch here! */
 
-	if (UNIV_UNLIKELY(info->n_hash_potential == 0)) {
+	if (info->n_hash_potential == 0) {
 
 		return(FALSE);
 	}
@@ -887,8 +976,7 @@ btr_search_guess_on_hash(
 	cursor->n_fields = info->n_fields;
 	cursor->n_bytes = info->n_bytes;
 
-	if (UNIV_UNLIKELY(dtuple_get_n_fields(tuple)
-			  < cursor->n_fields + (cursor->n_bytes > 0))) {
+	if (dtuple_get_n_fields(tuple) < btr_search_get_n_fields(cursor)) {
 
 		return(FALSE);
 	}
@@ -903,49 +991,69 @@ btr_search_guess_on_hash(
 	cursor->fold = fold;
 	cursor->flag = BTR_CUR_HASH;
 
-	if (UNIV_LIKELY(!has_search_latch)) {
-		rw_lock_s_lock(&btr_search_latch);
+	if (!has_search_latch) {
+		btr_search_s_lock(index);
 
-		if (UNIV_UNLIKELY(!btr_search_enabled)) {
-			goto failure_unlock;
+		if (!btr_search_enabled) {
+			btr_search_s_unlock(index);
+
+			btr_search_failure(info, cursor);
+
+			return(FALSE);
 		}
 	}
 
-	ut_ad(rw_lock_get_writer(&btr_search_latch) != RW_LOCK_EX);
-	ut_ad(rw_lock_get_reader_count(&btr_search_latch) > 0);
+	ut_ad(rw_lock_get_writer(btr_get_search_latch(index)) != RW_LOCK_X);
+	ut_ad(rw_lock_get_reader_count(btr_get_search_latch(index)) > 0);
+
+	rec = (rec_t*) ha_search_and_get_data(
+			btr_get_search_table(index), fold);
+
+	if (rec == NULL) {
 
-	rec = (rec_t*) ha_search_and_get_data(btr_search_sys->hash_index, fold);
+		if (!has_search_latch) {
+			btr_search_s_unlock(index);
+		}
+
+		btr_search_failure(info, cursor);
 
-	if (UNIV_UNLIKELY(!rec)) {
-		goto failure_unlock;
+		return(FALSE);
 	}
 
-	block = buf_block_align(rec);
+	buf_block_t*	block = buf_block_align(rec);
+
+	if (!has_search_latch) {
 
-	if (UNIV_LIKELY(!has_search_latch)) {
+		if (!buf_page_get_known_nowait(
+			latch_mode, block, BUF_MAKE_YOUNG,
+			__FILE__, __LINE__, mtr)) {
 
-		if (UNIV_UNLIKELY(
-			    !buf_page_get_known_nowait(latch_mode, block,
-						       BUF_MAKE_YOUNG,
-						       __FILE__, __LINE__,
-						       mtr))) {
-			goto failure_unlock;
+			if (!has_search_latch) {
+				btr_search_s_unlock(index);
+			}
+
+			btr_search_failure(info, cursor);
+
+			return(FALSE);
 		}
 
-		rw_lock_s_unlock(&btr_search_latch);
+		btr_search_s_unlock(index);
 
 		buf_block_dbg_add_level(block, SYNC_TREE_NODE_FROM_HASH);
 	}
 
-	if (UNIV_UNLIKELY(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE)) {
+	if (buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
+
 		ut_ad(buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH);
 
-		if (UNIV_LIKELY(!has_search_latch)) {
+		if (!has_search_latch) {
 
 			btr_leaf_page_release(block, latch_mode, mtr);
 		}
 
-		goto failure;
+		btr_search_failure(info, cursor);
+
+		return(FALSE);
 	}
 
 	ut_ad(page_rec_is_user_rec(rec));
@@ -954,23 +1062,26 @@ btr_search_guess_on_hash(
 
 	/* Check the validity of the guess within the page */
 
-	/* If we only have the latch on btr_search_latch, not on the
+	/* If we only have the latch on search system, not on the
 	page, it only protects the columns of the record the cursor
 	is positioned on. We cannot look at the next of the previous
 	record to determine if our guess for the cursor position is
 	right. */
-	if (UNIV_UNLIKELY(index_id != btr_page_get_index_id(block->frame))
+	if (index_id != btr_page_get_index_id(block->frame)
 	    || !btr_search_check_guess(cursor,
 				       has_search_latch,
 				       tuple, mode, mtr)) {
-		if (UNIV_LIKELY(!has_search_latch)) {
+
+		if (!has_search_latch) {
 			btr_leaf_page_release(block, latch_mode, mtr);
 		}
 
-		goto failure;
+		btr_search_failure(info, cursor);
+
+		return(FALSE);
 	}
 
-	if (UNIV_LIKELY(info->n_hash_potential < BTR_SEARCH_BUILD_LIMIT + 5)) {
+	if (info->n_hash_potential < BTR_SEARCH_BUILD_LIMIT + 5) {
 
 		info->n_hash_potential++;
 	}
@@ -986,8 +1097,9 @@ btr_search_guess_on_hash(
 
 	btr_leaf_page_release(block, latch_mode, mtr);
 
-	btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
-				    &cursor2, 0, mtr);
+	btr_cur_search_to_nth_level(
+		index, 0, tuple, mode, latch_mode, &cursor2, 0, mtr);
+
 	if (mode == PAGE_CUR_GE
 	    && page_rec_is_supremum(btr_cur_get_rec(&cursor2))) {
 
@@ -997,8 +1109,9 @@ btr_search_guess_on_hash(
 
 		info->last_hash_succ = FALSE;
 
-		btr_pcur_open_on_user_rec(index, tuple, mode, latch_mode,
-					  &pcur, mtr);
+		btr_pcur_open_on_user_rec(
+			index, tuple, mode, latch_mode, &pcur, mtr);
+
 		ut_ad(btr_pcur_get_rec(&pcur) == btr_cur_get_rec(cursor));
 	} else {
 		ut_ad(btr_cur_get_rec(&cursor2) == btr_cur_get_rec(cursor));
@@ -1013,62 +1126,38 @@ btr_search_guess_on_hash(
 #ifdef UNIV_SEARCH_PERF_STAT
 	btr_search_n_succ++;
 #endif
-	if (UNIV_LIKELY(!has_search_latch)
-	    && buf_page_peek_if_too_old(&block->page)) {
+	if (!has_search_latch && buf_page_peek_if_too_old(&block->page)) {
 
 		buf_page_make_young(&block->page);
 	}
 
 	/* Increment the page get statistics though we did not really
 	fix the page: for user info only */
-	buf_pool = buf_pool_from_bpage(&block->page);
-	buf_pool->stat.n_page_gets++;
 
-	return(TRUE);
+	{
+		buf_pool_t*	buf_pool = buf_pool_from_bpage(&block->page);
 
-	/*-------------------------------------------*/
-failure_unlock:
-	if (UNIV_LIKELY(!has_search_latch)) {
-		rw_lock_s_unlock(&btr_search_latch);
+		++buf_pool->stat.n_page_gets;
 	}
-failure:
-	cursor->flag = BTR_CUR_HASH_FAIL;
-
-#ifdef UNIV_SEARCH_PERF_STAT
-	info->n_hash_fail++;
-
-	if (info->n_hash_succ > 0) {
-		info->n_hash_succ--;
-	}
-#endif
-	info->last_hash_succ = FALSE;
 
-	return(FALSE);
+	return(TRUE);
 }
 
-/********************************************************************//**
-Drops a page hash index. */
-UNIV_INTERN
+/** Drop any adaptive hash index entries that point to an index page.
+ at param[in,out]	block	block containing index page, s- or x-latched, or an
+			index page for which we know that
+			block->buf_fix_count == 0 or it is an index page which
+			has already been removed from the buf_pool->page_hash
+			i.e.: it is in state BUF_BLOCK_REMOVE_HASH */
 void
-btr_search_drop_page_hash_index(
-/*============================*/
-	buf_block_t*	block)	/*!< in: block containing index page,
-				s- or x-latched, or an index page
-				for which we know that
-				block->buf_fix_count == 0 or it is an
-				index page which has already been
-				removed from the buf_pool->page_hash
-				i.e.: it is in state
-				BUF_BLOCK_REMOVE_HASH */
+btr_search_drop_page_hash_index(buf_block_t* block)
 {
-	hash_table_t*		table;
 	ulint			n_fields;
 	ulint			n_bytes;
 	const page_t*		page;
 	const rec_t*		rec;
 	ulint			fold;
 	ulint			prev_fold;
-	index_id_t		index_id;
 	ulint			n_cached;
 	ulint			n_recs;
 	ulint*			folds;
@@ -1076,31 +1165,59 @@ btr_search_drop_page_hash_index(
 	mem_heap_t*		heap;
 	const dict_index_t*	index;
 	ulint*			offsets;
+	rw_lock_t*		latch;
 	btr_search_t*		info;
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
-	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
-	/* Do a dirty check on block->index, return if the block is
-	not in the adaptive hash index. This is to avoid acquiring
-	shared btr_search_latch for performance consideration. */
-	if (!block->index) {
+	if (!btr_search_enabled) {
 		return;
 	}
 
 retry:
-	rw_lock_s_lock(&btr_search_latch);
+	/* Do a dirty check on block->index, return if the block is
+	not in the adaptive hash index. */
 	index = block->index;
 
-	if (UNIV_LIKELY(!index)) {
+	if (index == NULL) {
+		return;
+	}
+
+	ut_ad(block->page.buf_fix_count == 0
+	      || buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH
+	      || rw_lock_own(&block->lock, RW_LOCK_S)
+	      || rw_lock_own(&block->lock, RW_LOCK_X));
 
-		rw_lock_s_unlock(&btr_search_latch);
+	/* We must not dereference index here, because it could be freed
+	if (index->table->n_ref_count == 0 && !mutex_own(&dict_sys->mutex)).
+	Determine the ahi_slot based on the block contents. */
 
+	const index_id_t	index_id
+		= btr_page_get_index_id(block->frame);
+	const ulint		ahi_slot
+		= ut_fold_ulint_pair(index_id, block->page.id.space())
+		% btr_ahi_parts;
+	latch = btr_search_latches[ahi_slot];
+
+	ut_ad(!btr_search_own_any(RW_LOCK_S));
+	ut_ad(!btr_search_own_any(RW_LOCK_X));
+
+	rw_lock_s_lock(latch);
+
+	if (block->index == NULL) {
+		rw_lock_s_unlock(latch);
 		return;
 	}
 
+	/* The index associated with a block must remain the
+	same, because we are holding block->lock or the block is
+	not accessible by other threads (BUF_BLOCK_REMOVE_HASH),
+	or the index is not accessible to other threads
+	(buf_fix_count == 0 when DROP TABLE or similar is executing
+	buf_LRU_drop_page_hash_for_tablespace()). */
+	ut_a(index == block->index);
+	ut_ad(!index->disable_ahi);
+
+	ut_ad(block->page.id.space() == index->space);
+	ut_a(index_id == index->id);
 	ut_a(!dict_index_is_ibuf(index));
 #ifdef UNIV_DEBUG
 	switch (dict_index_get_online_status(index)) {
@@ -1123,25 +1240,15 @@ retry:
 	}
 #endif /* UNIV_DEBUG */
 
-	table = btr_search_sys->hash_index;
-
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
-	      || rw_lock_own(&(block->lock), RW_LOCK_EX)
-	      || block->page.buf_fix_count == 0
-	      || buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH);
-#endif /* UNIV_SYNC_DEBUG */
-
 	n_fields = block->curr_n_fields;
 	n_bytes = block->curr_n_bytes;
 
-	/* NOTE: The fields of block must not be accessed after
-	releasing btr_search_latch, as the index page might only
-	be s-latched! */
+	/* NOTE: The AHI fields of block must not be accessed after
+	releasing search latch, as the index page might only be s-latched! */
 
-	rw_lock_s_unlock(&btr_search_latch);
+	rw_lock_s_unlock(latch);
 
-	ut_a(n_fields + n_bytes > 0);
+	ut_a(n_fields > 0 || n_bytes > 0);
 
 	page = block->frame;
 	n_recs = page_get_n_recs(page);
@@ -1149,26 +1256,23 @@ retry:
 	/* Calculate and cache fold values into an array for fast deletion
 	from the hash index */
 
-	folds = (ulint*) mem_alloc(n_recs * sizeof(ulint));
+	folds = (ulint*) ut_malloc_nokey(n_recs * sizeof(ulint));
 
 	n_cached = 0;
 
 	rec = page_get_infimum_rec(page);
 	rec = page_rec_get_next_low(rec, page_is_comp(page));
 
-	index_id = btr_page_get_index_id(page);
-
-	ut_a(index_id == index->id);
-
 	prev_fold = 0;
 
 	heap = NULL;
 	offsets = NULL;
 
 	while (!page_rec_is_supremum(rec)) {
-		offsets = rec_get_offsets(rec, index, offsets,
-					  n_fields + (n_bytes > 0), &heap);
-		ut_a(rec_offs_n_fields(offsets) == n_fields + (n_bytes > 0));
+		offsets = rec_get_offsets(
+			rec, index, offsets,
+			btr_search_get_n_fields(n_fields, n_bytes),
+			&heap);
 		fold = rec_fold(rec, offsets, n_fields, n_bytes, index_id);
 
 		if (fold == prev_fold && prev_fold != 0) {
@@ -1190,7 +1294,7 @@ next_rec:
 		mem_heap_free(heap);
 	}
 
-	rw_lock_x_lock(&btr_search_latch);
+	rw_lock_x_lock(latch);
 
 	if (UNIV_UNLIKELY(!block->index)) {
 		/* Someone else has meanwhile dropped the hash index */
@@ -1200,21 +1304,23 @@ next_rec:
 
 	ut_a(block->index == index);
 
-	if (UNIV_UNLIKELY(block->curr_n_fields != n_fields)
-	    || UNIV_UNLIKELY(block->curr_n_bytes != n_bytes)) {
+	if (block->curr_n_fields != n_fields
+	    || block->curr_n_bytes != n_bytes) {
 
 		/* Someone else has meanwhile built a new hash index on the
 		page, with different parameters */
 
-		rw_lock_x_unlock(&btr_search_latch);
+		rw_lock_x_unlock(latch);
 
-		mem_free(folds);
+		ut_free(folds);
 		goto retry;
 	}
 
 	for (i = 0; i < n_cached; i++) {
 
-		ha_remove_all_nodes_to_page(table, folds[i], page);
+		ha_remove_all_nodes_to_page(
+			btr_search_sys->hash_tables[ahi_slot],
+			folds[i], page);
 	}
 
 	info = btr_search_get_info(block->index);
@@ -1230,41 +1336,39 @@ cleanup:
 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
 	if (UNIV_UNLIKELY(block->n_pointers)) {
 		/* Corruption */
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: Corruption of adaptive hash index."
-			" After dropping\n"
-			"InnoDB: the hash index to a page of %s,"
-			" still %lu hash nodes remain.\n",
-			index->name, (ulong) block->n_pointers);
-		rw_lock_x_unlock(&btr_search_latch);
+		ib::error() << "Corruption of adaptive hash index."
+			<< " After dropping, the hash index to a page of "
+			<< index->name
+			<< ", still " << block->n_pointers
+			<< " hash nodes remain.";
+		rw_lock_x_unlock(latch);
 
 		ut_ad(btr_search_validate());
 	} else {
-		rw_lock_x_unlock(&btr_search_latch);
+		rw_lock_x_unlock(latch);
 	}
 #else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
-	rw_lock_x_unlock(&btr_search_latch);
+	rw_lock_x_unlock(latch);
 #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
 
-	mem_free(folds);
+	ut_free(folds);
 }
 
-/********************************************************************//**
-Drops a possible page hash index when a page is evicted from the buffer pool
-or freed in a file segment. */
-UNIV_INTERN
+/** Drop any adaptive hash index entries that may point to an index
+page that may be in the buffer pool, when a page is evicted from the
+buffer pool or freed in a file segment.
+ at param[in]	page_id		page id
+ at param[in]	page_size	page size */
 void
 btr_search_drop_page_hash_when_freed(
-/*=================================*/
-	ulint	space,		/*!< in: space id */
-	ulint	zip_size,	/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint	page_no)	/*!< in: page number */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size)
 {
 	buf_block_t*	block;
 	mtr_t		mtr;
 
+	ut_d(export_vars.innodb_ahi_drop_lookups++);
+
 	mtr_start(&mtr);
 
 	/* If the caller has a latch on the page, then the caller must
@@ -1273,35 +1377,49 @@ btr_search_drop_page_hash_when_freed(
 	are possibly holding, we cannot s-latch the page, but must
 	(recursively) x-latch it, even though we are only reading. */
 
-	block = buf_page_get_gen(space, zip_size, page_no, RW_X_LATCH, NULL,
+	block = buf_page_get_gen(page_id, page_size, RW_X_LATCH, NULL,
 				 BUF_PEEK_IF_IN_POOL, __FILE__, __LINE__,
 				 &mtr);
 
-	if (block && block->index) {
+	if (block) {
+
+		/* If AHI is still valid, page can't be in free state.
+		AHI is dropped when page is freed. */
+		ut_ad(!block->page.file_page_was_freed);
 
 		buf_block_dbg_add_level(block, SYNC_TREE_NODE_FROM_HASH);
 
-		btr_search_drop_page_hash_index(block);
+		dict_index_t*	index = block->index;
+		if (index != NULL) {
+			/* In all our callers, the table handle should
+			be open, or we should be in the process of
+			dropping the table (preventing eviction). */
+			ut_ad(index->table->n_ref_count > 0
+			      || mutex_own(&dict_sys->mutex));
+			btr_search_drop_page_hash_index(block);
+		}
 	}
 
 	mtr_commit(&mtr);
 }
 
-/********************************************************************//**
-Builds a hash index on a page with the given parameters. If the page already
+/** Build a hash index on a page with the given parameters. If the page already
 has a hash index with different parameters, the old hash index is removed.
 If index is non-NULL, this function checks if n_fields and n_bytes are
-sensible values, and does not build a hash index if not. */
+sensible, and does not build a hash index if not.
+ at param[in,out]	index		index for which to build.
+ at param[in,out]	block		index page, s-/x- latched.
+ at param[in]	n_fields	hash this many full fields
+ at param[in]	n_bytes		hash this many bytes of the next field
+ at param[in]	left_side	hash for searches from left side */
 static
 void
 btr_search_build_page_hash_index(
-/*=============================*/
-	dict_index_t*	index,	/*!< in: index for which to build */
-	buf_block_t*	block,	/*!< in: index page, s- or x-latched */
-	ulint		n_fields,/*!< in: hash this many full fields */
-	ulint		n_bytes,/*!< in: hash this many bytes from the next
-				field */
-	ibool		left_side)/*!< in: hash for searches from left side? */
+	dict_index_t*	index,
+	buf_block_t*	block,
+	ulint		n_fields,
+	ulint		n_bytes,
+	ibool		left_side)
 {
 	hash_table_t*	table;
 	page_t*		page;
@@ -1317,63 +1435,60 @@ btr_search_build_page_hash_index(
 	mem_heap_t*	heap		= NULL;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets		= offsets_;
-	rec_offs_init(offsets_);
 
+	if (index->disable_ahi || !btr_search_enabled) {
+		return;
+	}
+
+	rec_offs_init(offsets_);
 	ut_ad(index);
+	ut_ad(block->page.id.space() == index->space);
 	ut_a(!dict_index_is_ibuf(index));
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
-	      || rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_X));
+	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_S)
+	      || rw_lock_own(&(block->lock), RW_LOCK_X));
 
-	rw_lock_s_lock(&btr_search_latch);
+	btr_search_s_lock(index);
 
-	if (!btr_search_enabled) {
-		rw_lock_s_unlock(&btr_search_latch);
-		return;
-	}
-
-	table = btr_search_sys->hash_index;
+	table = btr_get_search_table(index);
 	page = buf_block_get_frame(block);
 
 	if (block->index && ((block->curr_n_fields != n_fields)
 			     || (block->curr_n_bytes != n_bytes)
 			     || (block->curr_left_side != left_side))) {
 
-		rw_lock_s_unlock(&btr_search_latch);
+		btr_search_s_unlock(index);
 
 		btr_search_drop_page_hash_index(block);
 	} else {
-		rw_lock_s_unlock(&btr_search_latch);
+		btr_search_s_unlock(index);
 	}
 
-	n_recs = page_get_n_recs(page);
+	/* Check that the values for hash index build are sensible */
 
-	if (n_recs == 0) {
+	if (n_fields == 0 && n_bytes == 0) {
 
 		return;
 	}
 
-	/* Check that the values for hash index build are sensible */
-
-	if (n_fields + n_bytes == 0) {
-
+	if (dict_index_get_n_unique_in_tree(index)
+	    < btr_search_get_n_fields(n_fields, n_bytes)) {
 		return;
 	}
 
-	if (dict_index_get_n_unique_in_tree(index) < n_fields
-	    || (dict_index_get_n_unique_in_tree(index) == n_fields
-		&& n_bytes > 0)) {
+	n_recs = page_get_n_recs(page);
+
+	if (n_recs == 0) {
+
 		return;
 	}
 
 	/* Calculate and cache fold values and corresponding records into
 	an array for fast insertion to the hash index */
 
-	folds = (ulint*) mem_alloc(n_recs * sizeof(ulint));
-	recs = (rec_t**) mem_alloc(n_recs * sizeof(rec_t*));
+	folds = (ulint*) ut_malloc_nokey(n_recs * sizeof(ulint));
+	recs = (rec_t**) ut_malloc_nokey(n_recs * sizeof(rec_t*));
 
 	n_cached = 0;
 
@@ -1381,16 +1496,12 @@ btr_search_build_page_hash_index(
 
 	rec = page_rec_get_next(page_get_infimum_rec(page));
 
-	offsets = rec_get_offsets(rec, index, offsets,
-				  n_fields + (n_bytes > 0), &heap);
-
-	if (!page_rec_is_supremum(rec)) {
-		ut_a(n_fields <= rec_offs_n_fields(offsets));
-
-		if (n_bytes > 0) {
-			ut_a(n_fields < rec_offs_n_fields(offsets));
-		}
-	}
+	offsets = rec_get_offsets(
+		rec, index, offsets,
+		btr_search_get_n_fields(n_fields, n_bytes),
+		&heap);
+	ut_ad(page_rec_is_supremum(rec)
+	      || n_fields + (n_bytes > 0) == rec_offs_n_fields(offsets));
 
 	fold = rec_fold(rec, offsets, n_fields, n_bytes, index->id);
 
@@ -1416,8 +1527,9 @@ btr_search_build_page_hash_index(
 			break;
 		}
 
-		offsets = rec_get_offsets(next_rec, index, offsets,
-					  n_fields + (n_bytes > 0), &heap);
+		offsets = rec_get_offsets(
+			next_rec, index, offsets,
+			btr_search_get_n_fields(n_fields, n_bytes), &heap);
 		next_fold = rec_fold(next_rec, offsets, n_fields,
 				     n_bytes, index->id);
 
@@ -1440,11 +1552,11 @@ btr_search_build_page_hash_index(
 		fold = next_fold;
 	}
 
-	btr_search_check_free_space_in_heap();
+	btr_search_check_free_space_in_heap(index);
 
-	rw_lock_x_lock(&btr_search_latch);
+	btr_search_x_lock(index);
 
-	if (UNIV_UNLIKELY(!btr_search_enabled)) {
+	if (!btr_search_enabled) {
 		goto exit_func;
 	}
 
@@ -1478,42 +1590,42 @@ btr_search_build_page_hash_index(
 	MONITOR_INC(MONITOR_ADAPTIVE_HASH_PAGE_ADDED);
 	MONITOR_INC_VALUE(MONITOR_ADAPTIVE_HASH_ROW_ADDED, n_cached);
 exit_func:
-	rw_lock_x_unlock(&btr_search_latch);
+	btr_search_x_unlock(index);
 
-	mem_free(folds);
-	mem_free(recs);
+	ut_free(folds);
+	ut_free(recs);
 	if (UNIV_LIKELY_NULL(heap)) {
 		mem_heap_free(heap);
 	}
 }
 
-/********************************************************************//**
-Moves or deletes hash entries for moved records. If new_page is already hashed,
-then the hash index for page, if any, is dropped. If new_page is not hashed,
-and page is hashed, then a new hash index is built to new_page with the same
-parameters as page (this often happens when a page is split). */
-UNIV_INTERN
+/** Moves or deletes hash entries for moved records. If new_page is already
+hashed, then the hash index for page, if any, is dropped. If new_page is not
+hashed, and page is hashed, then a new hash index is built to new_page with the
+same parameters as page (this often happens when a page is split).
+ at param[in,out]	new_block	records are copied to this page.
+ at param[in,out]	block		index page from which record are copied, and the
+				copied records will be deleted from this page.
+ at param[in,out]	index		record descriptor */
 void
 btr_search_move_or_delete_hash_entries(
-/*===================================*/
-	buf_block_t*	new_block,	/*!< in: records are copied
-					to this page */
-	buf_block_t*	block,		/*!< in: index page from which
-					records were copied, and the
-					copied records will be deleted
-					from this page */
-	dict_index_t*	index)		/*!< in: record descriptor */
+	buf_block_t*	new_block,
+	buf_block_t*	block,
+	dict_index_t*	index)
 {
-	ulint	n_fields;
-	ulint	n_bytes;
-	ibool	left_side;
+	/* AHI is disabled for intrinsic table as it depends on index-id
+	which is dynamically assigned for intrinsic table indexes and not
+	through a centralized index generator. */
+	if (index->disable_ahi || !btr_search_enabled) {
+		return;
+	}
+
+	ut_ad(!dict_table_is_intrinsic(index->table));
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
-	ut_ad(rw_lock_own(&(new_block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_X));
+	ut_ad(rw_lock_own(&(new_block->lock), RW_LOCK_X));
 
-	rw_lock_s_lock(&btr_search_latch);
+	btr_search_s_lock(index);
 
 	ut_a(!new_block->index || new_block->index == index);
 	ut_a(!block->index || block->index == index);
@@ -1522,7 +1634,7 @@ btr_search_move_or_delete_hash_entries(
 
 	if (new_block->index) {
 
-		rw_lock_s_unlock(&btr_search_latch);
+		btr_search_s_unlock(index);
 
 		btr_search_drop_page_hash_index(block);
 
@@ -1530,39 +1642,34 @@ btr_search_move_or_delete_hash_entries(
 	}
 
 	if (block->index) {
-
-		n_fields = block->curr_n_fields;
-		n_bytes = block->curr_n_bytes;
-		left_side = block->curr_left_side;
+		ulint	n_fields = block->curr_n_fields;
+		ulint	n_bytes = block->curr_n_bytes;
+		ibool	left_side = block->curr_left_side;
 
 		new_block->n_fields = block->curr_n_fields;
 		new_block->n_bytes = block->curr_n_bytes;
 		new_block->left_side = left_side;
 
-		rw_lock_s_unlock(&btr_search_latch);
+		btr_search_s_unlock(index);
 
-		ut_a(n_fields + n_bytes > 0);
+		ut_a(n_fields > 0 || n_bytes > 0);
 
-		btr_search_build_page_hash_index(index, new_block, n_fields,
-						 n_bytes, left_side);
+		btr_search_build_page_hash_index(
+			index, new_block, n_fields, n_bytes, left_side);
 		ut_ad(n_fields == block->curr_n_fields);
 		ut_ad(n_bytes == block->curr_n_bytes);
 		ut_ad(left_side == block->curr_left_side);
 		return;
 	}
 
-	rw_lock_s_unlock(&btr_search_latch);
+	btr_search_s_unlock(index);
 }
 
-/********************************************************************//**
-Updates the page hash index when a single record is deleted from a page. */
-UNIV_INTERN
+/** Updates the page hash index when a single record is deleted from a page.
+ at param[in]	cursor	cursor which was positioned on the record to delete
+			using btr_cur_search_, the record is not yet deleted.*/
 void
-btr_search_update_hash_on_delete(
-/*=============================*/
-	btr_cur_t*	cursor)	/*!< in: cursor which was positioned on the
-				record to delete using btr_cur_search_...,
-				the record is not yet deleted */
+btr_search_update_hash_on_delete(btr_cur_t* cursor)
 {
 	hash_table_t*	table;
 	buf_block_t*	block;
@@ -1573,11 +1680,13 @@ btr_search_update_hash_on_delete(
 	mem_heap_t*	heap		= NULL;
 	rec_offs_init(offsets_);
 
+	if (cursor->index->disable_ahi || !btr_search_enabled) {
+		return;
+	}
+
 	block = btr_cur_get_block(cursor);
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_X));
 
 	index = block->index;
 
@@ -1586,11 +1695,12 @@ btr_search_update_hash_on_delete(
 		return;
 	}
 
+	ut_ad(block->page.id.space() == index->space);
 	ut_a(index == cursor->index);
-	ut_a(block->curr_n_fields + block->curr_n_bytes > 0);
+	ut_a(block->curr_n_fields > 0 || block->curr_n_bytes > 0);
 	ut_a(!dict_index_is_ibuf(index));
 
-	table = btr_search_sys->hash_index;
+	table = btr_get_search_table(index);
 
 	rec = btr_cur_get_rec(cursor);
 
@@ -1601,7 +1711,7 @@ btr_search_update_hash_on_delete(
 		mem_heap_free(heap);
 	}
 
-	rw_lock_x_lock(&btr_search_latch);
+	btr_search_x_lock(index);
 
 	if (block->index) {
 		ut_a(block->index == index);
@@ -1614,32 +1724,30 @@ btr_search_update_hash_on_delete(
 		}
 	}
 
-	rw_lock_x_unlock(&btr_search_latch);
+	btr_search_x_unlock(index);
 }
 
-/********************************************************************//**
-Updates the page hash index when a single record is inserted on a page. */
-UNIV_INTERN
+/** Updates the page hash index when a single record is inserted on a page.
+ at param[in]	cursor	cursor which was positioned to the place to insert
+			using btr_cur_search_, and the new record has been
+			inserted next to the cursor. */
 void
-btr_search_update_hash_node_on_insert(
-/*==================================*/
-	btr_cur_t*	cursor)	/*!< in: cursor which was positioned to the
-				place to insert using btr_cur_search_...,
-				and the new record has been inserted next
-				to the cursor */
+btr_search_update_hash_node_on_insert(btr_cur_t* cursor)
 {
 	hash_table_t*	table;
 	buf_block_t*	block;
 	dict_index_t*	index;
 	rec_t*		rec;
 
+	if (cursor->index->disable_ahi || !btr_search_enabled) {
+		return;
+	}
+
 	rec = btr_cur_get_rec(cursor);
 
 	block = btr_cur_get_block(cursor);
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_X));
 
 	index = block->index;
 
@@ -1651,7 +1759,7 @@ btr_search_update_hash_node_on_insert(
 	ut_a(cursor->index == index);
 	ut_a(!dict_index_is_ibuf(index));
 
-	rw_lock_x_lock(&btr_search_latch);
+	btr_search_x_lock(index);
 
 	if (!block->index) {
 
@@ -1665,7 +1773,7 @@ btr_search_update_hash_node_on_insert(
 	    && (cursor->n_bytes == block->curr_n_bytes)
 	    && !block->curr_left_side) {
 
-		table = btr_search_sys->hash_index;
+		table = btr_get_search_table(index);
 
 		if (ha_search_and_update_if_found(
 			table, cursor->fold, rec, block,
@@ -1674,24 +1782,21 @@ btr_search_update_hash_node_on_insert(
 		}
 
 func_exit:
-		rw_lock_x_unlock(&btr_search_latch);
+		btr_search_x_unlock(index);
 	} else {
-		rw_lock_x_unlock(&btr_search_latch);
+		btr_search_x_unlock(index);
 
 		btr_search_update_hash_on_insert(cursor);
 	}
 }
 
-/********************************************************************//**
-Updates the page hash index when a single record is inserted on a page. */
-UNIV_INTERN
-void
-btr_search_update_hash_on_insert(
-/*=============================*/
-	btr_cur_t*	cursor)	/*!< in: cursor which was positioned to the
+/** Updates the page hash index when a single record is inserted on a page.
+ at param[in,out]	cursor		cursor which was positioned to the
 				place to insert using btr_cur_search_...,
 				and the new record has been inserted next
 				to the cursor */
+void
+btr_search_update_hash_on_insert(btr_cur_t* cursor)
 {
 	hash_table_t*	table;
 	buf_block_t*	block;
@@ -1711,11 +1816,13 @@ btr_search_update_hash_on_insert(
 	ulint*		offsets		= offsets_;
 	rec_offs_init(offsets_);
 
+	if (cursor->index->disable_ahi || !btr_search_enabled) {
+		return;
+	}
+
 	block = btr_cur_get_block(cursor);
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_X));
 
 	index = block->index;
 
@@ -1724,12 +1831,14 @@ btr_search_update_hash_on_insert(
 		return;
 	}
 
-	btr_search_check_free_space_in_heap();
+	ut_ad(block->page.id.space() == index->space);
+	btr_search_check_free_space_in_heap(index);
 
-	table = btr_search_sys->hash_index;
+	table = btr_get_search_table(index);
 
 	rec = btr_cur_get_rec(cursor);
 
+	ut_a(!index->disable_ahi);
 	ut_a(index == cursor->index);
 	ut_a(!dict_index_is_ibuf(index));
 
@@ -1745,20 +1854,22 @@ btr_search_update_hash_on_insert(
 	ins_fold = rec_fold(ins_rec, offsets, n_fields, n_bytes, index->id);
 
 	if (!page_rec_is_supremum(next_rec)) {
-		offsets = rec_get_offsets(next_rec, index, offsets,
-					  n_fields + (n_bytes > 0), &heap);
+		offsets = rec_get_offsets(
+			next_rec, index, offsets,
+			btr_search_get_n_fields(n_fields, n_bytes), &heap);
 		next_fold = rec_fold(next_rec, offsets, n_fields,
 				     n_bytes, index->id);
 	}
 
 	if (!page_rec_is_infimum(rec)) {
-		offsets = rec_get_offsets(rec, index, offsets,
-					  n_fields + (n_bytes > 0), &heap);
+		offsets = rec_get_offsets(
+			rec, index, offsets,
+			btr_search_get_n_fields(n_fields, n_bytes), &heap);
 		fold = rec_fold(rec, offsets, n_fields, n_bytes, index->id);
 	} else {
 		if (left_side) {
 
-			rw_lock_x_lock(&btr_search_latch);
+			btr_search_x_lock(index);
 
 			locked = TRUE;
 
@@ -1776,7 +1887,7 @@ btr_search_update_hash_on_insert(
 
 		if (!locked) {
 
-			rw_lock_x_lock(&btr_search_latch);
+			btr_search_x_lock(index);
 
 			locked = TRUE;
 
@@ -1798,7 +1909,7 @@ check_next_rec:
 		if (!left_side) {
 
 			if (!locked) {
-				rw_lock_x_lock(&btr_search_latch);
+				btr_search_x_lock(index);
 
 				locked = TRUE;
 
@@ -1817,7 +1928,7 @@ check_next_rec:
 
 		if (!locked) {
 
-			rw_lock_x_lock(&btr_search_latch);
+			btr_search_x_lock(index);
 
 			locked = TRUE;
 
@@ -1827,13 +1938,7 @@ check_next_rec:
 		}
 
 		if (!left_side) {
-
 			ha_insert_for_fold(table, ins_fold, block, ins_rec);
-			/*
-			fputs("Hash insert for ", stderr);
-			dict_index_name_print(stderr, index);
-			fprintf(stderr, " fold %lu\n", ins_fold);
-			*/
 		} else {
 			ha_insert_for_fold(table, next_fold, block, next_rec);
 		}
@@ -1844,21 +1949,20 @@ function_exit:
 		mem_heap_free(heap);
 	}
 	if (locked) {
-		rw_lock_x_unlock(&btr_search_latch);
+		btr_search_x_unlock(index);
 	}
 }
 
 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
-/********************************************************************//**
-Validates the search system.
- at return	TRUE if ok */
-UNIV_INTERN
+
+/** Validates the search system for given hash table.
+ at param[in]	hash_table_id	hash table to validate
+ at return TRUE if ok */
+static
 ibool
-btr_search_validate(void)
-/*=====================*/
+btr_search_hash_table_validate(ulint hash_table_id)
 {
 	ha_node_t*	node;
-	ulint		n_page_dumps	= 0;
 	ibool		ok		= TRUE;
 	ulint		i;
 	ulint		cell_count;
@@ -1866,30 +1970,50 @@ btr_search_validate(void)
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets		= offsets_;
 
+	if (!btr_search_enabled) {
+		return(TRUE);
+	}
+
 	/* How many cells to check before temporarily releasing
-	btr_search_latch. */
+	search latches. */
 	ulint		chunk_size = 10000;
 
 	rec_offs_init(offsets_);
 
-	rw_lock_x_lock(&btr_search_latch);
+	btr_search_x_lock_all();
 	buf_pool_mutex_enter_all();
 
-	cell_count = hash_get_n_cells(btr_search_sys->hash_index);
+	cell_count = hash_get_n_cells(
+			btr_search_sys->hash_tables[hash_table_id]);
 
 	for (i = 0; i < cell_count; i++) {
-		/* We release btr_search_latch every once in a while to
+		/* We release search latches every once in a while to
 		give other queries a chance to run. */
 		if ((i != 0) && ((i % chunk_size) == 0)) {
+
 			buf_pool_mutex_exit_all();
-			rw_lock_x_unlock(&btr_search_latch);
+			btr_search_x_unlock_all();
+
 			os_thread_yield();
-			rw_lock_x_lock(&btr_search_latch);
+
+			btr_search_x_lock_all();
 			buf_pool_mutex_enter_all();
+
+			ulint	curr_cell_count = hash_get_n_cells(
+				btr_search_sys->hash_tables[hash_table_id]);
+
+			if (cell_count != curr_cell_count) {
+
+				cell_count = curr_cell_count;
+
+				if (i >= cell_count) {
+					break;
+				}
+			}
 		}
 
-		node = (ha_node_t*)
-			hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
+		node = (ha_node_t*) hash_get_nth_cell(
+			btr_search_sys->hash_tables[hash_table_id], i)->node;
 
 		for (; node != NULL; node = node->next) {
 			const buf_block_t*	block
@@ -1910,8 +2034,7 @@ btr_search_validate(void)
 				assertion and the comment below) */
 				hash_block = buf_block_hash_get(
 					buf_pool,
-					buf_block_get_space(block),
-					buf_block_get_page_no(block));
+					block->page.id);
 			} else {
 				hash_block = NULL;
 			}
@@ -1927,94 +2050,115 @@ btr_search_validate(void)
 				After that, it invokes
 				btr_search_drop_page_hash_index() to
 				remove the block from
-				btr_search_sys->hash_index. */
+				btr_search_sys->hash_tables[i]. */
 
 				ut_a(buf_block_get_state(block)
 				     == BUF_BLOCK_REMOVE_HASH);
 			}
 
 			ut_a(!dict_index_is_ibuf(block->index));
+			ut_ad(block->page.id.space() == block->index->space);
 
 			page_index_id = btr_page_get_index_id(block->frame);
 
-			offsets = rec_get_offsets(node->data,
-						  block->index, offsets,
-						  block->curr_n_fields
-						  + (block->curr_n_bytes > 0),
-						  &heap);
-
-			if (!block->index || node->fold
-			    != rec_fold(node->data,
-					offsets,
-					block->curr_n_fields,
-					block->curr_n_bytes,
-					page_index_id)) {
+			offsets = rec_get_offsets(
+				node->data, block->index, offsets,
+				btr_search_get_n_fields(block->curr_n_fields,
+							block->curr_n_bytes),
+				&heap);
+
+			const ulint	fold = rec_fold(
+				node->data, offsets,
+				block->curr_n_fields,
+				block->curr_n_bytes,
+				page_index_id);
+
+			if (node->fold != fold) {
 				const page_t*	page = block->frame;
 
 				ok = FALSE;
-				ut_print_timestamp(stderr);
-
-				fprintf(stderr,
-					"  InnoDB: Error in an adaptive hash"
-					" index pointer to page %lu\n"
-					"InnoDB: ptr mem address %p"
-					" index id %llu,"
-					" node fold %lu, rec fold %lu\n",
-					(ulong) page_get_page_no(page),
-					node->data,
-					(ullint) page_index_id,
-					(ulong) node->fold,
-					(ulong) rec_fold(node->data,
-							 offsets,
-							 block->curr_n_fields,
-							 block->curr_n_bytes,
-							 page_index_id));
+
+				ib::error() << "Error in an adaptive hash"
+					<< " index pointer to page "
+					<< page_id_t(page_get_space_id(page),
+						     page_get_page_no(page))
+					<< ", ptr mem address "
+					<< reinterpret_cast<const void*>(
+						node->data)
+					<< ", index id " << page_index_id
+					<< ", node fold " << node->fold
+					<< ", rec fold " << fold;
 
 				fputs("InnoDB: Record ", stderr);
 				rec_print_new(stderr, node->data, offsets);
 				fprintf(stderr, "\nInnoDB: on that page."
 					" Page mem address %p, is hashed %p,"
-					" n fields %lu, n bytes %lu\n"
+					" n fields %lu\n"
 					"InnoDB: side %lu\n",
 					(void*) page, (void*) block->index,
 					(ulong) block->curr_n_fields,
-					(ulong) block->curr_n_bytes,
 					(ulong) block->curr_left_side);
-
-				if (n_page_dumps < 20) {
-					buf_page_print(
-						page, 0,
-						BUF_PAGE_PRINT_NO_CRASH);
-					n_page_dumps++;
-				}
+				ut_ad(0);
 			}
 		}
 	}
 
 	for (i = 0; i < cell_count; i += chunk_size) {
-		ulint end_index = ut_min(i + chunk_size - 1, cell_count - 1);
-
-		/* We release btr_search_latch every once in a while to
+		/* We release search latches every once in a while to
 		give other queries a chance to run. */
 		if (i != 0) {
+
 			buf_pool_mutex_exit_all();
-			rw_lock_x_unlock(&btr_search_latch);
+			btr_search_x_unlock_all();
+
 			os_thread_yield();
-			rw_lock_x_lock(&btr_search_latch);
+
+			btr_search_x_lock_all();
 			buf_pool_mutex_enter_all();
+
+			ulint	curr_cell_count = hash_get_n_cells(
+				btr_search_sys->hash_tables[hash_table_id]);
+
+			if (cell_count != curr_cell_count) {
+
+				cell_count = curr_cell_count;
+
+				if (i >= cell_count) {
+					break;
+				}
+			}
 		}
 
-		if (!ha_validate(btr_search_sys->hash_index, i, end_index)) {
+		ulint end_index = ut_min(i + chunk_size - 1, cell_count - 1);
+
+		if (!ha_validate(btr_search_sys->hash_tables[hash_table_id],
+				 i, end_index)) {
 			ok = FALSE;
 		}
 	}
 
 	buf_pool_mutex_exit_all();
-	rw_lock_x_unlock(&btr_search_latch);
+	btr_search_x_unlock_all();
+
 	if (UNIV_LIKELY_NULL(heap)) {
 		mem_heap_free(heap);
 	}
 
 	return(ok);
 }
+
+/** Validate the search system.
+ at return true if ok. */
+bool
+btr_search_validate()
+{
+	for (ulint i = 0; i < btr_ahi_parts; ++i) {
+		if (!btr_search_hash_table_validate(i)) {
+			return(false);
+		}
+	}
+
+	return(true);
+}
+
 #endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */
diff --git a/storage/innobase/buf/buf0buddy.cc b/storage/innobase/buf/buf0buddy.cc
index 958b3b5..a4cae99 100644
--- a/storage/innobase/buf/buf0buddy.cc
+++ b/storage/innobase/buf/buf0buddy.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2006, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2006, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,12 +23,10 @@ Binary buddy allocator for compressed pages
 Created December 2006 by Marko Makela
 *******************************************************/
 
-#define THIS_MODULE
 #include "buf0buddy.h"
 #ifdef UNIV_NONINL
 # include "buf0buddy.ic"
 #endif
-#undef THIS_MODULE
 #include "buf0buf.h"
 #include "buf0lru.h"
 #include "buf0flu.h"
@@ -71,11 +69,11 @@ are written.*/
 
 /** Value that we stamp on all buffers that are currently on the zip_free
 list. This value is stamped at BUF_BUDDY_STAMP_OFFSET offset */
-#define BUF_BUDDY_STAMP_FREE	(SRV_LOG_SPACE_FIRST_ID)
+#define BUF_BUDDY_STAMP_FREE	 SRV_LOG_SPACE_FIRST_ID
 
 /** Stamp value for non-free buffers. Will be overwritten by a non-zero
 value by the consumer of the block */
-#define BUF_BUDDY_STAMP_NONFREE	(0XFFFFFFFF)
+#define BUF_BUDDY_STAMP_NONFREE	0XFFFFFFFFUL
 
 #if BUF_BUDDY_STAMP_FREE >= BUF_BUDDY_STAMP_NONFREE
 # error "BUF_BUDDY_STAMP_FREE >= BUF_BUDDY_STAMP_NONFREE"
@@ -111,7 +109,7 @@ buf_buddy_mem_invalid(
 
 /**********************************************************************//**
 Check if a buddy is stamped free.
- at return	whether the buddy is free */
+ at return whether the buddy is free */
 UNIV_INLINE __attribute__((warn_unused_result))
 bool
 buf_buddy_stamp_is_free(
@@ -140,7 +138,7 @@ buf_buddy_stamp_free(
 
 /**********************************************************************//**
 Stamps a buddy nonfree.
- at param[in/out]	buf	block to stamp
+ at param[in,out]	buf	block to stamp
 @param[in]	i	block size */
 #define buf_buddy_stamp_nonfree(buf, i) do {				\
 	buf_buddy_mem_invalid(buf, i);					\
@@ -152,7 +150,7 @@ Stamps a buddy nonfree.
 
 /**********************************************************************//**
 Get the offset of the buddy of a compressed page frame.
- at return	the buddy relative of page */
+ at return the buddy relative of page */
 UNIV_INLINE
 void*
 buf_buddy_get(
@@ -174,23 +172,33 @@ buf_buddy_get(
 	}
 }
 
+#ifdef UNIV_DEBUG
 /** Validate a given zip_free list. */
 struct	CheckZipFree {
-	ulint	i;
-	CheckZipFree(ulint i) : i (i) {}
+	CheckZipFree(ulint i) : m_i(i) {}
 
 	void	operator()(const buf_buddy_free_t* elem) const
 	{
 		ut_a(buf_buddy_stamp_is_free(elem));
-		ut_a(elem->stamp.size <= i);
+		ut_a(elem->stamp.size <= m_i);
 	}
+
+	ulint		m_i;
 };
 
-#define BUF_BUDDY_LIST_VALIDATE(bp, i)				\
-	UT_LIST_VALIDATE(list, buf_buddy_free_t,		\
-			 bp->zip_free[i], CheckZipFree(i))
+/** Validate a buddy list.
+ at param[in]	buf_pool	buffer pool instance
+ at param[in]	i		buddy size to validate */
+static
+void
+buf_buddy_list_validate(
+	const buf_pool_t*	buf_pool,
+	ulint			i)
+{
+	CheckZipFree	check(i);
+	ut_list_validate(buf_pool->zip_free[i], check);
+}
 
-#ifdef UNIV_DEBUG
 /**********************************************************************//**
 Debug function to validate that a buffer is indeed free i.e.: in the
 zip_free[].
@@ -282,8 +290,8 @@ buf_buddy_add_to_free(
 	ut_ad(buf_pool->zip_free[i].start != buf);
 
 	buf_buddy_stamp_free(buf, i);
-	UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], buf);
-	ut_d(BUF_BUDDY_LIST_VALIDATE(buf_pool, i));
+	UT_LIST_ADD_FIRST(buf_pool->zip_free[i], buf);
+	ut_d(buf_buddy_list_validate(buf_pool, i));
 }
 
 /**********************************************************************//**
@@ -293,20 +301,21 @@ void
 buf_buddy_remove_from_free(
 /*=======================*/
 	buf_pool_t*		buf_pool,	/*!< in: buffer pool instance */
-	buf_buddy_free_t*	buf,		/*!< in,own: block to be freed */
+	buf_buddy_free_t*	buf,		/*!< in,own: block to be
+						freed */
 	ulint			i)		/*!< in: index of
 						buf_pool->zip_free[] */
 {
 	ut_ad(buf_pool_mutex_own(buf_pool));
 	ut_ad(buf_buddy_check_free(buf_pool, buf, i));
 
-	UT_LIST_REMOVE(list, buf_pool->zip_free[i], buf);
+	UT_LIST_REMOVE(buf_pool->zip_free[i], buf);
 	buf_buddy_stamp_nonfree(buf, i);
 }
 
 /**********************************************************************//**
 Try to allocate a block from buf_pool->zip_free[].
- at return	allocated block, or NULL if buf_pool->zip_free[] was empty */
+ at return allocated block, or NULL if buf_pool->zip_free[] was empty */
 static
 buf_buddy_free_t*
 buf_buddy_alloc_zip(
@@ -320,10 +329,22 @@ buf_buddy_alloc_zip(
 	ut_a(i < BUF_BUDDY_SIZES);
 	ut_a(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
 
-	ut_d(BUF_BUDDY_LIST_VALIDATE(buf_pool, i));
+	ut_d(buf_buddy_list_validate(buf_pool, i));
 
 	buf = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
 
+	if (buf_pool->curr_size < buf_pool->old_size
+	    && UT_LIST_GET_LEN(buf_pool->withdraw)
+		< buf_pool->withdraw_target) {
+
+		while (buf != NULL
+		       && buf_frame_will_withdrawn(
+			       buf_pool, reinterpret_cast<byte*>(buf))) {
+			/* This should be withdrawn, not to be allocated */
+			buf = UT_LIST_GET_NEXT(list, buf);
+		}
+	}
+
 	if (buf) {
 		buf_buddy_remove_from_free(buf_pool, buf, i);
 	} else if (i + 1 < BUF_BUDDY_SIZES) {
@@ -388,9 +409,9 @@ buf_buddy_block_free(
 	UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);
 
 	block = (buf_block_t*) bpage;
-	mutex_enter(&block->mutex);
+	buf_page_mutex_enter(block);
 	buf_LRU_block_free_non_file_page(block);
-	mutex_exit(&block->mutex);
+	buf_page_mutex_exit(block);
 
 	ut_ad(buf_pool->buddy_n_frames > 0);
 	ut_d(buf_pool->buddy_n_frames--);
@@ -425,7 +446,7 @@ buf_buddy_block_register(
 
 /**********************************************************************//**
 Allocate a block from a bigger object.
- at return	allocated block */
+ at return allocated block */
 static
 void*
 buf_buddy_alloc_from(
@@ -463,8 +484,7 @@ buf_buddy_alloc_from(
 Allocate a block.  The thread calling this function must hold
 buf_pool->mutex and must not hold buf_pool->zip_mutex or any block->mutex.
 The buf_pool_mutex may be released and reacquired.
- at return	allocated block, never NULL */
-UNIV_INTERN
+ at return allocated block, never NULL */
 void*
 buf_buddy_alloc_low(
 /*================*/
@@ -520,7 +540,7 @@ func_exit:
 
 /**********************************************************************//**
 Try to relocate a block.
- at return	true if relocated */
+ at return true if relocated */
 static
 bool
 buf_buddy_relocate(
@@ -528,11 +548,13 @@ buf_buddy_relocate(
 	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
 	void*		src,		/*!< in: block to relocate */
 	void*		dst,		/*!< in: free block to relocate to */
-	ulint		i)		/*!< in: index of
+	ulint		i,		/*!< in: index of
 					buf_pool->zip_free[] */
+	bool		force)		/*!< in: true if we must relocate
+					always */
 {
 	buf_page_t*	bpage;
-	const ulint	size	= BUF_BUDDY_LOW << i;
+	const ulint	size = BUF_BUDDY_LOW << i;
 	ulint		space;
 	ulint		offset;
 
@@ -555,12 +577,19 @@ buf_buddy_relocate(
 
 	ut_ad(space != BUF_BUDDY_STAMP_FREE);
 
-	ulint		fold = buf_page_address_fold(space, offset);
-	rw_lock_t*	hash_lock = buf_page_hash_lock_get(buf_pool, fold);
+	const page_id_t	page_id(space, offset);
+
+	/* If space,offset is bogus, then we know that the
+	buf_page_hash_get_low() call below will return NULL. */
+	if (!force && buf_pool != buf_pool_get(page_id)) {
+		return(false);
+	}
+
+	rw_lock_t*	hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
 
 	rw_lock_x_lock(hash_lock);
 
-	bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
+	bpage = buf_page_hash_get_low(buf_pool, page_id);
 
 	if (!bpage || bpage->zip.data != src) {
 		/* The block has probably been freshly
@@ -570,7 +599,27 @@ buf_buddy_relocate(
 
 		rw_lock_x_unlock(hash_lock);
 
-		return(false);
+		if (!force || space != 0 || offset != 0) {
+			return(false);
+		}
+
+		/* It might be just uninitialized page.
+		We should search from LRU list also. */
+
+		bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
+		while (bpage != NULL) {
+			if (bpage->zip.data == src) {
+				hash_lock = buf_page_hash_lock_get(
+					buf_pool, bpage->id);
+				rw_lock_x_lock(hash_lock);
+				break;
+			}
+			bpage = UT_LIST_GET_NEXT(LRU, bpage);
+		}
+
+		if (bpage == NULL) {
+			return(false);
+		}
 	}
 
 	if (page_zip_get_size(&bpage->zip) != size) {
@@ -588,20 +637,17 @@ buf_buddy_relocate(
 	contain uninitialized data. */
 	UNIV_MEM_ASSERT_W(src, size);
 
-	ib_mutex_t*	block_mutex = buf_page_get_mutex(bpage);
+	BPageMutex*	block_mutex = buf_page_get_mutex(bpage);
 
 	mutex_enter(block_mutex);
 
 	if (buf_page_can_relocate(bpage)) {
 		/* Relocate the compressed page. */
-		ullint	usec = ut_time_us(NULL);
+		uintmax_t	usec = ut_time_us(NULL);
 
 		ut_a(bpage->zip.data == src);
 
-		/* Note: This is potentially expensive, we need a better
-		solution here. We go with correctness for now. */
-		::memcpy(dst, src, size);
-
+		memcpy(dst, src, size);
 		bpage->zip.data = reinterpret_cast<page_zip_t*>(dst);
 
 		rw_lock_x_unlock(hash_lock);
@@ -612,24 +658,19 @@ buf_buddy_relocate(
 			reinterpret_cast<buf_buddy_free_t*>(src), i);
 
 		buf_buddy_stat_t*	buddy_stat = &buf_pool->buddy_stat[i];
-
-		++buddy_stat->relocated;
-
+		buddy_stat->relocated++;
 		buddy_stat->relocated_usec += ut_time_us(NULL) - usec;
-
 		return(true);
 	}
 
 	rw_lock_x_unlock(hash_lock);
 
 	mutex_exit(block_mutex);
-
 	return(false);
 }
 
 /**********************************************************************//**
 Deallocate a block. */
-UNIV_INTERN
 void
 buf_buddy_free_low(
 /*===============*/
@@ -663,7 +704,8 @@ recombine:
 	/* Do not recombine blocks if there are few free blocks.
 	We may waste up to 15360*max_len bytes to free blocks
 	(1024 + 2048 + 4096 + 8192 = 15360) */
-	if (UT_LIST_GET_LEN(buf_pool->zip_free[i]) < 16) {
+	if (UT_LIST_GET_LEN(buf_pool->zip_free[i]) < 16
+	    && buf_pool->curr_size >= buf_pool->old_size) {
 		goto func_exit;
 	}
 
@@ -684,7 +726,7 @@ buddy_is_free:
 		goto recombine;
 
 	case BUF_BUDDY_STATE_USED:
-		ut_d(BUF_BUDDY_LIST_VALIDATE(buf_pool, i));
+		ut_d(buf_buddy_list_validate(buf_pool, i));
 
 		/* The buddy is not free. Is there a free block of
 		this size? */
@@ -698,7 +740,8 @@ buddy_is_free:
 
 			/* Try to relocate the buddy of buf to the free
 			block. */
-			if (buf_buddy_relocate(buf_pool, buddy, zip_buf, i)) {
+			if (buf_buddy_relocate(buf_pool, buddy, zip_buf, i,
+					       false)) {
 
 				goto buddy_is_free;
 			}
@@ -719,3 +762,119 @@ func_exit:
 			      reinterpret_cast<buf_buddy_free_t*>(buf),
 			      i);
 }
+
+/** Reallocate a block.
+ at param[in]	buf_pool	buffer pool instance
+ at param[in]	buf		block to be reallocated, must be pointed
+to by the buffer pool
+ at param[in]	size		block size, up to UNIV_PAGE_SIZE
+ at retval false	if failed because of no free blocks. */
+bool
+buf_buddy_realloc(
+	buf_pool_t*	buf_pool,
+	void*		buf,
+	ulint		size)
+{
+	buf_block_t*	block = NULL;
+	ulint		i = buf_buddy_get_slot(size);
+
+	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(!mutex_own(&buf_pool->zip_mutex));
+	ut_ad(i <= BUF_BUDDY_SIZES);
+	ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
+
+	if (i < BUF_BUDDY_SIZES) {
+		/* Try to allocate from the buddy system. */
+		block = reinterpret_cast<buf_block_t*>(
+			buf_buddy_alloc_zip(buf_pool, i));
+	}
+
+	if (block == NULL) {
+		/* Try allocating from the buf_pool->free list. */
+		block = buf_LRU_get_free_only(buf_pool);
+
+		if (block == NULL) {
+			return(false); /* free_list was not enough */
+		}
+
+		buf_buddy_block_register(block);
+
+		block = reinterpret_cast<buf_block_t*>(
+			buf_buddy_alloc_from(
+				buf_pool, block->frame, i, BUF_BUDDY_SIZES));
+	}
+
+	buf_pool->buddy_stat[i].used++;
+
+	/* Try to relocate the buddy of buf to the free block. */
+	if (buf_buddy_relocate(buf_pool, buf, block, i, true)) {
+		/* succeeded */
+		buf_buddy_free_low(buf_pool, buf, i);
+	} else {
+		/* failed */
+		buf_buddy_free_low(buf_pool, block, i);
+	}
+
+	return(true); /* free_list was enough */
+}
+
+/** Combine all pairs of free buddies.
+ at param[in]	buf_pool	buffer pool instance */
+void
+buf_buddy_condense_free(
+	buf_pool_t*	buf_pool)
+{
+	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(buf_pool->curr_size < buf_pool->old_size);
+
+	for (ulint i = 0; i < UT_ARR_SIZE(buf_pool->zip_free); ++i) {
+		buf_buddy_free_t* buf =
+			UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
+
+		/* seek to withdraw target */
+		while (buf != NULL
+		       && !buf_frame_will_withdrawn(
+				buf_pool, reinterpret_cast<byte*>(buf))) {
+			buf = UT_LIST_GET_NEXT(list, buf);
+		}
+
+		while (buf != NULL) {
+			buf_buddy_free_t* next =
+				UT_LIST_GET_NEXT(list, buf);
+
+			buf_buddy_free_t* buddy =
+				reinterpret_cast<buf_buddy_free_t*>(
+					buf_buddy_get(
+						reinterpret_cast<byte*>(buf),
+						BUF_BUDDY_LOW << i));
+
+			/* seek to the next withdraw target */
+			while (true) {
+				while (next != NULL
+				       && !buf_frame_will_withdrawn(
+						buf_pool,
+						reinterpret_cast<byte*>(next))) {
+					 next = UT_LIST_GET_NEXT(list, next);
+				}
+
+				if (buddy != next) {
+					break;
+				}
+
+				next = UT_LIST_GET_NEXT(list, next);
+			}
+
+			if (buf_buddy_is_free(buddy, i)
+			    == BUF_BUDDY_STATE_FREE) {
+				/* Both buf and buddy are free.
+				Try to combine them. */
+				buf_buddy_remove_from_free(buf_pool, buf, i);
+				buf_pool->buddy_stat[i].used++;
+
+				buf_buddy_free_low(buf_pool, buf, i);
+			}
+
+			buf = next;
+		}
+	}
+}
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index 85e4429..ab0f086 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -30,29 +30,50 @@ The database buffer buf_pool
 Created 11/5/1995 Heikki Tuuri
 *******************************************************/
 
+#include "ha_prototypes.h"
+
+#include "page0size.h"
 #include "buf0buf.h"
 
 #ifdef UNIV_NONINL
 #include "buf0buf.ic"
 #endif
-
+#ifdef UNIV_INNOCHECKSUM
+#include "string.h"
+#include "mach0data.h"
+#endif /* UNIV_INNOCHECKSUM */
+#ifndef UNIV_INNOCHECKSUM
 #include "mem0mem.h"
 #include "btr0btr.h"
 #include "fil0fil.h"
+#include "fsp0sysspace.h"
 #ifndef UNIV_HOTBACKUP
 #include "buf0buddy.h"
 #include "lock0lock.h"
+#include "sync0rw.h"
 #include "btr0sea.h"
 #include "ibuf0ibuf.h"
 #include "trx0undo.h"
+#include "trx0purge.h"
 #include "log0log.h"
+#include "dict0stats_bg.h"
 #endif /* !UNIV_HOTBACKUP */
 #include "srv0srv.h"
+#include "srv0start.h"
 #include "dict0dict.h"
 #include "log0recv.h"
-#include "page0zip.h"
 #include "srv0mon.h"
+#include "fsp0sysspace.h"
+#endif /* !UNIV_INNOCHECKSUM */
+#include "page0zip.h"
 #include "buf0checksum.h"
+#include "sync0sync.h"
+#include "buf0dump.h"
+#include "ut0new.h"
+
+#include <new>
+#include <map>
+#include <sstream>
 #ifdef HAVE_LIBNUMA
 #include <numa.h>
 #include <numaif.h>
@@ -246,41 +267,62 @@ that the whole area may be needed in the near future, and issue
 the read requests for the whole area.
 */
 
-#ifndef UNIV_HOTBACKUP
+#if (!(defined(UNIV_HOTBACKUP) || defined(UNIV_INNOCHECKSUM)))
 /** Value in microseconds */
 static const int WAIT_FOR_READ	= 100;
-/** Number of attemtps made to read in a page in the buffer pool */
-static const ulint BUF_PAGE_READ_MAX_RETRIES = 100;
+static const int WAIT_FOR_WRITE = 100;
+/** Number of attempts made to read in a page in the buffer pool */
+static const ulint	BUF_PAGE_READ_MAX_RETRIES = 100;
+/** Number of pages to read ahead */
+static const ulint	BUF_READ_AHEAD_PAGES = 64;
+/** The maximum portion of the buffer pool that can be used for the
+read-ahead buffer.  (Divide buf_pool size by this amount) */
+static const ulint	BUF_READ_AHEAD_PORTION = 32;
 
 /** The buffer pools of the database */
-UNIV_INTERN buf_pool_t*	buf_pool_ptr;
+buf_pool_t*	buf_pool_ptr;
+
+/** true when resizing buffer pool is in the critical path. */
+volatile bool	buf_pool_resizing;
+
+/** true when withdrawing buffer pool pages might cause page relocation */
+volatile bool	buf_pool_withdrawing;
+
+/** the clock is incremented every time a pointer to a page may become obsolete;
+if the withdrwa clock has not changed, the pointer is still valid in buffer
+pool. if changed, the pointer might not be in buffer pool any more. */
+volatile ulint	buf_withdraw_clock;
+
+/** Map of buffer pool chunks by its first frame address
+This is newly made by initialization of buffer pool and buf_resize_thread.
+Currently, no need mutex protection for update. */
+typedef std::map<
+	const byte*,
+	buf_chunk_t*,
+	std::less<const byte*>,
+	ut_allocator<std::pair<const byte*, buf_chunk_t*> > >
+	buf_pool_chunk_map_t;
+
+static buf_pool_chunk_map_t*			buf_chunk_map_reg;
+
+/** Chunk map to be used to lookup.
+The map pointed by this should not be updated */
+static buf_pool_chunk_map_t*	buf_chunk_map_ref = NULL;
 
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-static ulint	buf_dbg_counter	= 0; /*!< This is used to insert validation
-					operations in execution in the
-					debug version */
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 #ifdef UNIV_DEBUG
-/** If this is set TRUE, the program prints info whenever
-read-ahead or flush occurs */
-UNIV_INTERN ibool		buf_debug_prints = FALSE;
+/** Protect reference for buf_chunk_map_ref from deleting map,
+because the reference can be caused by debug assertion code. */
+static rw_lock_t*	buf_chunk_map_latch;
+
+/** Disable resizing buffer pool to make assertion code not expensive. */
+my_bool			buf_disable_resize_buffer_pool_debug = TRUE;
 #endif /* UNIV_DEBUG */
 
-#ifdef UNIV_PFS_RWLOCK
-/* Keys to register buffer block related rwlocks and mutexes with
-performance schema */
-UNIV_INTERN mysql_pfs_key_t	buf_block_lock_key;
-# ifdef UNIV_SYNC_DEBUG
-UNIV_INTERN mysql_pfs_key_t	buf_block_debug_latch_key;
-# endif /* UNIV_SYNC_DEBUG */
-#endif /* UNIV_PFS_RWLOCK */
-
-#ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t	buffer_block_mutex_key;
-UNIV_INTERN mysql_pfs_key_t	buf_pool_mutex_key;
-UNIV_INTERN mysql_pfs_key_t	buf_pool_zip_mutex_key;
-UNIV_INTERN mysql_pfs_key_t	flush_list_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/** This is used to insert validation operations in execution
+in the debug version */
+static ulint	buf_dbg_counter	= 0;
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
 #if defined UNIV_PFS_MUTEX || defined UNIV_PFS_RWLOCK
 # ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
@@ -307,17 +349,25 @@ on the io_type */
 	 ? (counter##_READ)				\
 	 : (counter##_WRITTEN))
 
+/** Registers a chunk to buf_pool_chunk_map
+ at param[in]	chunk	chunk of buffers */
+static
+void
+buf_pool_register_chunk(
+	buf_chunk_t*	chunk)
+{
+	buf_chunk_map_reg->insert(buf_pool_chunk_map_t::value_type(
+		chunk->blocks->frame, chunk));
+}
+
 /********************************************************************//**
 Gets the smallest oldest_modification lsn for any page in the pool. Returns
 zero if all modified pages have been flushed to disk.
 @return oldest modification in pool, zero if none */
-UNIV_INTERN
 lsn_t
 buf_pool_get_oldest_modification(void)
 /*==================================*/
 {
-	ulint		i;
-	buf_page_t*	bpage;
 	lsn_t		lsn = 0;
 	lsn_t		oldest_lsn = 0;
 
@@ -325,14 +375,24 @@ buf_pool_get_oldest_modification(void)
 	thread to add a dirty page to any flush list. */
 	log_flush_order_mutex_enter();
 
-	for (i = 0; i < srv_buf_pool_instances; i++) {
+	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
 		buf_pool_t*	buf_pool;
 
 		buf_pool = buf_pool_from_array(i);
 
 		buf_flush_list_mutex_enter(buf_pool);
 
-		bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
+		buf_page_t*	bpage;
+
+		/* We don't let log-checkpoint halt because pages from system
+		temporary are not yet flushed to the disk. Anyway, object
+		residing in system temporary doesn't generate REDO logging. */
+		for (bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
+		     bpage != NULL
+			&& fsp_is_system_temporary(bpage->id.space());
+		     bpage = UT_LIST_GET_PREV(list, bpage)) {
+			/* Do nothing. */
+		}
 
 		if (bpage != NULL) {
 			ut_ad(bpage->in_flush_list);
@@ -356,7 +416,6 @@ buf_pool_get_oldest_modification(void)
 
 /********************************************************************//**
 Get total buffer pool statistics. */
-UNIV_INTERN
 void
 buf_get_total_list_len(
 /*===================*/
@@ -383,7 +442,6 @@ buf_get_total_list_len(
 
 /********************************************************************//**
 Get total list size in bytes from all buffer pools. */
-UNIV_INTERN
 void
 buf_get_total_list_size_in_bytes(
 /*=============================*/
@@ -409,7 +467,6 @@ buf_get_total_list_size_in_bytes(
 
 /********************************************************************//**
 Get total buffer pool statistics. */
-UNIV_INTERN
 void
 buf_get_total_stat(
 /*===============*/
@@ -443,7 +500,6 @@ buf_get_total_stat(
 /********************************************************************//**
 Allocates a buffer block.
 @return own: the allocated block, in state BUF_BLOCK_MEMORY */
-UNIV_INTERN
 buf_block_t*
 buf_block_alloc(
 /*============*/
@@ -468,21 +524,18 @@ buf_block_alloc(
 
 	return(block);
 }
-#endif /* !UNIV_HOTBACKUP */
+#endif /* !UNIV_HOTBACKUP && !UNIV_INNOCHECKSUM */
 
-/********************************************************************//**
-Checks if a page is all zeroes.
- at return	TRUE if the page is all zeroes */
+/** Checks if a page contains only zeroes.
+ at param[in]	read_buf	database page
+ at param[in]	page_size	page size
+ at return true if page is filled with zeroes */
 bool
 buf_page_is_zeroes(
-/*===============*/
-	const byte*	read_buf,	/*!< in: a database page */
-	const ulint	zip_size)	/*!< in: size of compressed page;
-					0 for uncompressed pages */
+	const byte*		read_buf,
+	const page_size_t&	page_size)
 {
-	const ulint page_size = zip_size ? zip_size : UNIV_PAGE_SIZE;
-
-	for (ulint i = 0; i < page_size; i++) {
+	for (ulint i = 0; i < page_size.logical(); i++) {
 		if (read_buf[i] != 0) {
 			return(false);
 		}
@@ -494,30 +547,77 @@ buf_page_is_zeroes(
 @param[in]	read_buf	database page
 @param[in]	checksum_field1	new checksum field
 @param[in]	checksum_field2	old checksum field
- at return true if the page is in crc32 checksum format */
+ at param[in]	page_no		page number of given read_buf
+ at param[in]	is_log_enabled	true if log option is enabled
+ at param[in]	log_file	file pointer to log_file
+ at param[in]	curr_algo	current checksum algorithm
+ at return true if the page is in crc32 checksum format. */
 UNIV_INLINE
 bool
 buf_page_is_checksum_valid_crc32(
-	const byte*	read_buf,
-	ulint		checksum_field1,
-	ulint		checksum_field2)
+	const byte*			read_buf,
+	ulint				checksum_field1,
+	ulint				checksum_field2
+#ifdef UNIV_INNOCHECKSUM
+	,uintmax_t			page_no,
+	bool				is_log_enabled,
+	FILE*				log_file,
+	const srv_checksum_algorithm_t	curr_algo
+#endif /* UNIV_INNOCHECKSUM */
+	)
 {
-	ib_uint32_t	crc32 = buf_calc_page_crc32(read_buf);
+	const uint32_t	crc32 = buf_calc_page_crc32(read_buf);
+
+#ifdef UNIV_INNOCHECKSUM
+	if (is_log_enabled
+	    && curr_algo == SRV_CHECKSUM_ALGORITHM_STRICT_CRC32) {
+		fprintf(log_file, "page::%" PRIuMAX ";"
+			" crc32 calculated = %u;"
+			" recorded checksum field1 = %lu recorded"
+			" checksum field2 =%lu\n", page_no,
+			crc32, checksum_field1, checksum_field2);
+	}
+#endif /* UNIV_INNOCHECKSUM */
+
+	if (checksum_field1 != checksum_field2) {
+		return(false);
+	}
+
+	if (checksum_field1 == crc32) {
+		return(true);
+	}
+
+	const uint32_t	crc32_legacy = buf_calc_page_crc32(read_buf, true);
+
+	if (checksum_field1 == crc32_legacy) {
+		return(true);
+	}
 
-	return(checksum_field1 == crc32 && checksum_field2 == crc32);
+	return(false);
 }
 
 /** Checks if the page is in innodb checksum format.
 @param[in]	read_buf	database page
 @param[in]	checksum_field1	new checksum field
 @param[in]	checksum_field2	old checksum field
- at return true if the page is in innodb checksum format */
+ at param[in]	page_no		page number of given read_buf
+ at param[in]	is_log_enabled	true if log option is enabled
+ at param[in]	log_file	file pointer to log_file
+ at param[in]	curr_algo	current checksum algorithm
+ at return true if the page is in innodb checksum format. */
 UNIV_INLINE
 bool
 buf_page_is_checksum_valid_innodb(
-	const byte*	read_buf,
-	ulint		checksum_field1,
-	ulint		checksum_field2)
+	const byte*			read_buf,
+	ulint				checksum_field1,
+	ulint				checksum_field2
+#ifdef UNIV_INNOCHECKSUM
+	,uintmax_t			page_no,
+	bool				is_log_enabled,
+	FILE*				log_file,
+	const srv_checksum_algorithm_t	curr_algo
+#endif /* UNIV_INNOCHECKSUM */
+	)
 {
 	/* There are 2 valid formulas for
 	checksum_field2 (old checksum field) which algo=innodb could have
@@ -529,8 +629,41 @@ buf_page_is_checksum_valid_innodb(
 	2. Newer InnoDB versions store the old formula checksum
 	(buf_calc_page_old_checksum()). */
 
+	ulint	old_checksum = buf_calc_page_old_checksum(read_buf);
+	ulint	new_checksum = buf_calc_page_new_checksum(read_buf);
+
+#ifdef UNIV_INNOCHECKSUM
+	if (is_log_enabled
+	    && curr_algo == SRV_CHECKSUM_ALGORITHM_INNODB) {
+		fprintf(log_file, "page::%" PRIuMAX ";"
+			" old style: calculated ="
+			" %lu; recorded = %lu\n",
+			page_no, old_checksum,
+			checksum_field2);
+		fprintf(log_file, "page::%" PRIuMAX ";"
+			" new style: calculated ="
+			" %lu; crc32 = %u; recorded = %lu\n",
+			page_no, new_checksum,
+			buf_calc_page_crc32(read_buf), checksum_field1);
+	}
+
+	if (is_log_enabled
+	    && curr_algo == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB) {
+		fprintf(log_file, "page::%" PRIuMAX ";"
+			" old style: calculated ="
+			" %lu; recorded checksum = %lu\n",
+			page_no, old_checksum,
+			checksum_field2);
+		fprintf(log_file, "page::%" PRIuMAX ";"
+			" new style: calculated ="
+			" %lu; recorded checksum  = %lu\n",
+			page_no, new_checksum,
+			checksum_field1);
+	}
+#endif /* UNIV_INNOCHECKSUM */
+
 	if (checksum_field2 != mach_read_from_4(read_buf + FIL_PAGE_LSN)
-	    && checksum_field2 != buf_calc_page_old_checksum(read_buf)) {
+	    && checksum_field2 != old_checksum) {
 		return(false);
 	}
 
@@ -539,8 +672,7 @@ buf_page_is_checksum_valid_innodb(
 	/* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id
 	(always equal to 0), to FIL_PAGE_SPACE_OR_CHKSUM */
 
-	if (checksum_field1 != 0
-	    && checksum_field1 != buf_calc_page_new_checksum(read_buf)) {
+	if (checksum_field1 != 0 && checksum_field1 != new_checksum) {
 		return(false);
 	}
 
@@ -551,37 +683,73 @@ buf_page_is_checksum_valid_innodb(
 @param[in]	read_buf	database page
 @param[in]	checksum_field1	new checksum field
 @param[in]	checksum_field2	old checksum field
- at return true if the page is in none checksum format */
+ at param[in]	page_no		page number of given read_buf
+ at param[in]	is_log_enabled	true if log option is enabled
+ at param[in]	log_file	file pointer to log_file
+ at param[in]	curr_algo	current checksum algorithm
+ at return true if the page is in none checksum format. */
 UNIV_INLINE
 bool
 buf_page_is_checksum_valid_none(
-	const byte*	read_buf,
-	ulint		checksum_field1,
-	ulint		checksum_field2)
+	const byte*			read_buf,
+	ulint				checksum_field1,
+	ulint				checksum_field2
+#ifdef	UNIV_INNOCHECKSUM
+	,uintmax_t			page_no,
+	bool				is_log_enabled,
+	FILE*				log_file,
+	const srv_checksum_algorithm_t	curr_algo
+#endif	/* UNIV_INNOCHECKSUM */
+	)
 {
+
+#ifdef UNIV_INNOCHECKSUM
+	if (is_log_enabled
+	    && curr_algo == SRV_CHECKSUM_ALGORITHM_STRICT_NONE) {
+		fprintf(log_file,
+			"page::%" PRIuMAX "; none checksum: calculated"
+			" = %lu; recorded checksum_field1 = %lu"
+			" recorded checksum_field2 = %lu\n",
+			page_no, BUF_NO_CHECKSUM_MAGIC,
+			checksum_field1, checksum_field2);
+	}
+#endif /* UNIV_INNOCHECKSUM */
+
 	return(checksum_field1 == checksum_field2
 	       && checksum_field1 == BUF_NO_CHECKSUM_MAGIC);
 }
 
-/********************************************************************//**
-Checks if a page is corrupt.
- at return	TRUE if corrupted */
-UNIV_INTERN
+/** Checks if a page is corrupt.
+ at param[in]	check_lsn	true if we need to check and complain about
+the LSN
+ at param[in]	read_buf	database page
+ at param[in]	page_size	page size
+ at param[in]	skip_checksum	if true, skip checksum
+ at param[in]	page_no		page number of given read_buf
+ at param[in]	strict_check	true if strict-check option is enabled
+ at param[in]	is_log_enabled	true if log option is enabled
+ at param[in]	log_file	file pointer to log_file
+ at return TRUE if corrupted */
 ibool
 buf_page_is_corrupted(
-/*==================*/
-	bool		check_lsn,	/*!< in: true if we need to check
-					and complain about the LSN */
-	const byte*	read_buf,	/*!< in: a database page */
-	ulint		zip_size)	/*!< in: size of compressed page;
-					0 for uncompressed pages */
+	bool			check_lsn,
+	const byte*		read_buf,
+	const page_size_t&	page_size,
+	bool			skip_checksum
+#ifdef UNIV_INNOCHECKSUM
+	,uintmax_t		page_no,
+	bool			strict_check,
+	bool			is_log_enabled,
+	FILE*			log_file
+#endif /* UNIV_INNOCHECKSUM */
+)
 {
 	ulint		checksum_field1;
 	ulint		checksum_field2;
 
-	if (!zip_size
+	if (!page_size.is_compressed()
 	    && memcmp(read_buf + FIL_PAGE_LSN + 4,
-		      read_buf + UNIV_PAGE_SIZE
+		      read_buf + page_size.logical()
 		      - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) {
 
 		/* Stored log sequence numbers at the start and the end
@@ -590,77 +758,112 @@ buf_page_is_corrupted(
 		return(TRUE);
 	}
 
-#ifndef UNIV_HOTBACKUP
+#if !defined(UNIV_HOTBACKUP) && !defined(UNIV_INNOCHECKSUM)
 	if (check_lsn && recv_lsn_checks_on) {
-		lsn_t	current_lsn;
+		lsn_t		current_lsn;
+		const lsn_t	page_lsn
+			= mach_read_from_8(read_buf + FIL_PAGE_LSN);
 
 		/* Since we are going to reset the page LSN during the import
 		phase it makes no sense to spam the log with error messages. */
 
-		if (log_peek_lsn(&current_lsn)
-		    && current_lsn
-		    < mach_read_from_8(read_buf + FIL_PAGE_LSN)) {
-			ut_print_timestamp(stderr);
-
-			fprintf(stderr,
-				" InnoDB: Error: page %lu log sequence number"
-				" " LSN_PF "\n"
-				"InnoDB: is in the future! Current system "
-				"log sequence number " LSN_PF ".\n"
-				"InnoDB: Your database may be corrupt or "
-				"you may have copied the InnoDB\n"
-				"InnoDB: tablespace but not the InnoDB "
-				"log files. See\n"
-				"InnoDB: " REFMAN
-				"forcing-innodb-recovery.html\n"
-				"InnoDB: for more information.\n",
-				(ulong) mach_read_from_4(
-					read_buf + FIL_PAGE_OFFSET),
-				(lsn_t) mach_read_from_8(
-					read_buf + FIL_PAGE_LSN),
-				current_lsn);
+		if (log_peek_lsn(&current_lsn) && current_lsn < page_lsn) {
+
+			const ulint	space_id = mach_read_from_4(
+				read_buf + FIL_PAGE_SPACE_ID);
+			const ulint	page_no = mach_read_from_4(
+				read_buf + FIL_PAGE_OFFSET);
+
+			ib::error() << "Page " << page_id_t(space_id, page_no)
+				<< " log sequence number " << page_lsn
+				<< " is in the future! Current system"
+				<< " log sequence number "
+				<< current_lsn << ".";
+
+			ib::error() << "Your database may be corrupt or"
+				" you may have copied the InnoDB"
+				" tablespace but not the InnoDB"
+				" log files. "
+				<< FORCE_RECOVERY_MSG;
+
 		}
 	}
-#endif
+#endif /* !UNIV_HOTBACKUP && !UNIV_INNOCHECKSUM */
 
 	/* Check whether the checksum fields have correct values */
 
-	if (srv_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_NONE) {
+	if (srv_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_NONE
+	    || skip_checksum) {
 		return(FALSE);
 	}
 
-	if (zip_size) {
-		return(!page_zip_verify_checksum(read_buf, zip_size));
+	if (page_size.is_compressed()) {
+#ifdef UNIV_INNOCHECKSUM
+		return(!page_zip_verify_checksum(read_buf,
+						 page_size.physical(),
+						 page_no, strict_check,
+						 is_log_enabled, log_file));
+#else
+		return(!page_zip_verify_checksum(read_buf,
+						 page_size.physical()));
+#endif /* UNIV_INNOCHECKSUM */
 	}
 
 	checksum_field1 = mach_read_from_4(
 		read_buf + FIL_PAGE_SPACE_OR_CHKSUM);
 
 	checksum_field2 = mach_read_from_4(
-		read_buf + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM);
+		read_buf + page_size.logical() - FIL_PAGE_END_LSN_OLD_CHKSUM);
 
 #if FIL_PAGE_LSN % 8
 #error "FIL_PAGE_LSN must be 64 bit aligned"
 #endif
 
 	/* declare empty pages non-corrupted */
-	if (checksum_field1 == 0 && checksum_field2 == 0
-	    && *reinterpret_cast<const ib_uint64_t*>(read_buf +
-						     FIL_PAGE_LSN) == 0) {
+	if (checksum_field1 == 0
+	    && checksum_field2 == 0
+	    && *reinterpret_cast<const ib_uint64_t*>(
+		    read_buf + FIL_PAGE_LSN) == 0) {
+
 		/* make sure that the page is really empty */
-		for (ulint i = 0; i < UNIV_PAGE_SIZE; i++) {
-			if (read_buf[i] != 0) {
-				return(TRUE);
+
+		ulint	i;
+
+		for (i = 0; i < page_size.logical(); ++i) {
+
+			/* The FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID has been
+			repurposed for page compression. It can be
+			set for uncompressed empty pages. */
+
+			if ((i < FIL_PAGE_FILE_FLUSH_LSN
+			     || i >= FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID)
+			    && read_buf[i] != 0) {
+
+				break;
 			}
 		}
-
-		return(FALSE);
+#ifdef UNIV_INNOCHECKSUM
+		if (i >= page_size.logical()) {
+			if (is_log_enabled) {
+				fprintf(log_file, "Page::%" PRIuMAX
+					" is empty and uncorrupted\n",
+					page_no);
+			}
+			return(FALSE);
+		}
+#else
+		return(i < page_size.logical());
+#endif /* UNIV_INNOCHECKSUM */
 	}
 
-	DBUG_EXECUTE_IF("buf_page_is_corrupt_failure", return(TRUE); );
+#ifndef UNIV_INNOCHECKSUM
+	const page_id_t	page_id(mach_read_from_4(
+					read_buf + FIL_PAGE_SPACE_ID),
+				mach_read_from_4(
+					read_buf + FIL_PAGE_OFFSET));
+#endif /* UNIV_INNOCHECKSUM */
 
-	ulint	page_no = mach_read_from_4(read_buf + FIL_PAGE_OFFSET);
-	ulint	space_id = mach_read_from_4(read_buf + FIL_PAGE_SPACE_ID);
+	DBUG_EXECUTE_IF("buf_page_import_corrupt_failure", return(TRUE); );
 	const srv_checksum_algorithm_t	curr_algo =
 		static_cast<srv_checksum_algorithm_t>(srv_checksum_algorithm);
 
@@ -669,99 +872,194 @@ buf_page_is_corrupted(
 	case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
 
 		if (buf_page_is_checksum_valid_crc32(read_buf,
-			checksum_field1, checksum_field2)) {
+			checksum_field1, checksum_field2
+#ifdef UNIV_INNOCHECKSUM
+			, page_no, is_log_enabled, log_file, curr_algo
+#endif /* UNIV_INNOCHECKSUM */
+			)) {
 			return(FALSE);
 		}
 
 		if (buf_page_is_checksum_valid_none(read_buf,
-			checksum_field1, checksum_field2)) {
+			checksum_field1, checksum_field2
+#ifdef UNIV_INNOCHECKSUM
+			, page_no, is_log_enabled, log_file, curr_algo)) {
+#else /* UNIV_INNOCHECKSUM */
+		)) {
 			if (curr_algo
 			    == SRV_CHECKSUM_ALGORITHM_STRICT_CRC32) {
 				page_warn_strict_checksum(
 					curr_algo,
 					SRV_CHECKSUM_ALGORITHM_NONE,
-					space_id, page_no);
+					page_id);
 			}
-
+#endif /* UNIV_INNOCHECKSUM */
+
+#ifdef UNIV_INNOCHECKSUM
+			if (is_log_enabled) {
+
+				fprintf(log_file, "page::%" PRIuMAX ";"
+					" old style: calculated = %lu;"
+					" recorded = %lu\n", page_no,
+					buf_calc_page_old_checksum(read_buf),
+					checksum_field2);
+				fprintf(log_file, "page::%" PRIuMAX ";"
+					" new style: calculated = %lu;"
+					" crc32 = %u; recorded = %lu\n",
+					page_no,
+					buf_calc_page_new_checksum(read_buf),
+					buf_calc_page_crc32(read_buf),
+					checksum_field1);
+			}
+#endif /* UNIV_INNOCHECKSUM */
 			return(FALSE);
 		}
 
 		if (buf_page_is_checksum_valid_innodb(read_buf,
-			checksum_field1, checksum_field2)) {
+			checksum_field1, checksum_field2
+#ifdef UNIV_INNOCHECKSUM
+			, page_no, is_log_enabled, log_file, curr_algo)) {
+#else /* UNIV_INNOCHECKSUM */
+		)) {
 			if (curr_algo
 			    == SRV_CHECKSUM_ALGORITHM_STRICT_CRC32) {
 				page_warn_strict_checksum(
 					curr_algo,
 					SRV_CHECKSUM_ALGORITHM_INNODB,
-					space_id, page_no);
+					page_id);
 			}
-
+#endif /* UNIV_INNOCHECKSUM */
 			return(FALSE);
 		}
 
+#ifdef UNIV_INNOCHECKSUM
+		if (is_log_enabled) {
+			fprintf(log_file, "Fail; page %" PRIuMAX
+				" invalid (fails crc32 checksum)\n",
+				page_no);
+		}
+#endif /* UNIV_INNOCHECKSUM */
 		return(TRUE);
 
 	case SRV_CHECKSUM_ALGORITHM_INNODB:
 	case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB:
 
 		if (buf_page_is_checksum_valid_innodb(read_buf,
-			checksum_field1, checksum_field2)) {
+			checksum_field1, checksum_field2
+#ifdef UNIV_INNOCHECKSUM
+			, page_no, is_log_enabled, log_file, curr_algo
+#endif /* UNIV_INNOCHECKSUM */
+		)) {
 			return(FALSE);
 		}
 
 		if (buf_page_is_checksum_valid_none(read_buf,
-			checksum_field1, checksum_field2)) {
+			checksum_field1, checksum_field2
+#ifdef UNIV_INNOCHECKSUM
+			, page_no, is_log_enabled, log_file, curr_algo)) {
+#else	/* UNIV_INNOCHECKSUM */
+		)) {
 			if (curr_algo
 			    == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB) {
 				page_warn_strict_checksum(
 					curr_algo,
 					SRV_CHECKSUM_ALGORITHM_NONE,
-					space_id, page_no);
+					page_id);
 			}
-
+#endif /* UNIV_INNOCHECKSUM */
+
+#ifdef UNIV_INNOCHECKSUM
+			if (is_log_enabled) {
+				fprintf(log_file, "page::%" PRIuMAX ";"
+					" old style: calculated = %lu;"
+					" recorded = %lu\n", page_no,
+					buf_calc_page_old_checksum(read_buf),
+					checksum_field2);
+				fprintf(log_file, "page::%" PRIuMAX ";"
+					" new style: calculated = %lu;"
+					" crc32 = %u; recorded = %lu\n",
+					page_no,
+					buf_calc_page_new_checksum(read_buf),
+					buf_calc_page_crc32(read_buf),
+					checksum_field1);
+			}
+#endif /* UNIV_INNOCHECKSUM */
 			return(FALSE);
 		}
 
 		if (buf_page_is_checksum_valid_crc32(read_buf,
-			checksum_field1, checksum_field2)) {
+			checksum_field1, checksum_field2
+#ifdef UNIV_INNOCHECKSUM
+			, page_no, is_log_enabled, log_file, curr_algo)) {
+#else /* UNIV_INNOCHECKSUM */
+		)) {
 			if (curr_algo
 			    == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB) {
 				page_warn_strict_checksum(
 					curr_algo,
 					SRV_CHECKSUM_ALGORITHM_CRC32,
-					space_id, page_no);
+					page_id);
 			}
+#endif /* UNIV_INNOCHECKSUM */
 
 			return(FALSE);
 		}
 
+#ifdef UNIV_INNOCHECKSUM
+		if (is_log_enabled) {
+			fprintf(log_file, "Fail; page %" PRIuMAX
+				" invalid (fails innodb checksum)\n",
+				page_no);
+		}
+#endif /* UNIV_INNOCHECKSUM */
 		return(TRUE);
 
 	case SRV_CHECKSUM_ALGORITHM_STRICT_NONE:
 
 		if (buf_page_is_checksum_valid_none(read_buf,
-			checksum_field1, checksum_field2)) {
-			return(FALSE);
+			checksum_field1, checksum_field2
+#ifdef UNIV_INNOCHECKSUM
+			, page_no, is_log_enabled, log_file, curr_algo
+#endif /* UNIV_INNOCHECKSUM */
+		)) {
+			return(false);
 		}
 
 		if (buf_page_is_checksum_valid_crc32(read_buf,
-			checksum_field1, checksum_field2)) {
+			checksum_field1, checksum_field2
+#ifdef UNIV_INNOCHECKSUM
+			, page_no, is_log_enabled, log_file, curr_algo)) {
+#else /* UNIV_INNOCHECKSUM */
+		)) {
 			page_warn_strict_checksum(
 				curr_algo,
 				SRV_CHECKSUM_ALGORITHM_CRC32,
-				space_id, page_no);
+				page_id);
+#endif /* UNIV_INNOCHECKSUM */
 			return(FALSE);
 		}
 
 		if (buf_page_is_checksum_valid_innodb(read_buf,
-			checksum_field1, checksum_field2)) {
+			checksum_field1, checksum_field2
+#ifdef UNIV_INNOCHECKSUM
+			, page_no, is_log_enabled, log_file, curr_algo)) {
+#else /* UNIV_INNOCHECKSUM */
+		)) {
 			page_warn_strict_checksum(
 				curr_algo,
 				SRV_CHECKSUM_ALGORITHM_INNODB,
-				space_id, page_no);
+				page_id);
+#endif /* UNIV_INNOCHECKSUM */
 			return(FALSE);
 		}
 
+#ifdef UNIV_INNOCHECKSUM
+		if (is_log_enabled) {
+			fprintf(log_file, "Fail; page %" PRIuMAX
+				" invalid (fails none checksum)\n",
+				page_no);
+		}
+#endif /* UNIV_INNOCHECKSUM */
 		return(TRUE);
 
 	case SRV_CHECKSUM_ALGORITHM_NONE:
@@ -775,113 +1073,122 @@ buf_page_is_corrupted(
 	return(FALSE);
 }
 
-/********************************************************************//**
-Prints a page to stderr. */
-UNIV_INTERN
+#ifndef UNIV_INNOCHECKSUM
+
+/** Prints a page to stderr.
+ at param[in]	read_buf	a database page
+ at param[in]	page_size	page size
+ at param[in]	flags		0 or BUF_PAGE_PRINT_NO_CRASH or
+BUF_PAGE_PRINT_NO_FULL */
 void
 buf_page_print(
-/*===========*/
-	const byte*	read_buf,	/*!< in: a database page */
-	ulint		zip_size,	/*!< in: compressed page size, or
-					0 for uncompressed pages */
-	ulint		flags)		/*!< in: 0 or
-					BUF_PAGE_PRINT_NO_CRASH or
-					BUF_PAGE_PRINT_NO_FULL */
-
+	const byte*		read_buf,
+	const page_size_t&	page_size,
+	ulint			flags)
 {
 #ifndef UNIV_HOTBACKUP
 	dict_index_t*	index;
 #endif /* !UNIV_HOTBACKUP */
-	ulint		size = zip_size;
-
-	if (!size) {
-		size = UNIV_PAGE_SIZE;
-	}
 
 	if (!(flags & BUF_PAGE_PRINT_NO_FULL)) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: Page dump in ascii and hex (%lu bytes):\n",
-			(ulong) size);
-		ut_print_buf(stderr, read_buf, size);
+
+		ib::info() << "Page dump in ascii and hex ("
+			<< page_size.physical() << " bytes):";
+
+		ut_print_buf(stderr, read_buf, page_size.physical());
 		fputs("\nInnoDB: End of page dump\n", stderr);
 	}
 
-	if (zip_size) {
+	if (page_size.is_compressed()) {
 		/* Print compressed page. */
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: Compressed page type (" ULINTPF "); "
-			"stored checksum in field1 " ULINTPF "; "
-			"calculated checksums for field1: "
-			"%s " ULINTPF ", "
-			"%s " ULINTPF ", "
-			"%s " ULINTPF "; "
-			"page LSN " LSN_PF "; "
-			"page number (if stored to page already) " ULINTPF "; "
-			"space id (if stored to page already) " ULINTPF "\n",
-			fil_page_get_type(read_buf),
-			mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
-			buf_checksum_algorithm_name(
-				SRV_CHECKSUM_ALGORITHM_CRC32),
-			page_zip_calc_checksum(read_buf, zip_size,
-				SRV_CHECKSUM_ALGORITHM_CRC32),
-			buf_checksum_algorithm_name(
-				SRV_CHECKSUM_ALGORITHM_INNODB),
-			page_zip_calc_checksum(read_buf, zip_size,
-				SRV_CHECKSUM_ALGORITHM_INNODB),
-			buf_checksum_algorithm_name(
-				SRV_CHECKSUM_ALGORITHM_NONE),
-			page_zip_calc_checksum(read_buf, zip_size,
-				SRV_CHECKSUM_ALGORITHM_NONE),
-			mach_read_from_8(read_buf + FIL_PAGE_LSN),
-			mach_read_from_4(read_buf + FIL_PAGE_OFFSET),
-			mach_read_from_4(read_buf
-					 + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
+		ib::info() << "Compressed page type ("
+			<< fil_page_get_type(read_buf)
+			<< "); stored checksum in field1 "
+			<< mach_read_from_4(
+				read_buf + FIL_PAGE_SPACE_OR_CHKSUM)
+			<< "; calculated checksums for field1: "
+			<< buf_checksum_algorithm_name(
+				SRV_CHECKSUM_ALGORITHM_CRC32)
+			<< " "
+			<< page_zip_calc_checksum(
+				read_buf, page_size.physical(),
+				SRV_CHECKSUM_ALGORITHM_CRC32)
+			<< "/"
+			<< page_zip_calc_checksum(
+				read_buf, page_size.physical(),
+				SRV_CHECKSUM_ALGORITHM_CRC32, true)
+			<< ", "
+			<< buf_checksum_algorithm_name(
+				SRV_CHECKSUM_ALGORITHM_INNODB)
+			<< " "
+			<< page_zip_calc_checksum(
+				read_buf, page_size.physical(),
+				SRV_CHECKSUM_ALGORITHM_INNODB)
+			<< ", "
+			<< buf_checksum_algorithm_name(
+				SRV_CHECKSUM_ALGORITHM_NONE)
+			<< " "
+			<< page_zip_calc_checksum(
+				read_buf, page_size.physical(),
+				SRV_CHECKSUM_ALGORITHM_NONE)
+			<< "; page LSN "
+			<< mach_read_from_8(read_buf + FIL_PAGE_LSN)
+			<< "; page number (if stored to page"
+			<< " already) "
+			<< mach_read_from_4(read_buf + FIL_PAGE_OFFSET)
+			<< "; space id (if stored to page already) "
+			<< mach_read_from_4(
+				read_buf + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+
 	} else {
-		ut_print_timestamp(stderr);
-		fprintf(stderr, " InnoDB: uncompressed page, "
-			"stored checksum in field1 " ULINTPF ", "
-			"calculated checksums for field1: "
-			"%s " UINT32PF ", "
-			"%s " ULINTPF ", "
-			"%s " ULINTPF ", "
-
-			"stored checksum in field2 " ULINTPF ", "
-			"calculated checksums for field2: "
-			"%s " UINT32PF ", "
-			"%s " ULINTPF ", "
-			"%s " ULINTPF ", "
-
-			"page LSN " ULINTPF " " ULINTPF ", "
-			"low 4 bytes of LSN at page end " ULINTPF ", "
-			"page number (if stored to page already) " ULINTPF ", "
-			"space id (if created with >= MySQL-4.1.1 "
-			"and stored already) %lu\n",
-			mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
-			buf_checksum_algorithm_name(SRV_CHECKSUM_ALGORITHM_CRC32),
-			buf_calc_page_crc32(read_buf),
-			buf_checksum_algorithm_name(SRV_CHECKSUM_ALGORITHM_INNODB),
-			buf_calc_page_new_checksum(read_buf),
-			buf_checksum_algorithm_name(SRV_CHECKSUM_ALGORITHM_NONE),
-			BUF_NO_CHECKSUM_MAGIC,
-
-			mach_read_from_4(read_buf + UNIV_PAGE_SIZE
-					 - FIL_PAGE_END_LSN_OLD_CHKSUM),
-			buf_checksum_algorithm_name(SRV_CHECKSUM_ALGORITHM_CRC32),
-			buf_calc_page_crc32(read_buf),
-			buf_checksum_algorithm_name(SRV_CHECKSUM_ALGORITHM_INNODB),
-			buf_calc_page_old_checksum(read_buf),
-			buf_checksum_algorithm_name(SRV_CHECKSUM_ALGORITHM_NONE),
-			BUF_NO_CHECKSUM_MAGIC,
-
-			mach_read_from_4(read_buf + FIL_PAGE_LSN),
-			mach_read_from_4(read_buf + FIL_PAGE_LSN + 4),
-			mach_read_from_4(read_buf + UNIV_PAGE_SIZE
-					 - FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
-			mach_read_from_4(read_buf + FIL_PAGE_OFFSET),
-			mach_read_from_4(read_buf
-					 + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
+		const uint32_t	crc32 = buf_calc_page_crc32(read_buf);
+
+		const uint32_t	crc32_legacy = buf_calc_page_crc32(read_buf,
+								   true);
+
+		ib::info() << "Uncompressed page, stored checksum in field1 "
+			<< mach_read_from_4(
+				read_buf + FIL_PAGE_SPACE_OR_CHKSUM)
+			<< ", calculated checksums for field1: "
+			<< buf_checksum_algorithm_name(
+				SRV_CHECKSUM_ALGORITHM_CRC32) << " "
+			<< crc32 << "/" << crc32_legacy
+			<< ", "
+			<< buf_checksum_algorithm_name(
+				SRV_CHECKSUM_ALGORITHM_INNODB) << " "
+			<< buf_calc_page_new_checksum(read_buf)
+			<< ", "
+			<< buf_checksum_algorithm_name(
+				SRV_CHECKSUM_ALGORITHM_NONE) << " "
+			<< BUF_NO_CHECKSUM_MAGIC
+			<< ", stored checksum in field2 "
+			<< mach_read_from_4(read_buf + page_size.logical()
+					    - FIL_PAGE_END_LSN_OLD_CHKSUM)
+			<< ", calculated checksums for field2: "
+			<< buf_checksum_algorithm_name(
+				SRV_CHECKSUM_ALGORITHM_CRC32) << " "
+			<< crc32 << "/" << crc32_legacy
+			<< ", "
+			<< buf_checksum_algorithm_name(
+				SRV_CHECKSUM_ALGORITHM_INNODB) << " "
+			<< buf_calc_page_old_checksum(read_buf)
+			<< ", "
+			<< buf_checksum_algorithm_name(
+				SRV_CHECKSUM_ALGORITHM_NONE) << " "
+			<< BUF_NO_CHECKSUM_MAGIC
+			<< ",  page LSN "
+			<< mach_read_from_4(read_buf + FIL_PAGE_LSN)
+			<< " "
+			<< mach_read_from_4(read_buf + FIL_PAGE_LSN + 4)
+			<< ", low 4 bytes of LSN at page end "
+			<< mach_read_from_4(read_buf + page_size.logical()
+					    - FIL_PAGE_END_LSN_OLD_CHKSUM + 4)
+			<< ", page number (if stored to page already) "
+			<< mach_read_from_4(read_buf + FIL_PAGE_OFFSET)
+			<< ", space id (if created with >= MySQL-4.1.1"
+			   " and stored already) "
+			<< mach_read_from_4(
+				read_buf + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
 	}
 
 #ifndef UNIV_HOTBACKUP
@@ -900,17 +1207,19 @@ buf_page_print(
 	switch (fil_page_get_type(read_buf)) {
 		index_id_t	index_id;
 	case FIL_PAGE_INDEX:
+	case FIL_PAGE_RTREE:
 		index_id = btr_page_get_index_id(read_buf);
 		fprintf(stderr,
 			"InnoDB: Page may be an index page where"
-			" index id is %llu\n",
-			(ullint) index_id);
+			" index id is " IB_ID_FMT "\n",
+			index_id);
 #ifndef UNIV_HOTBACKUP
 		index = dict_index_find_on_id_low(index_id);
 		if (index) {
-			fputs("InnoDB: (", stderr);
-			dict_index_name_print(stderr, NULL, index);
-			fputs(")\n", stderr);
+			ib::info()
+				<< "Index " << index_id
+				<< " is " << index->name
+				<< " in table " << index->table->name;
 		}
 #endif /* !UNIV_HOTBACKUP */
 		break;
@@ -962,6 +1271,8 @@ buf_page_print(
 #ifndef UNIV_HOTBACKUP
 
 # ifdef PFS_GROUP_BUFFER_SYNC
+extern mysql_pfs_key_t	buffer_block_mutex_key;
+
 /********************************************************************//**
 This function registers mutexes and rwlocks in buffer blocks with
 performance schema. If PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER is
@@ -974,27 +1285,24 @@ pfs_register_buffer_block(
 /*======================*/
 	buf_chunk_t*	chunk)		/*!< in/out: chunk of buffers */
 {
-	ulint		i;
-	ulint		num_to_register;
 	buf_block_t*    block;
+	ulint		num_to_register;
 
 	block = chunk->blocks;
 
-	num_to_register = ut_min(chunk->size,
-				 PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER);
-
-	for (i = 0; i < num_to_register; i++) {
-		ib_mutex_t*	mutex;
-		rw_lock_t*	rwlock;
+	num_to_register = ut_min(
+		chunk->size, PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER);
 
+	for (ulint i = 0; i < num_to_register; i++) {
 #  ifdef UNIV_PFS_MUTEX
+		BPageMutex*	mutex;
+
 		mutex = &block->mutex;
-		ut_a(!mutex->pfs_psi);
-		mutex->pfs_psi = (PSI_server)
-			? PSI_server->init_mutex(buffer_block_mutex_key, mutex)
-			: NULL;
+		mutex->pfs_add(buffer_block_mutex_key);
 #  endif /* UNIV_PFS_MUTEX */
 
+		rw_lock_t*	rwlock;
+
 #  ifdef UNIV_PFS_RWLOCK
 		rwlock = &block->lock;
 		ut_a(!rwlock->pfs_psi);
@@ -1002,14 +1310,14 @@ pfs_register_buffer_block(
 			? PSI_server->init_rwlock(buf_block_lock_key, rwlock)
 			: NULL;
 
-#   ifdef UNIV_SYNC_DEBUG
+#   ifdef UNIV_DEBUG
 		rwlock = &block->debug_latch;
 		ut_a(!rwlock->pfs_psi);
 		rwlock->pfs_psi = (PSI_server)
 			? PSI_server->init_rwlock(buf_block_debug_latch_key,
 						  rwlock)
 			: NULL;
-#   endif /* UNIV_SYNC_DEBUG */
+#   endif /* UNIV_DEBUG */
 
 #  endif /* UNIV_PFS_RWLOCK */
 		block++;
@@ -1035,60 +1343,63 @@ buf_block_init(
 	block->page.state = BUF_BLOCK_NOT_USED;
 	block->page.buf_fix_count = 0;
 	block->page.io_fix = BUF_IO_NONE;
+	block->page.flush_observer = NULL;
 
 	block->modify_clock = 0;
 
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
-	block->page.file_page_was_freed = FALSE;
-#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
+	ut_d(block->page.file_page_was_freed = FALSE);
 
-	block->check_index_page_at_flush = FALSE;
 	block->index = NULL;
+	block->made_dirty_with_no_latch = false;
+	block->skip_flush_check = false;
+
+	ut_d(block->page.in_page_hash = FALSE);
+	ut_d(block->page.in_zip_hash = FALSE);
+	ut_d(block->page.in_flush_list = FALSE);
+	ut_d(block->page.in_free_list = FALSE);
+	ut_d(block->page.in_LRU_list = FALSE);
+	ut_d(block->in_unzip_LRU_list = FALSE);
+	ut_d(block->in_withdraw_list = FALSE);
 
-#ifdef UNIV_DEBUG
-	block->page.in_page_hash = FALSE;
-	block->page.in_zip_hash = FALSE;
-	block->page.in_flush_list = FALSE;
-	block->page.in_free_list = FALSE;
-	block->page.in_LRU_list = FALSE;
-	block->in_unzip_LRU_list = FALSE;
-#endif /* UNIV_DEBUG */
 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
 	block->n_pointers = 0;
 #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
 	page_zip_des_init(&block->page.zip);
 
+	mutex_create(LATCH_ID_BUF_BLOCK_MUTEX, &block->mutex);
+
 #if defined PFS_SKIP_BUFFER_MUTEX_RWLOCK || defined PFS_GROUP_BUFFER_SYNC
 	/* If PFS_SKIP_BUFFER_MUTEX_RWLOCK is defined, skip registration
-	of buffer block mutex/rwlock with performance schema. If
-	PFS_GROUP_BUFFER_SYNC is defined, skip the registration
-	since buffer block mutex/rwlock will be registered later in
-	pfs_register_buffer_block() */
+	of buffer block rwlock with performance schema.
+
+	If PFS_GROUP_BUFFER_SYNC is defined, skip the registration
+	since buffer block rwlock will be registered later in
+	pfs_register_buffer_block(). */
 
-	mutex_create(PFS_NOT_INSTRUMENTED, &block->mutex, SYNC_BUF_BLOCK);
 	rw_lock_create(PFS_NOT_INSTRUMENTED, &block->lock, SYNC_LEVEL_VARYING);
 
-# ifdef UNIV_SYNC_DEBUG
-	rw_lock_create(PFS_NOT_INSTRUMENTED,
-		       &block->debug_latch, SYNC_NO_ORDER_CHECK);
-# endif /* UNIV_SYNC_DEBUG */
+	ut_d(rw_lock_create(
+			PFS_NOT_INSTRUMENTED,
+			&block->debug_latch, SYNC_NO_ORDER_CHECK));
 
 #else /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
-	mutex_create(buffer_block_mutex_key, &block->mutex, SYNC_BUF_BLOCK);
+
 	rw_lock_create(buf_block_lock_key, &block->lock, SYNC_LEVEL_VARYING);
 
-# ifdef UNIV_SYNC_DEBUG
-	rw_lock_create(buf_block_debug_latch_key,
-		       &block->debug_latch, SYNC_NO_ORDER_CHECK);
-# endif /* UNIV_SYNC_DEBUG */
+	ut_d(rw_lock_create(
+			buf_block_debug_latch_key,
+			&block->debug_latch, SYNC_NO_ORDER_CHECK));
+
 #endif /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
 
+	block->lock.is_block_lock = 1;
+
 	ut_ad(rw_lock_validate(&(block->lock)));
 }
 
 /********************************************************************//**
 Allocates a chunk of buffer frames.
- at return	chunk, or NULL on failure */
+ at return chunk, or NULL on failure */
 static
 buf_chunk_t*
 buf_chunk_init(
@@ -1108,8 +1419,10 @@ buf_chunk_init(
 	mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block)
 				  + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
 
-	chunk->mem_size = mem_size;
-	chunk->mem = os_mem_alloc_large(&chunk->mem_size);
+	DBUG_EXECUTE_IF("ib_buf_chunk_init_fails", return(NULL););
+
+	chunk->mem = buf_pool->allocator.allocate_large(mem_size,
+							&chunk->mem_pfx);
 
 	if (UNIV_UNLIKELY(chunk->mem == NULL)) {
 
@@ -1118,16 +1431,15 @@ buf_chunk_init(
 
 #ifdef HAVE_LIBNUMA
 	if (srv_numa_interleave) {
-		int	st = mbind(chunk->mem, chunk->mem_size,
+		int	st = mbind(chunk->mem, mem_size,
 				   MPOL_INTERLEAVE,
 				   numa_all_nodes_ptr->maskp,
 				   numa_all_nodes_ptr->size,
 				   MPOL_MF_MOVE);
 		if (st != 0) {
-			ib_logf(IB_LOG_LEVEL_WARN,
-				"Failed to set NUMA memory policy of buffer"
-				" pool page frames to MPOL_INTERLEAVE"
-				" (error: %s).", strerror(errno));
+			ib::warn() << "Failed to set NUMA memory policy of"
+				" buffer pool page frames to MPOL_INTERLEAVE"
+				" (error: " << strerror(errno) << ").";
 		}
 	}
 #endif // HAVE_LIBNUMA
@@ -1142,7 +1454,7 @@ buf_chunk_init(
 	it is bigger, we may allocate more blocks than requested. */
 
 	frame = (byte*) ut_align(chunk->mem, UNIV_PAGE_SIZE);
-	chunk->size = chunk->mem_size / UNIV_PAGE_SIZE
+	chunk->size = chunk->mem_pfx.m_size / UNIV_PAGE_SIZE
 		- (frame != chunk->mem);
 
 	/* Subtract the space needed for block descriptors. */
@@ -1169,7 +1481,7 @@ buf_chunk_init(
 		UNIV_MEM_INVALID(block->frame, UNIV_PAGE_SIZE);
 
 		/* Add the block to the free list */
-		UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
+		UT_LIST_ADD_LAST(buf_pool->free, &block->page);
 
 		ut_d(block->page.in_free_list = TRUE);
 		ut_ad(buf_pool_from_block(block) == buf_pool);
@@ -1178,9 +1490,11 @@ buf_chunk_init(
 		frame += UNIV_PAGE_SIZE;
 	}
 
+	buf_pool_register_chunk(chunk);
+
 #ifdef PFS_GROUP_BUFFER_SYNC
 	pfs_register_buffer_block(chunk);
-#endif
+#endif /* PFS_GROUP_BUFFER_SYNC */
 	return(chunk);
 }
 
@@ -1188,7 +1502,7 @@ buf_chunk_init(
 /*********************************************************************//**
 Finds a block in the given buffer chunk that points to a
 given compressed page.
- at return	buffer block pointing to the compressed page, or NULL */
+ at return buffer block pointing to the compressed page, or NULL */
 static
 buf_block_t*
 buf_chunk_contains_zip(
@@ -1214,8 +1528,7 @@ buf_chunk_contains_zip(
 /*********************************************************************//**
 Finds a block in the buffer pool that points to a
 given compressed page.
- at return	buffer block pointing to the compressed page, or NULL */
-UNIV_INTERN
+ at return buffer block pointing to the compressed page, or NULL */
 buf_block_t*
 buf_pool_contains_zip(
 /*==================*/
@@ -1242,7 +1555,7 @@ buf_pool_contains_zip(
 
 /*********************************************************************//**
 Checks that all file pages in the buffer chunk are in a replaceable state.
- at return	address of a non-free block, or NULL if all freed */
+ at return address of a non-free block, or NULL if all freed */
 static
 const buf_block_t*
 buf_chunk_not_freed(
@@ -1273,9 +1586,9 @@ buf_chunk_not_freed(
 			file pages. */
 			break;
 		case BUF_BLOCK_FILE_PAGE:
-			mutex_enter(&block->mutex);
+			buf_page_mutex_enter(block);
 			ready = buf_flush_ready_for_replace(&block->page);
-			mutex_exit(&block->mutex);
+			buf_page_mutex_exit(block);
 
 			if (!ready) {
 
@@ -1310,6 +1623,7 @@ buf_pool_set_sizes(void)
 
 	srv_buf_pool_curr_size = curr_size;
 	srv_buf_pool_old_size = srv_buf_pool_size;
+	srv_buf_pool_base_size = srv_buf_pool_size;
 
 	buf_pool_mutex_exit_all();
 }
@@ -1317,7 +1631,6 @@ buf_pool_set_sizes(void)
 /********************************************************************//**
 Initialize a buffer pool instance.
 @return DB_SUCCESS if all goes well. */
-UNIV_INTERN
 ulint
 buf_pool_init_instance(
 /*===================*/
@@ -1326,50 +1639,99 @@ buf_pool_init_instance(
 	ulint		instance_no)	/*!< in: id of the instance */
 {
 	ulint		i;
+	ulint		chunk_size;
 	buf_chunk_t*	chunk;
 
+	ut_ad(buf_pool_size % srv_buf_pool_chunk_unit == 0);
+
 	/* 1. Initialize general fields
 	------------------------------- */
-	mutex_create(buf_pool_mutex_key,
-		     &buf_pool->mutex, SYNC_BUF_POOL);
-	mutex_create(buf_pool_zip_mutex_key,
-		     &buf_pool->zip_mutex, SYNC_BUF_BLOCK);
+	mutex_create(LATCH_ID_BUF_POOL, &buf_pool->mutex);
+
+	mutex_create(LATCH_ID_BUF_POOL_ZIP, &buf_pool->zip_mutex);
+
+	new(&buf_pool->allocator)
+		ut_allocator<unsigned char>(mem_key_buf_buf_pool);
 
 	buf_pool_mutex_enter(buf_pool);
 
 	if (buf_pool_size > 0) {
-		buf_pool->n_chunks = 1;
+		buf_pool->n_chunks
+			= buf_pool_size / srv_buf_pool_chunk_unit;
+		chunk_size = srv_buf_pool_chunk_unit;
+
+		buf_pool->chunks =
+			reinterpret_cast<buf_chunk_t*>(ut_zalloc_nokey(
+				buf_pool->n_chunks * sizeof(*chunk)));
+		buf_pool->chunks_old = NULL;
+
+		UT_LIST_INIT(buf_pool->LRU, &buf_page_t::LRU);
+		UT_LIST_INIT(buf_pool->free, &buf_page_t::list);
+		UT_LIST_INIT(buf_pool->withdraw, &buf_page_t::list);
+		buf_pool->withdraw_target = 0;
+		UT_LIST_INIT(buf_pool->flush_list, &buf_page_t::list);
+		UT_LIST_INIT(buf_pool->unzip_LRU, &buf_block_t::unzip_LRU);
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+		UT_LIST_INIT(buf_pool->zip_clean, &buf_page_t::list);
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
-		buf_pool->chunks = chunk =
-			(buf_chunk_t*) mem_zalloc(sizeof *chunk);
+		for (i = 0; i < UT_ARR_SIZE(buf_pool->zip_free); ++i) {
+			UT_LIST_INIT(
+				buf_pool->zip_free[i], &buf_buddy_free_t::list);
+		}
 
-		UT_LIST_INIT(buf_pool->free);
+		buf_pool->curr_size = 0;
+		chunk = buf_pool->chunks;
 
-		if (!buf_chunk_init(buf_pool, chunk, buf_pool_size)) {
-			mem_free(chunk);
-			mem_free(buf_pool);
+		do {
+			if (!buf_chunk_init(buf_pool, chunk, chunk_size)) {
+				while (--chunk >= buf_pool->chunks) {
+					buf_block_t*	block = chunk->blocks;
 
-			buf_pool_mutex_exit(buf_pool);
+					for (i = chunk->size; i--; block++) {
+						mutex_free(&block->mutex);
+						rw_lock_free(&block->lock);
 
-			return(DB_ERROR);
-		}
+						ut_d(rw_lock_free(
+							&block->debug_latch));
+					}
+
+					buf_pool->allocator.deallocate_large(
+						chunk->mem, &chunk->mem_pfx);
+				}
+				ut_free(buf_pool->chunks);
+				buf_pool_mutex_exit(buf_pool);
+
+				return(DB_ERROR);
+			}
+
+			buf_pool->curr_size += chunk->size;
+		} while (++chunk < buf_pool->chunks + buf_pool->n_chunks);
 
 		buf_pool->instance_no = instance_no;
-		buf_pool->old_pool_size = buf_pool_size;
-		buf_pool->curr_size = chunk->size;
+		buf_pool->read_ahead_area =
+			ut_min(BUF_READ_AHEAD_PAGES,
+			       ut_2_power_up(buf_pool->curr_size /
+					     BUF_READ_AHEAD_PORTION));
 		buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
 
+		buf_pool->old_size = buf_pool->curr_size;
+		buf_pool->n_chunks_new = buf_pool->n_chunks;
+
 		/* Number of locks protecting page_hash must be a
 		power of two */
 		srv_n_page_hash_locks = static_cast<ulong>(
-				 ut_2_power_up(srv_n_page_hash_locks));
+			 ut_2_power_up(srv_n_page_hash_locks));
 		ut_a(srv_n_page_hash_locks != 0);
 		ut_a(srv_n_page_hash_locks <= MAX_PAGE_HASH_LOCKS);
 
-		buf_pool->page_hash = ha_create(2 * buf_pool->curr_size,
-						srv_n_page_hash_locks,
-						MEM_HEAP_FOR_PAGE_HASH,
-						SYNC_BUF_PAGE_HASH);
+		buf_pool->page_hash = ib_create(
+			2 * buf_pool->curr_size,
+			LATCH_ID_HASH_TABLE_RW_LOCK,
+			srv_n_page_hash_locks, MEM_HEAP_FOR_PAGE_HASH);
+
+		buf_pool->page_hash_old = NULL;
 
 		buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
 
@@ -1378,20 +1740,35 @@ buf_pool_init_instance(
 	/* 2. Initialize flushing fields
 	-------------------------------- */
 
-	mutex_create(flush_list_mutex_key, &buf_pool->flush_list_mutex,
-		     SYNC_BUF_FLUSH_LIST);
+	mutex_create(LATCH_ID_FLUSH_LIST, &buf_pool->flush_list_mutex);
 
 	for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
-		buf_pool->no_flush[i] = os_event_create();
+		buf_pool->no_flush[i] = os_event_create(0);
 	}
 
-	buf_pool->watch = (buf_page_t*) mem_zalloc(
+	buf_pool->watch = (buf_page_t*) ut_zalloc_nokey(
 		sizeof(*buf_pool->watch) * BUF_POOL_WATCH_SIZE);
+	for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) {
+		buf_pool->watch[i].buf_pool_index = buf_pool->instance_no;
+	}
 
-	/* All fields are initialized by mem_zalloc(). */
+	/* All fields are initialized by ut_zalloc_nokey(). */
 
 	buf_pool->try_LRU_scan = TRUE;
 
+	/* Initialize the hazard pointer for flush_list batches */
+	new(&buf_pool->flush_hp)
+		FlushHp(buf_pool, &buf_pool->flush_list_mutex);
+
+	/* Initialize the hazard pointer for LRU batches */
+	new(&buf_pool->lru_hp) LRUHp(buf_pool, &buf_pool->mutex);
+
+	/* Initialize the iterator for LRU scan search */
+	new(&buf_pool->lru_scan_itr) LRUItr(buf_pool, &buf_pool->mutex);
+
+	/* Initialize the iterator for single page scan search */
+	new(&buf_pool->single_scan_itr) LRUItr(buf_pool, &buf_pool->mutex);
+
 	buf_pool_mutex_exit(buf_pool);
 
 	return(DB_SUCCESS);
@@ -1409,11 +1786,18 @@ buf_pool_free_instance(
 	buf_chunk_t*	chunk;
 	buf_chunk_t*	chunks;
 	buf_page_t*	bpage;
+	buf_page_t*	prev_bpage = 0;
 
-	bpage = UT_LIST_GET_LAST(buf_pool->LRU);
-	while (bpage != NULL) {
-		buf_page_t*	prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
-		enum buf_page_state	state = buf_page_get_state(bpage);
+	mutex_free(&buf_pool->mutex);
+	mutex_free(&buf_pool->zip_mutex);
+	mutex_free(&buf_pool->flush_list_mutex);
+
+	for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
+	     bpage != NULL;
+	     bpage = prev_bpage) {
+
+		prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
+		buf_page_state	state = buf_page_get_state(bpage);
 
 		ut_ad(buf_page_in_file(bpage));
 		ut_ad(bpage->in_LRU_list);
@@ -1425,30 +1809,43 @@ buf_pool_free_instance(
 			      || srv_fast_shutdown == 2);
 			buf_page_free_descriptor(bpage);
 		}
-
-		bpage = prev_bpage;
 	}
 
-	mem_free(buf_pool->watch);
+	ut_free(buf_pool->watch);
 	buf_pool->watch = NULL;
 
 	chunks = buf_pool->chunks;
 	chunk = chunks + buf_pool->n_chunks;
 
 	while (--chunk >= chunks) {
-		os_mem_free_large(chunk->mem, chunk->mem_size);
+		buf_block_t*	block = chunk->blocks;
+
+		for (ulint i = chunk->size; i--; block++) {
+			mutex_free(&block->mutex);
+			rw_lock_free(&block->lock);
+
+			ut_d(rw_lock_free(&block->debug_latch));
+		}
+
+		buf_pool->allocator.deallocate_large(
+			chunk->mem, &chunk->mem_pfx);
 	}
 
-	mem_free(buf_pool->chunks);
+	for (ulint i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; ++i) {
+		os_event_destroy(buf_pool->no_flush[i]);
+	}
+
+	ut_free(buf_pool->chunks);
 	ha_clear(buf_pool->page_hash);
 	hash_table_free(buf_pool->page_hash);
 	hash_table_free(buf_pool->zip_hash);
+
+	buf_pool->allocator.~ut_allocator();
 }
 
 /********************************************************************//**
 Creates the buffer pool.
- at return	DB_SUCCESS if success, DB_ERROR if not enough memory or error */
-UNIV_INTERN
+ at return DB_SUCCESS if success, DB_ERROR if not enough memory or error */
 dberr_t
 buf_pool_init(
 /*==========*/
@@ -1462,24 +1859,33 @@ buf_pool_init(
 	ut_ad(n_instances <= MAX_BUFFER_POOLS);
 	ut_ad(n_instances == srv_buf_pool_instances);
 
+	buf_pool_resizing = false;
+	buf_pool_withdrawing = false;
+	buf_withdraw_clock = 0;
+
 #ifdef HAVE_LIBNUMA
 	if (srv_numa_interleave) {
-		ib_logf(IB_LOG_LEVEL_INFO,
-			"Setting NUMA memory policy to MPOL_INTERLEAVE");
+		ib::info() << "Setting NUMA memory policy to MPOL_INTERLEAVE";
 		if (set_mempolicy(MPOL_INTERLEAVE,
 				  numa_all_nodes_ptr->maskp,
 				  numa_all_nodes_ptr->size) != 0) {
-			ib_logf(IB_LOG_LEVEL_WARN,
-				"Failed to set NUMA memory policy to"
-				" MPOL_INTERLEAVE (error: %s).",
-				strerror(errno));
+			ib::warn() << "Failed to set NUMA memory policy to"
+				" MPOL_INTERLEAVE: " << strerror(errno);
 		}
 	}
 #endif // HAVE_LIBNUMA
 
-	buf_pool_ptr = (buf_pool_t*) mem_zalloc(
+	buf_pool_ptr = (buf_pool_t*) ut_zalloc_nokey(
 		n_instances * sizeof *buf_pool_ptr);
 
+	buf_chunk_map_reg = UT_NEW_NOKEY(buf_pool_chunk_map_t());
+
+	ut_d(buf_chunk_map_latch = static_cast<rw_lock_t*>(
+			ut_zalloc_nokey(sizeof(*buf_chunk_map_latch))));
+
+	ut_d(rw_lock_create(
+		buf_chunk_map_latch_key, buf_chunk_map_latch, SYNC_ANY_LATCH));
+
 	for (i = 0; i < n_instances; i++) {
 		buf_pool_t*	ptr	= &buf_pool_ptr[i];
 
@@ -1492,6 +1898,8 @@ buf_pool_init(
 		}
 	}
 
+	buf_chunk_map_ref = buf_chunk_map_reg;
+
 	buf_pool_set_sizes();
 	buf_LRU_old_ratio_update(100 * 3/ 8, FALSE);
 
@@ -1499,12 +1907,10 @@ buf_pool_init(
 
 #ifdef HAVE_LIBNUMA
 	if (srv_numa_interleave) {
-		ib_logf(IB_LOG_LEVEL_INFO,
-			"Setting NUMA memory policy to MPOL_DEFAULT");
+		ib::info() << "Setting NUMA memory policy to MPOL_DEFAULT";
 		if (set_mempolicy(MPOL_DEFAULT, NULL, 0) != 0) {
-			ib_logf(IB_LOG_LEVEL_WARN,
-				"Failed to set NUMA memory policy to"
-				" MPOL_DEFAULT (error: %s).", strerror(errno));
+			ib::warn() << "Failed to set NUMA memory policy to"
+				" MPOL_DEFAULT: " << strerror(errno);
 		}
 	}
 #endif // HAVE_LIBNUMA
@@ -1515,127 +1921,1176 @@ buf_pool_init(
 /********************************************************************//**
 Frees the buffer pool at shutdown.  This must not be invoked before
 freeing all mutexes. */
-UNIV_INTERN
 void
 buf_pool_free(
 /*==========*/
 	ulint	n_instances)	/*!< in: numbere of instances to free */
 {
-	ulint	i;
-
-	for (i = 0; i < n_instances; i++) {
+	for (ulint i = 0; i < n_instances; i++) {
 		buf_pool_free_instance(buf_pool_from_array(i));
 	}
 
-	mem_free(buf_pool_ptr);
+	ut_d(rw_lock_free(buf_chunk_map_latch));
+	ut_d(ut_free(buf_chunk_map_latch));
+	ut_d(buf_chunk_map_latch = NULL);
+
+	UT_DELETE(buf_chunk_map_reg);
+	buf_chunk_map_reg = buf_chunk_map_ref = NULL;
+
+	ut_free(buf_pool_ptr);
 	buf_pool_ptr = NULL;
 }
 
-/********************************************************************//**
-Clears the adaptive hash index on all pages in the buffer pool. */
-UNIV_INTERN
-void
-buf_pool_clear_hash_index(void)
-/*===========================*/
+/** Reallocate a control block.
+ at param[in]	buf_pool	buffer pool instance
+ at param[in]	block		pointer to control block
+ at retval false	if failed because of no free blocks. */
+static
+bool
+buf_page_realloc(
+	buf_pool_t*	buf_pool,
+	buf_block_t*	block)
 {
-	ulint	p;
+	buf_block_t*	new_block;
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-	ut_ad(!btr_search_enabled);
+	ut_ad(buf_pool_withdrawing);
+	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 
-	for (p = 0; p < srv_buf_pool_instances; p++) {
-		buf_pool_t*	buf_pool = buf_pool_from_array(p);
-		buf_chunk_t*	chunks	= buf_pool->chunks;
-		buf_chunk_t*	chunk	= chunks + buf_pool->n_chunks;
+	new_block = buf_LRU_get_free_only(buf_pool);
 
-		while (--chunk >= chunks) {
-			buf_block_t*	block	= chunk->blocks;
-			ulint		i	= chunk->size;
+	if (new_block == NULL) {
+		return(false); /* free_list was not enough */
+	}
 
-			for (; i--; block++) {
-				dict_index_t*	index	= block->index;
+	rw_lock_t*	hash_lock = buf_page_hash_lock_get(buf_pool, block->page.id);
 
-				/* We can set block->index = NULL
-				when we have an x-latch on btr_search_latch;
-				see the comment in buf0buf.h */
+	rw_lock_x_lock(hash_lock);
+	mutex_enter(&block->mutex);
 
-				if (!index) {
-					/* Not hashed */
-					continue;
-				}
+	if (buf_page_can_relocate(&block->page)) {
+		mutex_enter(&new_block->mutex);
 
-				block->index = NULL;
-# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
-				block->n_pointers = 0;
-# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+		memcpy(new_block->frame, block->frame, UNIV_PAGE_SIZE);
+		memcpy(&new_block->page, &block->page, sizeof block->page);
+
+		/* relocate LRU list */
+		ut_ad(block->page.in_LRU_list);
+		ut_ad(!block->page.in_zip_hash);
+		ut_d(block->page.in_LRU_list = FALSE);
+
+		buf_LRU_adjust_hp(buf_pool, &block->page);
+
+		buf_page_t*	prev_b = UT_LIST_GET_PREV(LRU, &block->page);
+		UT_LIST_REMOVE(buf_pool->LRU, &block->page);
+
+		if (prev_b != NULL) {
+			UT_LIST_INSERT_AFTER(buf_pool->LRU, prev_b, &new_block->page);
+		} else {
+			UT_LIST_ADD_FIRST(buf_pool->LRU, &new_block->page);
+		}
+
+		if (buf_pool->LRU_old == &block->page) {
+			buf_pool->LRU_old = &new_block->page;
+		}
+
+		ut_ad(new_block->page.in_LRU_list);
+
+		/* relocate unzip_LRU list */
+		if (block->page.zip.data != NULL) {
+			ut_ad(block->in_unzip_LRU_list);
+			ut_d(new_block->in_unzip_LRU_list = TRUE);
+			UNIV_MEM_DESC(&new_block->page.zip.data,
+				      page_zip_get_size(&new_block->page.zip));
+
+			buf_block_t*	prev_block = UT_LIST_GET_PREV(unzip_LRU, block);
+			UT_LIST_REMOVE(buf_pool->unzip_LRU, block);
+
+			ut_d(block->in_unzip_LRU_list = FALSE);
+			block->page.zip.data = NULL;
+			page_zip_set_size(&block->page.zip, 0);
+
+			if (prev_block != NULL) {
+				UT_LIST_INSERT_AFTER(buf_pool->unzip_LRU, prev_block, new_block);
+			} else {
+				UT_LIST_ADD_FIRST(buf_pool->unzip_LRU, new_block);
 			}
+		} else {
+			ut_ad(!block->in_unzip_LRU_list);
+			ut_d(new_block->in_unzip_LRU_list = FALSE);
 		}
-	}
-}
 
-/********************************************************************//**
-Relocate a buffer control block.  Relocates the block on the LRU list
-and in buf_pool->page_hash.  Does not relocate bpage->list.
-The caller must take care of relocating bpage->list. */
-UNIV_INTERN
-void
-buf_relocate(
-/*=========*/
-	buf_page_t*	bpage,	/*!< in/out: control block being relocated;
-				buf_page_get_state(bpage) must be
-				BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_ZIP_PAGE */
-	buf_page_t*	dpage)	/*!< in/out: destination control block */
-{
-	buf_page_t*	b;
-	ulint		fold;
-	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
+		/* relocate buf_pool->page_hash */
+		ut_ad(block->page.in_page_hash);
+		ut_ad(&block->page == buf_page_hash_get_low(buf_pool,
+							    block->page.id));
+		ut_d(block->page.in_page_hash = FALSE);
+		ulint	fold = block->page.id.fold();
+		ut_ad(fold == new_block->page.id.fold());
+		HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, (&block->page));
+		HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, (&new_block->page));
+
+		ut_ad(new_block->page.in_page_hash);
+
+		buf_block_modify_clock_inc(block);
+		memset(block->frame + FIL_PAGE_OFFSET, 0xff, 4);
+		memset(block->frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xff, 4);
+		UNIV_MEM_INVALID(block->frame, UNIV_PAGE_SIZE);
+		buf_block_set_state(block, BUF_BLOCK_REMOVE_HASH);
+		block->page.id.reset(ULINT32_UNDEFINED, ULINT32_UNDEFINED);
 
-	fold = buf_page_address_fold(bpage->space, bpage->offset);
+		/* Relocate buf_pool->flush_list. */
+		if (block->page.oldest_modification) {
+			buf_flush_relocate_on_flush_list(
+				&block->page, &new_block->page);
+		}
 
-	ut_ad(buf_pool_mutex_own(buf_pool));
-	ut_ad(buf_page_hash_lock_held_x(buf_pool, bpage));
-	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
-	ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
-	ut_a(bpage->buf_fix_count == 0);
-	ut_ad(bpage->in_LRU_list);
-	ut_ad(!bpage->in_zip_hash);
-	ut_ad(bpage->in_page_hash);
-	ut_ad(bpage == buf_page_hash_get_low(buf_pool,
-					     bpage->space,
-					     bpage->offset,
-					     fold));
+		/* set other flags of buf_block_t */
 
-	ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
-#ifdef UNIV_DEBUG
-	switch (buf_page_get_state(bpage)) {
-	case BUF_BLOCK_POOL_WATCH:
-	case BUF_BLOCK_NOT_USED:
-	case BUF_BLOCK_READY_FOR_USE:
-	case BUF_BLOCK_FILE_PAGE:
-	case BUF_BLOCK_MEMORY:
-	case BUF_BLOCK_REMOVE_HASH:
-		ut_error;
-	case BUF_BLOCK_ZIP_DIRTY:
-	case BUF_BLOCK_ZIP_PAGE:
-		break;
-	}
-#endif /* UNIV_DEBUG */
+		ut_ad(!block->index);
+		new_block->index	= NULL;
+		new_block->n_hash_helps	= 0;
+		new_block->n_fields	= 1;
+		new_block->left_side	= TRUE;
 
-	memcpy(dpage, bpage, sizeof *dpage);
+		new_block->lock_hash_val = block->lock_hash_val;
+		ut_ad(new_block->lock_hash_val == lock_rec_hash(
+			new_block->page.id.space(),
+			new_block->page.id.page_no()));
 
-	ut_d(bpage->in_LRU_list = FALSE);
-	ut_d(bpage->in_page_hash = FALSE);
+		rw_lock_x_unlock(hash_lock);
+		mutex_exit(&new_block->mutex);
 
-	/* relocate buf_pool->LRU */
-	b = UT_LIST_GET_PREV(LRU, bpage);
-	UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
+		/* free block */
+		buf_block_set_state(block, BUF_BLOCK_MEMORY);
+		buf_LRU_block_free_non_file_page(block);
 
-	if (b) {
-		UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, b, dpage);
+		mutex_exit(&block->mutex);
 	} else {
-		UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, dpage);
+		rw_lock_x_unlock(hash_lock);
+		mutex_exit(&block->mutex);
+
+		/* free new_block */
+		mutex_enter(&new_block->mutex);
+		buf_LRU_block_free_non_file_page(new_block);
+		mutex_exit(&new_block->mutex);
+	}
+
+	return(true); /* free_list was enough */
+}
+
+/** Sets the global variable that feeds MySQL's innodb_buffer_pool_resize_status
+to the specified string. The format and the following parameters are the
+same as the ones used for printf(3).
+ at param[in]	fmt	format
+ at param[in]	...	extra parameters according to fmt */
+static
+void
+buf_resize_status(
+	const char*	fmt,
+	...)
+{
+	va_list	ap;
+
+	va_start(ap, fmt);
+
+	ut_vsnprintf(
+		export_vars.innodb_buffer_pool_resize_status,
+		sizeof(export_vars.innodb_buffer_pool_resize_status),
+		fmt, ap);
+
+	va_end(ap);
+
+	ib::info() << export_vars.innodb_buffer_pool_resize_status;
+}
+
+/** Determines if a block is intended to be withdrawn.
+ at param[in]	buf_pool	buffer pool instance
+ at param[in]	block		pointer to control block
+ at retval true	if will be withdrawn */
+bool
+buf_block_will_withdrawn(
+	buf_pool_t*		buf_pool,
+	const buf_block_t*	block)
+{
+	ut_ad(buf_pool->curr_size < buf_pool->old_size);
+	ut_ad(!buf_pool_resizing || buf_pool_mutex_own(buf_pool));
+
+	const buf_chunk_t*	chunk
+		= buf_pool->chunks + buf_pool->n_chunks_new;
+	const buf_chunk_t*	echunk
+		= buf_pool->chunks + buf_pool->n_chunks;
+
+	while (chunk < echunk) {
+		if (block >= chunk->blocks
+		    && block < chunk->blocks + chunk->size) {
+			return(true);
+		}
+		++chunk;
+	}
+
+	return(false);
+}
+
+/** Determines if a frame is intended to be withdrawn.
+ at param[in]	buf_pool	buffer pool instance
+ at param[in]	ptr		pointer to a frame
+ at retval true	if will be withdrawn */
+bool
+buf_frame_will_withdrawn(
+	buf_pool_t*	buf_pool,
+	const byte*	ptr)
+{
+	ut_ad(buf_pool->curr_size < buf_pool->old_size);
+	ut_ad(!buf_pool_resizing || buf_pool_mutex_own(buf_pool));
+
+	const buf_chunk_t*	chunk
+		= buf_pool->chunks + buf_pool->n_chunks_new;
+	const buf_chunk_t*	echunk
+		= buf_pool->chunks + buf_pool->n_chunks;
+
+	while (chunk < echunk) {
+		if (ptr >= chunk->blocks->frame
+		    && ptr < (chunk->blocks + chunk->size - 1)->frame
+			     + UNIV_PAGE_SIZE) {
+			return(true);
+		}
+		++chunk;
+	}
+
+	return(false);
+}
+
+/** Withdraw the buffer pool blocks from end of the buffer pool instance
+until withdrawn by buf_pool->withdraw_target.
+ at param[in]	buf_pool	buffer pool instance
+ at retval true	if retry is needed */
+static
+bool
+buf_pool_withdraw_blocks(
+	buf_pool_t*	buf_pool)
+{
+	buf_block_t*	block;
+	ulint		loop_count = 0;
+	ulint		i = buf_pool_index(buf_pool);
+
+	ib::info() << "buffer pool " << i
+		<< " : start to withdraw the last "
+		<< buf_pool->withdraw_target << " blocks.";
+
+	/* Minimize buf_pool->zip_free[i] lists */
+	buf_pool_mutex_enter(buf_pool);
+	buf_buddy_condense_free(buf_pool);
+	buf_pool_mutex_exit(buf_pool);
+
+	while (UT_LIST_GET_LEN(buf_pool->withdraw)
+	       < buf_pool->withdraw_target) {
+
+		/* try to withdraw from free_list */
+		ulint	count1 = 0;
+
+		buf_pool_mutex_enter(buf_pool);
+		block = reinterpret_cast<buf_block_t*>(
+			UT_LIST_GET_FIRST(buf_pool->free));
+		while (block != NULL
+		       && UT_LIST_GET_LEN(buf_pool->withdraw)
+			  < buf_pool->withdraw_target) {
+			ut_ad(block->page.in_free_list);
+			ut_ad(!block->page.in_flush_list);
+			ut_ad(!block->page.in_LRU_list);
+			ut_a(!buf_page_in_file(&block->page));
+
+			buf_block_t*	next_block;
+			next_block = reinterpret_cast<buf_block_t*>(
+				UT_LIST_GET_NEXT(
+					list, &block->page));
+
+			if (buf_block_will_withdrawn(buf_pool, block)) {
+				/* This should be withdrawn */
+				UT_LIST_REMOVE(
+					buf_pool->free,
+					&block->page);
+				UT_LIST_ADD_LAST(
+					buf_pool->withdraw,
+					&block->page);
+				ut_d(block->in_withdraw_list = TRUE);
+				count1++;
+			}
+
+			block = next_block;
+		}
+		buf_pool_mutex_exit(buf_pool);
+
+		/* reserve free_list length */
+		if (UT_LIST_GET_LEN(buf_pool->withdraw)
+		    < buf_pool->withdraw_target) {
+			ulint	scan_depth;
+			ulint	n_flushed = 0;
+
+			/* cap scan_depth with current LRU size. */
+			buf_pool_mutex_enter(buf_pool);
+			scan_depth = UT_LIST_GET_LEN(buf_pool->LRU);
+			buf_pool_mutex_exit(buf_pool);
+
+			scan_depth = ut_min(
+				ut_max(buf_pool->withdraw_target
+				       - UT_LIST_GET_LEN(buf_pool->withdraw),
+				       static_cast<ulint>(srv_LRU_scan_depth)),
+				scan_depth);
+
+			buf_flush_do_batch(buf_pool, BUF_FLUSH_LRU,
+				scan_depth, 0, &n_flushed);
+			buf_flush_wait_batch_end(buf_pool, BUF_FLUSH_LRU);
+
+			if (n_flushed) {
+				MONITOR_INC_VALUE_CUMULATIVE(
+					MONITOR_LRU_BATCH_FLUSH_TOTAL_PAGE,
+					MONITOR_LRU_BATCH_FLUSH_COUNT,
+					MONITOR_LRU_BATCH_FLUSH_PAGES,
+					n_flushed);
+			}
+		}
+
+		/* relocate blocks/buddies in withdrawn area */
+		ulint	count2 = 0;
+
+		buf_pool_mutex_enter(buf_pool);
+		buf_page_t*	bpage;
+		bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
+		while (bpage != NULL) {
+			BPageMutex*	block_mutex;
+			buf_page_t*	next_bpage;
+
+			block_mutex = buf_page_get_mutex(bpage);
+			mutex_enter(block_mutex);
+
+			next_bpage = UT_LIST_GET_NEXT(LRU, bpage);
+
+			if (bpage->zip.data != NULL
+			    && buf_frame_will_withdrawn(
+				buf_pool,
+				static_cast<byte*>(bpage->zip.data))) {
+
+				if (buf_page_can_relocate(bpage)) {
+					mutex_exit(block_mutex);
+					buf_pool_mutex_exit_forbid(buf_pool);
+					if(!buf_buddy_realloc(
+						buf_pool, bpage->zip.data,
+						page_zip_get_size(
+							&bpage->zip))) {
+
+						/* failed to allocate block */
+						buf_pool_mutex_exit_allow(
+							buf_pool);
+						break;
+					}
+					buf_pool_mutex_exit_allow(buf_pool);
+					mutex_enter(block_mutex);
+					count2++;
+				}
+				/* NOTE: if the page is in use,
+				not reallocated yet */
+			}
+
+			if (buf_page_get_state(bpage)
+			    == BUF_BLOCK_FILE_PAGE
+			    && buf_block_will_withdrawn(
+				buf_pool,
+				reinterpret_cast<buf_block_t*>(bpage))) {
+
+				if (buf_page_can_relocate(bpage)) {
+					mutex_exit(block_mutex);
+					buf_pool_mutex_exit_forbid(buf_pool);
+					if(!buf_page_realloc(
+						buf_pool,
+						reinterpret_cast<buf_block_t*>(
+							bpage))) {
+						/* failed to allocate block */
+						buf_pool_mutex_exit_allow(
+							buf_pool);
+						break;
+					}
+					buf_pool_mutex_exit_allow(buf_pool);
+					count2++;
+				} else {
+					mutex_exit(block_mutex);
+				}
+				/* NOTE: if the page is in use,
+				not reallocated yet */
+			} else {
+				mutex_exit(block_mutex);
+			}
+
+			bpage = next_bpage;
+		}
+		buf_pool_mutex_exit(buf_pool);
+
+		buf_resize_status(
+			"buffer pool %lu : withdrawing blocks. (%lu/%lu)",
+			i, UT_LIST_GET_LEN(buf_pool->withdraw),
+			buf_pool->withdraw_target);
+
+		ib::info() << "buffer pool " << i << " : withdrew "
+			<< count1 << " blocks from free list."
+			<< " Tried to relocate " << count2 << " pages ("
+			<< UT_LIST_GET_LEN(buf_pool->withdraw) << "/"
+			<< buf_pool->withdraw_target << ").";
+
+		if (++loop_count >= 10) {
+			/* give up for now.
+			retried after user threads paused. */
+
+			ib::info() << "buffer pool " << i
+				<< " : will retry to withdraw later.";
+
+			/* need retry later */
+			return(true);
+		}
+	}
+
+	/* confirm withdrawn enough */
+	const buf_chunk_t*	chunk
+		= buf_pool->chunks + buf_pool->n_chunks_new;
+	const buf_chunk_t*	echunk
+		= buf_pool->chunks + buf_pool->n_chunks;
+
+	while (chunk < echunk) {
+		block = chunk->blocks;
+		for (ulint j = chunk->size; j--; block++) {
+			/* If !=BUF_BLOCK_NOT_USED block in the
+			withdrawn area, it means corruption
+			something */
+			ut_a(buf_block_get_state(block)
+				== BUF_BLOCK_NOT_USED);
+			ut_ad(block->in_withdraw_list);
+		}
+		++chunk;
+	}
+
+	ib::info() << "buffer pool " << i << " : withdrawn target "
+		<< UT_LIST_GET_LEN(buf_pool->withdraw) << " blocks.";
+
+	/* retry is not needed */
+	++buf_withdraw_clock;
+	os_wmb;
+
+	return(false);
+}
+
+/** resize page_hash and zip_hash for a buffer pool instance.
+ at param[in]	buf_pool	buffer pool instance */
+static
+void
+buf_pool_resize_hash(
+	buf_pool_t*	buf_pool)
+{
+	hash_table_t*	new_hash_table;
+
+	ut_ad(buf_pool->page_hash_old == NULL);
+
+	/* recreate page_hash */
+	new_hash_table = ib_recreate(
+		buf_pool->page_hash, 2 * buf_pool->curr_size);
+
+	for (ulint i = 0; i < hash_get_n_cells(buf_pool->page_hash); i++) {
+		buf_page_t*	bpage;
+
+		bpage = static_cast<buf_page_t*>(
+			HASH_GET_FIRST(
+				buf_pool->page_hash, i));
+
+		while (bpage) {
+			buf_page_t*	prev_bpage = bpage;
+			ulint		fold;
+
+			bpage = static_cast<buf_page_t*>(
+				HASH_GET_NEXT(
+					hash, prev_bpage));
+
+			fold = prev_bpage->id.fold();
+
+			HASH_DELETE(buf_page_t, hash,
+				buf_pool->page_hash, fold,
+				prev_bpage);
+
+			HASH_INSERT(buf_page_t, hash,
+				new_hash_table, fold,
+				prev_bpage);
+		}
+	}
+
+	buf_pool->page_hash_old = buf_pool->page_hash;
+	buf_pool->page_hash = new_hash_table;
+
+	/* recreate zip_hash */
+	new_hash_table = hash_create(2 * buf_pool->curr_size);
+
+	for (ulint i = 0; i < hash_get_n_cells(buf_pool->zip_hash); i++) {
+		buf_page_t*	bpage;
+
+		bpage = static_cast<buf_page_t*>(
+			HASH_GET_FIRST(buf_pool->zip_hash, i));
+
+		while (bpage) {
+			buf_page_t*	prev_bpage = bpage;
+			ulint		fold;
+
+			bpage = static_cast<buf_page_t*>(
+				HASH_GET_NEXT(
+					hash, prev_bpage));
+
+			fold = BUF_POOL_ZIP_FOLD(
+				reinterpret_cast<buf_block_t*>(
+					prev_bpage));
+
+			HASH_DELETE(buf_page_t, hash,
+				buf_pool->zip_hash, fold,
+				prev_bpage);
+
+			HASH_INSERT(buf_page_t, hash,
+				new_hash_table, fold,
+				prev_bpage);
+		}
+	}
+
+	hash_table_free(buf_pool->zip_hash);
+	buf_pool->zip_hash = new_hash_table;
+}
+
+#ifndef DBUG_OFF
+/** This is a debug routine to inject an memory allocation failure error. */
+static
+void
+buf_pool_resize_chunk_make_null(buf_chunk_t** new_chunks)
+{
+	static int count = 0;
+
+	if (count == 1) {
+		ut_free(*new_chunks);
+		*new_chunks = NULL;
+	}
+
+	count++;
+}
+#endif // DBUG_OFF
+
+/** Resize the buffer pool based on srv_buf_pool_size from
+srv_buf_pool_old_size. */
+void
+buf_pool_resize()
+{
+	buf_pool_t*	buf_pool;
+	ulint		new_instance_size;
+	bool		warning = false;
+
+	ut_ad(!buf_pool_resizing);
+	ut_ad(!buf_pool_withdrawing);
+	ut_ad(srv_buf_pool_chunk_unit > 0);
+
+	new_instance_size = srv_buf_pool_size / srv_buf_pool_instances;
+	new_instance_size /= UNIV_PAGE_SIZE;
+
+	buf_resize_status("Resizing buffer pool from " ULINTPF " to "
+			  ULINTPF " (unit=" ULINTPF ").",
+			  srv_buf_pool_old_size, srv_buf_pool_size,
+			  srv_buf_pool_chunk_unit);
+
+	/* set new limit for all buffer pool for resizing */
+	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
+		buf_pool = buf_pool_from_array(i);
+		buf_pool_mutex_enter(buf_pool);
+
+		ut_ad(buf_pool->curr_size == buf_pool->old_size);
+		ut_ad(buf_pool->n_chunks_new == buf_pool->n_chunks);
+		ut_ad(UT_LIST_GET_LEN(buf_pool->withdraw) == 0);
+		ut_ad(buf_pool->flush_rbt == NULL);
+
+		buf_pool->curr_size = new_instance_size;
+
+		buf_pool->n_chunks_new = new_instance_size * UNIV_PAGE_SIZE
+			/ srv_buf_pool_chunk_unit;
+
+		buf_pool_mutex_exit(buf_pool);
+	}
+
+	/* disable AHI if needed */
+	bool	btr_search_disabled = false;
+
+	buf_resize_status("Disabling adaptive hash index.");
+
+	btr_search_s_lock_all();
+	if (btr_search_enabled) {
+		btr_search_s_unlock_all();
+		btr_search_disabled = true;
+	} else {
+		btr_search_s_unlock_all();
+	}
+
+	btr_search_disable(true);
+
+	if (btr_search_disabled) {
+		ib::info() << "disabled adaptive hash index.";
+	}
+
+	/* set withdraw target */
+	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
+		buf_pool = buf_pool_from_array(i);
+		if (buf_pool->curr_size < buf_pool->old_size) {
+			ulint	withdraw_target = 0;
+
+			const buf_chunk_t*	chunk
+				= buf_pool->chunks + buf_pool->n_chunks_new;
+			const buf_chunk_t*	echunk
+				= buf_pool->chunks + buf_pool->n_chunks;
+
+			while (chunk < echunk) {
+				withdraw_target += chunk->size;
+				++chunk;
+			}
+
+			ut_ad(buf_pool->withdraw_target == 0);
+			buf_pool->withdraw_target = withdraw_target;
+			buf_pool_withdrawing = true;
+		}
+	}
+
+	buf_resize_status("Withdrawing blocks to be shrunken.");
+
+	ib_time_t	withdraw_started = ut_time();
+	ulint		message_interval = 60;
+	ulint		retry_interval = 1;
+
+withdraw_retry:
+	bool	should_retry_withdraw = false;
+
+	/* wait for the number of blocks fit to the new size (if needed)*/
+	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
+		buf_pool = buf_pool_from_array(i);
+		if (buf_pool->curr_size < buf_pool->old_size) {
+
+			should_retry_withdraw |=
+				buf_pool_withdraw_blocks(buf_pool);
+		}
+	}
+
+	if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
+		/* abort to resize for shutdown. */
+		buf_pool_withdrawing = false;
+		return;
+	}
+
+	/* abort buffer pool load */
+	buf_load_abort();
+
+	if (should_retry_withdraw
+	    && ut_difftime(ut_time(), withdraw_started) >= message_interval) {
+
+		if (message_interval > 900) {
+			message_interval = 1800;
+		} else {
+			message_interval *= 2;
+		}
+
+		lock_mutex_enter();
+		trx_sys_mutex_enter();
+		bool	found = false;
+		for (trx_t* trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list);
+		     trx != NULL;
+		     trx = UT_LIST_GET_NEXT(mysql_trx_list, trx)) {
+			if (trx->state != TRX_STATE_NOT_STARTED
+			    && trx->mysql_thd != NULL
+			    && ut_difftime(withdraw_started,
+					   trx->start_time) > 0) {
+				if (!found) {
+					ib::warn() <<
+						"The following trx might hold"
+						" the blocks in buffer pool to"
+					        " be withdrawn. Buffer pool"
+						" resizing can complete only"
+						" after all the transactions"
+						" below release the blocks.";
+					found = true;
+				}
+
+				lock_trx_print_wait_and_mvcc_state(
+					stderr, trx);
+			}
+		}
+		trx_sys_mutex_exit();
+		lock_mutex_exit();
+
+		withdraw_started = ut_time();
+	}
+
+	if (should_retry_withdraw) {
+		ib::info() << "Will retry to withdraw " << retry_interval
+			<< " seconds later.";
+		os_thread_sleep(retry_interval * 1000000);
+
+		if (retry_interval > 5) {
+			retry_interval = 10;
+		} else {
+			retry_interval *= 2;
+		}
+
+		goto withdraw_retry;
+	}
+
+	buf_pool_withdrawing = false;
+
+	buf_resize_status("Latching whole of buffer pool.");
+
+#ifndef DBUG_OFF
+	{
+		bool	should_wait = true;
+
+		while (should_wait) {
+			should_wait = false;
+			DBUG_EXECUTE_IF(
+				"ib_buf_pool_resize_wait_before_resize",
+				should_wait = true; os_thread_sleep(10000););
+		}
+	}
+#endif /* !DBUG_OFF */
+
+	if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
+		return;
+	}
+
+	/* Indicate critical path */
+	buf_pool_resizing = true;
+
+	/* Acquire all buf_pool_mutex/hash_lock */
+	for (ulint i = 0; i < srv_buf_pool_instances; ++i) {
+		buf_pool_t*	buf_pool = buf_pool_from_array(i);
+
+		buf_pool_mutex_enter(buf_pool);
+	}
+	for (ulint i = 0; i < srv_buf_pool_instances; ++i) {
+		buf_pool_t*	buf_pool = buf_pool_from_array(i);
+
+		hash_lock_x_all(buf_pool->page_hash);
+	}
+
+	buf_chunk_map_reg = UT_NEW_NOKEY(buf_pool_chunk_map_t());
+
+	/* add/delete chunks */
+	for (ulint i = 0; i < srv_buf_pool_instances; ++i) {
+		buf_pool_t*	buf_pool = buf_pool_from_array(i);
+		buf_chunk_t*	chunk;
+		buf_chunk_t*	echunk;
+
+		buf_resize_status("buffer pool %lu :"
+			" resizing with chunks %lu to %lu.",
+			i, buf_pool->n_chunks, buf_pool->n_chunks_new);
+
+		if (buf_pool->n_chunks_new < buf_pool->n_chunks) {
+			/* delete chunks */
+			chunk = buf_pool->chunks
+				+ buf_pool->n_chunks_new;
+			echunk = buf_pool->chunks + buf_pool->n_chunks;
+
+			ulint	sum_freed = 0;
+
+			while (chunk < echunk) {
+				buf_block_t*	block = chunk->blocks;
+
+				for (ulint j = chunk->size;
+				     j--; block++) {
+					mutex_free(&block->mutex);
+					rw_lock_free(&block->lock);
+
+					ut_d(rw_lock_free(
+						&block->debug_latch));
+				}
+
+				buf_pool->allocator.deallocate_large(
+					chunk->mem, &chunk->mem_pfx);
+
+				sum_freed += chunk->size;
+
+				++chunk;
+			}
+
+			/* discard withdraw list */
+			UT_LIST_INIT(buf_pool->withdraw,
+				     &buf_page_t::list);
+			buf_pool->withdraw_target = 0;
+
+			ib::info() << "buffer pool " << i << " : "
+				<< buf_pool->n_chunks - buf_pool->n_chunks_new
+				<< " chunks (" << sum_freed
+				<< " blocks) were freed.";
+
+			buf_pool->n_chunks = buf_pool->n_chunks_new;
+		}
+
+		{
+			/* reallocate buf_pool->chunks */
+			const ulint	new_chunks_size
+				= buf_pool->n_chunks_new * sizeof(*chunk);
+
+			buf_chunk_t*	new_chunks
+				= reinterpret_cast<buf_chunk_t*>(
+					ut_zalloc_nokey_nofatal(new_chunks_size));
+
+			DBUG_EXECUTE_IF("buf_pool_resize_chunk_null",
+				buf_pool_resize_chunk_make_null(&new_chunks););
+
+			if (new_chunks == NULL) {
+				ib::error() << "buffer pool " << i
+					<< " : failed to allocate"
+					" the chunk array.";
+				buf_pool->n_chunks_new
+					= buf_pool->n_chunks;
+				warning = true;
+				buf_pool->chunks_old = NULL;
+				goto calc_buf_pool_size;
+			}
+
+			ulint	n_chunks_copy = ut_min(buf_pool->n_chunks_new,
+						       buf_pool->n_chunks);
+
+			memcpy(new_chunks, buf_pool->chunks,
+			       n_chunks_copy * sizeof(*chunk));
+
+			for (ulint j = 0; j < n_chunks_copy; j++) {
+				buf_pool_register_chunk(&new_chunks[j]);
+			}
+
+			buf_pool->chunks_old = buf_pool->chunks;
+			buf_pool->chunks = new_chunks;
+		}
+
+
+		if (buf_pool->n_chunks_new > buf_pool->n_chunks) {
+			/* add chunks */
+			chunk = buf_pool->chunks + buf_pool->n_chunks;
+			echunk = buf_pool->chunks
+				+ buf_pool->n_chunks_new;
+
+			ulint	sum_added = 0;
+			ulint	n_chunks = buf_pool->n_chunks;
+
+			while (chunk < echunk) {
+				ulong	unit = srv_buf_pool_chunk_unit;
+
+				if (!buf_chunk_init(buf_pool, chunk, unit)) {
+
+					ib::error() << "buffer pool " << i
+						<< " : failed to allocate"
+						" new memory.";
+
+					warning = true;
+
+					buf_pool->n_chunks_new
+						= n_chunks;
+
+					break;
+				}
+
+				sum_added += chunk->size;
+
+				++n_chunks;
+				++chunk;
+			}
+
+			ib::info() << "buffer pool " << i << " : "
+				<< buf_pool->n_chunks_new - buf_pool->n_chunks
+				<< " chunks (" << sum_added
+				<< " blocks) were added.";
+
+			buf_pool->n_chunks = n_chunks;
+		}
+calc_buf_pool_size:
+
+		/* recalc buf_pool->curr_size */
+		ulint	new_size = 0;
+
+		chunk = buf_pool->chunks;
+		do {
+			new_size += chunk->size;
+		} while (++chunk < buf_pool->chunks
+				   + buf_pool->n_chunks);
+
+		buf_pool->curr_size = new_size;
+		buf_pool->n_chunks_new = buf_pool->n_chunks;
+
+		if (buf_pool->chunks_old) {
+			ut_free(buf_pool->chunks_old);
+			buf_pool->chunks_old = NULL;
+		}
+	}
+
+	buf_pool_chunk_map_t*	chunk_map_old = buf_chunk_map_ref;
+	buf_chunk_map_ref = buf_chunk_map_reg;
+
+	/* set instance sizes */
+	{
+		ulint	curr_size = 0;
+
+		for (ulint i = 0; i < srv_buf_pool_instances; i++) {
+			buf_pool = buf_pool_from_array(i);
+
+			ut_ad(UT_LIST_GET_LEN(buf_pool->withdraw) == 0);
+
+			buf_pool->read_ahead_area =
+				ut_min(BUF_READ_AHEAD_PAGES,
+				       ut_2_power_up(buf_pool->curr_size /
+						      BUF_READ_AHEAD_PORTION));
+			buf_pool->curr_pool_size
+				= buf_pool->curr_size * UNIV_PAGE_SIZE;
+			curr_size += buf_pool->curr_pool_size;
+			buf_pool->old_size = buf_pool->curr_size;
+		}
+		srv_buf_pool_curr_size = curr_size;
+		innodb_set_buf_pool_size(buf_pool_size_align(curr_size));
+	}
+
+	const bool	new_size_too_diff
+		= srv_buf_pool_base_size > srv_buf_pool_size * 2
+			|| srv_buf_pool_base_size * 2 < srv_buf_pool_size;
+
+	/* Normalize page_hash and zip_hash,
+	if the new size is too different */
+	if (!warning && new_size_too_diff) {
+
+		buf_resize_status("Resizing hash tables.");
+
+		for (ulint i = 0; i < srv_buf_pool_instances; ++i) {
+			buf_pool_t*	buf_pool = buf_pool_from_array(i);
+
+			buf_pool_resize_hash(buf_pool);
+
+			ib::info() << "buffer pool " << i
+				<< " : hash tables were resized.";
+		}
+	}
+
+	/* Release all buf_pool_mutex/page_hash */
+	for (ulint i = 0; i < srv_buf_pool_instances; ++i) {
+		buf_pool_t*	buf_pool = buf_pool_from_array(i);
+
+		hash_unlock_x_all(buf_pool->page_hash);
+		buf_pool_mutex_exit(buf_pool);
+
+		if (buf_pool->page_hash_old != NULL) {
+			hash_table_free(buf_pool->page_hash_old);
+			buf_pool->page_hash_old = NULL;
+		}
+	}
+
+	ut_d(rw_lock_x_lock(buf_chunk_map_latch));
+	UT_DELETE(chunk_map_old);
+	ut_d(rw_lock_x_unlock(buf_chunk_map_latch));
+
+	buf_pool_resizing = false;
+
+	/* Normalize other components, if the new size is too different */
+	if (!warning && new_size_too_diff) {
+		srv_buf_pool_base_size = srv_buf_pool_size;
+
+		buf_resize_status("Resizing also other hash tables.");
+
+		/* normalize lock_sys */
+		srv_lock_table_size = 5 * (srv_buf_pool_size / UNIV_PAGE_SIZE);
+		lock_sys_resize(srv_lock_table_size);
+
+		/* normalize btr_search_sys */
+		btr_search_sys_resize(
+			buf_pool_get_curr_size() / sizeof(void*) / 64);
+
+		/* normalize dict_sys */
+		dict_resize();
+
+		ib::info() << "Resized hash tables at lock_sys,"
+			" adaptive hash index, dictionary.";
+	}
+
+	/* normalize ibuf->max_size */
+	ibuf_max_size_update(srv_change_buffer_max_size);
+
+	if (srv_buf_pool_old_size != srv_buf_pool_size) {
+
+		ib::info() << "Completed to resize buffer pool from "
+			<< srv_buf_pool_old_size
+			<< " to " << srv_buf_pool_size << ".";
+		srv_buf_pool_old_size = srv_buf_pool_size;
+	}
+
+	/* enable AHI if needed */
+	if (btr_search_disabled) {
+		btr_search_enable();
+		ib::info() << "Re-enabled adaptive hash index.";
+	}
+
+	char	now[32];
+
+	ut_sprintf_timestamp(now);
+	if (!warning) {
+		buf_resize_status("Completed resizing buffer pool at %s.",
+			now);
+	} else {
+		buf_resize_status("Resizing buffer pool failed,"
+			" finished resizing at %s.", now);
+	}
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+	ut_a(buf_validate());
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+
+	return;
+}
+
+/** This is the thread for resizing buffer pool. It waits for an event and
+when waked up either performs a resizing and sleeps again.
+ at param[in]	arg	a dummy parameter required by os_thread_create.
+ at return	this function does not return, calls os_thread_exit()
+*/
+extern "C"
+os_thread_ret_t
+DECLARE_THREAD(buf_resize_thread)(
+	void*	arg __attribute__((unused)))
+{
+	my_thread_init();
+
+	srv_buf_resize_thread_active = true;
+
+	buf_resize_status("not started");
+
+	while (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
+		os_event_wait(srv_buf_resize_event);
+		os_event_reset(srv_buf_resize_event);
+
+		if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
+			break;
+		}
+
+		buf_pool_mutex_enter_all();
+		if (srv_buf_pool_old_size == srv_buf_pool_size) {
+			buf_pool_mutex_exit_all();
+			std::ostringstream sout;
+			sout << "Size did not change (old size = new size = "
+				<< srv_buf_pool_size << ". Nothing to do.";
+			buf_resize_status(sout.str().c_str());
+
+			/* nothing to do */
+			continue;
+		}
+		buf_pool_mutex_exit_all();
+
+		buf_pool_resize();
+	}
+
+	srv_buf_resize_thread_active = false;
+
+	my_thread_end();
+	os_thread_exit(NULL);
+
+	OS_THREAD_DUMMY_RETURN;
+}
+
+/********************************************************************//**
+Clears the adaptive hash index on all pages in the buffer pool. */
+void
+buf_pool_clear_hash_index(void)
+/*===========================*/
+{
+	ulint	p;
+
+	ut_ad(btr_search_own_all(RW_LOCK_X));
+	ut_ad(!buf_pool_resizing);
+	ut_ad(!btr_search_enabled);
+
+	for (p = 0; p < srv_buf_pool_instances; p++) {
+		buf_pool_t*	buf_pool = buf_pool_from_array(p);
+		buf_chunk_t*	chunks	= buf_pool->chunks;
+		buf_chunk_t*	chunk	= chunks + buf_pool->n_chunks;
+
+		while (--chunk >= chunks) {
+			buf_block_t*	block	= chunk->blocks;
+			ulint		i	= chunk->size;
+
+			for (; i--; block++) {
+				dict_index_t*	index	= block->index;
+
+				/* We can set block->index = NULL
+				when we have an x-latch on search latch;
+				see the comment in buf0buf.h */
+
+				if (!index) {
+					/* Not hashed */
+					continue;
+				}
+
+				block->index = NULL;
+# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+				block->n_pointers = 0;
+# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+			}
+		}
+	}
+}
+
+/********************************************************************//**
+Relocate a buffer control block.  Relocates the block on the LRU list
+and in buf_pool->page_hash.  Does not relocate bpage->list.
+The caller must take care of relocating bpage->list. */
+static
+void
+buf_relocate(
+/*=========*/
+	buf_page_t*	bpage,	/*!< in/out: control block being relocated;
+				buf_page_get_state(bpage) must be
+				BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_ZIP_PAGE */
+	buf_page_t*	dpage)	/*!< in/out: destination control block */
+{
+	buf_page_t*	b;
+	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
+
+	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(buf_page_hash_lock_held_x(buf_pool, bpage));
+	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+	ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
+	ut_a(bpage->buf_fix_count == 0);
+	ut_ad(bpage->in_LRU_list);
+	ut_ad(!bpage->in_zip_hash);
+	ut_ad(bpage->in_page_hash);
+	ut_ad(bpage == buf_page_hash_get_low(buf_pool, bpage->id));
+
+	ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
+#ifdef UNIV_DEBUG
+	switch (buf_page_get_state(bpage)) {
+	case BUF_BLOCK_POOL_WATCH:
+	case BUF_BLOCK_NOT_USED:
+	case BUF_BLOCK_READY_FOR_USE:
+	case BUF_BLOCK_FILE_PAGE:
+	case BUF_BLOCK_MEMORY:
+	case BUF_BLOCK_REMOVE_HASH:
+		ut_error;
+	case BUF_BLOCK_ZIP_DIRTY:
+	case BUF_BLOCK_ZIP_PAGE:
+		break;
+	}
+#endif /* UNIV_DEBUG */
+
+	memcpy(dpage, bpage, sizeof *dpage);
+
+	/* Important that we adjust the hazard pointer before
+	removing bpage from LRU list. */
+	buf_LRU_adjust_hp(buf_pool, bpage);
+
+	ut_d(bpage->in_LRU_list = FALSE);
+	ut_d(bpage->in_page_hash = FALSE);
+
+	/* relocate buf_pool->LRU */
+	b = UT_LIST_GET_PREV(LRU, bpage);
+	UT_LIST_REMOVE(buf_pool->LRU, bpage);
+
+	if (b != NULL) {
+		UT_LIST_INSERT_AFTER(buf_pool->LRU, b, dpage);
+	} else {
+		UT_LIST_ADD_FIRST(buf_pool->LRU, dpage);
 	}
 
 	if (UNIV_UNLIKELY(buf_pool->LRU_old == bpage)) {
@@ -1655,23 +3110,101 @@ buf_relocate(
 #endif /* UNIV_LRU_DEBUG */
 	}
 
-        ut_d(UT_LIST_VALIDATE(
-		LRU, buf_page_t, buf_pool->LRU, CheckInLRUList()));
+        ut_d(CheckInLRUList::validate(buf_pool));
 
 	/* relocate buf_pool->page_hash */
+	ulint	fold = bpage->id.fold();
+	ut_ad(fold == dpage->id.fold());
 	HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
 	HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, dpage);
 }
 
-/********************************************************************//**
-Determine if a block is a sentinel for a buffer pool watch.
- at return	TRUE if a sentinel for a buffer pool watch, FALSE if not */
-UNIV_INTERN
+/** Hazard Pointer implementation. */
+
+/** Set current value
+ at param bpage	buffer block to be set as hp */
+void
+HazardPointer::set(buf_page_t* bpage)
+{
+	ut_ad(mutex_own(m_mutex));
+	ut_ad(!bpage || buf_pool_from_bpage(bpage) == m_buf_pool);
+	ut_ad(!bpage || buf_page_in_file(bpage));
+
+	m_hp = bpage;
+}
+
+/** Checks if a bpage is the hp
+ at param bpage    buffer block to be compared
+ at return true if it is hp */
+
+bool
+HazardPointer::is_hp(const buf_page_t* bpage)
+{
+	ut_ad(mutex_own(m_mutex));
+	ut_ad(!m_hp || buf_pool_from_bpage(m_hp) == m_buf_pool);
+	ut_ad(!bpage || buf_pool_from_bpage(bpage) == m_buf_pool);
+
+	return(bpage == m_hp);
+}
+
+/** Adjust the value of hp. This happens when some other thread working
+on the same list attempts to remove the hp from the list.
+ at param bpage	buffer block to be compared */
+
+void
+FlushHp::adjust(const buf_page_t* bpage)
+{
+	ut_ad(bpage != NULL);
+
+	/** We only support reverse traversal for now. */
+	if (is_hp(bpage)) {
+		m_hp = UT_LIST_GET_PREV(list, m_hp);
+	}
+
+	ut_ad(!m_hp || m_hp->in_flush_list);
+}
+
+/** Adjust the value of hp. This happens when some other thread working
+on the same list attempts to remove the hp from the list.
+ at param bpage	buffer block to be compared */
+
+void
+LRUHp::adjust(const buf_page_t* bpage)
+{
+	ut_ad(bpage);
+
+	/** We only support reverse traversal for now. */
+	if (is_hp(bpage)) {
+		m_hp = UT_LIST_GET_PREV(LRU, m_hp);
+	}
+
+	ut_ad(!m_hp || m_hp->in_LRU_list);
+}
+
+/** Selects from where to start a scan. If we have scanned too deep into
+the LRU list it resets the value to the tail of the LRU list.
+ at return buf_page_t from where to start scan. */
+
+buf_page_t*
+LRUItr::start()
+{
+	ut_ad(mutex_own(m_mutex));
+
+	if (!m_hp || m_hp->old) {
+		m_hp = UT_LIST_GET_LAST(m_buf_pool->LRU);
+	}
+
+	return(m_hp);
+}
+
+/** Determine if a block is a sentinel for a buffer pool watch.
+ at param[in]	buf_pool	buffer pool instance
+ at param[in]	bpage		block
+ at return TRUE if a sentinel for a buffer pool watch, FALSE if not */
 ibool
 buf_pool_watch_is_sentinel(
-/*=======================*/
-	buf_pool_t*		buf_pool,	/*!< buffer pool instance */
-	const buf_page_t*	bpage)		/*!< in: block */
+	const buf_pool_t*	buf_pool,
+	const buf_page_t*	bpage)
 {
 	/* We must also own the appropriate hash lock. */
 	ut_ad(buf_page_hash_lock_held_s_or_x(buf_pool, bpage));
@@ -1690,35 +3223,29 @@ buf_pool_watch_is_sentinel(
 	ut_ad(!bpage->in_zip_hash);
 	ut_ad(bpage->in_page_hash);
 	ut_ad(bpage->zip.data == NULL);
-	ut_ad(bpage->buf_fix_count > 0);
 	return(TRUE);
 }
 
-/****************************************************************//**
-Add watch for the given page to be read in. Caller must have
+/** Add watch for the given page to be read in. Caller must have
 appropriate hash_lock for the bpage. This function may release the
 hash_lock and reacquire it.
+ at param[in]	page_id		page id
+ at param[in,out]	hash_lock	hash_lock currently latched
 @return NULL if watch set, block if the page is in the buffer pool */
-UNIV_INTERN
 buf_page_t*
 buf_pool_watch_set(
-/*===============*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset,	/*!< in: page number */
-	ulint	fold)	/*!< in: buf_page_address_fold(space, offset) */
+	const page_id_t&	page_id,
+	rw_lock_t**		hash_lock)
 {
 	buf_page_t*	bpage;
 	ulint		i;
-	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
-	rw_lock_t*	hash_lock;
+	buf_pool_t*	buf_pool = buf_pool_get(page_id);
 
-	hash_lock = buf_page_hash_lock_get(buf_pool, fold);
+	ut_ad(*hash_lock == buf_page_hash_lock_get(buf_pool, page_id));
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(hash_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(*hash_lock, RW_LOCK_X));
 
-	bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
+	bpage = buf_page_hash_get_low(buf_pool, page_id);
 
 	if (bpage != NULL) {
 page_found:
@@ -1728,11 +3255,7 @@ page_found:
 		}
 
 		/* Add to an existing watch. */
-#ifdef PAGE_ATOMIC_REF_COUNT
-		os_atomic_increment_uint32(&bpage->buf_fix_count, 1);
-#else
-		++bpage->buf_fix_count;
-#endif /* PAGE_ATOMIC_REF_COUNT */
+		buf_block_fix(bpage);
 		return(NULL);
 	}
 
@@ -1746,21 +3269,24 @@ page_found:
 
 
 	/* To obey latching order first release the hash_lock. */
-	rw_lock_x_unlock(hash_lock);
+	rw_lock_x_unlock(*hash_lock);
 
 	buf_pool_mutex_enter(buf_pool);
 	hash_lock_x_all(buf_pool->page_hash);
 
+	/* If not own buf_pool_mutex, page_hash can be changed. */
+	*hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
+
 	/* We have to recheck that the page
 	was not loaded or a watch set by some other
 	purge thread. This is because of the small
 	time window between when we release the
 	hash_lock to acquire buf_pool mutex above. */
 
-	bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
+	bpage = buf_page_hash_get_low(buf_pool, page_id);
 	if (UNIV_LIKELY_NULL(bpage)) {
 		buf_pool_mutex_exit(buf_pool);
-		hash_unlock_x_all_but(buf_pool->page_hash, hash_lock);
+		hash_unlock_x_all_but(buf_pool->page_hash, *hash_lock);
 		goto page_found;
 	}
 
@@ -1787,20 +3313,19 @@ page_found:
 			buf_block_t::mutex or buf_pool->zip_mutex or both. */
 
 			bpage->state = BUF_BLOCK_ZIP_PAGE;
-			bpage->space = static_cast<ib_uint32_t>(space);
-			bpage->offset = static_cast<ib_uint32_t>(offset);
+			bpage->id.copy_from(page_id);
 			bpage->buf_fix_count = 1;
 
 			ut_d(bpage->in_page_hash = TRUE);
 			HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
-				    fold, bpage);
+				    page_id.fold(), bpage);
 
 			buf_pool_mutex_exit(buf_pool);
 			/* Once the sentinel is in the page_hash we can
 			safely release all locks except just the
 			relevant hash_lock */
 			hash_unlock_x_all_but(buf_pool->page_hash,
-						hash_lock);
+						*hash_lock);
 
 			return(NULL);
 		case BUF_BLOCK_ZIP_PAGE:
@@ -1822,48 +3347,42 @@ page_found:
 	return(NULL);
 }
 
-/****************************************************************//**
-Remove the sentinel block for the watch before replacing it with a real block.
-buf_page_watch_clear() or buf_page_watch_occurred() will notice that
-the block has been replaced with the real block.
+/** Remove the sentinel block for the watch before replacing it with a
+real block. buf_page_watch_clear() or buf_page_watch_occurred() will notice
+that the block has been replaced with the real block.
+ at param[in,out]	buf_pool	buffer pool instance
+ at param[in,out]	watch		sentinel for watch
 @return reference count, to be added to the replacement block */
 static
 void
 buf_pool_watch_remove(
-/*==================*/
-	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
-	ulint		fold,		/*!< in: buf_page_address_fold(
-					space, offset) */
-	buf_page_t*	watch)		/*!< in/out: sentinel for watch */
+	buf_pool_t*	buf_pool,
+	buf_page_t*	watch)
 {
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 	/* We must also own the appropriate hash_bucket mutex. */
-	rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, fold);
-	ut_ad(rw_lock_own(hash_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, watch->id);
+	ut_ad(rw_lock_own(hash_lock, RW_LOCK_X));
+#endif /* UNIV_DEBUG */
 
 	ut_ad(buf_pool_mutex_own(buf_pool));
 
-	HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, watch);
+	HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, watch->id.fold(),
+		    watch);
 	ut_d(watch->in_page_hash = FALSE);
 	watch->buf_fix_count = 0;
 	watch->state = BUF_BLOCK_POOL_WATCH;
 }
 
-/****************************************************************//**
-Stop watching if the page has been read in.
-buf_pool_watch_set(space,offset) must have returned NULL before. */
-UNIV_INTERN
+/** Stop watching if the page has been read in.
+buf_pool_watch_set(same_page_id) must have returned NULL before.
+ at param[in]	page_id	page id */
 void
 buf_pool_watch_unset(
-/*=================*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset)	/*!< in: page number */
+	const page_id_t&	page_id)
 {
 	buf_page_t*	bpage;
-	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
-	ulint		fold = buf_page_address_fold(space, offset);
-	rw_lock_t*	hash_lock = buf_page_hash_lock_get(buf_pool, fold);
+	buf_pool_t*	buf_pool = buf_pool_get(page_id);
 
 	/* We only need to have buf_pool mutex in case where we end
 	up calling buf_pool_watch_remove but to obey latching order
@@ -1872,58 +3391,44 @@ buf_pool_watch_unset(
 	called from the purge thread. */
 	buf_pool_mutex_enter(buf_pool);
 
+	rw_lock_t*	hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
 	rw_lock_x_lock(hash_lock);
 
-	/* The page must exist because buf_pool_watch_set() increments
-	buf_fix_count. */
-
-	bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
-
-	if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) {
-		buf_block_unfix(reinterpret_cast<buf_block_t*>(bpage));
-	} else {
-
-		ut_ad(bpage->buf_fix_count > 0);
-
-#ifdef PAGE_ATOMIC_REF_COUNT
-		os_atomic_decrement_uint32(&bpage->buf_fix_count, 1);
-#else
-		--bpage->buf_fix_count;
-#endif /* PAGE_ATOMIC_REF_COUNT */
+	/* The page must exist because buf_pool_watch_set()
+	increments buf_fix_count. */
+	bpage = buf_page_hash_get_low(buf_pool, page_id);
 
-		if (bpage->buf_fix_count == 0) {
-			buf_pool_watch_remove(buf_pool, fold, bpage);
-		}
+	if (buf_block_unfix(bpage) == 0
+	    && buf_pool_watch_is_sentinel(buf_pool, bpage)) {
+		buf_pool_watch_remove(buf_pool, bpage);
 	}
 
 	buf_pool_mutex_exit(buf_pool);
 	rw_lock_x_unlock(hash_lock);
 }
 
-/****************************************************************//**
-Check if the page has been read in.
-This may only be called after buf_pool_watch_set(space,offset)
-has returned NULL and before invoking buf_pool_watch_unset(space,offset).
- at return	FALSE if the given page was not read in, TRUE if it was */
-UNIV_INTERN
+/** Check if the page has been read in.
+This may only be called after buf_pool_watch_set(same_page_id)
+has returned NULL and before invoking buf_pool_watch_unset(same_page_id).
+ at param[in]	page_id	page id
+ at return FALSE if the given page was not read in, TRUE if it was */
 ibool
 buf_pool_watch_occurred(
-/*====================*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset)	/*!< in: page number */
+	const page_id_t&	page_id)
 {
 	ibool		ret;
 	buf_page_t*	bpage;
-	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
-	ulint		fold	= buf_page_address_fold(space, offset);
-	rw_lock_t*	hash_lock = buf_page_hash_lock_get(buf_pool,
-							     fold);
+	buf_pool_t*	buf_pool = buf_pool_get(page_id);
+	rw_lock_t*	hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
 
 	rw_lock_s_lock(hash_lock);
 
+	/* If not own buf_pool_mutex, page_hash can be changed. */
+	hash_lock = buf_page_hash_lock_s_confirm(hash_lock, buf_pool, page_id);
+
 	/* The page must exist because buf_pool_watch_set()
 	increments buf_fix_count. */
-	bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
+	bpage = buf_page_hash_get_low(buf_pool, page_id);
 
 	ret = !buf_pool_watch_is_sentinel(buf_pool, bpage);
 	rw_lock_s_unlock(hash_lock);
@@ -1935,7 +3440,6 @@ buf_pool_watch_occurred(
 Moves a page to the start of the buffer pool LRU list. This high-level
 function can be used to prevent an important page from slipping out of
 the buffer pool. */
-UNIV_INTERN
 void
 buf_page_make_young(
 /*================*/
@@ -1974,54 +3478,26 @@ buf_page_make_young_if_needed(
 	}
 }
 
-/********************************************************************//**
-Resets the check_index_page_at_flush field of a page if found in the buffer
-pool. */
-UNIV_INTERN
-void
-buf_reset_check_index_page_at_flush(
-/*================================*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset)	/*!< in: page number */
-{
-	buf_block_t*	block;
-	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
-
-	buf_pool_mutex_enter(buf_pool);
-
-	block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
-
-	if (block && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE) {
-		ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
-		block->check_index_page_at_flush = FALSE;
-	}
-
-	buf_pool_mutex_exit(buf_pool);
-}
+#ifdef UNIV_DEBUG
 
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
-/********************************************************************//**
-Sets file_page_was_freed TRUE if the page is found in the buffer pool.
+/** Sets file_page_was_freed TRUE if the page is found in the buffer pool.
 This function should be called when we free a file page and want the
 debug version to check that it is not accessed any more unless
 reallocated.
- at return	control block if found in page hash table, otherwise NULL */
-UNIV_INTERN
+ at param[in]	page_id	page id
+ at return control block if found in page hash table, otherwise NULL */
 buf_page_t*
 buf_page_set_file_page_was_freed(
-/*=============================*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset)	/*!< in: page number */
+	const page_id_t&	page_id)
 {
 	buf_page_t*	bpage;
-	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
+	buf_pool_t*	buf_pool = buf_pool_get(page_id);
 	rw_lock_t*	hash_lock;
 
-	bpage = buf_page_hash_get_s_locked(buf_pool, space, offset,
-					   &hash_lock);
+	bpage = buf_page_hash_get_s_locked(buf_pool, page_id, &hash_lock);
 
 	if (bpage) {
-		ib_mutex_t*	block_mutex = buf_page_get_mutex(bpage);
+		BPageMutex*	block_mutex = buf_page_get_mutex(bpage);
 		ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
 		mutex_enter(block_mutex);
 		rw_lock_s_unlock(hash_lock);
@@ -2034,27 +3510,23 @@ buf_page_set_file_page_was_freed(
 	return(bpage);
 }
 
-/********************************************************************//**
-Sets file_page_was_freed FALSE if the page is found in the buffer pool.
+/** Sets file_page_was_freed FALSE if the page is found in the buffer pool.
 This function should be called when we free a file page and want the
 debug version to check that it is not accessed any more unless
 reallocated.
- at return	control block if found in page hash table, otherwise NULL */
-UNIV_INTERN
+ at param[in]	page_id	page id
+ at return control block if found in page hash table, otherwise NULL */
 buf_page_t*
 buf_page_reset_file_page_was_freed(
-/*===============================*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset)	/*!< in: page number */
+	const page_id_t&	page_id)
 {
 	buf_page_t*	bpage;
-	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
+	buf_pool_t*	buf_pool = buf_pool_get(page_id);
 	rw_lock_t*	hash_lock;
 
-	bpage = buf_page_hash_get_s_locked(buf_pool, space, offset,
-					   &hash_lock);
+	bpage = buf_page_hash_get_s_locked(buf_pool, page_id, &hash_lock);
 	if (bpage) {
-		ib_mutex_t*	block_mutex = buf_page_get_mutex(bpage);
+		BPageMutex*	block_mutex = buf_page_get_mutex(bpage);
 		ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
 		mutex_enter(block_mutex);
 		rw_lock_s_unlock(hash_lock);
@@ -2064,21 +3536,19 @@ buf_page_reset_file_page_was_freed(
 
 	return(bpage);
 }
-#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
+#endif /* UNIV_DEBUG */
 
-/********************************************************************//**
-Attempts to discard the uncompressed frame of a compressed page. The
-caller should not be holding any mutexes when this function is called.
- at return	TRUE if successful, FALSE otherwise. */
+/** Attempts to discard the uncompressed frame of a compressed page.
+The caller should not be holding any mutexes when this function is called.
+ at param[in]	page_id	page id
+ at return TRUE if successful, FALSE otherwise. */
 static
 void
 buf_block_try_discard_uncompressed(
-/*===============================*/
-	ulint		space,	/*!< in: space id */
-	ulint		offset)	/*!< in: page number */
+	const page_id_t&	page_id)
 {
 	buf_page_t*	bpage;
-	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
+	buf_pool_t*	buf_pool = buf_pool_get(page_id);
 
 	/* Since we need to acquire buf_pool mutex to discard
 	the uncompressed frame and because page_hash mutex resides
@@ -2088,7 +3558,7 @@ buf_block_try_discard_uncompressed(
 	we need to check again if the block is still in page_hash. */
 	buf_pool_mutex_enter(buf_pool);
 
-	bpage = buf_page_hash_get(buf_pool, space, offset);
+	bpage = buf_page_hash_get(buf_pool, page_id);
 
 	if (bpage) {
 		buf_LRU_free_page(bpage, false);
@@ -2097,29 +3567,27 @@ buf_block_try_discard_uncompressed(
 	buf_pool_mutex_exit(buf_pool);
 }
 
-/********************************************************************//**
-Get read access to a compressed page (usually of type
+/** Get read access to a compressed page (usually of type
 FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
 The page must be released with buf_page_release_zip().
 NOTE: the page is not protected by any latch.  Mutual exclusion has to
 be implemented at a higher level.  In other words, all possible
 accesses to a given page through this function must be protected by
 the same set of mutexes or latches.
- at return	pointer to the block */
-UNIV_INTERN
+ at param[in]	page_id		page id
+ at param[in]	page_size	page size
+ at return pointer to the block */
 buf_page_t*
 buf_page_get_zip(
-/*=============*/
-	ulint		space,	/*!< in: space id */
-	ulint		zip_size,/*!< in: compressed page size */
-	ulint		offset)	/*!< in: page number */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size)
 {
 	buf_page_t*	bpage;
-	ib_mutex_t*	block_mutex;
+	BPageMutex*	block_mutex;
 	rw_lock_t*	hash_lock;
 	ibool		discard_attempted = FALSE;
 	ibool		must_read;
-	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
+	buf_pool_t*	buf_pool = buf_pool_get(page_id);
 
 	buf_pool->stat.n_page_gets++;
 
@@ -2128,8 +3596,8 @@ lookup:
 
 		/* The following call will also grab the page_hash
 		mutex if the page is found. */
-		bpage = buf_page_hash_get_s_locked(buf_pool, space,
-						offset, &hash_lock);
+		bpage = buf_page_hash_get_s_locked(buf_pool, page_id,
+						   &hash_lock);
 		if (bpage) {
 			ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
 			break;
@@ -2138,7 +3606,7 @@ lookup:
 		/* Page not in buf_pool: needs to be read from file */
 
 		ut_ad(!hash_lock);
-		buf_read_page(space, zip_size, offset);
+		buf_read_page(page_id, page_size);
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 		ut_a(++buf_dbg_counter % 5771 || buf_validate());
@@ -2166,28 +3634,26 @@ err_exit:
 
 	case BUF_BLOCK_ZIP_PAGE:
 	case BUF_BLOCK_ZIP_DIRTY:
+		buf_block_fix(bpage);
 		block_mutex = &buf_pool->zip_mutex;
 		mutex_enter(block_mutex);
-#ifdef PAGE_ATOMIC_REF_COUNT
-		os_atomic_increment_uint32(&bpage->buf_fix_count, 1);
-#else
-		++bpage->buf_fix_count;
-#endif /* PAGE_ATOMIC_REF_COUNT */
 		goto got_block;
 	case BUF_BLOCK_FILE_PAGE:
 		/* Discard the uncompressed page frame if possible. */
 		if (!discard_attempted) {
 			rw_lock_s_unlock(hash_lock);
-			buf_block_try_discard_uncompressed(space, offset);
+			buf_block_try_discard_uncompressed(page_id);
 			discard_attempted = TRUE;
 			goto lookup;
 		}
 
+		buf_block_buf_fix_inc((buf_block_t*) bpage,
+				      __FILE__, __LINE__);
+
 		block_mutex = &((buf_block_t*) bpage)->mutex;
 
 		mutex_enter(block_mutex);
 
-		buf_block_buf_fix_inc((buf_block_t*) bpage, __FILE__, __LINE__);
 		goto got_block;
 	}
 
@@ -2198,9 +3664,8 @@ got_block:
 	must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
 
 	rw_lock_s_unlock(hash_lock);
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
-	ut_a(!bpage->file_page_was_freed);
-#endif /* defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG */
+
+	ut_ad(!bpage->file_page_was_freed);
 
 	buf_page_set_accessed(bpage);
 
@@ -2235,9 +3700,9 @@ got_block:
 	}
 
 #ifdef UNIV_IBUF_COUNT_DEBUG
-	ut_a(ibuf_count_get(buf_page_get_space(bpage),
-			    buf_page_get_page_no(bpage)) == 0);
-#endif
+	ut_a(ibuf_count_get(page_id) == 0);
+#endif /* UNIV_IBUF_COUNT_DEBUG */
+
 	return(bpage);
 }
 
@@ -2249,8 +3714,9 @@ buf_block_init_low(
 /*===============*/
 	buf_block_t*	block)	/*!< in: block to init */
 {
-	block->check_index_page_at_flush = FALSE;
 	block->index		= NULL;
+	block->made_dirty_with_no_latch = false;
+	block->skip_flush_check = false;
 
 	block->n_hash_helps	= 0;
 	block->n_fields		= 1;
@@ -2261,8 +3727,7 @@ buf_block_init_low(
 
 /********************************************************************//**
 Decompress a block.
- at return	TRUE if successful */
-UNIV_INTERN
+ at return TRUE if successful */
 ibool
 buf_zip_decompress(
 /*===============*/
@@ -2272,38 +3737,43 @@ buf_zip_decompress(
 	const byte*	frame = block->page.zip.data;
 	ulint		size = page_zip_get_size(&block->page.zip);
 
-	ut_ad(buf_block_get_zip_size(block));
-	ut_a(buf_block_get_space(block) != 0);
+	ut_ad(block->page.size.is_compressed());
+	ut_a(block->page.id.space() != 0);
 
 	if (UNIV_UNLIKELY(check && !page_zip_verify_checksum(frame, size))) {
 
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: compressed page checksum mismatch"
-			" (space %u page %u): stored: %lu, crc32: %lu "
-			"innodb: %lu, none: %lu\n",
-			block->page.space, block->page.offset,
-			mach_read_from_4(frame + FIL_PAGE_SPACE_OR_CHKSUM),
-			page_zip_calc_checksum(frame, size,
-					       SRV_CHECKSUM_ALGORITHM_CRC32),
-			page_zip_calc_checksum(frame, size,
-					       SRV_CHECKSUM_ALGORITHM_INNODB),
-			page_zip_calc_checksum(frame, size,
-					       SRV_CHECKSUM_ALGORITHM_NONE));
+		ib::error() << "Compressed page checksum mismatch "
+			<< block->page.id << "): stored: "
+			<< mach_read_from_4(frame + FIL_PAGE_SPACE_OR_CHKSUM)
+			<< ", crc32: "
+			<< page_zip_calc_checksum(
+				frame, size, SRV_CHECKSUM_ALGORITHM_CRC32)
+			<< "/"
+			<< page_zip_calc_checksum(
+				frame, size, SRV_CHECKSUM_ALGORITHM_CRC32,
+				true)
+			<< " innodb: "
+			<< page_zip_calc_checksum(
+				frame, size, SRV_CHECKSUM_ALGORITHM_INNODB)
+			<< ", none: "
+			<< page_zip_calc_checksum(
+				frame, size, SRV_CHECKSUM_ALGORITHM_NONE);
+
 		return(FALSE);
 	}
 
 	switch (fil_page_get_type(frame)) {
 	case FIL_PAGE_INDEX:
+	case FIL_PAGE_RTREE:
 		if (page_zip_decompress(&block->page.zip,
 					block->frame, TRUE)) {
 			return(TRUE);
 		}
 
-		fprintf(stderr,
-			"InnoDB: unable to decompress space %lu page %lu\n",
-			(ulong) block->page.space,
-			(ulong) block->page.offset);
+		ib::error() << "Unable to decompress space "
+			<< block->page.id.space()
+			<< " page " << block->page.id.page_no();
+
 		return(FALSE);
 
 	case FIL_PAGE_TYPE_ALLOCATED:
@@ -2314,142 +3784,168 @@ buf_zip_decompress(
 	case FIL_PAGE_TYPE_ZBLOB:
 	case FIL_PAGE_TYPE_ZBLOB2:
 		/* Copy to uncompressed storage. */
-		memcpy(block->frame, frame,
-		       buf_block_get_zip_size(block));
+		memcpy(block->frame, frame, block->page.size.physical());
 		return(TRUE);
 	}
 
-	ut_print_timestamp(stderr);
-	fprintf(stderr,
-		"  InnoDB: unknown compressed page"
-		" type %lu\n",
-		fil_page_get_type(frame));
+	ib::error() << "Unknown compressed page type "
+		<< fil_page_get_type(frame);
+
 	return(FALSE);
 }
 
 #ifndef UNIV_HOTBACKUP
 /*******************************************************************//**
-Gets the block to whose frame the pointer is pointing to if found
-in this buffer pool instance.
- at return	pointer to block */
-UNIV_INTERN
+Gets the block to whose frame the pointer is pointing to.
+ at return pointer to block, never NULL */
 buf_block_t*
-buf_block_align_instance(
-/*=====================*/
- 	buf_pool_t*	buf_pool,	/*!< in: buffer in which the block
-					resides */
-	const byte*	ptr)		/*!< in: pointer to a frame */
+buf_block_align(
+/*============*/
+	const byte*	ptr)	/*!< in: pointer to a frame */
 {
-	buf_chunk_t*	chunk;
-	ulint		i;
+	buf_pool_chunk_map_t::iterator it;
 
-	/* TODO: protect buf_pool->chunks with a mutex (it will
-	currently remain constant after buf_pool_init()) */
-	for (chunk = buf_pool->chunks, i = buf_pool->n_chunks; i--; chunk++) {
-		ulint	offs;
+	ut_ad(srv_buf_pool_chunk_unit > 0);
 
-		if (UNIV_UNLIKELY(ptr < chunk->blocks->frame)) {
-
-			continue;
-		}
-		/* else */
+	/* TODO: This might be still optimistic treatment.
+	buf_pool_resize() needs all buf_pool_mutex and all
+	buf_pool->page_hash x-latched until actual modification.
+	It should block the other user threads and should take while
+	which is enough to done the buf_pool_chunk_map access. */
+	while (buf_pool_resizing) {
+		/* buf_pool_chunk_map is being modified */
+		os_thread_sleep(100000); /* 0.1 sec */
+	}
 
-		offs = ptr - chunk->blocks->frame;
+	ulint	counter = 0;
+retry:
+#ifdef UNIV_DEBUG
+	bool resize_disabled = (buf_disable_resize_buffer_pool_debug != FALSE);
+	if (!resize_disabled) {
+		rw_lock_s_lock(buf_chunk_map_latch);
+	}
+#endif /* UNIV_DEBUG */
+	buf_pool_chunk_map_t*	chunk_map = buf_chunk_map_ref;
 
-		offs >>= UNIV_PAGE_SIZE_SHIFT;
+	if (ptr < reinterpret_cast<byte*>(srv_buf_pool_chunk_unit)) {
+		it = chunk_map->upper_bound(0);
+	} else {
+		it = chunk_map->upper_bound(
+			ptr - srv_buf_pool_chunk_unit);
+	}
 
-		if (UNIV_LIKELY(offs < chunk->size)) {
-			buf_block_t*	block = &chunk->blocks[offs];
+	if (it == chunk_map->end()) {
+#ifdef UNIV_DEBUG
+		if (!resize_disabled) {
+			rw_lock_s_unlock(buf_chunk_map_latch);
+		}
+#endif /* UNIV_DEBUG */
+		/* The block should always be found. */
+		++counter;
+		ut_a(counter < 10);
+		os_thread_sleep(100000); /* 0.1 sec */
+		goto retry;
+	}
 
-			/* The function buf_chunk_init() invokes
-			buf_block_init() so that block[n].frame ==
-			block->frame + n * UNIV_PAGE_SIZE.  Check it. */
-			ut_ad(block->frame == page_align(ptr));
+	buf_chunk_t*	chunk = it->second;
 #ifdef UNIV_DEBUG
-			/* A thread that updates these fields must
-			hold buf_pool->mutex and block->mutex.  Acquire
-			only the latter. */
-			mutex_enter(&block->mutex);
+	if (!resize_disabled) {
+		rw_lock_s_unlock(buf_chunk_map_latch);
+	}
+#endif /* UNIV_DEBUG */
 
-			switch (buf_block_get_state(block)) {
-			case BUF_BLOCK_POOL_WATCH:
-			case BUF_BLOCK_ZIP_PAGE:
-			case BUF_BLOCK_ZIP_DIRTY:
-				/* These types should only be used in
-				the compressed buffer pool, whose
-				memory is allocated from
-				buf_pool->chunks, in UNIV_PAGE_SIZE
-				blocks flagged as BUF_BLOCK_MEMORY. */
-				ut_error;
-				break;
-			case BUF_BLOCK_NOT_USED:
-			case BUF_BLOCK_READY_FOR_USE:
-			case BUF_BLOCK_MEMORY:
-				/* Some data structures contain
-				"guess" pointers to file pages.  The
-				file pages may have been freed and
-				reused.  Do not complain. */
-				break;
-			case BUF_BLOCK_REMOVE_HASH:
-				/* buf_LRU_block_remove_hashed_page()
-				will overwrite the FIL_PAGE_OFFSET and
-				FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID with
-				0xff and set the state to
-				BUF_BLOCK_REMOVE_HASH. */
-				ut_ad(page_get_space_id(page_align(ptr))
-				      == 0xffffffff);
-				ut_ad(page_get_page_no(page_align(ptr))
-				      == 0xffffffff);
-				break;
-			case BUF_BLOCK_FILE_PAGE:
-				ut_ad(block->page.space
-				      == page_get_space_id(page_align(ptr)));
-				ut_ad(block->page.offset
-				      == page_get_page_no(page_align(ptr)));
-				break;
-			}
+	ulint		offs = ptr - chunk->blocks->frame;
 
-			mutex_exit(&block->mutex);
-#endif /* UNIV_DEBUG */
+	offs >>= UNIV_PAGE_SIZE_SHIFT;
 
-			return(block);
-		}
-	}
+	if (offs < chunk->size) {
+		buf_block_t*	block = &chunk->blocks[offs];
 
-	return(NULL);
-}
+		/* The function buf_chunk_init() invokes
+		buf_block_init() so that block[n].frame ==
+		block->frame + n * UNIV_PAGE_SIZE.  Check it. */
+		ut_ad(block->frame == page_align(ptr));
+#ifdef UNIV_DEBUG
+		/* A thread that updates these fields must
+		hold buf_pool->mutex and block->mutex.  Acquire
+		only the latter. */
+		buf_page_mutex_enter(block);
 
-/*******************************************************************//**
-Gets the block to whose frame the pointer is pointing to.
- at return	pointer to block, never NULL */
-UNIV_INTERN
-buf_block_t*
-buf_block_align(
-/*============*/
-	const byte*	ptr)	/*!< in: pointer to a frame */
-{
-	ulint		i;
+		switch (buf_block_get_state(block)) {
+		case BUF_BLOCK_POOL_WATCH:
+		case BUF_BLOCK_ZIP_PAGE:
+		case BUF_BLOCK_ZIP_DIRTY:
+			/* These types should only be used in
+			the compressed buffer pool, whose
+			memory is allocated from
+			buf_pool->chunks, in UNIV_PAGE_SIZE
+			blocks flagged as BUF_BLOCK_MEMORY. */
+			ut_error;
+			break;
+		case BUF_BLOCK_NOT_USED:
+		case BUF_BLOCK_READY_FOR_USE:
+		case BUF_BLOCK_MEMORY:
+			/* Some data structures contain
+			"guess" pointers to file pages.  The
+			file pages may have been freed and
+			reused.  Do not complain. */
+			break;
+		case BUF_BLOCK_REMOVE_HASH:
+			/* buf_LRU_block_remove_hashed_page()
+			will overwrite the FIL_PAGE_OFFSET and
+			FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID with
+			0xff and set the state to
+			BUF_BLOCK_REMOVE_HASH. */
+# ifndef UNIV_DEBUG_VALGRIND
+			/* In buf_LRU_block_remove_hashed() we
+			explicitly set those values to 0xff and
+			declare them uninitialized with
+			UNIV_MEM_INVALID() after that. */
+			ut_ad(page_get_space_id(page_align(ptr))
+			      == 0xffffffff);
+			ut_ad(page_get_page_no(page_align(ptr))
+			      == 0xffffffff);
+# endif /* UNIV_DEBUG_VALGRIND */
+			break;
+		case BUF_BLOCK_FILE_PAGE:
+			const ulint	space_id1 = block->page.id.space();
+			const ulint	page_no1 = block->page.id.page_no();
+			const ulint	space_id2 = page_get_space_id(
+							page_align(ptr));
+			const ulint	page_no2 = page_get_page_no(
+							page_align(ptr));
+
+			if (space_id1 != space_id2 || page_no1 != page_no2) {
+
+				ib::error() << "Found a mismatch page,"
+					<< " expect page "
+					<< page_id_t(space_id1, page_no1)
+					<< " but found "
+					<< page_id_t(space_id2, page_no2);
+
+				ut_ad(0);
+			}
+			break;
+		}
 
-	for (i = 0; i < srv_buf_pool_instances; i++) {
-		buf_block_t*	block;
+		buf_page_mutex_exit(block);
+#endif /* UNIV_DEBUG */
 
-		block = buf_block_align_instance(
-			buf_pool_from_array(i), ptr);
-		if (block) {
-			return(block);
-		}
+		return(block);
 	}
 
 	/* The block should always be found. */
-	ut_error;
-	return(NULL);
+	++counter;
+	ut_a(counter < 10);
+	os_thread_sleep(100000); /* 0.1 sec */
+	goto retry;
 }
 
 /********************************************************************//**
 Find out if a pointer belongs to a buf_block_t. It can be a pointer to
 the buf_block_t itself or a member of it. This functions checks one of
 the buffer pool instances.
- at return	TRUE if ptr belongs to a buf_block_t struct */
+ at return TRUE if ptr belongs to a buf_block_t struct */
 static
 ibool
 buf_pointer_is_block_field_instance(
@@ -2458,10 +3954,11 @@ buf_pointer_is_block_field_instance(
 	const void*	ptr)		/*!< in: pointer not dereferenced */
 {
 	const buf_chunk_t*		chunk	= buf_pool->chunks;
-	const buf_chunk_t* const	echunk	= chunk + buf_pool->n_chunks;
+	const buf_chunk_t* const	echunk	= chunk + ut_min(
+		buf_pool->n_chunks, buf_pool->n_chunks_new);
 
-	/* TODO: protect buf_pool->chunks with a mutex (it will
-	currently remain constant after buf_pool_init()) */
+	/* TODO: protect buf_pool->chunks with a mutex (the older pointer will
+	currently remain while during buf_pool_resize()) */
 	while (chunk < echunk) {
 		if (ptr >= (void*) chunk->blocks
 		    && ptr < (void*) (chunk->blocks + chunk->size)) {
@@ -2478,8 +3975,7 @@ buf_pointer_is_block_field_instance(
 /********************************************************************//**
 Find out if a pointer belongs to a buf_block_t. It can be a pointer to
 the buf_block_t itself or a member of it
- at return	TRUE if ptr belongs to a buf_block_t struct */
-UNIV_INTERN
+ at return TRUE if ptr belongs to a buf_block_t struct */
 ibool
 buf_pointer_is_block_field(
 /*=======================*/
@@ -2502,7 +3998,7 @@ buf_pointer_is_block_field(
 
 /********************************************************************//**
 Find out if a buffer block was created by buf_chunk_init().
- at return	TRUE if "block" has been added to buf_pool->free by buf_chunk_init() */
+ at return TRUE if "block" has been added to buf_pool->free by buf_chunk_init() */
 static
 ibool
 buf_block_is_uncompressed(
@@ -2541,14 +4037,14 @@ buf_debug_execute_is_force_flush()
 }
 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
 
-/**
-Wait for the block to be read in.
- at param block	The block to check */
+/** Wait for the block to be read in.
+ at param[in]	block	The block to check */
 static
 void
-buf_wait_for_read(buf_block_t* block)
+buf_wait_for_read(
+	buf_block_t*	block)
 {
-	/* Note: For the PAGE_ATOMIC_REF_COUNT case:
+	/* Note:
 
 	We are using the block->lock to check for IO state (and a dirty read).
 	We set the IO_READ state under the protection of the hash_lock
@@ -2560,7 +4056,7 @@ buf_wait_for_read(buf_block_t* block)
 
 		/* Wait until the read operation completes */
 
-		ib_mutex_t*	mutex = buf_page_get_mutex(&block->page);
+		BPageMutex*	mutex = buf_page_get_mutex(&block->page);
 
 		for (;;) {
 			buf_io_fix	io_fix;
@@ -2582,40 +4078,42 @@ buf_wait_for_read(buf_block_t* block)
 	}
 }
 
-/********************************************************************//**
-This is the general function used to get access to a database page.
- at return	pointer to the block or NULL */
-UNIV_INTERN
+/** This is the general function used to get access to a database page.
+ at param[in]	page_id		page id
+ at param[in]	rw_latch	RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
+ at param[in]	guess		guessed block or NULL
+ at param[in]	mode		BUF_GET, BUF_GET_IF_IN_POOL,
+BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH, or BUF_GET_IF_IN_POOL_OR_WATCH
+ at param[in]	file		file name
+ at param[in]	line		line where called
+ at param[in]	mtr		mini-transaction
+ at param[in]	dirty_with_no_latch
+				mark page as dirty even if page
+				is being pinned without any latch
+ at return pointer to the block or NULL */
 buf_block_t*
 buf_page_get_gen(
-/*=============*/
-	ulint		space,	/*!< in: space id */
-	ulint		zip_size,/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint		offset,	/*!< in: page number */
-	ulint		rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
-	buf_block_t*	guess,	/*!< in: guessed block or NULL */
-	ulint		mode,	/*!< in: BUF_GET, BUF_GET_IF_IN_POOL,
-				BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH, or
-				BUF_GET_IF_IN_POOL_OR_WATCH */
-	const char*	file,	/*!< in: file name */
-	ulint		line,	/*!< in: line where called */
-	mtr_t*		mtr)	/*!< in: mini-transaction */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	ulint			rw_latch,
+	buf_block_t*		guess,
+	ulint			mode,
+	const char*		file,
+	ulint			line,
+	mtr_t*			mtr,
+	bool			dirty_with_no_latch)
 {
 	buf_block_t*	block;
-	ulint		fold;
 	unsigned	access_time;
-	ulint		fix_type;
 	rw_lock_t*	hash_lock;
-	ulint		retries = 0;
 	buf_block_t*	fix_block;
-	ib_mutex_t*	fix_mutex = NULL;
-	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
+	ulint		retries = 0;
+	buf_pool_t*	buf_pool = buf_pool_get(page_id);
 
-	ut_ad(mtr);
-	ut_ad(mtr->state == MTR_ACTIVE);
+	ut_ad(mtr->is_active());
 	ut_ad((rw_latch == RW_S_LATCH)
 	      || (rw_latch == RW_X_LATCH)
+	      || (rw_latch == RW_SX_LATCH)
 	      || (rw_latch == RW_NO_LATCH));
 #ifdef UNIV_DEBUG
 	switch (mode) {
@@ -2631,22 +4129,29 @@ buf_page_get_gen(
 	default:
 		ut_error;
 	}
+
+	bool			found;
+	const page_size_t&	space_page_size
+		= fil_space_get_page_size(page_id.space(), &found);
+
+	ut_ad(found);
+
+	ut_ad(page_size.equals_to(space_page_size));
 #endif /* UNIV_DEBUG */
-	ut_ad(zip_size == fil_space_get_zip_size(space));
-	ut_ad(ut_is_2pow(zip_size));
-#ifndef UNIV_LOG_DEBUG
+
 	ut_ad(!ibuf_inside(mtr)
-	      || ibuf_page_low(space, zip_size, offset,
-			       FALSE, file, line, NULL));
-#endif
+	      || ibuf_page_low(page_id, page_size, FALSE, file, line, NULL));
+
 	buf_pool->stat.n_page_gets++;
-	fold = buf_page_address_fold(space, offset);
-	hash_lock = buf_page_hash_lock_get(buf_pool, fold);
+	hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
 loop:
 	block = guess;
 
 	rw_lock_s_lock(hash_lock);
 
+	/* If not own buf_pool_mutex, page_hash can be changed. */
+	hash_lock = buf_page_hash_lock_s_confirm(hash_lock, buf_pool, page_id);
+
 	if (block != NULL) {
 
 		/* If the guess is a compressed page descriptor that
@@ -2654,8 +4159,7 @@ loop:
 		it may have been freed by buf_relocate(). */
 
 		if (!buf_block_is_uncompressed(buf_pool, block)
-		    || offset != block->page.offset
-		    || space != block->page.space
+		    || !page_id.equals_to(block->page.id)
 		    || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
 
 			/* Our guess was bogus or things have changed
@@ -2667,8 +4171,7 @@ loop:
 	}
 
 	if (block == NULL) {
-		block = (buf_block_t*) buf_page_hash_get_low(
-			buf_pool, space, offset, fold);
+		block = (buf_block_t*) buf_page_hash_get_low(buf_pool, page_id);
 	}
 
 	if (!block || buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
@@ -2681,15 +4184,39 @@ loop:
 
 		if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
 			rw_lock_x_lock(hash_lock);
+
+			/* If not own buf_pool_mutex,
+			page_hash can be changed. */
+			hash_lock = buf_page_hash_lock_x_confirm(
+				hash_lock, buf_pool, page_id);
+
 			block = (buf_block_t*) buf_pool_watch_set(
-				space, offset, fold);
+				page_id, &hash_lock);
 
-			if (UNIV_LIKELY_NULL(block)) {
+			if (block) {
 				/* We can release hash_lock after we
 				increment the fix count to make
 				sure that no state change takes place. */
 				fix_block = block;
-				buf_block_fix(fix_block);
+
+				if (fsp_is_system_temporary(page_id.space())) {
+					/* For temporary tablespace,
+					the mutex is being used for
+					synchronization between user
+					thread and flush thread,
+					instead of block->lock. See
+					buf_flush_page() for the flush
+					thread counterpart. */
+
+					BPageMutex*	fix_mutex
+						= buf_page_get_mutex(
+							&fix_block->page);
+					mutex_enter(fix_mutex);
+					buf_block_fix(fix_block);
+					mutex_exit(fix_mutex);
+				} else {
+					buf_block_fix(fix_block);
+				}
 
 				/* Now safe to release page_hash mutex */
 				rw_lock_x_unlock(hash_lock);
@@ -2702,15 +4229,15 @@ loop:
 		if (mode == BUF_GET_IF_IN_POOL
 		    || mode == BUF_PEEK_IF_IN_POOL
 		    || mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
-#ifdef UNIV_SYNC_DEBUG
-			ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX));
-			ut_ad(!rw_lock_own(hash_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+
+			ut_ad(!rw_lock_own(hash_lock, RW_LOCK_X));
+			ut_ad(!rw_lock_own(hash_lock, RW_LOCK_S));
+
 			return(NULL);
 		}
 
-		if (buf_read_page(space, zip_size, offset)) {
-			buf_read_ahead_random(space, zip_size, offset,
+		if (buf_read_page(page_id, page_size)) {
+			buf_read_ahead_random(page_id, page_size,
 					      ibuf_inside(mtr));
 
 			retries = 0;
@@ -2721,59 +4248,57 @@ loop:
 				retries = BUF_PAGE_READ_MAX_RETRIES;
 			);
 		} else {
-			fprintf(stderr, "InnoDB: Error: Unable"
-				" to read tablespace %lu page no"
-				" %lu into the buffer pool after"
-				" %lu attempts\n"
-				"InnoDB: The most probable cause"
-				" of this error may be that the"
-				" table has been corrupted.\n"
-				"InnoDB: You can try to fix this"
-				" problem by using"
-				" innodb_force_recovery.\n"
-				"InnoDB: Please see reference manual"
-				" for more details.\n"
-				"InnoDB: Aborting...\n",
-				space, offset,
-				BUF_PAGE_READ_MAX_RETRIES);
-
-			ut_error;
+			ib::fatal() << "Unable to read page " << page_id
+				<< " into the buffer pool after "
+				<< BUF_PAGE_READ_MAX_RETRIES << " attempts."
+				" The most probable cause of this error may"
+				" be that the table has been corrupted. Or,"
+				" the table was compressed with with an"
+				" algorithm that is not supported by this"
+				" instance. If it is not a decompress failure,"
+				" you can try to fix this problem by using"
+				" innodb_force_recovery."
+				" Please see " REFMAN " for more"
+				" details. Aborting...";
 		}
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-		ut_a(++buf_dbg_counter % 5771 || buf_validate());
+		ut_a(fsp_skip_sanity_check(page_id.space())
+		     || ++buf_dbg_counter % 5771
+		     || buf_validate());
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 		goto loop;
 	} else {
 		fix_block = block;
 	}
 
-	buf_block_fix(fix_block);
+	if (fsp_is_system_temporary(page_id.space())) {
+		/* For temporary tablespace, the mutex is being used
+		for synchronization between user thread and flush
+		thread, instead of block->lock. See buf_flush_page()
+		for the flush thread counterpart. */
+		BPageMutex*	fix_mutex = buf_page_get_mutex(
+			&fix_block->page);
+		mutex_enter(fix_mutex);
+		buf_block_fix(fix_block);
+		mutex_exit(fix_mutex);
+	} else {
+		buf_block_fix(fix_block);
+	}
 
 	/* Now safe to release page_hash mutex */
 	rw_lock_s_unlock(hash_lock);
 
 got_block:
 
-	fix_mutex = buf_page_get_mutex(&fix_block->page);
-
-	ut_ad(page_zip_get_size(&block->page.zip) == zip_size);
-
 	if (mode == BUF_GET_IF_IN_POOL || mode == BUF_PEEK_IF_IN_POOL) {
 
-		bool	must_read;
-
-		{
-			buf_page_t*	fix_page = &fix_block->page;
-
-			mutex_enter(fix_mutex);
-
-			buf_io_fix	io_fix = buf_page_get_io_fix(fix_page);
-
-			must_read = (io_fix == BUF_IO_READ);
-
-			mutex_exit(fix_mutex);
-		}
+		buf_page_t*	fix_page = &fix_block->page;
+		BPageMutex*	fix_mutex = buf_page_get_mutex(fix_page);
+		mutex_enter(fix_mutex);
+		const bool	must_read
+			= (buf_page_get_io_fix(fix_page) == BUF_IO_READ);
+		mutex_exit(fix_mutex);
 
 		if (must_read) {
 			/* The page is being read to buffer pool,
@@ -2785,10 +4310,22 @@ got_block:
 		}
 	}
 
-	switch(buf_block_get_state(fix_block)) {
+	switch (buf_block_get_state(fix_block)) {
 		buf_page_t*	bpage;
 
 	case BUF_BLOCK_FILE_PAGE:
+		bpage = &block->page;
+		if (fsp_is_system_temporary(page_id.space())
+		    && buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
+			/* This suggest that page is being flushed.
+			Avoid returning reference to this page.
+			Instead wait for flush action to complete.
+			For normal page this sync is done using SX
+			lock but for intrinsic there is no latching. */
+			buf_block_unfix(fix_block);
+			os_thread_sleep(WAIT_FOR_WRITE);
+			goto loop;
+		}
 		break;
 
 	case BUF_BLOCK_ZIP_PAGE:
@@ -2829,24 +4366,19 @@ got_block:
 
 		buf_pool_mutex_enter(buf_pool);
 
+		/* If not own buf_pool_mutex, page_hash can be changed. */
+		hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
+
 		rw_lock_x_lock(hash_lock);
 
 		/* Buffer-fixing prevents the page_hash from changing. */
-		ut_ad(bpage == buf_page_hash_get_low(
-			      buf_pool, space, offset, fold));
+		ut_ad(bpage == buf_page_hash_get_low(buf_pool, page_id));
 
-		buf_block_mutex_enter(block);
+		buf_block_unfix(fix_block);
 
+		buf_page_mutex_enter(block);
 		mutex_enter(&buf_pool->zip_mutex);
 
-		ut_ad(fix_block->page.buf_fix_count > 0);
-
-#ifdef PAGE_ATOMIC_REF_COUNT
-		os_atomic_decrement_uint32(&fix_block->page.buf_fix_count, 1);
-#else
-		--fix_block->page.buf_fix_count;
-#endif /* PAGE_ATOMIC_REF_COUNT */
-
 		fix_block = block;
 
 		if (bpage->buf_fix_count > 0
@@ -2862,7 +4394,7 @@ got_block:
 			buf_LRU_block_free_non_file_page(block);
 			buf_pool_mutex_exit(buf_pool);
 			rw_lock_x_unlock(hash_lock);
-			buf_block_mutex_exit(block);
+			buf_page_mutex_exit(block);
 
 			/* Try again */
 			goto loop;
@@ -2878,18 +4410,18 @@ got_block:
 
 		buf_block_init_low(block);
 
-		/* Set after relocate(). */
+		/* Set after buf_relocate(). */
 		block->page.buf_fix_count = 1;
 
-		block->lock_hash_val = lock_rec_hash(space, offset);
+		block->lock_hash_val = lock_rec_hash(page_id.space(),
+						     page_id.page_no());
 
 		UNIV_MEM_DESC(&block->page.zip.data,
-			page_zip_get_size(&block->page.zip));
+			      page_zip_get_size(&block->page.zip));
 
 		if (buf_page_get_state(&block->page) == BUF_BLOCK_ZIP_PAGE) {
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-			UT_LIST_REMOVE(list, buf_pool->zip_clean,
-				       &block->page);
+			UT_LIST_REMOVE(buf_pool->zip_clean, &block->page);
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 			ut_ad(!block->page.in_flush_list);
 		} else {
@@ -2911,15 +4443,13 @@ got_block:
 		UNIV_MEM_INVALID(bpage, sizeof *bpage);
 
 		rw_lock_x_unlock(hash_lock);
-
-		++buf_pool->n_pend_unzip;
-
+		buf_pool->n_pend_unzip++;
 		mutex_exit(&buf_pool->zip_mutex);
 		buf_pool_mutex_exit(buf_pool);
 
 		access_time = buf_page_is_accessed(&block->page);
 
-		buf_block_mutex_exit(block);
+		buf_page_mutex_exit(block);
 
 		buf_page_free_descriptor(bpage);
 
@@ -2937,22 +4467,21 @@ got_block:
 		if (!recv_no_ibuf_operations) {
 			if (access_time) {
 #ifdef UNIV_IBUF_COUNT_DEBUG
-				ut_a(ibuf_count_get(space, offset) == 0);
+				ut_a(ibuf_count_get(page_id) == 0);
 #endif /* UNIV_IBUF_COUNT_DEBUG */
 			} else {
 				ibuf_merge_or_delete_for_page(
-					block, space, offset, zip_size, TRUE);
+					block, page_id, &page_size, TRUE);
 			}
 		}
 
 		buf_pool_mutex_enter(buf_pool);
 
-		/* Unfix and unlatch the block. */
-		buf_block_mutex_enter(fix_block);
+		buf_page_mutex_enter(fix_block);
 
 		buf_block_set_io_fix(fix_block, BUF_IO_NONE);
 
-		buf_block_mutex_exit(fix_block);
+		buf_page_mutex_exit(fix_block);
 
 		--buf_pool->n_pend_unzip;
 
@@ -2974,10 +4503,8 @@ got_block:
 	ut_ad(block == fix_block);
 	ut_ad(fix_block->page.buf_fix_count > 0);
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX));
-	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_X));
+	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_S));
 
 	ut_ad(buf_block_get_state(fix_block) == BUF_BLOCK_FILE_PAGE);
 
@@ -2999,18 +4526,29 @@ got_block:
 		are holding the buf_pool->mutex. */
 
 		if (buf_LRU_free_page(&fix_block->page, true)) {
+
 			buf_pool_mutex_exit(buf_pool);
+
+			/* If not own buf_pool_mutex,
+			page_hash can be changed. */
+			hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
+
 			rw_lock_x_lock(hash_lock);
 
+			/* If not own buf_pool_mutex,
+			page_hash can be changed. */
+			hash_lock = buf_page_hash_lock_x_confirm(
+				hash_lock, buf_pool, page_id);
+
 			if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
 				/* Set the watch, as it would have
 				been set if the page were not in the
 				buffer pool in the first place. */
 				block = (buf_block_t*) buf_pool_watch_set(
-					space, offset, fold);
+					page_id, &hash_lock);
 			} else {
 				block = (buf_block_t*) buf_page_hash_get_low(
-					buf_pool, space, offset, fold);
+					buf_pool, page_id);
 			}
 
 			rw_lock_x_unlock(hash_lock);
@@ -3022,26 +4560,29 @@ got_block:
 				and before we acquire the hash_lock
 				above. Try again. */
 				guess = block;
+
 				goto loop;
 			}
 
-			fprintf(stderr,
-				"innodb_change_buffering_debug evict %u %u\n",
-				(unsigned) space, (unsigned) offset);
+			ib::info() << "innodb_change_buffering_debug evict "
+				<< page_id;
+
 			return(NULL);
 		}
 
-		mutex_enter(&fix_block->mutex);
+		buf_page_mutex_enter(fix_block);
 
 		if (buf_flush_page_try(buf_pool, fix_block)) {
-			fprintf(stderr,
-				"innodb_change_buffering_debug flush %u %u\n",
-				(unsigned) space, (unsigned) offset);
+
+			ib::info() << "innodb_change_buffering_debug flush "
+				<< page_id;
+
 			guess = fix_block;
+
 			goto loop;
 		}
 
-		buf_block_mutex_exit(fix_block);
+		buf_page_mutex_exit(fix_block);
 
 		buf_block_fix(fix_block);
 
@@ -3053,30 +4594,40 @@ got_block:
 
 	ut_ad(fix_block->page.buf_fix_count > 0);
 
-#ifdef UNIV_SYNC_DEBUG
+#ifdef UNIV_DEBUG
 	/* We have already buffer fixed the page, and we are committed to
-	returning this page to the caller. Register for debugging. */
-	{
-		ibool	ret;
-		ret = rw_lock_s_lock_nowait(&fix_block->debug_latch, file, line);
+	returning this page to the caller. Register for debugging.
+	Avoid debug latching if page/block belongs to system temporary
+	tablespace (Not much needed for table with single threaded access.). */
+	if (!fsp_is_system_temporary(page_id.space())) {
+		ibool   ret;
+		ret = rw_lock_s_lock_nowait(
+			&fix_block->debug_latch, file, line);
 		ut_a(ret);
 	}
-#endif /* UNIV_SYNC_DEBUG */
+#endif /* UNIV_DEBUG */
+
+	/* While tablespace is reinited the indexes are already freed but the
+	blocks related to it still resides in buffer pool. Trying to remove
+	such blocks from buffer pool would invoke removal of AHI entries
+	associated with these blocks. Logic to remove AHI entry will try to
+	load the block but block is already in free state. Handle the said case
+	with mode = BUF_PEEK_IF_IN_POOL that is invoked from
+	"btr_search_drop_page_hash_when_freed". */
+	ut_ad(mode == BUF_GET_POSSIBLY_FREED
+	      || mode == BUF_PEEK_IF_IN_POOL
+	      || !fix_block->page.file_page_was_freed);
 
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
-	ut_a(mode == BUF_GET_POSSIBLY_FREED
-	     || !fix_block->page.file_page_was_freed);
-#endif
 	/* Check if this is the first access to the page */
 	access_time = buf_page_is_accessed(&fix_block->page);
 
 	/* This is a heuristic and we don't care about ordering issues. */
 	if (access_time == 0) {
-		buf_block_mutex_enter(fix_block);
+		buf_page_mutex_enter(fix_block);
 
 		buf_page_set_accessed(&fix_block->page);
 
-		buf_block_mutex_exit(fix_block);
+		buf_page_mutex_exit(fix_block);
 	}
 
 	if (mode != BUF_PEEK_IF_IN_POOL) {
@@ -3084,25 +4635,33 @@ got_block:
 	}
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-	ut_a(++buf_dbg_counter % 5771 || buf_validate());
-	ut_a(fix_block->page.buf_fix_count > 0);
+	ut_a(fsp_skip_sanity_check(page_id.space())
+	     || ++buf_dbg_counter % 5771
+	     || buf_validate());
 	ut_a(buf_block_get_state(fix_block) == BUF_BLOCK_FILE_PAGE);
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
-#ifdef PAGE_ATOMIC_REF_COUNT
 	/* We have to wait here because the IO_READ state was set
-	under the protection of the hash_lock and the block->mutex
-	but not the block->lock. */
+	under the protection of the hash_lock and not the block->mutex
+	and block->lock. */
 	buf_wait_for_read(fix_block);
-#endif /* PAGE_ATOMIC_REF_COUNT */
+
+	/* Mark block as dirty if requested by caller. If not requested (false)
+	then we avoid updating the dirty state of the block and retain the
+	original one. This is reason why ?
+	Same block can be shared/pinned by 2 different mtrs. If first mtr
+	set the dirty state to true and second mtr mark it as false the last
+	updated dirty state is retained. Which means we can loose flushing of
+	a modified block. */
+	if (dirty_with_no_latch) {
+		fix_block->made_dirty_with_no_latch = dirty_with_no_latch;
+	}
+
+	mtr_memo_type_t	fix_type;
 
 	switch (rw_latch) {
 	case RW_NO_LATCH:
 
-#ifndef PAGE_ATOMIC_REF_COUNT
-		buf_wait_for_read(fix_block);
-#endif /* !PAGE_ATOMIC_REF_COUNT */
-
 		fix_type = MTR_MEMO_BUF_FIX;
 		break;
 
@@ -3112,6 +4671,12 @@ got_block:
 		fix_type = MTR_MEMO_PAGE_S_FIX;
 		break;
 
+	case RW_SX_LATCH:
+		rw_lock_sx_lock_inline(&fix_block->lock, 0, file, line);
+
+		fix_type = MTR_MEMO_PAGE_SX_FIX;
+		break;
+
 	default:
 		ut_ad(rw_latch == RW_X_LATCH);
 		rw_lock_x_lock_inline(&fix_block->lock, 0, file, line);
@@ -3126,26 +4691,23 @@ got_block:
 		/* In the case of a first access, try to apply linear
 		read-ahead */
 
-		buf_read_ahead_linear(
-			space, zip_size, offset, ibuf_inside(mtr));
+		buf_read_ahead_linear(page_id, page_size, ibuf_inside(mtr));
 	}
 
 #ifdef UNIV_IBUF_COUNT_DEBUG
-	ut_a(ibuf_count_get(buf_block_get_space(fix_block),
-			    buf_block_get_page_no(fix_block)) == 0);
+	ut_a(ibuf_count_get(fix_block->page.id) == 0);
 #endif
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX));
-	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+
+	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_X));
+	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_S));
+
 	return(fix_block);
 }
 
 /********************************************************************//**
 This is the general function used to get optimistic access to a database
 page.
- at return	TRUE if success */
-UNIV_INTERN
+ at return TRUE if success */
 ibool
 buf_page_optimistic_get(
 /*====================*/
@@ -3159,18 +4721,17 @@ buf_page_optimistic_get(
 	buf_pool_t*	buf_pool;
 	unsigned	access_time;
 	ibool		success;
-	ulint		fix_type;
 
 	ut_ad(block);
 	ut_ad(mtr);
-	ut_ad(mtr->state == MTR_ACTIVE);
+	ut_ad(mtr->is_active());
 	ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
 
-	mutex_enter(&block->mutex);
+	buf_page_mutex_enter(block);
 
 	if (UNIV_UNLIKELY(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE)) {
 
-		mutex_exit(&block->mutex);
+		buf_page_mutex_exit(block);
 
 		return(FALSE);
 	}
@@ -3181,41 +4742,52 @@ buf_page_optimistic_get(
 
 	buf_page_set_accessed(&block->page);
 
-	mutex_exit(&block->mutex);
+	buf_page_mutex_exit(block);
 
 	buf_page_make_young_if_needed(&block->page);
 
 	ut_ad(!ibuf_inside(mtr)
-	      || ibuf_page(buf_block_get_space(block),
-			   buf_block_get_zip_size(block),
-			   buf_block_get_page_no(block), NULL));
+	      || ibuf_page(block->page.id, block->page.size, NULL));
+
+	mtr_memo_type_t	fix_type;
+
+	switch (rw_latch) {
+	case RW_S_LATCH:
+		success = rw_lock_s_lock_nowait(&block->lock, file, line);
 
-	if (rw_latch == RW_S_LATCH) {
-		success = rw_lock_s_lock_nowait(&(block->lock),
-						file, line);
 		fix_type = MTR_MEMO_PAGE_S_FIX;
-	} else {
-		success = rw_lock_x_lock_func_nowait_inline(&(block->lock),
-							    file, line);
+		break;
+	case RW_X_LATCH:
+		success = rw_lock_x_lock_func_nowait_inline(
+			&block->lock, file, line);
+
 		fix_type = MTR_MEMO_PAGE_X_FIX;
+		break;
+	default:
+		ut_error; /* RW_SX_LATCH is not implemented yet */
 	}
 
-	if (UNIV_UNLIKELY(!success)) {
+	if (!success) {
+		buf_page_mutex_enter(block);
 		buf_block_buf_fix_dec(block);
+		buf_page_mutex_exit(block);
 
 		return(FALSE);
 	}
 
-	if (UNIV_UNLIKELY(modify_clock != block->modify_clock)) {
+	if (modify_clock != block->modify_clock) {
+
 		buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
 
 		if (rw_latch == RW_S_LATCH) {
-			rw_lock_s_unlock(&(block->lock));
+			rw_lock_s_unlock(&block->lock);
 		} else {
-			rw_lock_x_unlock(&(block->lock));
+			rw_lock_x_unlock(&block->lock);
 		}
 
+		buf_page_mutex_enter(block);
 		buf_block_buf_fix_dec(block);
+		buf_page_mutex_exit(block);
 
 		return(FALSE);
 	}
@@ -3223,31 +4795,28 @@ buf_page_optimistic_get(
 	mtr_memo_push(mtr, block, fix_type);
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-	ut_a(++buf_dbg_counter % 5771 || buf_validate());
+	ut_a(fsp_skip_sanity_check(block->page.id.space())
+	     || ++buf_dbg_counter % 5771
+	     || buf_validate());
 	ut_a(block->page.buf_fix_count > 0);
 	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
-	mutex_enter(&block->mutex);
-	ut_a(!block->page.file_page_was_freed);
-	mutex_exit(&block->mutex);
-#endif
+	ut_d(buf_page_mutex_enter(block));
+	ut_ad(!block->page.file_page_was_freed);
+	ut_d(buf_page_mutex_exit(block));
 
 	if (!access_time) {
 		/* In the case of a first access, try to apply linear
 		read-ahead */
-
-		buf_read_ahead_linear(buf_block_get_space(block),
-				      buf_block_get_zip_size(block),
-				      buf_block_get_page_no(block),
+		buf_read_ahead_linear(block->page.id, block->page.size,
 				      ibuf_inside(mtr));
 	}
 
 #ifdef UNIV_IBUF_COUNT_DEBUG
-	ut_a(ibuf_count_get(buf_block_get_space(block),
-			    buf_block_get_page_no(block)) == 0);
-#endif
+	ut_a(ibuf_count_get(block->page.id) == 0);
+#endif /* UNIV_IBUF_COUNT_DEBUG */
+
 	buf_pool = buf_pool_from_block(block);
 	buf_pool->stat.n_page_gets++;
 
@@ -3258,8 +4827,7 @@ buf_page_optimistic_get(
 This is used to get access to a known database page, when no waiting can be
 done. For example, if a search in an adaptive hash index leads us to this
 frame.
- at return	TRUE if success */
-UNIV_INTERN
+ at return TRUE if success */
 ibool
 buf_page_get_known_nowait(
 /*======================*/
@@ -3272,13 +4840,11 @@ buf_page_get_known_nowait(
 {
 	buf_pool_t*	buf_pool;
 	ibool		success;
-	ulint		fix_type;
 
-	ut_ad(mtr);
-	ut_ad(mtr->state == MTR_ACTIVE);
+	ut_ad(mtr->is_active());
 	ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
 
-	mutex_enter(&block->mutex);
+	buf_page_mutex_enter(block);
 
 	if (buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH) {
 		/* Another thread is just freeing the block from the LRU list
@@ -3288,7 +4854,7 @@ buf_page_get_known_nowait(
 		we have already removed it from the page address hash table
 		of the buffer pool. */
 
-		mutex_exit(&block->mutex);
+		buf_page_mutex_exit(block);
 
 		return(FALSE);
 	}
@@ -3299,7 +4865,7 @@ buf_page_get_known_nowait(
 
 	buf_page_set_accessed(&block->page);
 
-	mutex_exit(&block->mutex);
+	buf_page_mutex_exit(block);
 
 	buf_pool = buf_pool_from_block(block);
 
@@ -3309,18 +4875,27 @@ buf_page_get_known_nowait(
 
 	ut_ad(!ibuf_inside(mtr) || mode == BUF_KEEP_OLD);
 
-	if (rw_latch == RW_S_LATCH) {
-		success = rw_lock_s_lock_nowait(&(block->lock),
-						file, line);
+	mtr_memo_type_t	fix_type;
+
+	switch (rw_latch) {
+	case RW_S_LATCH:
+		success = rw_lock_s_lock_nowait(&block->lock, file, line);
 		fix_type = MTR_MEMO_PAGE_S_FIX;
-	} else {
-		success = rw_lock_x_lock_func_nowait_inline(&(block->lock),
-							    file, line);
+		break;
+	case RW_X_LATCH:
+		success = rw_lock_x_lock_func_nowait_inline(
+			&block->lock, file, line);
+
 		fix_type = MTR_MEMO_PAGE_X_FIX;
+		break;
+	default:
+		ut_error; /* RW_SX_LATCH is not implemented yet */
 	}
 
 	if (!success) {
+		buf_page_mutex_enter(block);
 		buf_block_buf_fix_dec(block);
+		buf_page_mutex_exit(block);
 
 		return(FALSE);
 	}
@@ -3332,7 +4907,8 @@ buf_page_get_known_nowait(
 	ut_a(block->page.buf_fix_count > 0);
 	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
+
+#ifdef UNIV_DEBUG
 	if (mode != BUF_KEEP_OLD) {
 		/* If mode == BUF_KEEP_OLD, we are executing an I/O
 		completion routine.  Avoid a bogus assertion failure
@@ -3341,48 +4917,44 @@ buf_page_get_known_nowait(
 		deleting a record from SYS_INDEXES. This check will be
 		skipped in recv_recover_page() as well. */
 
-		mutex_enter(&block->mutex);
+		buf_page_mutex_enter(block);
 		ut_a(!block->page.file_page_was_freed);
-		mutex_exit(&block->mutex);
+		buf_page_mutex_exit(block);
 	}
-#endif
+#endif /* UNIV_DEBUG */
 
 #ifdef UNIV_IBUF_COUNT_DEBUG
-	ut_a((mode == BUF_KEEP_OLD)
-	     || (ibuf_count_get(buf_block_get_space(block),
-				buf_block_get_page_no(block)) == 0));
+	ut_a((mode == BUF_KEEP_OLD) || ibuf_count_get(block->page.id) == 0);
 #endif
 	buf_pool->stat.n_page_gets++;
 
 	return(TRUE);
 }
 
-/*******************************************************************//**
-Given a tablespace id and page number tries to get that page. If the
+/** Given a tablespace id and page number tries to get that page. If the
 page is not in the buffer pool it is not loaded and NULL is returned.
 Suitable for using when holding the lock_sys_t::mutex.
- at return	pointer to a page or NULL */
-UNIV_INTERN
+ at param[in]	page_id	page id
+ at param[in]	file	file name
+ at param[in]	line	line where called
+ at param[in]	mtr	mini-transaction
+ at return pointer to a page or NULL */
 const buf_block_t*
 buf_page_try_get_func(
-/*==================*/
-	ulint		space_id,/*!< in: tablespace id */
-	ulint		page_no,/*!< in: page number */
-	const char*	file,	/*!< in: file name */
-	ulint		line,	/*!< in: line where called */
-	mtr_t*		mtr)	/*!< in: mini-transaction */
+	const page_id_t&	page_id,
+	const char*		file,
+	ulint			line,
+	mtr_t*			mtr)
 {
 	buf_block_t*	block;
 	ibool		success;
-	ulint		fix_type;
-	buf_pool_t*	buf_pool = buf_pool_get(space_id, page_no);
+	buf_pool_t*	buf_pool = buf_pool_get(page_id);
 	rw_lock_t*	hash_lock;
 
 	ut_ad(mtr);
-	ut_ad(mtr->state == MTR_ACTIVE);
+	ut_ad(mtr->is_active());
 
-	block = buf_block_hash_get_s_locked(buf_pool, space_id,
-					    page_no, &hash_lock);
+	block = buf_block_hash_get_s_locked(buf_pool, page_id, &hash_lock);
 
 	if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
 		if (block) {
@@ -3393,19 +4965,18 @@ buf_page_try_get_func(
 
 	ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
 
-	mutex_enter(&block->mutex);
+	buf_page_mutex_enter(block);
 	rw_lock_s_unlock(hash_lock);
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-	ut_a(buf_block_get_space(block) == space_id);
-	ut_a(buf_block_get_page_no(block) == page_no);
+	ut_a(page_id.equals_to(block->page.id));
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
 	buf_block_buf_fix_inc(block, file, line);
-	mutex_exit(&block->mutex);
+	buf_page_mutex_exit(block);
 
-	fix_type = MTR_MEMO_PAGE_S_FIX;
+	mtr_memo_type_t	fix_type = MTR_MEMO_PAGE_S_FIX;
 	success = rw_lock_s_lock_nowait(&block->lock, file, line);
 
 	if (!success) {
@@ -3419,30 +4990,34 @@ buf_page_try_get_func(
 	}
 
 	if (!success) {
+		buf_page_mutex_enter(block);
 		buf_block_buf_fix_dec(block);
+		buf_page_mutex_exit(block);
 
 		return(NULL);
 	}
 
 	mtr_memo_push(mtr, block, fix_type);
+
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-	ut_a(++buf_dbg_counter % 5771 || buf_validate());
+	ut_a(fsp_skip_sanity_check(block->page.id.space())
+	     || ++buf_dbg_counter % 5771
+	     || buf_validate());
 	ut_a(block->page.buf_fix_count > 0);
 	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
-	mutex_enter(&block->mutex);
-	ut_a(!block->page.file_page_was_freed);
-	mutex_exit(&block->mutex);
-#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
+
+	ut_d(buf_page_mutex_enter(block));
+	ut_d(ut_a(!block->page.file_page_was_freed));
+	ut_d(buf_page_mutex_exit(block));
+
 	buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
 
 	buf_pool->stat.n_page_gets++;
 
 #ifdef UNIV_IBUF_COUNT_DEBUG
-	ut_a(ibuf_count_get(buf_block_get_space(block),
-			    buf_block_get_page_no(block)) == 0);
-#endif
+	ut_a(ibuf_count_get(block->page.id) == 0);
+#endif /* UNIV_IBUF_COUNT_DEBUG */
 
 	return(block);
 }
@@ -3463,43 +5038,38 @@ buf_page_init_low(
 	bpage->newest_modification = 0;
 	bpage->oldest_modification = 0;
 	HASH_INVALIDATE(bpage, hash);
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
-	bpage->file_page_was_freed = FALSE;
-#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
+
+	ut_d(bpage->file_page_was_freed = FALSE);
 }
 
-/********************************************************************//**
-Inits a page to the buffer buf_pool. */
-static __attribute__((nonnull))
+/** Inits a page to the buffer buf_pool.
+ at param[in,out]	buf_pool	buffer pool
+ at param[in]	page_id		page id
+ at param[in,out]	block		block to init */
+static
 void
 buf_page_init(
-/*==========*/
-	buf_pool_t*	buf_pool,/*!< in/out: buffer pool */
-	ulint		space,	/*!< in: space id */
-	ulint		offset,	/*!< in: offset of the page within space
-				in units of a page */
-	ulint		fold,	/*!< in: buf_page_address_fold(space,offset) */
-	ulint		zip_size,/*!< in: compressed page size, or 0 */
-	buf_block_t*	block)	/*!< in/out: block to init */
+	buf_pool_t*		buf_pool,
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	buf_block_t*		block)
 {
 	buf_page_t*	hash_page;
 
-	ut_ad(buf_pool == buf_pool_get(space, offset));
+	ut_ad(buf_pool == buf_pool_get(page_id));
 	ut_ad(buf_pool_mutex_own(buf_pool));
 
-	ut_ad(mutex_own(&(block->mutex)));
+	ut_ad(buf_page_mutex_own(block));
 	ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(buf_page_hash_lock_get(buf_pool, fold),
-			  RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(rw_lock_own(buf_page_hash_lock_get(buf_pool, page_id),
+			  RW_LOCK_X));
 
 	/* Set the state of the block */
-	buf_block_set_file_page(block, space, offset);
+	buf_block_set_file_page(block, page_id);
 
 #ifdef UNIV_DEBUG_VALGRIND
-	if (!space) {
+	if (is_system_tablespace(page_id.space())) {
 		/* Silence valid Valgrind warnings about uninitialized
 		data being written to data files.  There are some unused
 		bytes on some pages that InnoDB does not initialize. */
@@ -3509,60 +5079,58 @@ buf_page_init(
 
 	buf_block_init_low(block);
 
-	block->lock_hash_val = lock_rec_hash(space, offset);
+	block->lock_hash_val = lock_rec_hash(page_id.space(),
+					     page_id.page_no());
 
 	buf_page_init_low(&block->page);
 
 	/* Insert into the hash table of file pages */
 
-	hash_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
+	hash_page = buf_page_hash_get_low(buf_pool, page_id);
 
 	if (hash_page == NULL) {
-		/* Block not found in the hash table */
+		/* Block not found in hash table */
 	} else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) {
+		/* Preserve the reference count. */
 		ib_uint32_t	buf_fix_count = hash_page->buf_fix_count;
 
-	ut_a(buf_fix_count > 0);
+		ut_a(buf_fix_count > 0);
 
-#ifdef PAGE_ATOMIC_REF_COUNT
-		os_atomic_increment_uint32(
-			&block->page.buf_fix_count, buf_fix_count);
-#else
-		block->page.buf_fix_count += ulint(buf_fix_count);
-#endif /* PAGE_ATOMIC_REF_COUNT */
+		os_atomic_increment_uint32(&block->page.buf_fix_count,
+					   buf_fix_count);
 
-		buf_pool_watch_remove(buf_pool, fold, hash_page);
+		buf_pool_watch_remove(buf_pool, hash_page);
 	} else {
-		fprintf(stderr,
-			"InnoDB: Error: page %lu %lu already found"
-			" in the hash table: %p, %p\n",
-			(ulong) space,
-			(ulong) offset,
-			(const void*) hash_page, (const void*) block);
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-		mutex_exit(&block->mutex);
-		buf_pool_mutex_exit(buf_pool);
-		buf_print();
-		buf_LRU_print();
-		buf_validate();
-		buf_LRU_validate();
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-		ut_error;
+
+		ib::error() << "Page " << page_id
+			<< " already found in the hash table: "
+			<< hash_page << ", " << block;
+
+		ut_d(buf_page_mutex_exit(block));
+		ut_d(buf_pool_mutex_exit(buf_pool));
+		ut_d(buf_print());
+		ut_d(buf_LRU_print());
+		ut_d(buf_validate());
+		ut_d(buf_LRU_validate());
+		ut_ad(0);
 	}
 
 	ut_ad(!block->page.in_zip_hash);
 	ut_ad(!block->page.in_page_hash);
 	ut_d(block->page.in_page_hash = TRUE);
 
-	HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, &block->page);
+	block->page.id.copy_from(page_id);
+	block->page.size.copy_from(page_size);
+
+	HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
+		    page_id.fold(), &block->page);
 
-	if (zip_size) {
-		page_zip_set_size(&block->page.zip, zip_size);
+	if (page_size.is_compressed()) {
+		page_zip_set_size(&block->page.zip, page_size.physical());
 	}
 }
 
-/********************************************************************//**
-Function which inits a page for read to the buffer buf_pool. If the page is
+/** Inits a page for read to the buffer buf_pool. If the page is
 (1) already in buf_pool, or
 (2) if we specify to read only ibuf pages and the page is not an ibuf page, or
 (3) if the space is deleted or being deleted,
@@ -3570,31 +5138,27 @@ then this function does nothing.
 Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
 on the buffer frame. The io-handler must take care that the flag is cleared
 and the lock released later.
- at return	pointer to the block or NULL */
-UNIV_INTERN
+ at param[out]	err			DB_SUCCESS or DB_TABLESPACE_DELETED
+ at param[in]	mode			BUF_READ_IBUF_PAGES_ONLY, ...
+ at param[in]	page_id			page id
+ at param[in]	unzip			TRUE=request uncompressed page
+ at return pointer to the block or NULL */
 buf_page_t*
 buf_page_init_for_read(
-/*===================*/
-	dberr_t*	err,	/*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED */
-	ulint		mode,	/*!< in: BUF_READ_IBUF_PAGES_ONLY, ... */
-	ulint		space,	/*!< in: space id */
-	ulint		zip_size,/*!< in: compressed page size, or 0 */
-	ibool		unzip,	/*!< in: TRUE=request uncompressed page */
-	ib_int64_t	tablespace_version,
-				/*!< in: prevents reading from a wrong
-				version of the tablespace in case we have done
-				DISCARD + IMPORT */
-	ulint		offset)	/*!< in: page number */
+	dberr_t*		err,
+	ulint			mode,
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	ibool			unzip)
 {
 	buf_block_t*	block;
 	buf_page_t*	bpage	= NULL;
 	buf_page_t*	watch_page;
 	rw_lock_t*	hash_lock;
 	mtr_t		mtr;
-	ulint		fold;
 	ibool		lru	= FALSE;
 	void*		data;
-	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
+	buf_pool_t*	buf_pool = buf_pool_get(page_id);
 
 	ut_ad(buf_pool);
 
@@ -3603,12 +5167,12 @@ buf_page_init_for_read(
 	if (mode == BUF_READ_IBUF_PAGES_ONLY) {
 		/* It is a read-ahead within an ibuf routine */
 
-		ut_ad(!ibuf_bitmap_page(zip_size, offset));
+		ut_ad(!ibuf_bitmap_page(page_id, page_size));
 
 		ibuf_mtr_start(&mtr);
 
-		if (!recv_no_ibuf_operations
-		    && !ibuf_page(space, zip_size, offset, &mtr)) {
+		if (!recv_no_ibuf_operations &&
+		    !ibuf_page(page_id, page_size, &mtr)) {
 
 			ibuf_mtr_commit(&mtr);
 
@@ -3618,7 +5182,7 @@ buf_page_init_for_read(
 		ut_ad(mode == BUF_READ_ANY_PAGE);
 	}
 
-	if (zip_size && !unzip && !recv_recovery_is_on()) {
+	if (page_size.is_compressed() && !unzip && !recv_recovery_is_on()) {
 		block = NULL;
 	} else {
 		block = buf_LRU_get_free_block(buf_pool);
@@ -3626,53 +5190,40 @@ buf_page_init_for_read(
 		ut_ad(buf_pool_from_block(block) == buf_pool);
 	}
 
-	fold = buf_page_address_fold(space, offset);
-	hash_lock = buf_page_hash_lock_get(buf_pool, fold);
-
 	buf_pool_mutex_enter(buf_pool);
+
+	hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
 	rw_lock_x_lock(hash_lock);
 
-	watch_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
+	watch_page = buf_page_hash_get_low(buf_pool, page_id);
 	if (watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page)) {
 		/* The page is already in the buffer pool. */
 		watch_page = NULL;
-err_exit:
 		rw_lock_x_unlock(hash_lock);
 		if (block) {
-			mutex_enter(&block->mutex);
+			buf_page_mutex_enter(block);
 			buf_LRU_block_free_non_file_page(block);
-			mutex_exit(&block->mutex);
+			buf_page_mutex_exit(block);
 		}
 
 		bpage = NULL;
 		goto func_exit;
 	}
 
-	if (fil_tablespace_deleted_or_being_deleted_in_mem(
-		    space, tablespace_version)) {
-		/* The page belongs to a space which has been
-		deleted or is being deleted. */
-		*err = DB_TABLESPACE_DELETED;
-
-		goto err_exit;
-	}
-
 	if (block) {
 		bpage = &block->page;
 
-		mutex_enter(&block->mutex);
+		buf_page_mutex_enter(block);
 
 		ut_ad(buf_pool_from_bpage(bpage) == buf_pool);
 
-		buf_page_init(buf_pool, space, offset, fold, zip_size, block);
+		buf_page_init(buf_pool, page_id, page_size, block);
 
-#ifdef PAGE_ATOMIC_REF_COUNT
-		/* Note: We set the io state without the protection of
-		the block->lock. This is because other threads cannot
-		access this block unless it is in the hash table. */
+		/* Note: We are using the hash_lock for protection. This is
+		safe because no other thread can lookup the block from the
+		page hashtable yet. */
 
 		buf_page_set_io_fix(bpage, BUF_IO_READ);
-#endif /* PAGE_ATOMIC_REF_COUNT */
 
 		rw_lock_x_unlock(hash_lock);
 
@@ -3690,11 +5241,7 @@ err_exit:
 
 		rw_lock_x_lock_gen(&block->lock, BUF_IO_READ);
 
-#ifndef PAGE_ATOMIC_REF_COUNT
-		buf_page_set_io_fix(bpage, BUF_IO_READ);
-#endif /* !PAGE_ATOMIC_REF_COUNT */
-
-		if (zip_size) {
+		if (page_size.is_compressed()) {
 			/* buf_pool->mutex may be released and
 			reacquired by buf_buddy_alloc().  Thus, we
 			must release block->mutex in order not to
@@ -3703,9 +5250,10 @@ err_exit:
 			operation until after the block descriptor has
 			been added to buf_pool->LRU and
 			buf_pool->page_hash. */
-			mutex_exit(&block->mutex);
-			data = buf_buddy_alloc(buf_pool, zip_size, &lru);
-			mutex_enter(&block->mutex);
+			buf_page_mutex_exit(block);
+			data = buf_buddy_alloc(buf_pool, page_size.physical(),
+					       &lru);
+			buf_page_mutex_enter(block);
 			block->page.zip.data = (page_zip_t*) data;
 
 			/* To maintain the invariant
@@ -3717,7 +5265,7 @@ err_exit:
 			buf_unzip_LRU_add_block(block, TRUE);
 		}
 
-		mutex_exit(&block->mutex);
+		buf_page_mutex_exit(block);
 	} else {
 		rw_lock_x_unlock(hash_lock);
 
@@ -3725,7 +5273,7 @@ err_exit:
 		control block (bpage), in order to avoid the
 		invocation of buf_buddy_relocate_block() on
 		uninitialized data. */
-		data = buf_buddy_alloc(buf_pool, zip_size, &lru);
+		data = buf_buddy_alloc(buf_pool, page_size.physical(), &lru);
 
 		rw_lock_x_lock(hash_lock);
 
@@ -3734,8 +5282,7 @@ err_exit:
 		check the page_hash again, as it may have been modified. */
 		if (UNIV_UNLIKELY(lru)) {
 
-			watch_page = buf_page_hash_get_low(
-				buf_pool, space, offset, fold);
+			watch_page = buf_page_hash_get_low(buf_pool, page_id);
 
 			if (UNIV_UNLIKELY(watch_page
 			    && !buf_pool_watch_is_sentinel(buf_pool,
@@ -3744,7 +5291,8 @@ err_exit:
 				/* The block was added by some other thread. */
 				rw_lock_x_unlock(hash_lock);
 				watch_page = NULL;
-				buf_buddy_free(buf_pool, data, zip_size);
+				buf_buddy_free(buf_pool, data,
+					       page_size.physical());
 
 				bpage = NULL;
 				goto func_exit;
@@ -3757,26 +5305,25 @@ err_exit:
 		bpage->buf_pool_index = buf_pool_index(buf_pool);
 
 		page_zip_des_init(&bpage->zip);
-		page_zip_set_size(&bpage->zip, zip_size);
+		page_zip_set_size(&bpage->zip, page_size.physical());
 		bpage->zip.data = (page_zip_t*) data;
 
+		bpage->size.copy_from(page_size);
+
 		mutex_enter(&buf_pool->zip_mutex);
-		UNIV_MEM_DESC(bpage->zip.data,
-			      page_zip_get_size(&bpage->zip));
+		UNIV_MEM_DESC(bpage->zip.data, bpage->size.physical());
 
 		buf_page_init_low(bpage);
 
-		bpage->state	= BUF_BLOCK_ZIP_PAGE;
-		bpage->space	= static_cast<ib_uint32_t>(space);
-		bpage->offset	= static_cast<ib_uint32_t>(offset);
+		bpage->state = BUF_BLOCK_ZIP_PAGE;
+		bpage->id.copy_from(page_id);
+		bpage->flush_observer = NULL;
 
-#ifdef UNIV_DEBUG
-		bpage->in_page_hash = FALSE;
-		bpage->in_zip_hash = FALSE;
-		bpage->in_flush_list = FALSE;
-		bpage->in_free_list = FALSE;
-		bpage->in_LRU_list = FALSE;
-#endif /* UNIV_DEBUG */
+		ut_d(bpage->in_page_hash = FALSE);
+		ut_d(bpage->in_zip_hash = FALSE);
+		ut_d(bpage->in_flush_list = FALSE);
+		ut_d(bpage->in_free_list = FALSE);
+		ut_d(bpage->in_LRU_list = FALSE);
 
 		ut_d(bpage->in_page_hash = TRUE);
 
@@ -3789,24 +5336,20 @@ err_exit:
 
 			ut_a(buf_fix_count > 0);
 
-#ifdef PAGE_ATOMIC_REF_COUNT
 			os_atomic_increment_uint32(
 				&bpage->buf_fix_count, buf_fix_count);
-#else
-			bpage->buf_fix_count += buf_fix_count;
-#endif /* PAGE_ATOMIC_REF_COUNT */
 
 			ut_ad(buf_pool_watch_is_sentinel(buf_pool, watch_page));
-			buf_pool_watch_remove(buf_pool, fold, watch_page);
+			buf_pool_watch_remove(buf_pool, watch_page);
 		}
 
-		HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold,
-			    bpage);
+		HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
+			    bpage->id.fold(), bpage);
 
 		rw_lock_x_unlock(hash_lock);
 
 		/* The block must be put to the LRU list, to the old blocks.
-		The zip_size is already set into the page zip */
+		The zip size is already set into the page zip */
 		buf_LRU_add_block(bpage, TRUE/* to old blocks */);
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 		buf_LRU_insert_zip_clean(bpage);
@@ -3826,63 +5369,54 @@ func_exit:
 		ibuf_mtr_commit(&mtr);
 	}
 
-
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX));
-	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
+	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_X));
+	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_S));
 	ut_ad(!bpage || buf_page_in_file(bpage));
+
 	return(bpage);
 }
 
-/********************************************************************//**
-Initializes a page to the buffer buf_pool. The page is usually not read
+/** Initializes a page to the buffer buf_pool. The page is usually not read
 from a file even if it cannot be found in the buffer buf_pool. This is one
 of the functions which perform to a block a state transition NOT_USED =>
 FILE_PAGE (the other is buf_page_get_gen).
- at return	pointer to the block, page bufferfixed */
-UNIV_INTERN
+ at param[in]	page_id		page id
+ at param[in]	page_size	page size
+ at param[in]	mtr		mini-transaction
+ at return pointer to the block, page bufferfixed */
 buf_block_t*
 buf_page_create(
-/*============*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset,	/*!< in: offset of the page within space in units of
-			a page */
-	ulint	zip_size,/*!< in: compressed page size, or 0 */
-	mtr_t*	mtr)	/*!< in: mini-transaction handle */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	mtr_t*			mtr)
 {
 	buf_frame_t*	frame;
 	buf_block_t*	block;
-	ulint		fold;
 	buf_block_t*	free_block	= NULL;
-	buf_pool_t*	buf_pool	= buf_pool_get(space, offset);
+	buf_pool_t*	buf_pool = buf_pool_get(page_id);
 	rw_lock_t*	hash_lock;
 
-	ut_ad(mtr);
-	ut_ad(mtr->state == MTR_ACTIVE);
-	ut_ad(space || !zip_size);
+	ut_ad(mtr->is_active());
+	ut_ad(page_id.space() != 0 || !page_size.is_compressed());
 
 	free_block = buf_LRU_get_free_block(buf_pool);
 
-	fold = buf_page_address_fold(space, offset);
-	hash_lock = buf_page_hash_lock_get(buf_pool, fold);
-
 	buf_pool_mutex_enter(buf_pool);
+
+	hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
 	rw_lock_x_lock(hash_lock);
 
-	block = (buf_block_t*) buf_page_hash_get_low(
-		buf_pool, space, offset, fold);
+	block = (buf_block_t*) buf_page_hash_get_low(buf_pool, page_id);
 
 	if (block
 	    && buf_page_in_file(&block->page)
 	    && !buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
+
 #ifdef UNIV_IBUF_COUNT_DEBUG
-		ut_a(ibuf_count_get(space, offset) == 0);
-#endif
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
-		block->page.file_page_was_freed = FALSE;
-#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
+		ut_a(ibuf_count_get(page_id) == 0);
+#endif /* UNIV_IBUF_COUNT_DEBUG */
+
+		ut_d(block->page.file_page_was_freed = FALSE);
 
 		/* Page can be found in buf_pool */
 		buf_pool_mutex_exit(buf_pool);
@@ -3890,23 +5424,19 @@ buf_page_create(
 
 		buf_block_free(free_block);
 
-		return(buf_page_get_with_no_latch(space, zip_size, offset, mtr));
+		return(buf_page_get_with_no_latch(page_id, page_size, mtr));
 	}
 
 	/* If we get here, the page was not in buf_pool: init it there */
 
-#ifdef UNIV_DEBUG
-	if (buf_debug_prints) {
-		fprintf(stderr, "Creating space %lu page %lu to buffer\n",
-			(ulong) space, (ulong) offset);
-	}
-#endif /* UNIV_DEBUG */
+	DBUG_PRINT("ib_buf", ("create page " UINT32PF ":" UINT32PF,
+			      page_id.space(), page_id.page_no()));
 
 	block = free_block;
 
-	mutex_enter(&block->mutex);
+	buf_page_mutex_enter(block);
 
-	buf_page_init(buf_pool, space, offset, fold, zip_size, block);
+	buf_page_init(buf_pool, page_id, page_size, block);
 
 	rw_lock_x_unlock(hash_lock);
 
@@ -3916,7 +5446,7 @@ buf_page_create(
 	buf_block_buf_fix_inc(block, __FILE__, __LINE__);
 	buf_pool->stat.n_pages_created++;
 
-	if (zip_size) {
+	if (page_size.is_compressed()) {
 		void*	data;
 		ibool	lru;
 
@@ -3927,15 +5457,15 @@ buf_page_create(
 		buf_page_set_io_fix(&block->page, BUF_IO_READ);
 		rw_lock_x_lock(&block->lock);
 
-		mutex_exit(&block->mutex);
+		buf_page_mutex_exit(block);
 		/* buf_pool->mutex may be released and reacquired by
 		buf_buddy_alloc().  Thus, we must release block->mutex
 		in order not to break the latching order in
 		the reacquisition of buf_pool->mutex.  We also must
 		defer this operation until after the block descriptor
 		has been added to buf_pool->LRU and buf_pool->page_hash. */
-		data = buf_buddy_alloc(buf_pool, zip_size, &lru);
-		mutex_enter(&block->mutex);
+		data = buf_buddy_alloc(buf_pool, page_size.physical(), &lru);
+		buf_page_mutex_enter(block);
 		block->page.zip.data = (page_zip_t*) data;
 
 		/* To maintain the invariant
@@ -3956,12 +5486,11 @@ buf_page_create(
 
 	buf_page_set_accessed(&block->page);
 
-	mutex_exit(&block->mutex);
+	buf_page_mutex_exit(block);
 
 	/* Delete possible entries for the page from the insert buffer:
 	such can exist if the page belonged to an index which was dropped */
-
-	ibuf_merge_or_delete_for_page(NULL, space, offset, zip_size, TRUE);
+	ibuf_merge_or_delete_for_page(NULL, page_id, &page_size, TRUE);
 
 	frame = block->frame;
 
@@ -3969,11 +5498,14 @@ buf_page_create(
 	memset(frame + FIL_PAGE_NEXT, 0xff, 4);
 	mach_write_to_2(frame + FIL_PAGE_TYPE, FIL_PAGE_TYPE_ALLOCATED);
 
-	/* Reset to zero the file flush lsn field in the page; if the first
-	page of an ibdata file is 'created' in this function into the buffer
-	pool then we lose the original contents of the file flush lsn stamp.
-	Then InnoDB could in a crash recovery print a big, false, corruption
-	warning if the stamp contains an lsn bigger than the ib_logfile lsn. */
+	/* These 8 bytes are also repurposed for PageIO compression and must
+	be reset when the frame is assigned to a new page id. See fil0fil.h.
+
+	FIL_PAGE_FILE_FLUSH_LSN is used on the following pages:
+	(1) The first page of the InnoDB system tablespace (page 0:0)
+	(2) FIL_RTREE_SPLIT_SEQ_NUM on R-tree pages .
+
+	Therefore we don't transparently compress such pages. */
 
 	memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
 
@@ -3981,8 +5513,7 @@ buf_page_create(
 	ut_a(++buf_dbg_counter % 5771 || buf_validate());
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 #ifdef UNIV_IBUF_COUNT_DEBUG
-	ut_a(ibuf_count_get(buf_block_get_space(block),
-			    buf_block_get_page_no(block)) == 0);
+	ut_a(ibuf_count_get(block->page.id) == 0);
 #endif
 	return(block);
 }
@@ -4016,6 +5547,7 @@ buf_page_monitor(
 		ulint	level;
 
 	case FIL_PAGE_INDEX:
+	case FIL_PAGE_RTREE:
 		level = btr_page_get_level_low(frame);
 
 		/* Check if it is an index page for insert buffer */
@@ -4040,49 +5572,49 @@ buf_page_monitor(
 		}
 		break;
 
-        case FIL_PAGE_UNDO_LOG:
+	case FIL_PAGE_UNDO_LOG:
 		counter = MONITOR_RW_COUNTER(io_type, MONITOR_UNDO_LOG_PAGE);
 		break;
 
-        case FIL_PAGE_INODE:
+	case FIL_PAGE_INODE:
 		counter = MONITOR_RW_COUNTER(io_type, MONITOR_INODE_PAGE);
 		break;
 
-        case FIL_PAGE_IBUF_FREE_LIST:
+	case FIL_PAGE_IBUF_FREE_LIST:
 		counter = MONITOR_RW_COUNTER(io_type,
 					     MONITOR_IBUF_FREELIST_PAGE);
 		break;
 
-        case FIL_PAGE_IBUF_BITMAP:
+	case FIL_PAGE_IBUF_BITMAP:
 		counter = MONITOR_RW_COUNTER(io_type,
 					     MONITOR_IBUF_BITMAP_PAGE);
 		break;
 
-        case FIL_PAGE_TYPE_SYS:
+	case FIL_PAGE_TYPE_SYS:
 		counter = MONITOR_RW_COUNTER(io_type, MONITOR_SYSTEM_PAGE);
 		break;
 
-        case FIL_PAGE_TYPE_TRX_SYS:
+	case FIL_PAGE_TYPE_TRX_SYS:
 		counter = MONITOR_RW_COUNTER(io_type, MONITOR_TRX_SYSTEM_PAGE);
 		break;
 
-        case FIL_PAGE_TYPE_FSP_HDR:
+	case FIL_PAGE_TYPE_FSP_HDR:
 		counter = MONITOR_RW_COUNTER(io_type, MONITOR_FSP_HDR_PAGE);
 		break;
 
-        case FIL_PAGE_TYPE_XDES:
+	case FIL_PAGE_TYPE_XDES:
 		counter = MONITOR_RW_COUNTER(io_type, MONITOR_XDES_PAGE);
 		break;
 
-        case FIL_PAGE_TYPE_BLOB:
+	case FIL_PAGE_TYPE_BLOB:
 		counter = MONITOR_RW_COUNTER(io_type, MONITOR_BLOB_PAGE);
 		break;
 
-        case FIL_PAGE_TYPE_ZBLOB:
+	case FIL_PAGE_TYPE_ZBLOB:
 		counter = MONITOR_RW_COUNTER(io_type, MONITOR_ZBLOB_PAGE);
 		break;
 
-        case FIL_PAGE_TYPE_ZBLOB2:
+	case FIL_PAGE_TYPE_ZBLOB2:
 		counter = MONITOR_RW_COUNTER(io_type, MONITOR_ZBLOB2_PAGE);
 		break;
 
@@ -4094,7 +5626,7 @@ buf_page_monitor(
 }
 
 /********************************************************************//**
-Mark a table with the specified space pointed by bpage->space corrupted.
+Mark a table with the specified space pointed by bpage->id.space() corrupted.
 Also remove the bpage from LRU list.
 @return TRUE if successful */
 static
@@ -4106,7 +5638,7 @@ buf_mark_space_corrupt(
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
 	const ibool	uncompressed = (buf_page_get_state(bpage)
 					== BUF_BLOCK_FILE_PAGE);
-	ulint		space = bpage->space;
+	ib_uint32_t	space = bpage->id.space();
 	ibool		ret = TRUE;
 
 	/* First unfix and release lock on the bpage */
@@ -4145,11 +5677,13 @@ buf_mark_space_corrupt(
 Completes an asynchronous read or write request of a file page to or from
 the buffer pool.
 @return true if successful */
-UNIV_INTERN
 bool
 buf_page_io_complete(
 /*=================*/
-	buf_page_t*	bpage)	/*!< in: pointer to the block in question */
+	buf_page_t*	bpage,	/*!< in: pointer to the block in question */
+	bool		evict)	/*!< in: whether or not to evict the page
+				from LRU list. */
+
 {
 	enum buf_io_fix	io_type;
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
@@ -4171,15 +5705,18 @@ buf_page_io_complete(
 		ulint	read_page_no;
 		ulint	read_space_id;
 		byte*	frame;
+		bool	compressed_page;
 
-		if (buf_page_get_zip_size(bpage)) {
+		if (bpage->size.is_compressed()) {
 			frame = bpage->zip.data;
 			buf_pool->n_pend_unzip++;
+
 			if (uncompressed
 			    && !buf_zip_decompress((buf_block_t*) bpage,
 						   FALSE)) {
 
 				buf_pool->n_pend_unzip--;
+				compressed_page = false;
 				goto corrupt;
 			}
 			buf_pool->n_pend_unzip--;
@@ -4195,109 +5732,120 @@ buf_page_io_complete(
 		read_space_id = mach_read_from_4(
 			frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
 
-		if (bpage->space == TRX_SYS_SPACE
-		    && buf_dblwr_page_inside(bpage->offset)) {
+		if (bpage->id.space() == TRX_SYS_SPACE
+		    && buf_dblwr_page_inside(bpage->id.page_no())) {
 
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				"  InnoDB: Error: reading page %lu\n"
-				"InnoDB: which is in the"
-				" doublewrite buffer!\n",
-				(ulong) bpage->offset);
-		} else if (!read_space_id && !read_page_no) {
+			ib::error() << "Reading page " << bpage->id
+				<< ", which is in the doublewrite buffer!";
+
+		} else if (read_space_id == 0 && read_page_no == 0) {
 			/* This is likely an uninitialized page. */
-		} else if ((bpage->space
-			    && bpage->space != read_space_id)
-			   || bpage->offset != read_page_no) {
+		} else if ((bpage->id.space() != 0
+			    && bpage->id.space() != read_space_id)
+			   || bpage->id.page_no() != read_page_no) {
 			/* We did not compare space_id to read_space_id
 			if bpage->space == 0, because the field on the
 			page may contain garbage in MySQL < 4.1.1,
 			which only supported bpage->space == 0. */
 
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				"  InnoDB: Error: space id and page n:o"
-				" stored in the page\n"
-				"InnoDB: read in are %lu:%lu,"
-				" should be %lu:%lu!\n",
-				(ulong) read_space_id, (ulong) read_page_no,
-				(ulong) bpage->space,
-				(ulong) bpage->offset);
+			ib::error() << "Space id and page no stored in "
+				"the page, read in are "
+				<< page_id_t(read_space_id, read_page_no)
+				<< ", should be " << bpage->id;
+		}
+
+		compressed_page = Compression::is_compressed_page(frame);
+
+		/* If the decompress failed then the most likely case is
+		that we are reading in a page for which this instance doesn't
+		support the compression algorithm. */
+		if (compressed_page) {
+
+			Compression::meta_t	meta;
+
+			Compression::deserialize_header(frame, &meta);
+
+			ib::error()
+				<< "Page " << bpage->id << " "
+				<< "compressed with "
+				<< Compression::to_string(meta) << " "
+				<< "that is not supported by this instance";
 		}
 
 		/* From version 3.23.38 up we store the page checksum
 		to the 4 first bytes of the page end lsn field */
-
-		if (buf_page_is_corrupted(true, frame,
-					  buf_page_get_zip_size(bpage))) {
+		if (compressed_page
+		    || buf_page_is_corrupted(
+			    true, frame, bpage->size,
+			    fsp_is_checksum_disabled(bpage->id.space()))) {
 
 			/* Not a real corruption if it was triggered by
 			error injection */
-			DBUG_EXECUTE_IF("buf_page_is_corrupt_failure",
-				if (bpage->space > TRX_SYS_SPACE
+			DBUG_EXECUTE_IF(
+				"buf_page_import_corrupt_failure",
+				if (bpage->id.space() > TRX_SYS_SPACE
+				    && !Tablespace::is_undo_tablespace(
+					    bpage->id.space())
 				    && buf_mark_space_corrupt(bpage)) {
-					ib_logf(IB_LOG_LEVEL_INFO,
-						"Simulated page corruption");
+					ib::info() << "Simulated IMPORT "
+						"corruption";
 					return(true);
 				}
 				goto page_not_corrupt;
 				;);
 corrupt:
-			fprintf(stderr,
-				"InnoDB: Database page corruption on disk"
-				" or a failed\n"
-				"InnoDB: file read of page %lu.\n"
-				"InnoDB: You may have to recover"
-				" from a backup.\n",
-				(ulong) bpage->offset);
-			buf_page_print(frame, buf_page_get_zip_size(bpage),
-				       BUF_PAGE_PRINT_NO_CRASH);
-			fprintf(stderr,
-				"InnoDB: Database page corruption on disk"
-				" or a failed\n"
-				"InnoDB: file read of page %lu.\n"
-				"InnoDB: You may have to recover"
-				" from a backup.\n",
-				(ulong) bpage->offset);
-			fputs("InnoDB: It is also possible that"
-			      " your operating\n"
-			      "InnoDB: system has corrupted its"
-			      " own file cache\n"
-			      "InnoDB: and rebooting your computer"
-			      " removes the\n"
-			      "InnoDB: error.\n"
-			      "InnoDB: If the corrupt page is an index page\n"
-			      "InnoDB: you can also try to"
-			      " fix the corruption\n"
-			      "InnoDB: by dumping, dropping,"
-			      " and reimporting\n"
-			      "InnoDB: the corrupt table."
-			      " You can use CHECK\n"
-			      "InnoDB: TABLE to scan your"
-			      " table for corruption.\n"
-			      "InnoDB: See also "
-			      REFMAN "forcing-innodb-recovery.html\n"
-			      "InnoDB: about forcing recovery.\n", stderr);
+			/* Compressed pages are basically gibberish avoid
+			printing the contents. */
+			if (!compressed_page) {
+
+				ib::error()
+					<< "Database page corruption on disk"
+					" or a failed file read of page "
+					<< bpage->id
+					<< ". You may have to recover from "
+					<< "a backup.";
+
+				buf_page_print(
+					frame, bpage->size,
+					BUF_PAGE_PRINT_NO_CRASH);
+
+				ib::info()
+					<< "It is also possible that your"
+				        " operating system has corrupted"
+					" its own file cache and rebooting"
+					" your computer removes the error."
+					" If the corrupt page is an index page."
+					" You can also try to fix the"
+					" corruption by dumping, dropping,"
+					" and reimporting the corrupt table."
+					" You can use CHECK TABLE to scan"
+					" your table for corruption. "
+					<< FORCE_RECOVERY_MSG;
+			}
 
 			if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) {
+
 				/* If page space id is larger than TRX_SYS_SPACE
 				(0), we will attempt to mark the corresponding
 				table as corrupted instead of crashing server */
-				if (bpage->space > TRX_SYS_SPACE
+
+				if (bpage->id.space() > TRX_SYS_SPACE
 				    && buf_mark_space_corrupt(bpage)) {
+
 					return(false);
 				} else {
-					fputs("InnoDB: Ending processing"
-					      " because of"
-					      " a corrupt database page.\n",
-					      stderr);
-
-					ut_error;
+					ib::fatal()
+						<< "Aborting because of a"
+						" corrupt database page in"
+						" the system tablespace. Or, "
+						" there was a failure in"
+						" tagging the tablespace "
+						" as corrupt.";
 				}
 			}
 		}
 
-		DBUG_EXECUTE_IF("buf_page_is_corrupt_failure",
+		DBUG_EXECUTE_IF("buf_page_import_corrupt_failure",
 				page_not_corrupt:  bpage = bpage; );
 
 		if (recv_recovery_is_on()) {
@@ -4306,11 +5854,20 @@ corrupt:
 			recv_recover_page(TRUE, (buf_block_t*) bpage);
 		}
 
-		if (uncompressed && !recv_no_ibuf_operations) {
+		/* If space is being truncated then avoid ibuf operation.
+		During re-init we have already freed ibuf entries. */
+		if (uncompressed
+		    && !Compression::is_compressed_page(frame)
+		    && !recv_no_ibuf_operations
+		    && !Tablespace::is_undo_tablespace(bpage->id.space())
+		    && bpage->id.space() != srv_tmp_space.space_id()
+		    && !srv_is_tablespace_truncated(bpage->id.space())
+		    && fil_page_get_type(frame) == FIL_PAGE_INDEX
+		    && page_is_leaf(frame)) {
+
 			ibuf_merge_or_delete_for_page(
-				(buf_block_t*) bpage, bpage->space,
-				bpage->offset, buf_page_get_zip_size(bpage),
-				TRUE);
+				(buf_block_t*) bpage, bpage->id,
+				&bpage->size, TRUE);
 		}
 	}
 
@@ -4322,7 +5879,7 @@ corrupt:
 		/* For BUF_IO_READ of compressed-only blocks, the
 		buffered operations will be merged by buf_page_get_gen()
 		after the block has been uncompressed. */
-		ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
+		ut_a(ibuf_count_get(bpage->id) == 0);
 	}
 #endif
 	/* Because this thread which does the unlocking is not the same that
@@ -4331,6 +5888,7 @@ corrupt:
 	id. */
 
 	buf_page_set_io_fix(bpage, BUF_IO_NONE);
+	buf_page_monitor(bpage, io_type);
 
 	switch (io_type) {
 	case BUF_IO_READ:
@@ -4347,6 +5905,8 @@ corrupt:
 					     BUF_IO_READ);
 		}
 
+		mutex_exit(buf_page_get_mutex(bpage));
+
 		break;
 
 	case BUF_IO_WRITE:
@@ -4356,30 +5916,39 @@ corrupt:
 		buf_flush_write_complete(bpage);
 
 		if (uncompressed) {
-			rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
-					     BUF_IO_WRITE);
+			rw_lock_sx_unlock_gen(&((buf_block_t*) bpage)->lock,
+					      BUF_IO_WRITE);
 		}
 
 		buf_pool->stat.n_pages_written++;
 
+		/* We decide whether or not to evict the page from the
+		LRU list based on the flush_type.
+		* BUF_FLUSH_LIST: don't evict
+		* BUF_FLUSH_LRU: always evict
+		* BUF_FLUSH_SINGLE_PAGE: eviction preference is passed
+		by the caller explicitly. */
+		if (buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU) {
+			evict = true;
+		}
+
+		if (evict) {
+			mutex_exit(buf_page_get_mutex(bpage));
+			buf_LRU_free_page(bpage, true);
+		} else {
+			mutex_exit(buf_page_get_mutex(bpage));
+		}
+
 		break;
 
 	default:
 		ut_error;
 	}
 
-	buf_page_monitor(bpage, io_type);
-
-#ifdef UNIV_DEBUG
-	if (buf_debug_prints) {
-		fprintf(stderr, "Has %s page space %lu page no %lu\n",
-			io_type == BUF_IO_READ ? "read" : "written",
-			(ulong) buf_page_get_space(bpage),
-			(ulong) buf_page_get_page_no(bpage));
-	}
-#endif /* UNIV_DEBUG */
+	DBUG_PRINT("ib_buf", ("%s page " UINT32PF ":" UINT32PF,
+			      io_type == BUF_IO_READ ? "read" : "wrote",
+			      bpage->id.space(), bpage->id.page_no()));
 
-	mutex_exit(buf_page_get_mutex(bpage));
 	buf_pool_mutex_exit(buf_pool);
 
 	return(true);
@@ -4387,7 +5956,7 @@ corrupt:
 
 /*********************************************************************//**
 Asserts that all file pages in the buffer are in a replaceable state.
- at return	TRUE */
+ at return TRUE */
 static
 ibool
 buf_all_freed_instance(
@@ -4408,11 +5977,8 @@ buf_all_freed_instance(
 		const buf_block_t* block = buf_chunk_not_freed(chunk);
 
 		if (UNIV_LIKELY_NULL(block)) {
-			fprintf(stderr,
-				"Page %lu %lu still fixed or dirty\n",
-				(ulong) block->page.space,
-				(ulong) block->page.offset);
-			ut_error;
+			ib::fatal() << "Page " << block->page.id
+				<< " still fixed or dirty";
 		}
 	}
 
@@ -4461,7 +6027,7 @@ buf_pool_invalidate_instance(
 
 	buf_pool_mutex_enter(buf_pool);
 
-	while (buf_LRU_scan_and_free_block(buf_pool, TRUE)) {
+	while (buf_LRU_scan_and_free_block(buf_pool, true)) {
 	}
 
 	ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
@@ -4481,7 +6047,6 @@ buf_pool_invalidate_instance(
 Invalidates the file pages in the buffer pool when an archive recovery is
 completed. All the file pages buffered must be in a replaceable state when
 this function is called: not latched and not modified. */
-UNIV_INTERN
 void
 buf_pool_invalidate(void)
 /*=====================*/
@@ -4496,7 +6061,7 @@ buf_pool_invalidate(void)
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 /*********************************************************************//**
 Validates data in one buffer pool instance
- at return	TRUE */
+ at return TRUE */
 static
 ibool
 buf_pool_validate_instance(
@@ -4513,9 +6078,6 @@ buf_pool_validate_instance(
 	ulint		n_flush		= 0;
 	ulint		n_free		= 0;
 	ulint		n_zip		= 0;
-	ulint		fold		= 0;
-	ulint		space		= 0;
-	ulint		offset		= 0;
 
 	ut_ad(buf_pool);
 
@@ -4533,7 +6095,7 @@ buf_pool_validate_instance(
 
 		for (j = chunk->size; j--; block++) {
 
-			mutex_enter(&block->mutex);
+			buf_page_mutex_enter(block);
 
 			switch (buf_block_get_state(block)) {
 			case BUF_BLOCK_POOL_WATCH:
@@ -4545,22 +6107,14 @@ buf_pool_validate_instance(
 				break;
 
 			case BUF_BLOCK_FILE_PAGE:
-				space = buf_block_get_space(block);
-				offset = buf_block_get_page_no(block);
-				fold = buf_page_address_fold(space, offset);
-				ut_a(buf_page_hash_get_low(buf_pool,
-							   space,
-							   offset,
-							   fold)
+				ut_a(buf_page_hash_get_low(
+						buf_pool, block->page.id)
 				     == &block->page);
 
 #ifdef UNIV_IBUF_COUNT_DEBUG
 				ut_a(buf_page_get_io_fix(&block->page)
 				     == BUF_IO_READ
-				     || !ibuf_count_get(buf_block_get_space(
-								block),
-							buf_block_get_page_no(
-								block)));
+				     || !ibuf_count_get(block->page.id));
 #endif
 				switch (buf_page_get_io_fix(&block->page)) {
 				case BUF_IO_NONE:
@@ -4577,7 +6131,10 @@ buf_pool_validate_instance(
 assert_s_latched:
 						ut_a(rw_lock_is_locked(
 							     &block->lock,
-								     RW_LOCK_SHARED));
+								     RW_LOCK_S)
+						     || rw_lock_is_locked(
+								&block->lock,
+								RW_LOCK_SX));
 						break;
 					case BUF_FLUSH_LIST:
 						n_list_flush++;
@@ -4591,7 +6148,7 @@ assert_s_latched:
 				case BUF_IO_READ:
 
 					ut_a(rw_lock_is_locked(&block->lock,
-							       RW_LOCK_EX));
+							       RW_LOCK_X));
 					break;
 
 				case BUF_IO_PIN:
@@ -4612,7 +6169,7 @@ assert_s_latched:
 				break;
 			}
 
-			mutex_exit(&block->mutex);
+			buf_page_mutex_exit(block);
 		}
 	}
 
@@ -4643,9 +6200,7 @@ assert_s_latched:
 		we have acquired buf_pool->zip_mutex above which acts
 		as the 'block->mutex' for these bpages. */
 		ut_a(!b->oldest_modification);
-		fold = buf_page_address_fold(b->space, b->offset);
-		ut_a(buf_page_hash_get_low(buf_pool, b->space, b->offset,
-					   fold) == b);
+		ut_a(buf_page_hash_get_low(buf_pool, b->id) == b);
 		n_lru++;
 		n_zip++;
 	}
@@ -4697,9 +6252,7 @@ assert_s_latched:
 			ut_error;
 			break;
 		}
-		fold = buf_page_address_fold(b->space, b->offset);
-		ut_a(buf_page_hash_get_low(buf_pool, b->space, b->offset,
-					   fold) == b);
+		ut_a(buf_page_hash_get_low(buf_pool, b->id) == b);
 	}
 
 	ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
@@ -4709,19 +6262,21 @@ assert_s_latched:
 
 	mutex_exit(&buf_pool->zip_mutex);
 
-	if (n_lru + n_free > buf_pool->curr_size + n_zip) {
-		fprintf(stderr, "n LRU %lu, n free %lu, pool %lu zip %lu\n",
-			(ulong) n_lru, (ulong) n_free,
-			(ulong) buf_pool->curr_size, (ulong) n_zip);
-		ut_error;
+	if (buf_pool->curr_size == buf_pool->old_size
+	    && n_lru + n_free > buf_pool->curr_size + n_zip) {
+
+		ib::fatal() << "n_LRU " << n_lru << ", n_free " << n_free
+			<< ", pool " << buf_pool->curr_size
+			<< " zip " << n_zip << ". Aborting...";
 	}
 
 	ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
-	if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
-		fprintf(stderr, "Free list len %lu, free blocks %lu\n",
-			(ulong) UT_LIST_GET_LEN(buf_pool->free),
-			(ulong) n_free);
-		ut_error;
+	if (buf_pool->curr_size == buf_pool->old_size
+	    && UT_LIST_GET_LEN(buf_pool->free) != n_free) {
+
+		ib::fatal() << "Free list len "
+			<< UT_LIST_GET_LEN(buf_pool->free)
+			<< ", free blocks " << n_free << ". Aborting...";
 	}
 
 	ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
@@ -4738,8 +6293,7 @@ assert_s_latched:
 
 /*********************************************************************//**
 Validates the buffer buf_pool data structure.
- at return	TRUE */
-UNIV_INTERN
+ at return TRUE */
 ibool
 buf_validate(void)
 /*==============*/
@@ -4782,37 +6336,14 @@ buf_print_instance(
 	size = buf_pool->curr_size;
 
 	index_ids = static_cast<index_id_t*>(
-		mem_alloc(size * sizeof *index_ids));
+		ut_malloc_nokey(size * sizeof *index_ids));
 
-	counts = static_cast<ulint*>(mem_alloc(sizeof(ulint) * size));
+	counts = static_cast<ulint*>(ut_malloc_nokey(sizeof(ulint) * size));
 
 	buf_pool_mutex_enter(buf_pool);
 	buf_flush_list_mutex_enter(buf_pool);
 
-	fprintf(stderr,
-		"buf_pool size %lu\n"
-		"database pages %lu\n"
-		"free pages %lu\n"
-		"modified database pages %lu\n"
-		"n pending decompressions %lu\n"
-		"n pending reads %lu\n"
-		"n pending flush LRU %lu list %lu single page %lu\n"
-		"pages made young %lu, not young %lu\n"
-		"pages read %lu, created %lu, written %lu\n",
-		(ulong) size,
-		(ulong) UT_LIST_GET_LEN(buf_pool->LRU),
-		(ulong) UT_LIST_GET_LEN(buf_pool->free),
-		(ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
-		(ulong) buf_pool->n_pend_unzip,
-		(ulong) buf_pool->n_pend_reads,
-		(ulong) buf_pool->n_flush[BUF_FLUSH_LRU],
-		(ulong) buf_pool->n_flush[BUF_FLUSH_LIST],
-		(ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE],
-		(ulong) buf_pool->stat.n_pages_made_young,
-		(ulong) buf_pool->stat.n_pages_not_made_young,
-		(ulong) buf_pool->stat.n_pages_read,
-		(ulong) buf_pool->stat.n_pages_created,
-		(ulong) buf_pool->stat.n_pages_written);
+	ib::info() << *buf_pool;
 
 	buf_flush_list_mutex_exit(buf_pool);
 
@@ -4829,7 +6360,7 @@ buf_print_instance(
 		for (; n_blocks--; block++) {
 			const buf_frame_t* frame = block->frame;
 
-			if (fil_page_get_type(frame) == FIL_PAGE_INDEX) {
+			if (fil_page_index_page_check(frame)) {
 
 				id = btr_page_get_index_id(frame);
 
@@ -4860,28 +6391,26 @@ buf_print_instance(
 	for (i = 0; i < n_found; i++) {
 		index = dict_index_get_if_in_cache(index_ids[i]);
 
-		fprintf(stderr,
-			"Block count for index %llu in buffer is about %lu",
-			(ullint) index_ids[i],
-			(ulong) counts[i]);
-
-		if (index) {
-			putc(' ', stderr);
-			dict_index_name_print(stderr, NULL, index);
+		if (!index) {
+			ib::info() << "Block count for index "
+				<< index_ids[i] << " in buffer is about "
+				<< counts[i];
+		} else {
+			ib::info() << "Block count for index " << index_ids[i]
+				<< " in buffer is about " << counts[i]
+				<< ", index " << index->name
+				<< " of table " << index->table->name;
 		}
-
-		putc('\n', stderr);
 	}
 
-	mem_free(index_ids);
-	mem_free(counts);
+	ut_free(index_ids);
+	ut_free(counts);
 
 	ut_a(buf_pool_validate_instance(buf_pool));
 }
 
 /*********************************************************************//**
 Prints info of the buffer buf_pool data structure. */
-UNIV_INTERN
 void
 buf_print(void)
 /*===========*/
@@ -4900,8 +6429,7 @@ buf_print(void)
 #ifdef UNIV_DEBUG
 /*********************************************************************//**
 Returns the number of latched pages in the buffer pool.
- at return	number of latched pages */
-UNIV_INTERN
+ at return number of latched pages */
 ulint
 buf_get_latched_pages_number_instance(
 /*==================================*/
@@ -4929,7 +6457,7 @@ buf_get_latched_pages_number_instance(
 				continue;
 			}
 
-			mutex_enter(&block->mutex);
+			buf_page_mutex_enter(block);
 
 			if (block->page.buf_fix_count != 0
 			    || buf_page_get_io_fix(&block->page)
@@ -4937,7 +6465,7 @@ buf_get_latched_pages_number_instance(
 				fixed_pages_number++;
 			}
 
-			mutex_exit(&block->mutex);
+			buf_page_mutex_exit(block);
 		}
 	}
 
@@ -4991,8 +6519,7 @@ buf_get_latched_pages_number_instance(
 
 /*********************************************************************//**
 Returns the number of latched pages in all the buffer pools.
- at return	number of latched pages */
-UNIV_INTERN
+ at return number of latched pages */
 ulint
 buf_get_latched_pages_number(void)
 /*==============================*/
@@ -5016,16 +6543,14 @@ buf_get_latched_pages_number(void)
 
 /*********************************************************************//**
 Returns the number of pending buf pool read ios.
- at return	number of pending read I/O operations */
-UNIV_INTERN
+ at return number of pending read I/O operations */
 ulint
 buf_get_n_pending_read_ios(void)
 /*============================*/
 {
-	ulint	i;
 	ulint	pend_ios = 0;
 
-	for (i = 0; i < srv_buf_pool_instances; i++) {
+	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
 		pend_ios += buf_pool_from_array(i)->n_pend_reads;
 	}
 
@@ -5035,20 +6560,20 @@ buf_get_n_pending_read_ios(void)
 /*********************************************************************//**
 Returns the ratio in percents of modified pages in the buffer pool /
 database pages in the buffer pool.
- at return	modified page percentage ratio */
-UNIV_INTERN
-ulint
+ at return modified page percentage ratio */
+double
 buf_get_modified_ratio_pct(void)
 /*============================*/
 {
-	ulint		ratio;
+	double		ratio;
 	ulint		lru_len = 0;
 	ulint		free_len = 0;
 	ulint		flush_list_len = 0;
 
 	buf_get_total_list_len(&lru_len, &free_len, &flush_list_len);
 
-	ratio = (100 * flush_list_len) / (1 + lru_len + free_len);
+	ratio = static_cast<double>(100 * flush_list_len)
+		/ (1 + lru_len + free_len);
 
 	/* 1 + is there to avoid division by zero */
 
@@ -5115,7 +6640,6 @@ buf_stats_aggregate_pool_info(
 Collect buffer pool stats information for a buffer pool. Also
 record aggregated stats if there are more than one buffer pool
 in the server */
-UNIV_INTERN
 void
 buf_stats_get_pool_info(
 /*====================*/
@@ -5124,7 +6648,7 @@ buf_stats_get_pool_info(
 	buf_pool_info_t*	all_pool_info)	/*!< in/out: buffer pool info
 						to fill */
 {
-	buf_pool_info_t*        pool_info;
+	buf_pool_info_t*	pool_info;
 	time_t			current_time;
 	double			time_elapsed;
 
@@ -5250,7 +6774,6 @@ buf_stats_get_pool_info(
 
 /*********************************************************************//**
 Prints info of the buffer i/o. */
-UNIV_INTERN
 void
 buf_print_io_instance(
 /*==================*/
@@ -5329,7 +6852,6 @@ buf_print_io_instance(
 
 /*********************************************************************//**
 Prints info of the buffer i/o. */
-UNIV_INTERN
 void
 buf_print_io(
 /*=========*/
@@ -5343,7 +6865,7 @@ buf_print_io(
 	one extra buf_pool_info_t, the last one stores
 	aggregated/total values from all pools */
 	if (srv_buf_pool_instances > 1) {
-		pool_info = (buf_pool_info_t*) mem_zalloc((
+		pool_info = (buf_pool_info_t*) ut_zalloc_nokey((
 			srv_buf_pool_instances + 1) * sizeof *pool_info);
 
 		pool_info_total = &pool_info[srv_buf_pool_instances];
@@ -5352,7 +6874,7 @@ buf_print_io(
 
 		pool_info_total = pool_info =
 			static_cast<buf_pool_info_t*>(
-				mem_zalloc(sizeof *pool_info));
+				ut_zalloc_nokey(sizeof *pool_info));
 	}
 
 	for (i = 0; i < srv_buf_pool_instances; i++) {
@@ -5388,12 +6910,11 @@ buf_print_io(
 		}
 	}
 
-	mem_free(pool_info);
+	ut_free(pool_info);
 }
 
 /**********************************************************************//**
 Refreshes the statistics used to print per-second averages. */
-UNIV_INTERN
 void
 buf_refresh_io_stats(
 /*=================*/
@@ -5405,7 +6926,6 @@ buf_refresh_io_stats(
 
 /**********************************************************************//**
 Refreshes the statistics used to print per-second averages. */
-UNIV_INTERN
 void
 buf_refresh_io_stats_all(void)
 /*==========================*/
@@ -5422,7 +6942,6 @@ buf_refresh_io_stats_all(void)
 /**********************************************************************//**
 Check if all pages in all buffer pools are in a replacable state.
 @return FALSE if not */
-UNIV_INTERN
 ibool
 buf_all_freed(void)
 /*===============*/
@@ -5443,8 +6962,7 @@ buf_all_freed(void)
 /*********************************************************************//**
 Checks that there currently are no pending i/o-operations for the buffer
 pool.
- at return	number of pending i/o */
-UNIV_INTERN
+ at return number of pending i/o */
 ulint
 buf_pool_check_no_pending_io(void)
 /*==============================*/
@@ -5475,8 +6993,7 @@ buf_pool_check_no_pending_io(void)
 Code currently not used
 /*********************************************************************//**
 Gets the current length of the free list of buffer blocks.
- at return	length of the free list */
-UNIV_INTERN
+ at return length of the free list */
 ulint
 buf_get_free_list_len(void)
 /*=======================*/
@@ -5494,32 +7011,74 @@ buf_get_free_list_len(void)
 #endif
 
 #else /* !UNIV_HOTBACKUP */
-/********************************************************************//**
-Inits a page to the buffer buf_pool, for use in mysqlbackup --restore. */
-UNIV_INTERN
+
+/** Inits a page to the buffer buf_pool, for use in mysqlbackup --restore.
+ at param[in]	page_id		page id
+ at param[in]	page_size	page size
+ at param[in,out]	block		block to init */
 void
 buf_page_init_for_backup_restore(
-/*=============================*/
-	ulint		space,	/*!< in: space id */
-	ulint		offset,	/*!< in: offset of the page within space
-				in units of a page */
-	ulint		zip_size,/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	buf_block_t*	block)	/*!< in: block to init */
+	const page_id_t&	page_id,
+	const page_size_t&	page_size,
+	buf_block_t*		block)
 {
-	block->page.state	= BUF_BLOCK_FILE_PAGE;
-	block->page.space	= space;
-	block->page.offset	= offset;
+	block->page.state = BUF_BLOCK_FILE_PAGE;
+	block->page.id = page_id;
+	block->page.size.copy_from(page_size);
 
 	page_zip_des_init(&block->page.zip);
 
 	/* We assume that block->page.data has been allocated
-	with zip_size == UNIV_PAGE_SIZE. */
-	ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
-	ut_ad(ut_is_2pow(zip_size));
-	page_zip_set_size(&block->page.zip, zip_size);
-	if (zip_size) {
-		block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
+	with page_size == univ_page_size. */
+	if (page_size.is_compressed()) {
+		page_zip_set_size(&block->page.zip, page_size.physical());
+		block->page.zip.data = block->frame + page_size.logical();
+	} else {
+		page_zip_set_size(&block->page.zip, 0);
 	}
 }
+
 #endif /* !UNIV_HOTBACKUP */
+
+/** Print the given page_id_t object.
+ at param[in,out]	out	the output stream
+ at param[in]	page_id	the page_id_t object to be printed
+ at return the output stream */
+std::ostream&
+operator<<(
+	std::ostream&		out,
+	const page_id_t&	page_id)
+{
+	out << "[page id: space=" << page_id.m_space
+		<< ", page number=" << page_id.m_page_no << "]";
+	return(out);
+}
+
+/** Print the given buf_pool_t object.
+ at param[in,out]	out		the output stream
+ at param[in]	buf_pool	the buf_pool_t object to be printed
+ at return the output stream */
+std::ostream&
+operator<<(
+	std::ostream&		out,
+	const buf_pool_t&	buf_pool)
+{
+	out << "[buffer pool instance: "
+		<< "buf_pool size=" << buf_pool.curr_size
+		<< ", database pages=" << UT_LIST_GET_LEN(buf_pool.LRU)
+		<< ", free pages=" << UT_LIST_GET_LEN(buf_pool.free)
+		<< ", modified database pages="
+		<< UT_LIST_GET_LEN(buf_pool.flush_list)
+		<< ", n pending decompressions=" << buf_pool.n_pend_unzip
+		<< ", n pending reads=" << buf_pool.n_pend_reads
+		<< ", n pending flush LRU=" << buf_pool.n_flush[BUF_FLUSH_LRU]
+		<< " list=" << buf_pool.n_flush[BUF_FLUSH_LIST]
+		<< " single page=" << buf_pool.n_flush[BUF_FLUSH_SINGLE_PAGE]
+		<< ", pages made young=" << buf_pool.stat.n_pages_made_young
+		<< ", not young=" << buf_pool.stat.n_pages_not_made_young
+		<< ", pages read=" << buf_pool.stat.n_pages_read
+		<< ", created=" << buf_pool.stat.n_pages_created
+		<< ", written=" << buf_pool.stat.n_pages_written << "]";
+	return(out);
+}
+#endif /* !UNIV_INNOCHECKSUM */
diff --git a/storage/innobase/buf/buf0checksum.cc b/storage/innobase/buf/buf0checksum.cc
index 5289cd8..fca5ad4 100644
--- a/storage/innobase/buf/buf0checksum.cc
+++ b/storage/innobase/buf/buf0checksum.cc
@@ -24,36 +24,36 @@ Created Aug 11, 2011 Vasil Dimov
 *******************************************************/
 
 #include "univ.i"
-#include "fil0fil.h" /* FIL_* */
-#include "ut0crc32.h" /* ut_crc32() */
-#include "ut0rnd.h" /* ut_fold_binary() */
+#include "fil0fil.h"
+#include "ut0crc32.h"
+#include "ut0rnd.h"
 
 #ifndef UNIV_INNOCHECKSUM
 
-#include "srv0srv.h" /* SRV_CHECKSUM_* */
+#include "srv0srv.h"
+#endif /* !UNIV_INNOCHECKSUM */
 #include "buf0types.h"
 
 /** the macro MYSQL_SYSVAR_ENUM() requires "long unsigned int" and if we
 use srv_checksum_algorithm_t here then we get a compiler error:
 ha_innodb.cc:12251: error: cannot convert 'srv_checksum_algorithm_t*' to
   'long unsigned int*' in initialization */
-UNIV_INTERN ulong	srv_checksum_algorithm = SRV_CHECKSUM_ALGORITHM_INNODB;
-
-#endif /* !UNIV_INNOCHECKSUM */
-
-/********************************************************************//**
-Calculates a page CRC32 which is stored to the page when it is written
-to a file. Note that we must be careful to calculate the same value on
-32-bit and 64-bit architectures.
- at return	checksum */
-UNIV_INTERN
-ib_uint32_t
+ulong	srv_checksum_algorithm = SRV_CHECKSUM_ALGORITHM_INNODB;
+
+/** Calculates the CRC32 checksum of a page. The value is stored to the page
+when it is written to a file and also checked for a match when reading from
+the file. When reading we allow both normal CRC32 and CRC-legacy-big-endian
+variants. Note that we must be careful to calculate the same value on 32-bit
+and 64-bit architectures.
+ at param[in]	page			buffer page (UNIV_PAGE_SIZE bytes)
+ at param[in]	use_legacy_big_endian	if true then use big endian
+byteorder when converting byte strings to integers
+ at return checksum */
+uint32_t
 buf_calc_page_crc32(
-/*================*/
-	const byte*	page)	/*!< in: buffer page */
+	const byte*	page,
+	bool		use_legacy_big_endian /* = false */)
 {
-	ib_uint32_t	checksum;
-
 	/* Since the field FIL_PAGE_FILE_FLUSH_LSN, and in versions <= 4.1.x
 	FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, are written outside the buffer pool
 	to the first pages of data files, we have to skip them in the page
@@ -62,21 +62,26 @@ buf_calc_page_crc32(
 	checksum is stored, and also the last 8 bytes of page because
 	there we store the old formula checksum. */
 
-	checksum = ut_crc32(page + FIL_PAGE_OFFSET,
-			    FIL_PAGE_FILE_FLUSH_LSN - FIL_PAGE_OFFSET)
-		^ ut_crc32(page + FIL_PAGE_DATA,
-			   UNIV_PAGE_SIZE - FIL_PAGE_DATA
-			   - FIL_PAGE_END_LSN_OLD_CHKSUM);
+	ut_crc32_func_t	crc32_func = use_legacy_big_endian
+		? ut_crc32_legacy_big_endian
+		: ut_crc32;
 
-	return(checksum);
+	const uint32_t	c1 = crc32_func(
+		page + FIL_PAGE_OFFSET,
+		FIL_PAGE_FILE_FLUSH_LSN - FIL_PAGE_OFFSET);
+
+	const uint32_t	c2 = crc32_func(
+		page + FIL_PAGE_DATA,
+		UNIV_PAGE_SIZE - FIL_PAGE_DATA - FIL_PAGE_END_LSN_OLD_CHKSUM);
+
+	return(c1 ^ c2);
 }
 
 /********************************************************************//**
 Calculates a page checksum which is stored to the page when it is written
 to a file. Note that we must be careful to calculate the same value on
 32-bit and 64-bit architectures.
- at return	checksum */
-UNIV_INTERN
+ at return checksum */
 ulint
 buf_calc_page_new_checksum(
 /*=======================*/
@@ -109,8 +114,7 @@ checksum.
 NOTE: we must first store the new formula checksum to
 FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum
 because this takes that field as an input!
- at return	checksum */
-UNIV_INTERN
+ at return checksum */
 ulint
 buf_calc_page_old_checksum(
 /*=======================*/
@@ -125,12 +129,9 @@ buf_calc_page_old_checksum(
 	return(checksum);
 }
 
-#ifndef UNIV_INNOCHECKSUM
-
 /********************************************************************//**
 Return a printable string describing the checksum algorithm.
- at return	algorithm name */
-UNIV_INTERN
+ at return algorithm name */
 const char*
 buf_checksum_algorithm_name(
 /*========================*/
@@ -154,5 +155,3 @@ buf_checksum_algorithm_name(
 	ut_error;
 	return(NULL);
 }
-
-#endif /* !UNIV_INNOCHECKSUM */
diff --git a/storage/innobase/buf/buf0dblwr.cc b/storage/innobase/buf/buf0dblwr.cc
index 6222299..67aca58 100644
--- a/storage/innobase/buf/buf0dblwr.cc
+++ b/storage/innobase/buf/buf0dblwr.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,6 +23,7 @@ Doublwrite buffer module
 Created 2011/12/19
 *******************************************************/
 
+#include "ha_prototypes.h"
 #include "buf0dblwr.h"
 
 #ifdef UNIV_NONINL
@@ -38,22 +39,16 @@ Created 2011/12/19
 
 #ifndef UNIV_HOTBACKUP
 
-#ifdef UNIV_PFS_MUTEX
-/* Key to register the mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t	buf_dblwr_mutex_key;
-#endif /* UNIV_PFS_RWLOCK */
-
 /** The doublewrite buffer */
-UNIV_INTERN buf_dblwr_t*	buf_dblwr = NULL;
+buf_dblwr_t*	buf_dblwr = NULL;
 
 /** Set to TRUE when the doublewrite buffer is being created */
-UNIV_INTERN ibool	buf_dblwr_being_created = FALSE;
+ibool	buf_dblwr_being_created = FALSE;
 
 /****************************************************************//**
 Determines if a page number is located inside the doublewrite buffer.
 @return TRUE if the location is inside the two blocks of the
 doublewrite buffer */
-UNIV_INTERN
 ibool
 buf_dblwr_page_inside(
 /*==================*/
@@ -82,7 +77,7 @@ buf_dblwr_page_inside(
 /****************************************************************//**
 Calls buf_page_get() on the TRX_SYS_PAGE and returns a pointer to the
 doublewrite buffer within it.
- at return	pointer to the doublewrite buffer within the filespace header
+ at return pointer to the doublewrite buffer within the filespace header
 page. */
 UNIV_INLINE
 byte*
@@ -92,8 +87,9 @@ buf_dblwr_get(
 {
 	buf_block_t*	block;
 
-	block = buf_page_get(TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO,
-			     RW_X_LATCH, mtr);
+	block = buf_page_get(page_id_t(TRX_SYS_SPACE, TRX_SYS_PAGE_NO),
+			     univ_page_size, RW_X_LATCH, mtr);
+
 	buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
 
 	return(buf_block_get_frame(block) + TRX_SYS_DOUBLEWRITE);
@@ -102,7 +98,6 @@ buf_dblwr_get(
 /********************************************************************//**
 Flush a batch of writes to the datafiles that have already been
 written to the dblwr buffer on disk. */
-UNIV_INLINE
 void
 buf_dblwr_sync_datafiles()
 /*======================*/
@@ -116,7 +111,7 @@ buf_dblwr_sync_datafiles()
 	os_aio_wait_until_no_pending_writes();
 
 	/* Now we flush the data to disk (for example, with fsync) */
-	fil_flush_file_spaces(FIL_TABLESPACE);
+	fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
 }
 
 /****************************************************************//**
@@ -131,7 +126,7 @@ buf_dblwr_init(
 	ulint	buf_size;
 
 	buf_dblwr = static_cast<buf_dblwr_t*>(
-		mem_zalloc(sizeof(buf_dblwr_t)));
+		ut_zalloc_nokey(sizeof(buf_dblwr_t)));
 
 	/* There are two blocks of same size in the doublewrite
 	buffer. */
@@ -142,11 +137,10 @@ buf_dblwr_init(
 	ut_a(srv_doublewrite_batch_size > 0
 	     && srv_doublewrite_batch_size < buf_size);
 
-	mutex_create(buf_dblwr_mutex_key,
-		     &buf_dblwr->mutex, SYNC_DOUBLEWRITE);
+	mutex_create(LATCH_ID_BUF_DBLWR, &buf_dblwr->mutex);
 
-	buf_dblwr->b_event = os_event_create();
-	buf_dblwr->s_event = os_event_create();
+	buf_dblwr->b_event = os_event_create("dblwr_batch_event");
+	buf_dblwr->s_event = os_event_create("dblwr_single_event");
 	buf_dblwr->first_free = 0;
 	buf_dblwr->s_reserved = 0;
 	buf_dblwr->b_reserved = 0;
@@ -157,24 +151,25 @@ buf_dblwr_init(
 		doublewrite + TRX_SYS_DOUBLEWRITE_BLOCK2);
 
 	buf_dblwr->in_use = static_cast<bool*>(
-		mem_zalloc(buf_size * sizeof(bool)));
+		ut_zalloc_nokey(buf_size * sizeof(bool)));
 
 	buf_dblwr->write_buf_unaligned = static_cast<byte*>(
-		ut_malloc((1 + buf_size) * UNIV_PAGE_SIZE));
+		ut_malloc_nokey((1 + buf_size) * UNIV_PAGE_SIZE));
 
 	buf_dblwr->write_buf = static_cast<byte*>(
 		ut_align(buf_dblwr->write_buf_unaligned,
 			 UNIV_PAGE_SIZE));
 
 	buf_dblwr->buf_block_arr = static_cast<buf_page_t**>(
-		mem_zalloc(buf_size * sizeof(void*)));
+		ut_zalloc_nokey(buf_size * sizeof(void*)));
 }
 
 /****************************************************************//**
 Creates the doublewrite buffer to a new InnoDB installation. The header of the
-doublewrite buffer is placed on the trx system header page. */
-UNIV_INTERN
-void
+doublewrite buffer is placed on the trx system header page.
+ at return true if successful, false if not. */
+__attribute__((warn_unused_result))
+bool
 buf_dblwr_create(void)
 /*==================*/
 {
@@ -190,7 +185,7 @@ buf_dblwr_create(void)
 	if (buf_dblwr) {
 		/* Already inited */
 
-		return;
+		return(true);
 	}
 
 start_again:
@@ -208,23 +203,22 @@ start_again:
 
 		mtr_commit(&mtr);
 		buf_dblwr_being_created = FALSE;
-		return;
+		return(true);
 	}
 
-	ib_logf(IB_LOG_LEVEL_INFO,
-		"Doublewrite buffer not found: creating new");
-
-	if (buf_pool_get_curr_size()
-	    < ((2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
-		+ FSP_EXTENT_SIZE / 2 + 100)
-	       * UNIV_PAGE_SIZE)) {
+	ib::info() << "Doublewrite buffer not found: creating new";
 
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Cannot create doublewrite buffer: you must "
-			"increase your buffer pool size. Cannot continue "
-			"operation.");
+	ulint min_doublewrite_size =
+		( ( 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
+		  + FSP_EXTENT_SIZE / 2
+		  + 100)
+		* UNIV_PAGE_SIZE);
+	if (buf_pool_get_curr_size() <  min_doublewrite_size) {
+		ib::error() << "Cannot create doublewrite buffer: you must"
+			" increase your buffer pool size. Cannot continue"
+			" operation.";
 
-		exit(EXIT_FAILURE);
+		return(false);
 	}
 
 	block2 = fseg_create(TRX_SYS_SPACE, TRX_SYS_PAGE_NO,
@@ -237,15 +231,14 @@ start_again:
 	buf_block_dbg_add_level(block2, SYNC_NO_ORDER_CHECK);
 
 	if (block2 == NULL) {
-		ib_logf(IB_LOG_LEVEL_ERROR,
-			"Cannot create doublewrite buffer: you must "
-			"increase your tablespace size. "
-			"Cannot continue operation.");
+		ib::error() << "Cannot create doublewrite buffer: you must"
+			" increase your tablespace size."
+			" Cannot continue operation.";
 
 		/* We exit without committing the mtr to prevent
 		its modifications to the database getting to disk */
 
-		exit(EXIT_FAILURE);
+		return(false);
 	}
 
 	fseg_header = doublewrite + TRX_SYS_DOUBLEWRITE_FSEG;
@@ -256,12 +249,11 @@ start_again:
 		new_block = fseg_alloc_free_page(
 			fseg_header, prev_page_no + 1, FSP_UP, &mtr);
 		if (new_block == NULL) {
-			ib_logf(IB_LOG_LEVEL_ERROR,
-				"Cannot create doublewrite buffer: you must "
-				"increase your tablespace size. "
-				"Cannot continue operation.");
+			ib::error() << "Cannot create doublewrite buffer: "
+				" you must increase your tablespace size."
+				" Cannot continue operation.";
 
-			exit(EXIT_FAILURE);
+			return(false);
 		}
 
 		/* We read the allocated pages to the buffer pool;
@@ -274,7 +266,7 @@ start_again:
 		has not been written to in doublewrite. */
 
 		ut_ad(rw_lock_get_x_lock_count(&new_block->lock) == 1);
-		page_no = buf_block_get_page_no(new_block);
+		page_no = new_block->page.id.page_no();
 
 		if (i == FSP_EXTENT_SIZE / 2) {
 			ut_a(page_no == FSP_EXTENT_SIZE);
@@ -341,48 +333,66 @@ start_again:
 	/* Remove doublewrite pages from LRU */
 	buf_pool_invalidate();
 
-	ib_logf(IB_LOG_LEVEL_INFO, "Doublewrite buffer created");
+	ib::info() <<  "Doublewrite buffer created";
 
 	goto start_again;
 }
 
-/****************************************************************//**
-At a database startup initializes the doublewrite buffer memory structure if
+/**
+At database startup initializes the doublewrite buffer memory structure if
 we already have a doublewrite buffer created in the data files. If we are
 upgrading to an InnoDB version which supports multiple tablespaces, then this
 function performs the necessary update operations. If we are in a crash
-recovery, this function loads the pages from double write buffer into memory. */
-void
+recovery, this function loads the pages from double write buffer into memory.
+ at param[in]	file		File handle
+ at param[in]	path		Path name of file
+ at return DB_SUCCESS or error code */
+dberr_t
 buf_dblwr_init_or_load_pages(
-/*=========================*/
 	os_file_t	file,
-	char*		path,
-	bool		load_corrupt_pages)
+	const char*	path)
 {
-	byte*	buf;
-	byte*	read_buf;
-	byte*	unaligned_read_buf;
-	ulint	block1;
-	ulint	block2;
-	byte*	page;
-	ibool	reset_space_ids = FALSE;
-	byte*	doublewrite;
-	ulint	space_id;
-	ulint	i;
-        ulint	block_bytes = 0;
-	recv_dblwr_t& recv_dblwr = recv_sys->dblwr;
+	byte*		buf;
+	byte*		page;
+	ulint		block1;
+	ulint		block2;
+	ulint		space_id;
+	byte*		read_buf;
+	byte*		doublewrite;
+	byte*		unaligned_read_buf;
+	ibool		reset_space_ids = FALSE;
+	recv_dblwr_t&	recv_dblwr = recv_sys->dblwr;
 
 	/* We do the file i/o past the buffer pool */
 
-	unaligned_read_buf = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE));
+	unaligned_read_buf = static_cast<byte*>(
+		ut_malloc_nokey(2 * UNIV_PAGE_SIZE));
 
 	read_buf = static_cast<byte*>(
 		ut_align(unaligned_read_buf, UNIV_PAGE_SIZE));
 
 	/* Read the trx sys header to check if we are using the doublewrite
 	buffer */
-	off_t  trx_sys_page = TRX_SYS_PAGE_NO * UNIV_PAGE_SIZE;
-	os_file_read(file, read_buf, trx_sys_page, UNIV_PAGE_SIZE);
+	dberr_t		err;
+
+	IORequest	read_request(IORequest::READ);
+
+	read_request.disable_compression();
+
+	err = os_file_read(
+		read_request,
+		file, read_buf, TRX_SYS_PAGE_NO * UNIV_PAGE_SIZE,
+		UNIV_PAGE_SIZE);
+
+	if (err != DB_SUCCESS) {
+
+		ib::error()
+			<< "Failed to read the system tablespace header page";
+
+		ut_free(unaligned_read_buf);
+
+		return(err);
+	}
 
 	doublewrite = read_buf + TRX_SYS_DOUBLEWRITE;
 
@@ -397,7 +407,8 @@ buf_dblwr_init_or_load_pages(
 
 		buf = buf_dblwr->write_buf;
 	} else {
-		goto leave_func;
+		ut_free(unaligned_read_buf);
+		return(DB_SUCCESS);
 	}
 
 	if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED)
@@ -411,32 +422,56 @@ buf_dblwr_init_or_load_pages(
 
 		reset_space_ids = TRUE;
 
-		ib_logf(IB_LOG_LEVEL_INFO,
-			"Resetting space id's in the doublewrite buffer");
+		ib::info() << "Resetting space id's in the doublewrite buffer";
 	}
 
 	/* Read the pages from the doublewrite buffer to memory */
+	err = os_file_read(
+		read_request,
+		file, buf, block1 * UNIV_PAGE_SIZE,
+		TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE);
+
+	if (err != DB_SUCCESS) {
+
+		ib::error()
+			<< "Failed to read the first double write buffer "
+			"extent";
+
+		ut_free(unaligned_read_buf);
+
+		return(err);
+	}
+
+	err = os_file_read(
+		read_request,
+		file,
+		buf + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
+		block2 * UNIV_PAGE_SIZE,
+		TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE);
 
-        block_bytes = TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
+	if (err != DB_SUCCESS) {
 
-	os_file_read(file, buf, block1 * UNIV_PAGE_SIZE, block_bytes);
-	os_file_read(file, buf + block_bytes, block2 * UNIV_PAGE_SIZE,
-		     block_bytes);
+		ib::error()
+			<< "Failed to read the second double write buffer "
+			"extent";
+
+		ut_free(unaligned_read_buf);
+
+		return(err);
+	}
 
 	/* Check if any of these pages is half-written in data files, in the
 	intended position */
 
 	page = buf;
 
-	for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 2; i++) {
-
-		ulint source_page_no;
-
+	for (ulint i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 2; i++) {
 		if (reset_space_ids) {
+			ulint source_page_no;
 
 			space_id = 0;
-			mach_write_to_4(page
-					+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id);
+			mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
+					space_id);
 			/* We do not need to calculate new checksums for the
 			pages because the field .._SPACE_ID does not affect
 			them. Write the page back to where we read it from. */
@@ -448,157 +483,214 @@ buf_dblwr_init_or_load_pages(
 					+ i - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE;
 			}
 
-			os_file_write(path, file, page,
-				      source_page_no * UNIV_PAGE_SIZE,
-				      UNIV_PAGE_SIZE);
+			IORequest	write_request(IORequest::WRITE);
+
+			/* Recovered data file pages are written out
+			as uncompressed. */
+
+			write_request.disable_compression();
+
+			err = os_file_write(
+				write_request, path, file, page,
+				source_page_no * UNIV_PAGE_SIZE,
+				UNIV_PAGE_SIZE);
+
+			if (err != DB_SUCCESS) {
+
+				ib::error()
+					<< "Failed to write to the double write"
+					" buffer";
+
+				ut_free(unaligned_read_buf);
+
+				return(err);
+			}
 
-		} else if (load_corrupt_pages) {
+		} else {
 
 			recv_dblwr.add(page);
 		}
 
-		page += UNIV_PAGE_SIZE;
+		page += univ_page_size.physical();
 	}
 
 	if (reset_space_ids) {
 		os_file_flush(file);
 	}
 
-leave_func:
 	ut_free(unaligned_read_buf);
+
+	return(DB_SUCCESS);
 }
 
-/****************************************************************//**
-Process the double write buffer pages. */
+/** Process and remove the double write buffer pages for all tablespaces. */
 void
-buf_dblwr_process()
-/*===============*/
+buf_dblwr_process(void)
 {
-	ulint	space_id;
-	ulint	page_no;
-	ulint	page_no_dblwr = 0;
-	byte*	page;
-	byte*	read_buf;
-	byte*	unaligned_read_buf;
-	recv_dblwr_t& recv_dblwr = recv_sys->dblwr;
+	ulint		page_no_dblwr	= 0;
+	byte*		read_buf;
+	byte*		unaligned_read_buf;
+	recv_dblwr_t&	recv_dblwr	= recv_sys->dblwr;
 
-	unaligned_read_buf = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE));
+	unaligned_read_buf = static_cast<byte*>(
+		ut_malloc_nokey(2 * UNIV_PAGE_SIZE));
 
 	read_buf = static_cast<byte*>(
 		ut_align(unaligned_read_buf, UNIV_PAGE_SIZE));
 
-	for (std::list<byte*>::iterator i = recv_dblwr.pages.begin();
-	     i != recv_dblwr.pages.end(); ++i, ++page_no_dblwr ) {
+	for (recv_dblwr_t::list::iterator i = recv_dblwr.pages.begin();
+	     i != recv_dblwr.pages.end();
+	     ++i, ++page_no_dblwr) {
+
+		const byte*	page		= *i;
+		ulint		page_no		= page_get_page_no(page);
+		ulint		space_id	= page_get_space_id(page);
 
-		page = *i;
-		page_no  = mach_read_from_4(page + FIL_PAGE_OFFSET);
-		space_id = mach_read_from_4(page + FIL_PAGE_SPACE_ID);
+		fil_space_t*	space = fil_space_get(space_id);
 
-		if (!fil_tablespace_exists_in_mem(space_id)) {
-			/* Maybe we have dropped the single-table tablespace
+		if (space == NULL) {
+			/* Maybe we have dropped the tablespace
 			and this page once belonged to it: do nothing */
+			continue;
+		}
+
+		fil_space_open_if_needed(space);
 
-		} else if (!fil_check_adress_in_tablespace(space_id,
-							   page_no)) {
-			ib_logf(IB_LOG_LEVEL_WARN,
-				"A page in the doublewrite buffer is not "
-				"within space bounds; space id %lu "
-				"page number %lu, page %lu in "
-				"doublewrite buf.",
-				(ulong) space_id, (ulong) page_no,
-				page_no_dblwr);
+		if (page_no >= space->size) {
+
+			/* Do not report the warning if the tablespace is
+			schedule for truncate or was truncated and we have live
+			MLOG_TRUNCATE record in redo. */
+			bool	skip_warning =
+				srv_is_tablespace_truncated(space_id)
+				|| srv_was_tablespace_truncated(space_id);
+
+			if (!skip_warning) {
+				ib::warn() << "Page " << page_no_dblwr
+					<< " in the doublewrite buffer is"
+					" not within space bounds: page "
+					<< page_id_t(space_id, page_no);
+			}
 		} else {
-			ulint	zip_size = fil_space_get_zip_size(space_id);
+			const page_size_t	page_size(space->flags);
+			const page_id_t		page_id(space_id, page_no);
+
+			/* We want to ensure that for partial reads the
+			unread portion of the page is NUL. */
+			memset(read_buf, 0x0, page_size.physical());
+
+			IORequest	request;
+
+			request.dblwr_recover();
 
 			/* Read in the actual page from the file */
-			fil_io(OS_FILE_READ, true, space_id, zip_size,
-			       page_no, 0,
-			       zip_size ? zip_size : UNIV_PAGE_SIZE,
-			       read_buf, NULL);
+			dberr_t	err = fil_io(
+				request, true,
+				page_id, page_size,
+				0, page_size.physical(), read_buf, NULL);
+
+			if (err != DB_SUCCESS) {
+
+				ib::warn()
+					<< "Double write buffer recovery: "
+					<< page_id << " read failed with "
+					<< "error: " << ut_strerr(err);
+			}
 
 			/* Check if the page is corrupt */
+			if (buf_page_is_corrupted(
+				true, read_buf, page_size,
+				fsp_is_checksum_disabled(space_id))) {
 
-			if (buf_page_is_corrupted(true, read_buf, zip_size)) {
-
-				fprintf(stderr,
-					"InnoDB: Warning: database page"
-					" corruption or a failed\n"
-					"InnoDB: file read of"
-					" space %lu page %lu.\n"
-					"InnoDB: Trying to recover it from"
-					" the doublewrite buffer.\n",
-					(ulong) space_id, (ulong) page_no);
-
-				if (buf_page_is_corrupted(true,
-							  page, zip_size)) {
-					fprintf(stderr,
-						"InnoDB: Dump of the page:\n");
+				ib::warn() << "Database page corruption or"
+					<< " a failed file read of page "
+					<< page_id
+					<< ". Trying to recover it from the"
+					<< " doublewrite buffer.";
+
+				if (buf_page_is_corrupted(
+					true, page, page_size,
+					fsp_is_checksum_disabled(space_id))) {
+
+					ib::error() << "Dump of the page:";
 					buf_page_print(
-						read_buf, zip_size,
+						read_buf, page_size,
 						BUF_PAGE_PRINT_NO_CRASH);
-					fprintf(stderr,
-						"InnoDB: Dump of"
-						" corresponding page"
-						" in doublewrite buffer:\n");
+					ib::error() << "Dump of corresponding"
+						" page in doublewrite buffer:";
+
 					buf_page_print(
-						page, zip_size,
+						page, page_size,
 						BUF_PAGE_PRINT_NO_CRASH);
 
-					fprintf(stderr,
-						"InnoDB: Also the page in the"
-						" doublewrite buffer"
-						" is corrupt.\n"
-						"InnoDB: Cannot continue"
-						" operation.\n"
-						"InnoDB: You can try to"
-						" recover the database"
-						" with the my.cnf\n"
-						"InnoDB: option:\n"
-						"InnoDB:"
-						" innodb_force_recovery=6\n");
-					ut_error;
+					ib::fatal() << "The page in the"
+						" doublewrite buffer is"
+						" corrupt. Cannot continue"
+						" operation. You can try to"
+						" recover the database with"
+						" innodb_force_recovery=6";
 				}
+			} else if (buf_page_is_zeroes(read_buf, page_size)
+				   && !buf_page_is_zeroes(page, page_size)
+				   && !buf_page_is_corrupted(
+					true, page, page_size,
+					fsp_is_checksum_disabled(space_id))) {
 
-				/* Write the good page from the
-				doublewrite buffer to the intended
-				position */
+				/* Database page contained only zeroes, while
+				a valid copy is available in dblwr buffer. */
 
-				fil_io(OS_FILE_WRITE, true, space_id,
-				       zip_size, page_no, 0,
-				       zip_size ? zip_size : UNIV_PAGE_SIZE,
-				       page, NULL);
+			} else {
+
+				bool t1 = buf_page_is_zeroes(
+                                        read_buf, page_size);
 
-				ib_logf(IB_LOG_LEVEL_INFO,
-					"Recovered the page from"
-					" the doublewrite buffer.");
+				bool t2 = buf_page_is_zeroes(page, page_size);
 
-			} else if (buf_page_is_zeroes(read_buf, zip_size)) {
+				bool t3 = buf_page_is_corrupted(
+					true, page, page_size,
+					fsp_is_checksum_disabled(space_id));
 
-				if (!buf_page_is_zeroes(page, zip_size)
-				    && !buf_page_is_corrupted(true, page,
-							      zip_size)) {
+				if (t1 && !(t2 || t3)) {
 
 					/* Database page contained only
 					zeroes, while a valid copy is
 					available in dblwr buffer. */
 
-					fil_io(OS_FILE_WRITE, true, space_id,
-					       zip_size, page_no, 0,
-					       zip_size ? zip_size
-							: UNIV_PAGE_SIZE,
-					       page, NULL);
+				} else {
+					continue;
 				}
 			}
+
+			/* Recovered data file pages are written out
+			as uncompressed. */
+
+			IORequest	write_request(IORequest::WRITE);
+
+			write_request.disable_compression();
+
+			/* Write the good page from the doublewrite
+			buffer to the intended position. */
+
+			fil_io(write_request, true,
+			       page_id, page_size,
+			       0, page_size.physical(),
+			       const_cast<byte*>(page), NULL);
+
+			ib::info()
+				<< "Recovered page "
+				<< page_id
+				<< " from the doublewrite buffer.";
 		}
 	}
 
-	fil_flush_file_spaces(FIL_TABLESPACE);
+	recv_dblwr.pages.clear();
+
+	fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
 	ut_free(unaligned_read_buf);
 }
 
 /****************************************************************//**
 Frees doublewrite buffer. */
-UNIV_INTERN
 void
 buf_dblwr_free(void)
 /*================*/
@@ -608,35 +700,38 @@ buf_dblwr_free(void)
 	ut_ad(buf_dblwr->s_reserved == 0);
 	ut_ad(buf_dblwr->b_reserved == 0);
 
-	os_event_free(buf_dblwr->b_event);
-	os_event_free(buf_dblwr->s_event);
+	os_event_destroy(buf_dblwr->b_event);
+	os_event_destroy(buf_dblwr->s_event);
 	ut_free(buf_dblwr->write_buf_unaligned);
 	buf_dblwr->write_buf_unaligned = NULL;
 
-	mem_free(buf_dblwr->buf_block_arr);
+	ut_free(buf_dblwr->buf_block_arr);
 	buf_dblwr->buf_block_arr = NULL;
 
-	mem_free(buf_dblwr->in_use);
+	ut_free(buf_dblwr->in_use);
 	buf_dblwr->in_use = NULL;
 
 	mutex_free(&buf_dblwr->mutex);
-	mem_free(buf_dblwr);
+	ut_free(buf_dblwr);
 	buf_dblwr = NULL;
 }
 
 /********************************************************************//**
 Updates the doublewrite buffer when an IO request is completed. */
-UNIV_INTERN
 void
 buf_dblwr_update(
 /*=============*/
 	const buf_page_t*	bpage,	/*!< in: buffer block descriptor */
 	buf_flush_t		flush_type)/*!< in: flush type */
 {
-	if (!srv_use_doublewrite_buf || buf_dblwr == NULL) {
+	if (!srv_use_doublewrite_buf
+	    || buf_dblwr == NULL
+	    || fsp_is_system_temporary(bpage->id.space())) {
 		return;
 	}
 
+	ut_ad(!srv_read_only_mode);
+
 	switch (flush_type) {
 	case BUF_FLUSH_LIST:
 	case BUF_FLUSH_LRU:
@@ -652,7 +747,7 @@ buf_dblwr_update(
 			mutex_exit(&buf_dblwr->mutex);
 			/* This will finish the batch. Sync data files
 			to the disk. */
-			fil_flush_file_spaces(FIL_TABLESPACE);
+			fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
 			mutex_enter(&buf_dblwr->mutex);
 
 			/* We can now reuse the doublewrite memory buffer: */
@@ -702,18 +797,16 @@ buf_dblwr_check_page_lsn(
 			   - FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
 		   4)) {
 
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: ERROR: The page to be written"
-			" seems corrupt!\n"
-			"InnoDB: The low 4 bytes of LSN fields do not match "
-			"(" ULINTPF " != " ULINTPF ")!"
-			" Noticed in the buffer pool.\n",
-			mach_read_from_4(
-				page + FIL_PAGE_LSN + 4),
-			mach_read_from_4(
-				page + UNIV_PAGE_SIZE
-				- FIL_PAGE_END_LSN_OLD_CHKSUM + 4));
+		const ulint	lsn1 = mach_read_from_4(
+			page + FIL_PAGE_LSN + 4);
+		const ulint	lsn2 = mach_read_from_4(
+			page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM
+			+ 4);
+
+		ib::error() << "The page to be written seems corrupt!"
+			" The low 4 bytes of LSN fields do not match"
+			" (" << lsn1 << " != " << lsn2 << ")!"
+			" Noticed in the buffer pool.";
 	}
 }
 
@@ -726,21 +819,13 @@ buf_dblwr_assert_on_corrupt_block(
 /*==============================*/
 	const buf_block_t*	block)	/*!< in: block to check */
 {
-	buf_page_print(block->frame, 0, BUF_PAGE_PRINT_NO_CRASH);
-
-	ut_print_timestamp(stderr);
-	fprintf(stderr,
-		"  InnoDB: Apparent corruption of an"
-		" index page n:o %lu in space %lu\n"
-		"InnoDB: to be written to data file."
-		" We intentionally crash server\n"
-		"InnoDB: to prevent corrupt data"
-		" from ending up in data\n"
-		"InnoDB: files.\n",
-		(ulong) buf_block_get_page_no(block),
-		(ulong) buf_block_get_space(block));
-
-	ut_error;
+	buf_page_print(block->frame, univ_page_size, BUF_PAGE_PRINT_NO_CRASH);
+
+	ib::fatal() << "Apparent corruption of an index page "
+		<< block->page.id
+		<< " to be written to data file. We intentionally crash"
+		" the server to prevent corrupt data from ending up in"
+		" data files.";
 }
 
 /********************************************************************//**
@@ -752,26 +837,49 @@ buf_dblwr_check_block(
 /*==================*/
 	const buf_block_t*	block)	/*!< in: block to check */
 {
-	if (buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE
-	    || block->page.zip.data) {
-		/* No simple validate for compressed pages exists. */
-		return;
-	}
+	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 
-	buf_dblwr_check_page_lsn(block->frame);
-
-	if (!block->check_index_page_at_flush) {
+	if (block->skip_flush_check) {
 		return;
 	}
 
-	if (page_is_comp(block->frame)) {
-		if (!page_simple_validate_new(block->frame)) {
-			buf_dblwr_assert_on_corrupt_block(block);
+	switch (fil_page_get_type(block->frame)) {
+	case FIL_PAGE_INDEX:
+	case FIL_PAGE_RTREE:
+		if (page_is_comp(block->frame)) {
+			if (page_simple_validate_new(block->frame)) {
+				return;
+			}
+		} else if (page_simple_validate_old(block->frame)) {
+			return;
 		}
-	} else if (!page_simple_validate_old(block->frame)) {
-
-		buf_dblwr_assert_on_corrupt_block(block);
+		/* While it is possible that this is not an index page
+		but just happens to have wrongly set FIL_PAGE_TYPE,
+		such pages should never be modified to without also
+		adjusting the page type during page allocation or
+		buf_flush_init_for_writing() or fil_page_reset_type(). */
+		break;
+	case FIL_PAGE_TYPE_FSP_HDR:
+	case FIL_PAGE_IBUF_BITMAP:
+	case FIL_PAGE_TYPE_UNKNOWN:
+		/* Do not complain again, we already reset this field. */
+	case FIL_PAGE_UNDO_LOG:
+	case FIL_PAGE_INODE:
+	case FIL_PAGE_IBUF_FREE_LIST:
+	case FIL_PAGE_TYPE_SYS:
+	case FIL_PAGE_TYPE_TRX_SYS:
+	case FIL_PAGE_TYPE_XDES:
+	case FIL_PAGE_TYPE_BLOB:
+	case FIL_PAGE_TYPE_ZBLOB:
+	case FIL_PAGE_TYPE_ZBLOB2:
+		/* TODO: validate also non-index pages */
+		return;
+	case FIL_PAGE_TYPE_ALLOCATED:
+		/* empty pages should never be flushed */
+		break;
 	}
+
+	buf_dblwr_assert_on_corrupt_block(block);
 }
 
 /********************************************************************//**
@@ -785,33 +893,39 @@ buf_dblwr_write_block_to_datafile(
 	bool			sync)	/*!< in: true if sync IO
 					is requested */
 {
-	ut_a(bpage);
 	ut_a(buf_page_in_file(bpage));
 
-	const ulint flags = sync
-		? OS_FILE_WRITE
-		: OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER;
+	ulint	type = IORequest::WRITE;
+
+	if (sync) {
+		type |= IORequest::DO_NOT_WAKE;
+	}
+
+	IORequest	request(type);
+
+	if (bpage->zip.data != NULL) {
+		ut_ad(bpage->size.is_compressed());
 
-	if (bpage->zip.data) {
-		fil_io(flags, sync, buf_page_get_space(bpage),
-		       buf_page_get_zip_size(bpage),
-		       buf_page_get_page_no(bpage), 0,
-		       buf_page_get_zip_size(bpage),
+		fil_io(request, sync, bpage->id, bpage->size, 0,
+		       bpage->size.physical(),
 		       (void*) bpage->zip.data,
 		       (void*) bpage);
+	} else {
+		ut_ad(!bpage->size.is_compressed());
 
-		return;
-	}
-
+		/* Our IO API is common for both reads and writes and is
+		therefore geared towards a non-const parameter. */
 
-	const buf_block_t* block = (buf_block_t*) bpage;
-	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-	buf_dblwr_check_page_lsn(block->frame);
+		buf_block_t*	block = reinterpret_cast<buf_block_t*>(
+			const_cast<buf_page_t*>(bpage));
 
-	fil_io(flags, sync, buf_block_get_space(block), 0,
-	       buf_block_get_page_no(block), 0, UNIV_PAGE_SIZE,
-	       (void*) block->frame, (void*) block);
+		ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+		buf_dblwr_check_page_lsn(block->frame);
 
+		fil_io(request,
+		       sync, bpage->id, bpage->size, 0, bpage->size.physical(),
+		       block->frame, block);
+	}
 }
 
 /********************************************************************//**
@@ -820,7 +934,6 @@ and also wakes up the aio thread if simulated aio is used. It is very
 important to call this function after a batch of writes has been posted,
 and also when we may have to wait for a page latch! Otherwise a deadlock
 of threads can occur. */
-UNIV_INTERN
 void
 buf_dblwr_flush_buffered_writes(void)
 /*=================================*/
@@ -835,6 +948,8 @@ buf_dblwr_flush_buffered_writes(void)
 		return;
 	}
 
+	ut_ad(!srv_read_only_mode);
+
 try_again:
 	mutex_enter(&buf_dblwr->mutex);
 
@@ -846,13 +961,19 @@ try_again:
 
 		mutex_exit(&buf_dblwr->mutex);
 
+		/* Wake possible simulated aio thread as there could be
+		system temporary tablespace pages active for flushing.
+		Note: system temporary tablespace pages are not scheduled
+		for doublewrite. */
+		os_aio_simulated_wake_handler_threads();
+
 		return;
 	}
 
 	if (buf_dblwr->batch_running) {
 		/* Another thread is running the batch right now. Wait
 		for it to finish. */
-		ib_int64_t	sig_count = os_event_reset(buf_dblwr->b_event);
+		int64_t	sig_count = os_event_reset(buf_dblwr->b_event);
 		mutex_exit(&buf_dblwr->mutex);
 
 		os_event_wait_low(buf_dblwr->b_event, sig_count);
@@ -903,9 +1024,9 @@ try_again:
 	len = ut_min(TRX_SYS_DOUBLEWRITE_BLOCK_SIZE,
 		     buf_dblwr->first_free) * UNIV_PAGE_SIZE;
 
-	fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0,
-	       buf_dblwr->block1, 0, len,
-	       (void*) write_buf, NULL);
+	fil_io(IORequestWrite, true,
+	       page_id_t(TRX_SYS_SPACE, buf_dblwr->block1), univ_page_size,
+	       0, len, (void*) write_buf, NULL);
 
 	if (buf_dblwr->first_free <= TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
 		/* No unwritten pages in the second block. */
@@ -919,9 +1040,9 @@ try_again:
 	write_buf = buf_dblwr->write_buf
 		    + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
 
-	fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0,
-	       buf_dblwr->block2, 0, len,
-	       (void*) write_buf, NULL);
+	fil_io(IORequestWrite, true,
+	       page_id_t(TRX_SYS_SPACE, buf_dblwr->block2), univ_page_size,
+	       0, len, (void*) write_buf, NULL);
 
 flush:
 	/* increment the doublewrite flushed pages counter */
@@ -963,14 +1084,11 @@ flush:
 Posts a buffer page for writing. If the doublewrite memory buffer is
 full, calls buf_dblwr_flush_buffered_writes and waits for for free
 space to appear. */
-UNIV_INTERN
 void
 buf_dblwr_add_to_batch(
 /*====================*/
 	buf_page_t*	bpage)	/*!< in: buffer block to write */
 {
-	ulint	zip_size;
-
 	ut_a(buf_page_in_file(bpage));
 
 try_again:
@@ -986,7 +1104,7 @@ try_again:
 		point. The only exception is when a user thread is
 		forced to do a flush batch because of a sync
 		checkpoint. */
-		ib_int64_t	sig_count = os_event_reset(buf_dblwr->b_event);
+		int64_t	sig_count = os_event_reset(buf_dblwr->b_event);
 		mutex_exit(&buf_dblwr->mutex);
 
 		os_event_wait_low(buf_dblwr->b_event, sig_count);
@@ -1001,25 +1119,24 @@ try_again:
 		goto try_again;
 	}
 
-	zip_size = buf_page_get_zip_size(bpage);
+	byte*	p = buf_dblwr->write_buf
+		+ univ_page_size.physical() * buf_dblwr->first_free;
 
-	if (zip_size) {
-		UNIV_MEM_ASSERT_RW(bpage->zip.data, zip_size);
+	if (bpage->size.is_compressed()) {
+		UNIV_MEM_ASSERT_RW(bpage->zip.data, bpage->size.physical());
 		/* Copy the compressed page and clear the rest. */
-		memcpy(buf_dblwr->write_buf
-		       + UNIV_PAGE_SIZE * buf_dblwr->first_free,
-		       bpage->zip.data, zip_size);
-		memset(buf_dblwr->write_buf
-		       + UNIV_PAGE_SIZE * buf_dblwr->first_free
-		       + zip_size, 0, UNIV_PAGE_SIZE - zip_size);
+
+		memcpy(p, bpage->zip.data, bpage->size.physical());
+
+		memset(p + bpage->size.physical(), 0x0,
+		       univ_page_size.physical() - bpage->size.physical());
 	} else {
 		ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
+
 		UNIV_MEM_ASSERT_RW(((buf_block_t*) bpage)->frame,
-				   UNIV_PAGE_SIZE);
+				   bpage->size.logical());
 
-		memcpy(buf_dblwr->write_buf
-		       + UNIV_PAGE_SIZE * buf_dblwr->first_free,
-		       ((buf_block_t*) bpage)->frame, UNIV_PAGE_SIZE);
+		memcpy(p, ((buf_block_t*) bpage)->frame, bpage->size.logical());
 	}
 
 	buf_dblwr->buf_block_arr[buf_dblwr->first_free] = bpage;
@@ -1050,7 +1167,6 @@ flushes in the doublewrite buffer are in use we wait here for one to
 become free. We are guaranteed that a slot will become free because any
 thread that is using a slot must also release the slot before leaving
 this function. */
-UNIV_INTERN
 void
 buf_dblwr_write_single_page(
 /*========================*/
@@ -1059,7 +1175,6 @@ buf_dblwr_write_single_page(
 {
 	ulint		n_slots;
 	ulint		size;
-	ulint		zip_size;
 	ulint		offset;
 	ulint		i;
 
@@ -1093,8 +1208,7 @@ retry:
 	if (buf_dblwr->s_reserved == n_slots) {
 
 		/* All slots are reserved. */
-		ib_int64_t	sig_count =
-			os_event_reset(buf_dblwr->s_event);
+		int64_t	sig_count = os_event_reset(buf_dblwr->s_event);
 		mutex_exit(&buf_dblwr->mutex);
 		os_event_wait_low(buf_dblwr->s_event, sig_count);
 
@@ -1140,22 +1254,26 @@ retry:
 	write it. This is so because we want to pad the remaining
 	bytes in the doublewrite page with zeros. */
 
-	zip_size = buf_page_get_zip_size(bpage);
-	if (zip_size) {
-		memcpy(buf_dblwr->write_buf + UNIV_PAGE_SIZE * i,
-		       bpage->zip.data, zip_size);
-		memset(buf_dblwr->write_buf + UNIV_PAGE_SIZE * i
-		       + zip_size, 0, UNIV_PAGE_SIZE - zip_size);
+	if (bpage->size.is_compressed()) {
+		memcpy(buf_dblwr->write_buf + univ_page_size.physical() * i,
+		       bpage->zip.data, bpage->size.physical());
+
+		memset(buf_dblwr->write_buf + univ_page_size.physical() * i
+		       + bpage->size.physical(), 0x0,
+		       univ_page_size.physical() - bpage->size.physical());
 
-		fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0,
-		       offset, 0, UNIV_PAGE_SIZE,
+		fil_io(IORequestWrite, true,
+		       page_id_t(TRX_SYS_SPACE, offset), univ_page_size, 0,
+		       univ_page_size.physical(),
 		       (void*) (buf_dblwr->write_buf
-				+ UNIV_PAGE_SIZE * i), NULL);
+				+ univ_page_size.physical() * i),
+		       NULL);
 	} else {
 		/* It is a regular page. Write it directly to the
 		doublewrite buffer */
-		fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0,
-		       offset, 0, UNIV_PAGE_SIZE,
+		fil_io(IORequestWrite, true,
+		       page_id_t(TRX_SYS_SPACE, offset), univ_page_size, 0,
+		       univ_page_size.physical(),
 		       (void*) ((buf_block_t*) bpage)->frame,
 		       NULL);
 	}
diff --git a/storage/innobase/buf/buf0dump.cc b/storage/innobase/buf/buf0dump.cc
index 467f817..2cd259d 100644
--- a/storage/innobase/buf/buf0dump.cc
+++ b/storage/innobase/buf/buf0dump.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2011, 2015, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,32 +23,34 @@ Implements a buffer pool dump/load.
 Created April 08, 2011 Vasil Dimov
 *******************************************************/
 
-#include "univ.i"
+#include "my_global.h"
+#include "my_sys.h"
+#include "my_thread.h"
+
+#include "mysql/psi/mysql_stage.h"
+#include "mysql/psi/psi.h"
 
-#include <stdarg.h> /* va_* */
-#include <string.h> /* strerror() */
+#include "univ.i"
 
-#include "buf0buf.h" /* buf_pool_mutex_enter(), srv_buf_pool_instances */
+#include "buf0buf.h"
 #include "buf0dump.h"
-#include "db0err.h"
-#include "dict0dict.h" /* dict_operation_lock */
-#include "os0file.h" /* OS_FILE_MAX_PATH */
-#include "os0sync.h" /* os_event* */
-#include "os0thread.h" /* os_thread_* */
-#include "srv0srv.h" /* srv_fast_shutdown, srv_buf_dump* */
-#include "srv0start.h" /* srv_shutdown_state */
-#include "sync0rw.h" /* rw_lock_s_lock() */
-#include "ut0byte.h" /* ut_ull_create() */
-#include "ut0sort.h" /* UT_SORT_FUNCTION_BODY */
+#include "dict0dict.h"
+#include "os0file.h"
+#include "os0thread.h"
+#include "srv0srv.h"
+#include "srv0start.h"
+#include "sync0rw.h"
+#include "ut0byte.h"
+
+#include <algorithm>
 
 enum status_severity {
+	STATUS_VERBOSE,
 	STATUS_INFO,
-	STATUS_NOTICE,
 	STATUS_ERR
 };
 
-#define SHUTTING_DOWN()	(UNIV_UNLIKELY(srv_shutdown_state \
-				       != SRV_SHUTDOWN_NONE))
+#define SHUTTING_DOWN()	(srv_shutdown_state != SRV_SHUTDOWN_NONE)
 
 /* Flags that tell the buffer pool dump/load thread which action should it
 take after being waked up. */
@@ -73,7 +75,6 @@ Wakes up the buffer pool dump/load thread and instructs it to start
 a dump. This function is called by MySQL code via buffer_pool_dump_now()
 and it should return immediately because the whole MySQL is frozen during
 its execution. */
-UNIV_INTERN
 void
 buf_dump_start()
 /*============*/
@@ -87,7 +88,6 @@ Wakes up the buffer pool dump/load thread and instructs it to start
 a load. This function is called by MySQL code via buffer_pool_load_now()
 and it should return immediately because the whole MySQL is frozen during
 its execution. */
-UNIV_INTERN
 void
 buf_load_start()
 /*============*/
@@ -123,10 +123,17 @@ buf_dump_status(
 		sizeof(export_vars.innodb_buffer_pool_dump_status),
 		fmt, ap);
 
-	if (severity == STATUS_NOTICE || severity == STATUS_ERR) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr, " InnoDB: %s\n",
-			export_vars.innodb_buffer_pool_dump_status);
+	switch (severity) {
+	case STATUS_INFO:
+		ib::info() << export_vars.innodb_buffer_pool_dump_status;
+		break;
+
+	case STATUS_ERR:
+		ib::error() << export_vars.innodb_buffer_pool_dump_status;
+		break;
+
+	case STATUS_VERBOSE:
+		break;
 	}
 
 	va_end(ap);
@@ -158,15 +165,72 @@ buf_load_status(
 		sizeof(export_vars.innodb_buffer_pool_load_status),
 		fmt, ap);
 
-	if (severity == STATUS_NOTICE || severity == STATUS_ERR) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr, " InnoDB: %s\n",
-			export_vars.innodb_buffer_pool_load_status);
+	switch (severity) {
+	case STATUS_INFO:
+		ib::info() << export_vars.innodb_buffer_pool_load_status;
+		break;
+
+	case STATUS_ERR:
+		ib::error() << export_vars.innodb_buffer_pool_load_status;
+		break;
+
+	case STATUS_VERBOSE:
+		break;
 	}
 
 	va_end(ap);
 }
 
+/** Generate the path to the buffer pool dump/load file.
+ at param[out]	path		generated path
+ at param[in]	path_size	size of 'path', used as in snprintf(3). */
+static
+void
+buf_dump_generate_path(
+	char*	path,
+	size_t	path_size)
+{
+	char	buf[FN_REFLEN];
+
+	ut_snprintf(buf, sizeof(buf), "%s%c%s", srv_data_home,
+		    OS_PATH_SEPARATOR, srv_buf_dump_filename);
+
+	os_file_type_t	type;
+	bool		exists = false;
+	bool		ret;
+
+	ret = os_file_status(buf, &exists, &type);
+
+	/* For realpath() to succeed the file must exist. */
+
+	if (ret && exists) {
+		/* my_realpath() assumes the destination buffer is big enough
+		to hold FN_REFLEN bytes. */
+		ut_a(path_size >= FN_REFLEN);
+
+		my_realpath(path, buf, 0);
+	} else {
+		/* If it does not exist, then resolve only srv_data_home
+		and append srv_buf_dump_filename to it. */
+		char	srv_data_home_full[FN_REFLEN];
+
+		my_realpath(srv_data_home_full, srv_data_home, 0);
+
+		if (srv_data_home_full[strlen(srv_data_home_full) - 1]
+		    == OS_PATH_SEPARATOR) {
+
+			ut_snprintf(path, path_size, "%s%s",
+				    srv_data_home_full,
+				    srv_buf_dump_filename);
+		} else {
+			ut_snprintf(path, path_size, "%s%c%s",
+				    srv_data_home_full,
+				    OS_PATH_SEPARATOR,
+				    srv_buf_dump_filename);
+		}
+	}
+}
+
 /*****************************************************************//**
 Perform a buffer pool dump into the file specified by
 innodb_buffer_pool_filename. If any errors occur then the value of
@@ -189,14 +253,12 @@ buf_dump(
 	ulint	i;
 	int	ret;
 
-	ut_snprintf(full_filename, sizeof(full_filename),
-		    "%s%c%s", srv_data_home, SRV_PATH_SEPARATOR,
-		    srv_buf_dump_filename);
+	buf_dump_generate_path(full_filename, sizeof(full_filename));
 
 	ut_snprintf(tmp_filename, sizeof(tmp_filename),
 		    "%s.incomplete", full_filename);
 
-	buf_dump_status(STATUS_NOTICE, "Dumping buffer pool(s) to %s",
+	buf_dump_status(STATUS_INFO, "Dumping buffer pool(s) to %s",
 			full_filename);
 
 	f = fopen(tmp_filename, "w");
@@ -230,8 +292,18 @@ buf_dump(
 			continue;
 		}
 
-		dump = static_cast<buf_dump_t*>(
-			ut_malloc(n_pages * sizeof(*dump))) ;
+		if (srv_buf_pool_dump_pct != 100) {
+			ut_ad(srv_buf_pool_dump_pct < 100);
+
+			n_pages = n_pages * srv_buf_pool_dump_pct / 100;
+
+			if (n_pages == 0) {
+				n_pages = 1;
+			}
+		}
+
+		dump = static_cast<buf_dump_t*>(ut_malloc_nokey(
+				n_pages * sizeof(*dump)));
 
 		if (dump == NULL) {
 			buf_pool_mutex_exit(buf_pool);
@@ -244,14 +316,14 @@ buf_dump(
 			return;
 		}
 
-		for (bpage = UT_LIST_GET_LAST(buf_pool->LRU), j = 0;
-		     bpage != NULL;
-		     bpage = UT_LIST_GET_PREV(LRU, bpage), j++) {
+		for (bpage = UT_LIST_GET_FIRST(buf_pool->LRU), j = 0;
+		     bpage != NULL && j < n_pages;
+		     bpage = UT_LIST_GET_NEXT(LRU, bpage), j++) {
 
 			ut_a(buf_page_in_file(bpage));
 
-			dump[j] = BUF_DUMP_CREATE(buf_page_get_space(bpage),
-						  buf_page_get_page_no(bpage));
+			dump[j] = BUF_DUMP_CREATE(bpage->id.space(),
+						  bpage->id.page_no());
 		}
 
 		ut_a(j == n_pages);
@@ -274,10 +346,10 @@ buf_dump(
 
 			if (j % 128 == 0) {
 				buf_dump_status(
-					STATUS_INFO,
-					"Dumping buffer pool "
-					ULINTPF "/" ULINTPF ", "
-					"page " ULINTPF "/" ULINTPF,
+					STATUS_VERBOSE,
+					"Dumping buffer pool"
+					" " ULINTPF "/" ULINTPF ","
+					" page " ULINTPF "/" ULINTPF,
 					i + 1, srv_buf_pool_instances,
 					j + 1, n_pages);
 			}
@@ -320,44 +392,74 @@ buf_dump(
 
 	ut_sprintf_timestamp(now);
 
-	buf_dump_status(STATUS_NOTICE,
+	buf_dump_status(STATUS_INFO,
 			"Buffer pool(s) dump completed at %s", now);
 }
 
 /*****************************************************************//**
-Compare two buffer pool dump entries, used to sort the dump on
-space_no,page_no before loading in order to increase the chance for
-sequential IO.
- at return -1/0/1 if entry 1 is smaller/equal/bigger than entry 2 */
-static
-lint
-buf_dump_cmp(
-/*=========*/
-	const buf_dump_t	d1,	/*!< in: buffer pool dump entry 1 */
-	const buf_dump_t	d2)	/*!< in: buffer pool dump entry 2 */
+Artificially delay the buffer pool loading if necessary. The idea of
+this function is to prevent hogging the server with IO and slowing down
+too much normal client queries. */
+UNIV_INLINE
+void
+buf_load_throttle_if_needed(
+/*========================*/
+	ulint*	last_check_time,	/*!< in/out: milliseconds since epoch
+					of the last time we did check if
+					throttling is needed, we do the check
+					every srv_io_capacity IO ops. */
+	ulint*	last_activity_count,
+	ulint	n_io)			/*!< in: number of IO ops done since
+					buffer pool load has started */
 {
-	if (d1 < d2) {
-		return(-1);
-	} else if (d1 == d2) {
-		return(0);
-	} else {
-		return(1);
+	if (n_io % srv_io_capacity < srv_io_capacity - 1) {
+		return;
 	}
-}
 
-/*****************************************************************//**
-Sort a buffer pool dump on space_no, page_no. */
-static
-void
-buf_dump_sort(
-/*==========*/
-	buf_dump_t*	dump,	/*!< in/out: buffer pool dump to sort */
-	buf_dump_t*	tmp,	/*!< in/out: temp storage */
-	ulint		low,	/*!< in: lowest index (inclusive) */
-	ulint		high)	/*!< in: highest index (non-inclusive) */
-{
-	UT_SORT_FUNCTION_BODY(buf_dump_sort, dump, tmp, low, high,
-			      buf_dump_cmp);
+	if (*last_check_time == 0 || *last_activity_count == 0) {
+		*last_check_time = ut_time_ms();
+		*last_activity_count = srv_get_activity_count();
+		return;
+	}
+
+	/* srv_io_capacity IO operations have been performed by buffer pool
+	load since the last time we were here. */
+
+	/* If no other activity, then keep going without any delay. */
+	if (srv_get_activity_count() == *last_activity_count) {
+		return;
+	}
+
+	/* There has been other activity, throttle. */
+
+	ulint	now = ut_time_ms();
+	ulint	elapsed_time = now - *last_check_time;
+
+	/* Notice that elapsed_time is not the time for the last
+	srv_io_capacity IO operations performed by BP load. It is the
+	time elapsed since the last time we detected that there has been
+	other activity. This has a small and acceptable deficiency, e.g.:
+	1. BP load runs and there is no other activity.
+	2. Other activity occurs, we run N IO operations after that and
+	   enter here (where 0 <= N < srv_io_capacity).
+	3. last_check_time is very old and we do not sleep at this time, but
+	   only update last_check_time and last_activity_count.
+	4. We run srv_io_capacity more IO operations and call this function
+	   again.
+	5. There has been more other activity and thus we enter here.
+	6. Now last_check_time is recent and we sleep if necessary to prevent
+	   more than srv_io_capacity IO operations per second.
+	The deficiency is that we could have slept at 3., but for this we
+	would have to update last_check_time before the
+	"cur_activity_count == *last_activity_count" check and calling
+	ut_time_ms() that often may turn out to be too expensive. */
+
+	if (elapsed_time < 1000 /* 1 sec (1000 milli secs) */) {
+		os_thread_sleep((1000 - elapsed_time) * 1000 /* micro secs */);
+	}
+
+	*last_check_time = ut_time_ms();
+	*last_activity_count = srv_get_activity_count();
 }
 
 /*****************************************************************//**
@@ -375,7 +477,6 @@ buf_load()
 	char		now[32];
 	FILE*		f;
 	buf_dump_t*	dump;
-	buf_dump_t*	dump_tmp;
 	ulint		dump_n;
 	ulint		total_buffer_pools_pages;
 	ulint		i;
@@ -386,11 +487,9 @@ buf_load()
 	/* Ignore any leftovers from before */
 	buf_load_abort_flag = FALSE;
 
-	ut_snprintf(full_filename, sizeof(full_filename),
-		    "%s%c%s", srv_data_home, SRV_PATH_SEPARATOR,
-		    srv_buf_dump_filename);
+	buf_dump_generate_path(full_filename, sizeof(full_filename));
 
-	buf_load_status(STATUS_NOTICE,
+	buf_load_status(STATUS_INFO,
 			"Loading buffer pool(s) from %s", full_filename);
 
 	f = fopen(full_filename, "r");
@@ -420,22 +519,23 @@ buf_load()
 			what = "parsing";
 		}
 		fclose(f);
-		buf_load_status(STATUS_ERR, "Error %s '%s', "
-				"unable to load buffer pool (stage 1)",
+		buf_load_status(STATUS_ERR, "Error %s '%s',"
+				" unable to load buffer pool (stage 1)",
 				what, full_filename);
 		return;
 	}
 
 	/* If dump is larger than the buffer pool(s), then we ignore the
 	extra trailing. This could happen if a dump is made, then buffer
-	pool is shrunk and then load it attempted. */
+	pool is shrunk and then load is attempted. */
 	total_buffer_pools_pages = buf_pool_get_n_pages()
 		* srv_buf_pool_instances;
 	if (dump_n > total_buffer_pools_pages) {
 		dump_n = total_buffer_pools_pages;
 	}
 
-	dump = static_cast<buf_dump_t*>(ut_malloc(dump_n * sizeof(*dump)));
+	dump = static_cast<buf_dump_t*>(ut_malloc_nokey(dump_n
+							* sizeof(*dump)));
 
 	if (dump == NULL) {
 		fclose(f);
@@ -446,19 +546,6 @@ buf_load()
 		return;
 	}
 
-	dump_tmp = static_cast<buf_dump_t*>(
-		ut_malloc(dump_n * sizeof(*dump_tmp)));
-
-	if (dump_tmp == NULL) {
-		ut_free(dump);
-		fclose(f);
-		buf_load_status(STATUS_ERR,
-				"Cannot allocate " ULINTPF " bytes: %s",
-				(ulint) (dump_n * sizeof(*dump_tmp)),
-				strerror(errno));
-		return;
-	}
-
 	rewind(f);
 
 	for (i = 0; i < dump_n && !SHUTTING_DOWN(); i++) {
@@ -472,24 +559,22 @@ buf_load()
 			/* else */
 
 			ut_free(dump);
-			ut_free(dump_tmp);
 			fclose(f);
 			buf_load_status(STATUS_ERR,
-					"Error parsing '%s', unable "
-					"to load buffer pool (stage 2)",
+					"Error parsing '%s', unable"
+					" to load buffer pool (stage 2)",
 					full_filename);
 			return;
 		}
 
 		if (space_id > ULINT32_MASK || page_no > ULINT32_MASK) {
 			ut_free(dump);
-			ut_free(dump_tmp);
 			fclose(f);
 			buf_load_status(STATUS_ERR,
-					"Error parsing '%s': bogus "
-					"space,page " ULINTPF "," ULINTPF
-					" at line " ULINTPF ", "
-					"unable to load buffer pool",
+					"Error parsing '%s': bogus"
+					" space,page " ULINTPF "," ULINTPF
+					" at line " ULINTPF ","
+					" unable to load buffer pool",
 					full_filename,
 					space_id, page_no,
 					i);
@@ -509,56 +594,126 @@ buf_load()
 	if (dump_n == 0) {
 		ut_free(dump);
 		ut_sprintf_timestamp(now);
-		buf_load_status(STATUS_NOTICE,
-				"Buffer pool(s) load completed at %s "
-				"(%s was empty)", now, full_filename);
+		buf_load_status(STATUS_INFO,
+				"Buffer pool(s) load completed at %s"
+				" (%s was empty)", now, full_filename);
 		return;
 	}
 
 	if (!SHUTTING_DOWN()) {
-		buf_dump_sort(dump, dump_tmp, 0, dump_n);
+		std::sort(dump, dump + dump_n);
 	}
 
-	ut_free(dump_tmp);
+	ulint		last_check_time = 0;
+	ulint		last_activity_cnt = 0;
+
+	/* Avoid calling the expensive fil_space_acquire_silent() for each
+	page within the same tablespace. dump[] is sorted by (space, page),
+	so all pages from a given tablespace are consecutive. */
+	ulint		cur_space_id = BUF_DUMP_SPACE(dump[0]);
+	fil_space_t*	space = fil_space_acquire_silent(cur_space_id);
+	page_size_t	page_size(space ? space->flags : 0);
+
+#ifdef HAVE_PSI_STAGE_INTERFACE
+	PSI_stage_progress*	pfs_stage_progress
+		= mysql_set_stage(srv_stage_buffer_pool_load.m_key);
+#endif /* HAVE_PSI_STAGE_INTERFACE */
+
+	mysql_stage_set_work_estimated(pfs_stage_progress, dump_n);
+	mysql_stage_set_work_completed(pfs_stage_progress, 0);
 
 	for (i = 0; i < dump_n && !SHUTTING_DOWN(); i++) {
 
-		buf_read_page_async(BUF_DUMP_SPACE(dump[i]),
-				    BUF_DUMP_PAGE(dump[i]));
+		/* space_id for this iteration of the loop */
+		const ulint	this_space_id = BUF_DUMP_SPACE(dump[i]);
+
+		if (this_space_id != cur_space_id) {
+			if (space != NULL) {
+				fil_space_release(space);
+			}
+
+			cur_space_id = this_space_id;
+			space = fil_space_acquire_silent(cur_space_id);
+
+			if (space != NULL) {
+				const page_size_t	cur_page_size(
+					space->flags);
+				page_size.copy_from(cur_page_size);
+			}
+		}
+
+		if (space == NULL) {
+			continue;
+		}
+
+		buf_read_page_background(
+			page_id_t(this_space_id, BUF_DUMP_PAGE(dump[i])),
+			page_size, true);
 
 		if (i % 64 == 63) {
 			os_aio_simulated_wake_handler_threads();
 		}
 
-		if (i % 128 == 0) {
-			buf_load_status(STATUS_INFO,
+		/* Update the progress every 32 MiB, which is every Nth page,
+		where N = 32*1024^2 / page_size. */
+		static const ulint	update_status_every_n_mb = 32;
+		static const ulint	update_status_every_n_pages
+			= update_status_every_n_mb * 1024 * 1024
+			/ page_size.physical();
+
+		if (i % update_status_every_n_pages == 0) {
+			buf_load_status(STATUS_VERBOSE,
 					"Loaded " ULINTPF "/" ULINTPF " pages",
 					i + 1, dump_n);
+			mysql_stage_set_work_completed(pfs_stage_progress, i);
 		}
 
 		if (buf_load_abort_flag) {
+			if (space != NULL) {
+				fil_space_release(space);
+			}
 			buf_load_abort_flag = FALSE;
 			ut_free(dump);
 			buf_load_status(
-				STATUS_NOTICE,
+				STATUS_INFO,
 				"Buffer pool(s) load aborted on request");
+			/* Premature end, set estimated = completed = i and
+			end the current stage event. */
+			mysql_stage_set_work_estimated(pfs_stage_progress, i);
+			mysql_stage_set_work_completed(pfs_stage_progress, i);
+#ifdef HAVE_PSI_STAGE_INTERFACE
+			mysql_end_stage();
+#endif /* HAVE_PSI_STAGE_INTERFACE */
 			return;
 		}
+
+		buf_load_throttle_if_needed(
+			&last_check_time, &last_activity_cnt, i);
+	}
+
+	if (space != NULL) {
+		fil_space_release(space);
 	}
 
 	ut_free(dump);
 
 	ut_sprintf_timestamp(now);
 
-	buf_load_status(STATUS_NOTICE,
+	buf_load_status(STATUS_INFO,
 			"Buffer pool(s) load completed at %s", now);
+
+	/* Make sure that estimated = completed when we end. */
+	mysql_stage_set_work_completed(pfs_stage_progress, dump_n);
+	/* End the stage progress event. */
+#ifdef HAVE_PSI_STAGE_INTERFACE
+	mysql_end_stage();
+#endif /* HAVE_PSI_STAGE_INTERFACE */
 }
 
 /*****************************************************************//**
 Aborts a currently running buffer pool load. This function is called by
 MySQL code via buffer_pool_load_abort() and it should return immediately
 because the whole MySQL is frozen during its execution. */
-UNIV_INTERN
 void
 buf_load_abort()
 /*============*/
@@ -571,7 +726,7 @@ This is the main thread for buffer pool dump/load. It waits for an
 event and when waked up either performs a dump or load and sleeps
 again.
 @return this function does not return, it calls os_thread_exit() */
-extern "C" UNIV_INTERN
+extern "C"
 os_thread_ret_t
 DECLARE_THREAD(buf_dump_thread)(
 /*============================*/
@@ -580,10 +735,14 @@ DECLARE_THREAD(buf_dump_thread)(
 {
 	ut_ad(!srv_read_only_mode);
 
+#ifdef UNIV_PFS_THREAD
+	pfs_register_thread(buf_dump_thread_key);
+#endif /* UNIV_PFS_THREAD */
+
 	srv_buf_dump_thread_active = TRUE;
 
-	buf_dump_status(STATUS_INFO, "not started");
-	buf_load_status(STATUS_INFO, "not started");
+	buf_dump_status(STATUS_VERBOSE, "not started");
+	buf_load_status(STATUS_VERBOSE, "not started");
 
 	if (srv_buffer_pool_load_at_startup) {
 		buf_load();
diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index 540d638..a35a145 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -23,6 +23,9 @@ The database buffer buf_pool flush algorithm
 Created 11/11/1995 Heikki Tuuri
 *******************************************************/
 
+#include "ha_prototypes.h"
+#include <mysql/service_thd_wait.h>
+
 #include "buf0flu.h"
 
 #ifdef UNIV_NONINL
@@ -36,7 +39,6 @@ Created 11/11/1995 Heikki Tuuri
 #include "page0zip.h"
 #ifndef UNIV_HOTBACKUP
 #include "ut0byte.h"
-#include "ut0lst.h"
 #include "page0page.h"
 #include "fil0fil.h"
 #include "buf0lru.h"
@@ -46,8 +48,21 @@ Created 11/11/1995 Heikki Tuuri
 #include "os0file.h"
 #include "trx0sys.h"
 #include "srv0mon.h"
-#include "mysql/plugin.h"
-#include "mysql/service_thd_wait.h"
+#include "fsp0sysspace.h"
+#include "ut0stage.h"
+
+#ifdef UNIV_LINUX
+/* include defs for CPU time priority settings */
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+static const int buf_flush_page_cleaner_priority = -20;
+#endif /* UNIV_LINUX */
+
+/** Sleep time in microseconds for loop waiting for the oldest
+modification lsn */
+static const ulint buf_flush_wait_flushed_sleep_time = 10000;
 
 /** Number of pages flushed through non flush_list flushes. */
 static ulint buf_lru_flush_page_count = 0;
@@ -57,16 +72,116 @@ is set to TRUE by the page_cleaner thread when it is spawned and is set
 back to FALSE at shutdown by the page_cleaner as well. Therefore no
 need to protect it by a mutex. It is only ever read by the thread
 doing the shutdown */
-UNIV_INTERN ibool buf_page_cleaner_is_active = FALSE;
+bool buf_page_cleaner_is_active = false;
+
+/** Factor for scan length to determine n_pages for intended oldest LSN
+progress */
+static ulint buf_flush_lsn_scan_factor = 3;
 
-/** LRU flush batch is further divided into this chunk size to
-reduce the wait time for the threads waiting for a clean block */
-#define PAGE_CLEANER_LRU_BATCH_CHUNK_SIZE	100
+/** Average redo generation rate */
+static lsn_t lsn_avg_rate = 0;
+
+/** Target oldest LSN for the requested flush_sync */
+static lsn_t buf_flush_sync_lsn = 0;
 
 #ifdef UNIV_PFS_THREAD
-UNIV_INTERN mysql_pfs_key_t buf_page_cleaner_thread_key;
+mysql_pfs_key_t page_cleaner_thread_key;
 #endif /* UNIV_PFS_THREAD */
 
+/** Event to synchronise with the flushing. */
+os_event_t	buf_flush_event;
+
+/** State for page cleaner array slot */
+enum page_cleaner_state_t {
+	/** Not requested any yet.
+	Moved from FINISHED by the coordinator. */
+	PAGE_CLEANER_STATE_NONE = 0,
+	/** Requested but not started flushing.
+	Moved from NONE by the coordinator. */
+	PAGE_CLEANER_STATE_REQUESTED,
+	/** Flushing is on going.
+	Moved from REQUESTED by the worker. */
+	PAGE_CLEANER_STATE_FLUSHING,
+	/** Flushing was finished.
+	Moved from FLUSHING by the worker. */
+	PAGE_CLEANER_STATE_FINISHED
+};
+
+/** Page cleaner request state for each buffer pool instance */
+struct page_cleaner_slot_t {
+	page_cleaner_state_t	state;	/*!< state of the request.
+					protected by page_cleaner_t::mutex
+					if the worker thread got the slot and
+					set to PAGE_CLEANER_STATE_FLUSHING,
+					n_flushed_lru and n_flushed_list can be
+					updated only by the worker thread */
+	/* This value is set during state==PAGE_CLEANER_STATE_NONE */
+	ulint			n_pages_requested;
+					/*!< number of requested pages
+					for the slot */
+	/* These values are updated during state==PAGE_CLEANER_STATE_FLUSHING,
+	and commited with state==PAGE_CLEANER_STATE_FINISHED.
+	The consistency is protected by the 'state' */
+	ulint			n_flushed_lru;
+					/*!< number of flushed pages
+					by LRU scan flushing */
+	ulint			n_flushed_list;
+					/*!< number of flushed pages
+					by flush_list flushing */
+	bool			succeeded_list;
+					/*!< true if flush_list flushing
+					succeeded. */
+	ulint			flush_lru_time;
+					/*!< elapsed time for LRU flushing */
+	ulint			flush_list_time;
+					/*!< elapsed time for flush_list
+					flushing */
+	ulint			flush_lru_pass;
+					/*!< count to attempt LRU flushing */
+	ulint			flush_list_pass;
+					/*!< count to attempt flush_list
+					flushing */
+};
+
+/** Page cleaner structure common for all threads */
+struct page_cleaner_t {
+	ib_mutex_t		mutex;		/*!< mutex to protect whole of
+						page_cleaner_t struct and
+						page_cleaner_slot_t slots. */
+	os_event_t		is_requested;	/*!< event to activate worker
+						threads. */
+	os_event_t		is_finished;	/*!< event to signal that all
+						slots were finished. */
+	volatile ulint		n_workers;	/*!< number of worker threads
+						in existence */
+	bool			requested;	/*!< true if requested pages
+						to flush */
+	lsn_t			lsn_limit;	/*!< upper limit of LSN to be
+						flushed */
+	ulint			n_slots;	/*!< total number of slots */
+	ulint			n_slots_requested;
+						/*!< number of slots
+						in the state
+						PAGE_CLEANER_STATE_REQUESTED */
+	ulint			n_slots_flushing;
+						/*!< number of slots
+						in the state
+						PAGE_CLEANER_STATE_FLUSHING */
+	ulint			n_slots_finished;
+						/*!< number of slots
+						in the state
+						PAGE_CLEANER_STATE_FINISHED */
+	ulint			flush_time;	/*!< elapsed time to flush
+						requests for all slots */
+	ulint			flush_pass;	/*!< count to finish to flush
+						requests for all slots */
+	page_cleaner_slot_t*	slots;		/*!< pointer to the slots */
+	bool			is_running;	/*!< false if attempt
+						to shutdown */
+};
+
+static page_cleaner_t*	page_cleaner = NULL;
+
 /** If LRU list of a buf_pool is less than this size then LRU eviction
 should not happen. This is because when we do LRU flushing we also put
 the blocks on free list. If LRU list is very small then we can end up
@@ -76,8 +191,7 @@ in thrashing. */
 /* @} */
 
 /******************************************************************//**
-Increases flush_list size in bytes with zip_size for compressed page,
-UNIV_PAGE_SIZE for uncompressed page in inline function */
+Increases flush_list size in bytes with the page size in inline function */
 static inline
 void
 incr_flush_list_size_in_bytes(
@@ -86,15 +200,16 @@ incr_flush_list_size_in_bytes(
 	buf_pool_t*	buf_pool)	/*!< in: buffer pool instance */
 {
 	ut_ad(buf_flush_list_mutex_own(buf_pool));
-	ulint zip_size = page_zip_get_size(&block->page.zip);
-	buf_pool->stat.flush_list_bytes += zip_size ? zip_size : UNIV_PAGE_SIZE;
+
+	buf_pool->stat.flush_list_bytes += block->page.size.physical();
+
 	ut_ad(buf_pool->stat.flush_list_bytes <= buf_pool->curr_pool_size);
 }
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 /******************************************************************//**
 Validates the flush list.
- at return	TRUE if ok */
+ at return TRUE if ok */
 static
 ibool
 buf_flush_validate_low(
@@ -103,7 +218,7 @@ buf_flush_validate_low(
 
 /******************************************************************//**
 Validates the flush list some of the time.
- at return	TRUE if ok or the check was skipped */
+ at return TRUE if ok or the check was skipped */
 static
 ibool
 buf_flush_validate_skip(
@@ -130,65 +245,11 @@ buf_flush_validate_skip(
 }
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
-/*******************************************************************//**
-Sets hazard pointer during flush_list iteration. */
-UNIV_INLINE
-void
-buf_flush_set_hp(
-/*=============*/
-	buf_pool_t*		buf_pool,/*!< in/out: buffer pool instance */
-	const buf_page_t*	bpage)	/*!< in: buffer control block */
-{
-	ut_ad(buf_flush_list_mutex_own(buf_pool));
-	ut_ad(buf_pool->flush_list_hp == NULL || bpage == NULL);
-	ut_ad(!bpage || buf_page_in_file(bpage));
-	ut_ad(!bpage || bpage->in_flush_list);
-	ut_ad(!bpage || buf_pool_from_bpage(bpage) == buf_pool);
-
-	buf_pool->flush_list_hp = bpage;
-}
-
-/*******************************************************************//**
-Checks if the given block is a hazard pointer
- at return true if bpage is hazard pointer */
-UNIV_INLINE
-bool
-buf_flush_is_hp(
-/*============*/
-	buf_pool_t*		buf_pool,/*!< in: buffer pool instance */
-	const buf_page_t*	bpage)	/*!< in: buffer control block */
-{
-	ut_ad(buf_flush_list_mutex_own(buf_pool));
-
-	return(buf_pool->flush_list_hp == bpage);
-}
-
-/*******************************************************************//**
-Whenever we move a block in flush_list (either to remove it or to
-relocate it) we check the hazard pointer set by some other thread
-doing the flush list scan. If the hazard pointer is the same as the
-one we are about going to move then we set it to NULL to force a rescan
-in the thread doing the batch. */
-UNIV_INLINE
-void
-buf_flush_update_hp(
-/*================*/
-	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
-	buf_page_t*	bpage)		/*!< in: buffer control block */
-{
-	ut_ad(buf_flush_list_mutex_own(buf_pool));
-
-	if (buf_flush_is_hp(buf_pool, bpage)) {
-		buf_flush_set_hp(buf_pool, NULL);
-		MONITOR_INC(MONITOR_FLUSH_HP_RESCAN);
-	}
-}
-
 /******************************************************************//**
 Insert a block in the flush_rbt and returns a pointer to its
 predecessor or NULL if no predecessor. The ordering is maintained
 on the basis of the <oldest_modification, space, offset> key.
- at return	pointer to the predecessor or NULL if no predecessor. */
+ at return pointer to the predecessor or NULL if no predecessor. */
 static
 buf_page_t*
 buf_flush_insert_in_flush_rbt(
@@ -251,7 +312,7 @@ buf_pool->flush_rbt.
 Note that for the purpose of flush_rbt, we only need to order blocks
 on the oldest_modification. The other two fields are used to uniquely
 identify the blocks.
- at return	 < 0 if b2 < b1, 0 if b2 == b1, > 0 if b2 > b1 */
+ at return < 0 if b2 < b1, 0 if b2 == b1, > 0 if b2 > b1 */
 static
 int
 buf_flush_block_cmp(
@@ -262,13 +323,14 @@ buf_flush_block_cmp(
 	int			ret;
 	const buf_page_t*	b1 = *(const buf_page_t**) p1;
 	const buf_page_t*	b2 = *(const buf_page_t**) p2;
-#ifdef UNIV_DEBUG
-	buf_pool_t*		buf_pool = buf_pool_from_bpage(b1);
-#endif /* UNIV_DEBUG */
 
 	ut_ad(b1 != NULL);
 	ut_ad(b2 != NULL);
 
+#ifdef UNIV_DEBUG
+	buf_pool_t*	buf_pool = buf_pool_from_bpage(b1);
+#endif /* UNIV_DEBUG */
+
 	ut_ad(buf_flush_list_mutex_own(buf_pool));
 
 	ut_ad(b1->in_flush_list);
@@ -281,17 +343,16 @@ buf_flush_block_cmp(
 	}
 
 	/* If oldest_modification is same then decide on the space. */
-	ret = (int)(b2->space - b1->space);
+	ret = (int)(b2->id.space() - b1->id.space());
 
-	/* Or else decide ordering on the offset field. */
-	return(ret ? ret : (int)(b2->offset - b1->offset));
+	/* Or else decide ordering on the page number. */
+	return(ret ? ret : (int) (b2->id.page_no() - b1->id.page_no()));
 }
 
 /********************************************************************//**
 Initialize the red-black tree to speed up insertions into the flush_list
 during recovery process. Should be called at the start of recovery
 process before any page has been read/written. */
-UNIV_INTERN
 void
 buf_flush_init_flush_rbt(void)
 /*==========================*/
@@ -305,6 +366,8 @@ buf_flush_init_flush_rbt(void)
 
 		buf_flush_list_mutex_enter(buf_pool);
 
+		ut_ad(buf_pool->flush_rbt == NULL);
+
 		/* Create red black tree for speedy insertions in flush list. */
 		buf_pool->flush_rbt = rbt_create(
 			sizeof(buf_page_t*), buf_flush_block_cmp);
@@ -315,7 +378,6 @@ buf_flush_init_flush_rbt(void)
 
 /********************************************************************//**
 Frees up the red-black tree. */
-UNIV_INTERN
 void
 buf_flush_free_flush_rbt(void)
 /*==========================*/
@@ -342,7 +404,6 @@ buf_flush_free_flush_rbt(void)
 
 /********************************************************************//**
 Inserts a modified block into the flush list. */
-UNIV_INTERN
 void
 buf_flush_insert_into_flush_list(
 /*=============================*/
@@ -352,7 +413,7 @@ buf_flush_insert_into_flush_list(
 {
 	ut_ad(!buf_pool_mutex_own(buf_pool));
 	ut_ad(log_flush_order_mutex_own());
-	ut_ad(mutex_own(&block->mutex));
+	ut_ad(buf_page_mutex_own(block));
 
 	buf_flush_list_mutex_enter(buf_pool);
 
@@ -362,7 +423,7 @@ buf_flush_insert_into_flush_list(
 
 	/* If we are in the recovery then we need to update the flush
 	red-black tree as well. */
-	if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
+	if (buf_pool->flush_rbt != NULL) {
 		buf_flush_list_mutex_exit(buf_pool);
 		buf_flush_insert_sorted_into_flush_list(buf_pool, block, lsn);
 		return;
@@ -373,20 +434,23 @@ buf_flush_insert_into_flush_list(
 
 	ut_d(block->page.in_flush_list = TRUE);
 	block->page.oldest_modification = lsn;
-	UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
+
+	UT_LIST_ADD_FIRST(buf_pool->flush_list, &block->page);
+
 	incr_flush_list_size_in_bytes(block, buf_pool);
 
 #ifdef UNIV_DEBUG_VALGRIND
-	{
-		ulint	zip_size = buf_block_get_zip_size(block);
+	void*	p;
 
-		if (zip_size) {
-			UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
-		} else {
-			UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
-		}
+	if (block->page.size.is_compressed()) {
+		p = block->page.zip.data;
+	} else {
+		p = block->frame;
 	}
+
+	UNIV_MEM_ASSERT_RW(p, block->page.size.physical());
 #endif /* UNIV_DEBUG_VALGRIND */
+
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 	ut_a(buf_flush_validate_skip(buf_pool));
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
@@ -398,7 +462,6 @@ buf_flush_insert_into_flush_list(
 Inserts a modified block into the flush list in the right sorted position.
 This function is used by recovery, because there the modifications do not
 necessarily come in the order of lsn's. */
-UNIV_INTERN
 void
 buf_flush_insert_sorted_into_flush_list(
 /*====================================*/
@@ -411,7 +474,7 @@ buf_flush_insert_sorted_into_flush_list(
 
 	ut_ad(!buf_pool_mutex_own(buf_pool));
 	ut_ad(log_flush_order_mutex_own());
-	ut_ad(mutex_own(&block->mutex));
+	ut_ad(buf_page_mutex_own(block));
 	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 
 	buf_flush_list_mutex_enter(buf_pool);
@@ -437,15 +500,15 @@ buf_flush_insert_sorted_into_flush_list(
 	block->page.oldest_modification = lsn;
 
 #ifdef UNIV_DEBUG_VALGRIND
-	{
-		ulint	zip_size = buf_block_get_zip_size(block);
+	void*	p;
 
-		if (zip_size) {
-			UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
-		} else {
-			UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
-		}
+	if (block->page.size.is_compressed()) {
+		p = block->page.zip.data;
+	} else {
+		p = block->frame;
 	}
+
+	UNIV_MEM_ASSERT_RW(p, block->page.size.physical());
 #endif /* UNIV_DEBUG_VALGRIND */
 
 	prev_b = NULL;
@@ -454,9 +517,9 @@ buf_flush_insert_sorted_into_flush_list(
 	should not be NULL. In a very rare boundary case it is possible
 	that the flush_rbt has already been freed by the recovery thread
 	before the last page was hooked up in the flush_list by the
-	io-handler thread. In that case we'll  just do a simple
+	io-handler thread. In that case we'll just do a simple
 	linear search in the else block. */
-	if (buf_pool->flush_rbt) {
+	if (buf_pool->flush_rbt != NULL) {
 
 		prev_b = buf_flush_insert_in_flush_rbt(&block->page);
 
@@ -464,8 +527,9 @@ buf_flush_insert_sorted_into_flush_list(
 
 		b = UT_LIST_GET_FIRST(buf_pool->flush_list);
 
-		while (b && b->oldest_modification
+		while (b != NULL && b->oldest_modification
 		       > block->page.oldest_modification) {
+
 			ut_ad(b->in_flush_list);
 			prev_b = b;
 			b = UT_LIST_GET_NEXT(list, b);
@@ -473,10 +537,9 @@ buf_flush_insert_sorted_into_flush_list(
 	}
 
 	if (prev_b == NULL) {
-		UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
+		UT_LIST_ADD_FIRST(buf_pool->flush_list, &block->page);
 	} else {
-		UT_LIST_INSERT_AFTER(list, buf_pool->flush_list,
-				     prev_b, &block->page);
+		UT_LIST_INSERT_AFTER(buf_pool->flush_list, prev_b, &block->page);
 	}
 
 	incr_flush_list_size_in_bytes(block, buf_pool);
@@ -491,8 +554,7 @@ buf_flush_insert_sorted_into_flush_list(
 /********************************************************************//**
 Returns TRUE if the file page block is immediately suitable for replacement,
 i.e., the transition FILE_PAGE => NOT_USED allowed.
- at return	TRUE if can replace immediately */
-UNIV_INTERN
+ at return TRUE if can replace immediately */
 ibool
 buf_flush_ready_for_replace(
 /*========================*/
@@ -513,21 +575,15 @@ buf_flush_ready_for_replace(
 		       && buf_page_get_io_fix(bpage) == BUF_IO_NONE);
 	}
 
-	ut_print_timestamp(stderr);
-	fprintf(stderr,
-		"  InnoDB: Error: buffer block state %lu"
-		" in the LRU list!\n",
-		(ulong) buf_page_get_state(bpage));
-	ut_print_buf(stderr, bpage, sizeof(buf_page_t));
-	putc('\n', stderr);
+	ib::fatal() << "Buffer block " << bpage << " state " <<  bpage->state
+		<< " in the LRU list!";
 
 	return(FALSE);
 }
 
 /********************************************************************//**
 Returns true if the block is modified and ready for flushing.
- at return	true if can flush immediately */
-UNIV_INTERN
+ at return true if can flush immediately */
 bool
 buf_flush_ready_for_flush(
 /*======================*/
@@ -567,14 +623,12 @@ buf_flush_ready_for_flush(
 
 /********************************************************************//**
 Remove a block from the flush list of modified blocks. */
-UNIV_INTERN
 void
 buf_flush_remove(
 /*=============*/
 	buf_page_t*	bpage)	/*!< in: pointer to the block in question */
 {
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
-	ulint		zip_size;
 
 	ut_ad(buf_pool_mutex_own(buf_pool));
 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
@@ -582,6 +636,10 @@ buf_flush_remove(
 
 	buf_flush_list_mutex_enter(buf_pool);
 
+	/* Important that we adjust the hazard pointer before removing
+	the bpage from flush list. */
+	buf_pool->flush_hp.adjust(bpage);
+
 	switch (buf_page_get_state(bpage)) {
 	case BUF_BLOCK_POOL_WATCH:
 	case BUF_BLOCK_ZIP_PAGE:
@@ -594,18 +652,18 @@ buf_flush_remove(
 		return;
 	case BUF_BLOCK_ZIP_DIRTY:
 		buf_page_set_state(bpage, BUF_BLOCK_ZIP_PAGE);
-		UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
+		UT_LIST_REMOVE(buf_pool->flush_list, bpage);
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 		buf_LRU_insert_zip_clean(bpage);
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 		break;
 	case BUF_BLOCK_FILE_PAGE:
-		UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
+		UT_LIST_REMOVE(buf_pool->flush_list, bpage);
 		break;
 	}
 
 	/* If the flush_rbt is active then delete from there as well. */
-	if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
+	if (buf_pool->flush_rbt != NULL) {
 		buf_flush_delete_from_flush_rbt(bpage);
 	}
 
@@ -613,8 +671,7 @@ buf_flush_remove(
 	because we assert on in_flush_list in comparison function. */
 	ut_d(bpage->in_flush_list = FALSE);
 
-	zip_size = page_zip_get_size(&bpage->zip);
-	buf_pool->stat.flush_list_bytes -= zip_size ? zip_size : UNIV_PAGE_SIZE;
+	buf_pool->stat.flush_list_bytes -= bpage->size.physical();
 
 	bpage->oldest_modification = 0;
 
@@ -622,7 +679,14 @@ buf_flush_remove(
 	ut_a(buf_flush_validate_skip(buf_pool));
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
-	buf_flush_update_hp(buf_pool, bpage);
+	/* If there is an observer that want to know if the asynchronous
+	flushing was done then notify it. */
+	if (bpage->flush_observer != NULL) {
+		bpage->flush_observer->notify_remove(buf_pool, bpage);
+
+		bpage->flush_observer = NULL;
+	}
+
 	buf_flush_list_mutex_exit(buf_pool);
 }
 
@@ -637,7 +701,6 @@ use the current list node (bpage) to do the list manipulation because
 the list pointers could have changed between the time that we copied
 the contents of bpage to the dpage and the flush list manipulation
 below. */
-UNIV_INTERN
 void
 buf_flush_relocate_on_flush_list(
 /*=============================*/
@@ -668,46 +731,42 @@ buf_flush_relocate_on_flush_list(
 
 	/* If recovery is active we must swap the control blocks in
 	the flush_rbt as well. */
-	if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
+	if (buf_pool->flush_rbt != NULL) {
 		buf_flush_delete_from_flush_rbt(bpage);
 		prev_b = buf_flush_insert_in_flush_rbt(dpage);
 	}
 
+	/* Important that we adjust the hazard pointer before removing
+	the bpage from the flush list. */
+	buf_pool->flush_hp.adjust(bpage);
+
 	/* Must be done after we have removed it from the flush_rbt
 	because we assert on in_flush_list in comparison function. */
 	ut_d(bpage->in_flush_list = FALSE);
 
 	prev = UT_LIST_GET_PREV(list, bpage);
-	UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
+	UT_LIST_REMOVE(buf_pool->flush_list, bpage);
 
 	if (prev) {
 		ut_ad(prev->in_flush_list);
-		UT_LIST_INSERT_AFTER(
-			list,
-			buf_pool->flush_list,
-			prev, dpage);
+		UT_LIST_INSERT_AFTER( buf_pool->flush_list, prev, dpage);
 	} else {
-		UT_LIST_ADD_FIRST(
-			list,
-			buf_pool->flush_list,
-			dpage);
+		UT_LIST_ADD_FIRST(buf_pool->flush_list, dpage);
 	}
 
 	/* Just an extra check. Previous in flush_list
 	should be the same control block as in flush_rbt. */
-	ut_a(!buf_pool->flush_rbt || prev_b == prev);
+	ut_a(buf_pool->flush_rbt == NULL || prev_b == prev);
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 	ut_a(buf_flush_validate_low(buf_pool));
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
-	buf_flush_update_hp(buf_pool, bpage);
 	buf_flush_list_mutex_exit(buf_pool);
 }
 
 /********************************************************************//**
 Updates the flush system data structures when a write is completed. */
-UNIV_INTERN
 void
 buf_flush_write_complete(
 /*=====================*/
@@ -723,9 +782,6 @@ buf_flush_write_complete(
 	flush_type = buf_page_get_flush_type(bpage);
 	buf_pool->n_flush[flush_type]--;
 
-	/* fprintf(stderr, "n pending flush %lu\n",
-	buf_pool->n_flush[flush_type]); */
-
 	if (buf_pool->n_flush[flush_type] == 0
 	    && buf_pool->init_flush[flush_type] == FALSE) {
 
@@ -738,80 +794,84 @@ buf_flush_write_complete(
 }
 #endif /* !UNIV_HOTBACKUP */
 
-/********************************************************************//**
-Calculate the checksum of a page from compressed table and update the page. */
-UNIV_INTERN
+/** Calculate the checksum of a page from compressed table and update
+the page.
+ at param[in,out]	page	page to update
+ at param[in]	size	compressed page size
+ at param[in]	lsn	LSN to stamp on the page */
 void
 buf_flush_update_zip_checksum(
-/*==========================*/
-	buf_frame_t*	page,		/*!< in/out: Page to update */
-	ulint		zip_size,	/*!< in: Compressed page size */
-	lsn_t		lsn)		/*!< in: Lsn to stamp on the page */
+	buf_frame_t*	page,
+	ulint		size,
+	lsn_t		lsn)
 {
-	ut_a(zip_size > 0);
+	ut_a(size > 0);
 
-	ib_uint32_t	checksum = static_cast<ib_uint32_t>(
-		page_zip_calc_checksum(
-			page, zip_size,
-			static_cast<srv_checksum_algorithm_t>(
-				srv_checksum_algorithm)));
+	const uint32_t	checksum = page_zip_calc_checksum(
+		page, size,
+		static_cast<srv_checksum_algorithm_t>(srv_checksum_algorithm));
 
 	mach_write_to_8(page + FIL_PAGE_LSN, lsn);
-	memset(page + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
 	mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, checksum);
 }
 
-/********************************************************************//**
-Initializes a page for writing to the tablespace. */
-UNIV_INTERN
+/** Initialize a page for writing to the tablespace.
+ at param[in]	block		buffer block; NULL if bypassing the buffer pool
+ at param[in,out]	page		page frame
+ at param[in,out]	page_zip_	compressed page, or NULL if uncompressed
+ at param[in]	newest_lsn	newest modification LSN to the page
+ at param[in]	skip_checksum	whether to disable the page checksum */
 void
 buf_flush_init_for_writing(
-/*=======================*/
-	byte*	page,		/*!< in/out: page */
-	void*	page_zip_,	/*!< in/out: compressed page, or NULL */
-	lsn_t	newest_lsn)	/*!< in: newest modification lsn
-				to the page */
+	const buf_block_t*	block,
+	byte*			page,
+	void*			page_zip_,
+	lsn_t			newest_lsn,
+	bool			skip_checksum)
 {
-	ib_uint32_t	checksum = 0 /* silence bogus gcc warning */;
+	ib_uint32_t	checksum = BUF_NO_CHECKSUM_MAGIC;
 
+	ut_ad(block == NULL || block->frame == page);
+	ut_ad(block == NULL || page_zip_ == NULL
+	      || &block->page.zip == page_zip_);
 	ut_ad(page);
 
 	if (page_zip_) {
 		page_zip_des_t*	page_zip;
-		ulint		zip_size;
+		ulint		size;
 
 		page_zip = static_cast<page_zip_des_t*>(page_zip_);
-		zip_size = page_zip_get_size(page_zip);
+		size = page_zip_get_size(page_zip);
 
-		ut_ad(zip_size);
-		ut_ad(ut_is_2pow(zip_size));
-		ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
+		ut_ad(size);
+		ut_ad(ut_is_2pow(size));
+		ut_ad(size <= UNIV_ZIP_SIZE_MAX);
 
-		switch (UNIV_EXPECT(fil_page_get_type(page), FIL_PAGE_INDEX)) {
+		switch (fil_page_get_type(page)) {
 		case FIL_PAGE_TYPE_ALLOCATED:
 		case FIL_PAGE_INODE:
 		case FIL_PAGE_IBUF_BITMAP:
 		case FIL_PAGE_TYPE_FSP_HDR:
 		case FIL_PAGE_TYPE_XDES:
 			/* These are essentially uncompressed pages. */
-			memcpy(page_zip->data, page, zip_size);
+			memcpy(page_zip->data, page, size);
 			/* fall through */
 		case FIL_PAGE_TYPE_ZBLOB:
 		case FIL_PAGE_TYPE_ZBLOB2:
 		case FIL_PAGE_INDEX:
+		case FIL_PAGE_RTREE:
 
 			buf_flush_update_zip_checksum(
-				page_zip->data, zip_size, newest_lsn);
+				page_zip->data, size, newest_lsn);
 
 			return;
 		}
 
-		ut_print_timestamp(stderr);
-		fputs("  InnoDB: ERROR: The compressed page to be written"
-		      " seems corrupt:", stderr);
-		ut_print_buf(stderr, page, zip_size);
+		ib::error() << "The compressed page to be written"
+			" seems corrupt:";
+		ut_print_buf(stderr, page, size);
 		fputs("\nInnoDB: Possibly older version of the page:", stderr);
-		ut_print_buf(stderr, page_zip->data, zip_size);
+		ut_print_buf(stderr, page_zip->data, size);
 		putc('\n', stderr);
 		ut_error;
 	}
@@ -822,27 +882,85 @@ buf_flush_init_for_writing(
 	mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
 			newest_lsn);
 
-	/* Store the new formula checksum */
-
-	switch ((srv_checksum_algorithm_t) srv_checksum_algorithm) {
-	case SRV_CHECKSUM_ALGORITHM_CRC32:
-	case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
-		checksum = buf_calc_page_crc32(page);
-		mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, checksum);
-		break;
-	case SRV_CHECKSUM_ALGORITHM_INNODB:
-	case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB:
-		checksum = (ib_uint32_t) buf_calc_page_new_checksum(page);
-		mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, checksum);
-		checksum = (ib_uint32_t) buf_calc_page_old_checksum(page);
-		break;
-	case SRV_CHECKSUM_ALGORITHM_NONE:
-	case SRV_CHECKSUM_ALGORITHM_STRICT_NONE:
-		checksum = BUF_NO_CHECKSUM_MAGIC;
+	if (skip_checksum) {
 		mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, checksum);
-		break;
-	/* no default so the compiler will emit a warning if new enum
-	is added and not handled here */
+	} else {
+		if (block != NULL && UNIV_PAGE_SIZE == 16384) {
+			/* The page type could be garbage in old files
+			created before MySQL 5.5. Such files always
+			had a page size of 16 kilobytes. */
+			ulint	page_type = fil_page_get_type(page);
+			ulint	reset_type = page_type;
+
+			switch (block->page.id.page_no() % 16384) {
+			case 0:
+				reset_type = block->page.id.page_no() == 0
+					? FIL_PAGE_TYPE_FSP_HDR
+					: FIL_PAGE_TYPE_XDES;
+				break;
+			case 1:
+				reset_type = FIL_PAGE_IBUF_BITMAP;
+				break;
+			default:
+				switch (page_type) {
+				case FIL_PAGE_INDEX:
+				case FIL_PAGE_RTREE:
+				case FIL_PAGE_UNDO_LOG:
+				case FIL_PAGE_INODE:
+				case FIL_PAGE_IBUF_FREE_LIST:
+				case FIL_PAGE_TYPE_ALLOCATED:
+				case FIL_PAGE_TYPE_SYS:
+				case FIL_PAGE_TYPE_TRX_SYS:
+				case FIL_PAGE_TYPE_BLOB:
+				case FIL_PAGE_TYPE_ZBLOB:
+				case FIL_PAGE_TYPE_ZBLOB2:
+					break;
+				case FIL_PAGE_TYPE_FSP_HDR:
+				case FIL_PAGE_TYPE_XDES:
+				case FIL_PAGE_IBUF_BITMAP:
+					/* These pages should have
+					predetermined page numbers
+					(see above). */
+				default:
+					reset_type = FIL_PAGE_TYPE_UNKNOWN;
+					break;
+				}
+			}
+
+			if (UNIV_UNLIKELY(page_type != reset_type)) {
+				ib::info()
+					<< "Resetting invalid page "
+					<< block->page.id << " type "
+					<< page_type << " to "
+					<< reset_type << " when flushing.";
+				fil_page_set_type(page, reset_type);
+			}
+		}
+
+		switch ((srv_checksum_algorithm_t) srv_checksum_algorithm) {
+		case SRV_CHECKSUM_ALGORITHM_CRC32:
+		case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
+			checksum = buf_calc_page_crc32(page);
+			mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
+					checksum);
+			break;
+		case SRV_CHECKSUM_ALGORITHM_INNODB:
+		case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB:
+			checksum = (ib_uint32_t) buf_calc_page_new_checksum(
+				page);
+			mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
+					checksum);
+			checksum = (ib_uint32_t) buf_calc_page_old_checksum(
+				page);
+			break;
+		case SRV_CHECKSUM_ALGORITHM_NONE:
+		case SRV_CHECKSUM_ALGORITHM_STRICT_NONE:
+			mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
+					checksum);
+			break;
+			/* no default so the compiler will emit a warning if
+			new enum is added and not handled here */
+		}
 	}
 
 	/* With the InnoDB checksum, we overwrite the first 4 bytes of
@@ -876,17 +994,16 @@ buf_flush_write_block_low(
 	buf_flush_t	flush_type,	/*!< in: type of flush */
 	bool		sync)		/*!< in: true if sync IO request */
 {
-	ulint	zip_size	= buf_page_get_zip_size(bpage);
-	page_t*	frame		= NULL;
+	page_t*	frame = NULL;
 
 #ifdef UNIV_DEBUG
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
 	ut_ad(!buf_pool_mutex_own(buf_pool));
-#endif
+#endif /* UNIV_DEBUG */
 
-#ifdef UNIV_LOG_DEBUG
-	static ibool	univ_log_debug_warned;
-#endif /* UNIV_LOG_DEBUG */
+	DBUG_PRINT("ib_buf", ("flush %s %u page " UINT32PF ":" UINT32PF,
+			      sync ? "sync" : "async", (unsigned) flush_type,
+			      bpage->id.space(), bpage->id.page_no()));
 
 	ut_ad(buf_page_in_file(bpage));
 
@@ -897,27 +1014,21 @@ buf_flush_write_block_low(
 	LRU_list. */
 	ut_ad(!buf_pool_mutex_own(buf_pool));
 	ut_ad(!buf_flush_list_mutex_own(buf_pool));
-	ut_ad(!mutex_own(buf_page_get_mutex(bpage)));
+	ut_ad(!buf_page_get_mutex(bpage)->is_owned());
 	ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
 	ut_ad(bpage->oldest_modification != 0);
 
 #ifdef UNIV_IBUF_COUNT_DEBUG
-	ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
-#endif
+	ut_a(ibuf_count_get(bpage->id) == 0);
+#endif /* UNIV_IBUF_COUNT_DEBUG */
+
 	ut_ad(bpage->newest_modification != 0);
 
-#ifdef UNIV_LOG_DEBUG
-	if (!univ_log_debug_warned) {
-		univ_log_debug_warned = TRUE;
-		fputs("Warning: cannot force log to disk if"
-		      " UNIV_LOG_DEBUG is defined!\n"
-		      "Crash recovery will not work!\n",
-		      stderr);
-	}
-#else
 	/* Force the log to the disk before writing the modified block */
-	log_write_up_to(bpage->newest_modification, LOG_WAIT_ALL_GROUPS, TRUE);
-#endif
+	if (!srv_read_only_mode) {
+		log_write_up_to(bpage->newest_modification, true);
+	}
+
 	switch (buf_page_get_state(bpage)) {
 	case BUF_BLOCK_POOL_WATCH:
 	case BUF_BLOCK_ZIP_PAGE: /* The page should be dirty. */
@@ -929,12 +1040,11 @@ buf_flush_write_block_low(
 		break;
 	case BUF_BLOCK_ZIP_DIRTY:
 		frame = bpage->zip.data;
+
 		mach_write_to_8(frame + FIL_PAGE_LSN,
 				bpage->newest_modification);
 
-		ut_a(page_zip_verify_checksum(frame, zip_size));
-
-		memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
+		ut_a(page_zip_verify_checksum(frame, bpage->size.physical()));
 		break;
 	case BUF_BLOCK_FILE_PAGE:
 		frame = bpage->zip.data;
@@ -942,19 +1052,35 @@ buf_flush_write_block_low(
 			frame = ((buf_block_t*) bpage)->frame;
 		}
 
-		buf_flush_init_for_writing(((buf_block_t*) bpage)->frame,
-					   bpage->zip.data
-					   ? &bpage->zip : NULL,
-					   bpage->newest_modification);
+		buf_flush_init_for_writing(
+			reinterpret_cast<const buf_block_t*>(bpage),
+			reinterpret_cast<const buf_block_t*>(bpage)->frame,
+			bpage->zip.data ? &bpage->zip : NULL,
+			bpage->newest_modification,
+			fsp_is_checksum_disabled(bpage->id.space()));
 		break;
 	}
 
-	if (!srv_use_doublewrite_buf || !buf_dblwr) {
-		fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
-		       sync, buf_page_get_space(bpage), zip_size,
-		       buf_page_get_page_no(bpage), 0,
-		       zip_size ? zip_size : UNIV_PAGE_SIZE,
+	/* Disable use of double-write buffer for temporary tablespace.
+	Given the nature and load of temporary tablespace doublewrite buffer
+	adds an overhead during flushing. */
+
+	if (!srv_use_doublewrite_buf
+	    || buf_dblwr == NULL
+	    || srv_read_only_mode
+	    || fsp_is_system_temporary(bpage->id.space())) {
+
+		ut_ad(!srv_read_only_mode
+		      || fsp_is_system_temporary(bpage->id.space()));
+
+		ulint	type = IORequest::WRITE | IORequest::DO_NOT_WAKE;
+
+		IORequest	request(type);
+
+		fil_io(request,
+		       sync, bpage->id, bpage->size, 0, bpage->size.physical(),
 		       frame, bpage);
+
 	} else if (flush_type == BUF_FLUSH_SINGLE_PAGE) {
 		buf_dblwr_write_single_page(bpage, sync);
 	} else {
@@ -967,8 +1093,11 @@ buf_flush_write_block_low(
 	are working on. */
 	if (sync) {
 		ut_ad(flush_type == BUF_FLUSH_SINGLE_PAGE);
-		fil_flush(buf_page_get_space(bpage));
-		buf_page_io_complete(bpage);
+		fil_flush(bpage->id.space());
+
+		/* true means we want to evict this page from the
+		LRU list as well. */
+		buf_page_io_complete(bpage, true);
 	}
 
 	/* Increment the counter of I/O operations used
@@ -984,8 +1113,7 @@ writes! NOTE: buf_pool->mutex and buf_page_get_mutex(bpage) must be
 held upon entering this function, and they will be released by this
 function if it returns true.
 @return TRUE if the page was flushed */
-UNIV_INTERN
-bool
+ibool
 buf_flush_page(
 /*===========*/
 	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
@@ -993,47 +1121,50 @@ buf_flush_page(
 	buf_flush_t	flush_type,	/*!< in: type of flush */
 	bool		sync)		/*!< in: true if sync IO request */
 {
+	BPageMutex*	block_mutex;
+
 	ut_ad(flush_type < BUF_FLUSH_N_TYPES);
 	ut_ad(buf_pool_mutex_own(buf_pool));
 	ut_ad(buf_page_in_file(bpage));
 	ut_ad(!sync || flush_type == BUF_FLUSH_SINGLE_PAGE);
 
-	ib_mutex_t*	block_mutex = buf_page_get_mutex(bpage);
-
+	block_mutex = buf_page_get_mutex(bpage);
 	ut_ad(mutex_own(block_mutex));
 
 	ut_ad(buf_flush_ready_for_flush(bpage, flush_type));
 
-        bool            is_uncompressed;
+	bool	is_uncompressed;
 
-        is_uncompressed = (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
-        ut_ad(is_uncompressed == (block_mutex != &buf_pool->zip_mutex));
+	is_uncompressed = (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
+	ut_ad(is_uncompressed == (block_mutex != &buf_pool->zip_mutex));
 
-        ibool           flush;
-        rw_lock_t*	rw_lock;
-        bool            no_fix_count = bpage->buf_fix_count == 0;
+	ibool		flush;
+	rw_lock_t*	rw_lock;
+	bool		no_fix_count = bpage->buf_fix_count == 0;
 
-        if (!is_uncompressed) {
-                flush = TRUE;
+	if (!is_uncompressed) {
+		flush = TRUE;
 		rw_lock = NULL;
-
-	} else if (!(no_fix_count || flush_type == BUF_FLUSH_LIST)) {
-		/* This is a heuristic, to avoid expensive S attempts. */
+	} else if (!(no_fix_count || flush_type == BUF_FLUSH_LIST)
+		   || (!no_fix_count
+		       && srv_shutdown_state <= SRV_SHUTDOWN_CLEANUP
+		       && fsp_is_system_temporary(bpage->id.space()))) {
+		/* This is a heuristic, to avoid expensive SX attempts. */
+		/* For table residing in temporary tablespace sync is done
+		using IO_FIX and so before scheduling for flush ensure that
+		page is not fixed. */
 		flush = FALSE;
 	} else {
-
 		rw_lock = &reinterpret_cast<buf_block_t*>(bpage)->lock;
-
 		if (flush_type != BUF_FLUSH_LIST) {
-			flush = rw_lock_s_lock_gen_nowait(
-				rw_lock, BUF_IO_WRITE);
+			flush = rw_lock_sx_lock_nowait(rw_lock, BUF_IO_WRITE);
 		} else {
-			/* Will S lock later */
+			/* Will SX lock later */
 			flush = TRUE;
 		}
 	}
 
-        if (flush) {
+	if (flush) {
 
 		/* We are committed to flushing by the time we get here */
 
@@ -1042,33 +1173,51 @@ buf_flush_page(
 		buf_page_set_flush_type(bpage, flush_type);
 
 		if (buf_pool->n_flush[flush_type] == 0) {
-
 			os_event_reset(buf_pool->no_flush[flush_type]);
 		}
 
 		++buf_pool->n_flush[flush_type];
 
 		mutex_exit(block_mutex);
+
 		buf_pool_mutex_exit(buf_pool);
 
 		if (flush_type == BUF_FLUSH_LIST
 		    && is_uncompressed
-		    && !rw_lock_s_lock_gen_nowait(rw_lock, BUF_IO_WRITE)) {
-			/* avoiding deadlock possibility involves doublewrite
-			buffer, should flush it, because it might hold the
-			another block->lock. */
-			buf_dblwr_flush_buffered_writes();
+		    && !rw_lock_sx_lock_nowait(rw_lock, BUF_IO_WRITE)) {
+
+			if (!fsp_is_system_temporary(bpage->id.space())) {
+				/* avoiding deadlock possibility involves
+				doublewrite buffer, should flush it, because
+				it might hold the another block->lock. */
+				buf_dblwr_flush_buffered_writes();
+			} else {
+				buf_dblwr_sync_datafiles();
+			}
+
+			rw_lock_sx_lock_gen(rw_lock, BUF_IO_WRITE);
+		}
+
+		/* If there is an observer that want to know if the asynchronous
+		flushing was sent then notify it.
+		Note: we set flush observer to a page with x-latch, so we can
+		guarantee that notify_flush and notify_remove are called in pair
+		with s-latch on a uncompressed page. */
+		if (bpage->flush_observer != NULL) {
+			buf_pool_mutex_enter(buf_pool);
+
+			bpage->flush_observer->notify_flush(buf_pool, bpage);
 
-			rw_lock_s_lock_gen(rw_lock, BUF_IO_WRITE);
-                }
+			buf_pool_mutex_exit(buf_pool);
+		}
 
-                /* Even though bpage is not protected by any mutex at this
-                point, it is safe to access bpage, because it is io_fixed and
-                oldest_modification != 0.  Thus, it cannot be relocated in the
-                buffer pool or removed from flush_list or LRU_list. */
+		/* Even though bpage is not protected by any mutex at this
+		point, it is safe to access bpage, because it is io_fixed and
+		oldest_modification != 0.  Thus, it cannot be relocated in the
+		buffer pool or removed from flush_list or LRU_list. */
 
-                buf_flush_write_block_low(bpage, flush_type, sync);
-        }
+		buf_flush_write_block_low(bpage, flush_type, sync);
+	}
 
 	return(flush);
 }
@@ -1080,7 +1229,6 @@ NOTE: buf_pool->mutex and block->mutex must be held upon entering this
 function, and they will be released by this function after flushing.
 This is loosely based on buf_flush_batch() and buf_flush_page().
 @return TRUE if the page was flushed and the mutexes released */
-UNIV_INTERN
 ibool
 buf_flush_page_try(
 /*===============*/
@@ -1089,7 +1237,7 @@ buf_flush_page_try(
 {
 	ut_ad(buf_pool_mutex_own(buf_pool));
 	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-	ut_ad(mutex_own(&block->mutex));
+	ut_ad(buf_page_mutex_own(block));
 
 	if (!buf_flush_ready_for_flush(&block->page, BUF_FLUSH_SINGLE_PAGE)) {
 		return(FALSE);
@@ -1098,23 +1246,23 @@ buf_flush_page_try(
 	/* The following call will release the buffer pool and
 	block mutex. */
 	return(buf_flush_page(
-			buf_pool, &block->page, BUF_FLUSH_SINGLE_PAGE, true));
+			buf_pool, &block->page,
+			BUF_FLUSH_SINGLE_PAGE, true));
 }
 # endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
-/***********************************************************//**
-Check the page is in buffer pool and can be flushed.
- at return	true if the page can be flushed. */
+
+/** Check the page is in buffer pool and can be flushed.
+ at param[in]	page_id		page id
+ at param[in]	flush_type	BUF_FLUSH_LRU or BUF_FLUSH_LIST
+ at return true if the page can be flushed. */
 static
 bool
 buf_flush_check_neighbor(
-/*=====================*/
-	ulint		space,		/*!< in: space id */
-	ulint		offset,		/*!< in: page offset */
-	buf_flush_t	flush_type)	/*!< in: BUF_FLUSH_LRU or
-					BUF_FLUSH_LIST */
+	const page_id_t&	page_id,
+	buf_flush_t		flush_type)
 {
 	buf_page_t*	bpage;
-	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
+	buf_pool_t*	buf_pool = buf_pool_get(page_id);
 	bool		ret;
 
 	ut_ad(flush_type == BUF_FLUSH_LRU
@@ -1123,7 +1271,7 @@ buf_flush_check_neighbor(
 	buf_pool_mutex_enter(buf_pool);
 
 	/* We only want to flush pages from this buffer pool. */
-	bpage = buf_page_hash_get(buf_pool, space, offset);
+	bpage = buf_page_hash_get(buf_pool, page_id);
 
 	if (!bpage) {
 
@@ -1138,7 +1286,7 @@ buf_flush_check_neighbor(
 
 	ret = false;
 	if (flush_type != BUF_FLUSH_LRU || buf_page_is_old(bpage)) {
-		ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
+		BPageMutex* block_mutex = buf_page_get_mutex(bpage);
 
 		mutex_enter(block_mutex);
 		if (buf_flush_ready_for_flush(bpage, flush_type)) {
@@ -1151,26 +1299,25 @@ buf_flush_check_neighbor(
 	return(ret);
 }
 
-/***********************************************************//**
-Flushes to disk all flushable pages within the flush area.
- at return	number of pages flushed */
+/** Flushes to disk all flushable pages within the flush area.
+ at param[in]	page_id		page id
+ at param[in]	flush_type	BUF_FLUSH_LRU or BUF_FLUSH_LIST
+ at param[in]	n_flushed	number of pages flushed so far in this batch
+ at param[in]	n_to_flush	maximum number of pages we are allowed to flush
+ at return number of pages flushed */
 static
 ulint
 buf_flush_try_neighbors(
-/*====================*/
-	ulint		space,		/*!< in: space id */
-	ulint		offset,		/*!< in: page offset */
-	buf_flush_t	flush_type,	/*!< in: BUF_FLUSH_LRU or
-					BUF_FLUSH_LIST */
-	ulint		n_flushed,	/*!< in: number of pages
-					flushed so far in this batch */
-	ulint		n_to_flush)	/*!< in: maximum number of pages
-					we are allowed to flush */
+	const page_id_t&	page_id,
+	buf_flush_t		flush_type,
+	ulint			n_flushed,
+	ulint			n_to_flush)
 {
 	ulint		i;
 	ulint		low;
 	ulint		high;
-	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
+	ulint		count = 0;
+	buf_pool_t*	buf_pool = buf_pool_get(page_id);
 
 	ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
 
@@ -1178,8 +1325,8 @@ buf_flush_try_neighbors(
 	    || srv_flush_neighbors == 0) {
 		/* If there is little space or neighbor flushing is
 		not enabled then just flush the victim. */
-		low = offset;
-		high = offset + 1;
+		low = page_id.page_no();
+		high = page_id.page_no() + 1;
 	} else {
 		/* When flushed, dirty blocks are searched in
 		neighborhoods of this size, and flushed along with the
@@ -1191,27 +1338,38 @@ buf_flush_try_neighbors(
 			BUF_READ_AHEAD_AREA(buf_pool),
 			buf_pool->curr_size / 16);
 
-		low = (offset / buf_flush_area) * buf_flush_area;
-		high = (offset / buf_flush_area + 1) * buf_flush_area;
+		low = (page_id.page_no() / buf_flush_area) * buf_flush_area;
+		high = (page_id.page_no() / buf_flush_area + 1) * buf_flush_area;
 
 		if (srv_flush_neighbors == 1) {
 			/* adjust 'low' and 'high' to limit
 			   for contiguous dirty area */
-			if (offset > low) {
-				for (i = offset - 1;
-				     i >= low
-				     && buf_flush_check_neighbor(
-						space, i, flush_type);
-				     i--) {
-					/* do nothing */
+			if (page_id.page_no() > low) {
+				for (i = page_id.page_no() - 1; i >= low; i--) {
+					if (!buf_flush_check_neighbor(
+						page_id_t(page_id.space(), i),
+						flush_type)) {
+
+						break;
+					}
+
+					if (i == low) {
+						/* Avoid overwrap when low == 0
+						and calling
+						buf_flush_check_neighbor() with
+						i == (ulint) -1 */
+						i--;
+						break;
+					}
 				}
 				low = i + 1;
 			}
 
-			for (i = offset + 1;
+			for (i = page_id.page_no() + 1;
 			     i < high
 			     && buf_flush_check_neighbor(
-						space, i, flush_type);
+				     page_id_t(page_id.space(), i),
+				     flush_type);
 			     i++) {
 				/* do nothing */
 			}
@@ -1219,15 +1377,17 @@ buf_flush_try_neighbors(
 		}
 	}
 
-	/* fprintf(stderr, "Flush area: low %lu high %lu\n", low, high); */
-
-	if (high > fil_space_get_size(space)) {
-		high = fil_space_get_size(space);
+	const ulint	space_size = fil_space_get_size(page_id.space());
+	if (high > space_size) {
+		high = space_size;
 	}
 
-	ulint	count = 0;
+	DBUG_PRINT("ib_buf", ("flush " UINT32PF ":%u..%u",
+			      page_id.space(),
+			      (unsigned) low, (unsigned) high));
 
-	for (i = low; i < high; i++) {
+	for (ulint i = low; i < high; i++) {
+		buf_page_t*	bpage;
 
 		if ((count + n_flushed) >= n_to_flush) {
 
@@ -1237,19 +1397,21 @@ buf_flush_try_neighbors(
 			are flushing has not been flushed yet then
 			we'll try to flush the victim that we
 			selected originally. */
-			if (i <= offset) {
-				i = offset;
+			if (i <= page_id.page_no()) {
+				i = page_id.page_no();
 			} else {
 				break;
 			}
 		}
 
-		buf_pool = buf_pool_get(space, i);
+		const page_id_t	cur_page_id(page_id.space(), i);
+
+		buf_pool = buf_pool_get(cur_page_id);
 
 		buf_pool_mutex_enter(buf_pool);
 
 		/* We only want to flush pages from this buffer pool. */
-		buf_page_t*	bpage = buf_page_hash_get(buf_pool, space, i);
+		bpage = buf_page_hash_get(buf_pool, cur_page_id);
 
 		if (bpage == NULL) {
 
@@ -1263,70 +1425,76 @@ buf_flush_try_neighbors(
 		because the flushed blocks are soon freed */
 
 		if (flush_type != BUF_FLUSH_LRU
-		    || i == offset
+		    || i == page_id.page_no()
 		    || buf_page_is_old(bpage)) {
 
-			ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
+			BPageMutex* block_mutex = buf_page_get_mutex(bpage);
 
 			mutex_enter(block_mutex);
 
 			if (buf_flush_ready_for_flush(bpage, flush_type)
-			    && (i == offset || bpage->buf_fix_count == 0)
-			    && buf_flush_page(
+			    && (i == page_id.page_no()
+				|| bpage->buf_fix_count == 0)) {
+
+				/* We also try to flush those
+				neighbors != offset */
+
+				if (buf_flush_page(
 					buf_pool, bpage, flush_type, false)) {
 
-				++count;
+					++count;
+				} else {
+					mutex_exit(block_mutex);
+					buf_pool_mutex_exit(buf_pool);
+				}
 
 				continue;
+			} else {
+				mutex_exit(block_mutex);
 			}
-
-			mutex_exit(block_mutex);
 		}
-
 		buf_pool_mutex_exit(buf_pool);
 	}
 
-	if (count > 0) {
+	if (count > 1) {
 		MONITOR_INC_VALUE_CUMULATIVE(
-					MONITOR_FLUSH_NEIGHBOR_TOTAL_PAGE,
-					MONITOR_FLUSH_NEIGHBOR_COUNT,
-					MONITOR_FLUSH_NEIGHBOR_PAGES,
-					(count - 1));
+			MONITOR_FLUSH_NEIGHBOR_TOTAL_PAGE,
+			MONITOR_FLUSH_NEIGHBOR_COUNT,
+			MONITOR_FLUSH_NEIGHBOR_PAGES,
+			(count - 1));
 	}
 
 	return(count);
 }
 
-/********************************************************************//**
-Check if the block is modified and ready for flushing. If the the block
-is ready to flush then flush the page and try o flush its neighbors.
-
- at return	TRUE if buf_pool mutex was released during this function.
+/** Check if the block is modified and ready for flushing.
+If the the block is ready to flush then flush the page and try o flush
+its neighbors.
+ at param[in]	bpage		buffer control block,
+must be buf_page_in_file(bpage)
+ at param[in]	flush_type	BUF_FLUSH_LRU or BUF_FLUSH_LIST
+ at param[in]	n_to_flush	number of pages to flush
+ at param[in,out]	count		number of pages flushed
+ at return TRUE if buf_pool mutex was released during this function.
 This does not guarantee that some pages were written as well.
 Number of pages written are incremented to the count. */
 static
-ibool
+bool
 buf_flush_page_and_try_neighbors(
-/*=============================*/
-	buf_page_t*	bpage,		/*!< in: buffer control block,
-					must be
-					buf_page_in_file(bpage) */
-	buf_flush_t	flush_type,	/*!< in: BUF_FLUSH_LRU
-					or BUF_FLUSH_LIST */
-	ulint		n_to_flush,	/*!< in: number of pages to
-					flush */
-	ulint*		count)		/*!< in/out: number of pages
-					flushed */
+	buf_page_t*		bpage,
+	buf_flush_t		flush_type,
+	ulint			n_to_flush,
+	ulint*			count)
 {
-	ibool		flushed;
-	ib_mutex_t*	block_mutex;
 #ifdef UNIV_DEBUG
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
-#endif /* UNIV_DEBUG */
 
 	ut_ad(buf_pool_mutex_own(buf_pool));
+#endif /* UNIV_DEBUG */
+
+	bool		flushed;
+	BPageMutex*	block_mutex = buf_page_get_mutex(bpage);
 
-	block_mutex = buf_page_get_mutex(bpage);
 	mutex_enter(block_mutex);
 
 	ut_a(buf_page_in_file(bpage));
@@ -1336,26 +1504,22 @@ buf_flush_page_and_try_neighbors(
 
 		buf_pool = buf_pool_from_bpage(bpage);
 
-		buf_pool_mutex_exit(buf_pool);
-
-		/* These fields are protected by both the
-		buffer pool mutex and block mutex. */
-		ulint	space = buf_page_get_space(bpage);
-		ulint	offset = buf_page_get_page_no(bpage);
+		const page_id_t	page_id = bpage->id;
 
 		mutex_exit(block_mutex);
 
+		buf_pool_mutex_exit(buf_pool);
+
 		/* Try to flush also all the neighbors */
 		*count += buf_flush_try_neighbors(
-			space, offset, flush_type, *count, n_to_flush);
+			page_id, flush_type, *count, n_to_flush);
 
 		buf_pool_mutex_enter(buf_pool);
-
 		flushed = TRUE;
-
 	} else {
 		mutex_exit(block_mutex);
-		flushed = FALSE;
+
+		flushed = false;
 	}
 
 	ut_ad(buf_pool_mutex_own(buf_pool));
@@ -1380,7 +1544,6 @@ buf_free_from_unzip_LRU_list_batch(
 	ulint		max)		/*!< in: desired number of
 					blocks in the free_list */
 {
-	buf_block_t*	block;
 	ulint		scanned = 0;
 	ulint		count = 0;
 	ulint		free_len = UT_LIST_GET_LEN(buf_pool->free);
@@ -1388,8 +1551,10 @@ buf_free_from_unzip_LRU_list_batch(
 
 	ut_ad(buf_pool_mutex_own(buf_pool));
 
-	block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
-	while (block != NULL && count < max
+	buf_block_t*	block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
+
+	while (block != NULL
+	       && count < max
 	       && free_len < srv_LRU_scan_depth
 	       && lru_len > UT_LIST_GET_LEN(buf_pool->LRU) / 10) {
 
@@ -1438,90 +1603,65 @@ buf_flush_LRU_list_batch(
 					blocks in the free_list */
 {
 	buf_page_t*	bpage;
-	ulint		count = 0;
 	ulint		scanned = 0;
+	ulint		evict_count = 0;
+	ulint		count = 0;
 	ulint		free_len = UT_LIST_GET_LEN(buf_pool->free);
 	ulint		lru_len = UT_LIST_GET_LEN(buf_pool->LRU);
+	ulint		withdraw_depth = 0;
 
 	ut_ad(buf_pool_mutex_own(buf_pool));
 
-	bpage = UT_LIST_GET_LAST(buf_pool->LRU);
-	while (bpage != NULL && count < max
-	       && free_len < srv_LRU_scan_depth
-	       && lru_len > BUF_LRU_MIN_LEN) {
+	if (buf_pool->curr_size < buf_pool->old_size
+	    && buf_pool->withdraw_target > 0) {
+		withdraw_depth = buf_pool->withdraw_target
+				 - UT_LIST_GET_LEN(buf_pool->withdraw);
+	}
 
-		ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
-		ibool	 evict;
+	for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
+	     bpage != NULL && count + evict_count < max
+	     && free_len < srv_LRU_scan_depth + withdraw_depth
+	     && lru_len > BUF_LRU_MIN_LEN;
+	     ++scanned,
+	     bpage = buf_pool->lru_hp.get()) {
 
-		mutex_enter(block_mutex);
-		evict = buf_flush_ready_for_replace(bpage);
-		mutex_exit(block_mutex);
+		buf_page_t* prev = UT_LIST_GET_PREV(LRU, bpage);
+		buf_pool->lru_hp.set(prev);
 
-		++scanned;
+		BPageMutex*	block_mutex = buf_page_get_mutex(bpage);
 
-		/* If the block is ready to be replaced we try to
-		free it i.e.: put it on the free list.
-		Otherwise we try to flush the block and its
-		neighbors. In this case we'll put it on the
-		free list in the next pass. We do this extra work
-		of putting blocks to the free list instead of
-		just flushing them because after every flush
-		we have to restart the scan from the tail of
-		the LRU list and if we don't clear the tail
-		of the flushed pages then the scan becomes
-		O(n*n). */
-		if (evict) {
+		mutex_enter(block_mutex);
+
+		if (buf_flush_ready_for_replace(bpage)) {
+			/* block is ready for eviction i.e., it is
+			clean and is not IO-fixed or buffer fixed. */
+			mutex_exit(block_mutex);
 			if (buf_LRU_free_page(bpage, true)) {
-				/* buf_pool->mutex was potentially
-				released and reacquired. */
-				bpage = UT_LIST_GET_LAST(buf_pool->LRU);
-			} else {
-				bpage = UT_LIST_GET_PREV(LRU, bpage);
+				++evict_count;
 			}
+		} else if (buf_flush_ready_for_flush(bpage, BUF_FLUSH_LRU)) {
+			/* Block is ready for flush. Dispatch an IO
+			request. The IO helper thread will put it on
+			free list in IO completion routine. */
+			mutex_exit(block_mutex);
+			buf_flush_page_and_try_neighbors(
+				bpage, BUF_FLUSH_LRU, max, &count);
 		} else {
-			ulint		space;
-			ulint		offset;
-			buf_page_t*	prev_bpage;
-
-			prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
-
-			/* Save the previous bpage */
-
-			if (prev_bpage != NULL) {
-				space = prev_bpage->space;
-				offset = prev_bpage->offset;
-			} else {
-				space = ULINT_UNDEFINED;
-				offset = ULINT_UNDEFINED;
-			}
-
-			if (!buf_flush_page_and_try_neighbors(
-				bpage, BUF_FLUSH_LRU, max, &count)) {
-
-				bpage = prev_bpage;
-			} else {
-				/* buf_pool->mutex was released.
-				reposition the iterator. Note: the
-				prev block could have been repositioned
-				too but that should be rare. */
-
-				if (prev_bpage != NULL) {
-
-					ut_ad(space != ULINT_UNDEFINED);
-					ut_ad(offset != ULINT_UNDEFINED);
-
-					prev_bpage = buf_page_hash_get(
-						buf_pool, space, offset);
-				}
-
-				bpage = prev_bpage;
-			}
+			/* Can't evict or dispatch this block. Go to
+			previous. */
+			ut_ad(buf_pool->lru_hp.is_hp(prev));
+			mutex_exit(block_mutex);
 		}
 
+		ut_ad(!mutex_own(block_mutex));
+		ut_ad(buf_pool_mutex_own(buf_pool));
+
 		free_len = UT_LIST_GET_LEN(buf_pool->free);
 		lru_len = UT_LIST_GET_LEN(buf_pool->LRU);
 	}
 
+	buf_pool->lru_hp.set(NULL);
+
 	/* We keep track of all flushes happening as part of LRU
 	flush. When estimating the desired rate at which flush_list
 	should be flushed, we factor in this value. */
@@ -1529,6 +1669,14 @@ buf_flush_LRU_list_batch(
 
 	ut_ad(buf_pool_mutex_own(buf_pool));
 
+	if (evict_count) {
+		MONITOR_INC_VALUE_CUMULATIVE(
+			MONITOR_LRU_BATCH_EVICT_TOTAL_PAGE,
+			MONITOR_LRU_BATCH_EVICT_COUNT,
+			MONITOR_LRU_BATCH_EVICT_PAGES,
+			evict_count);
+	}
+
 	if (scanned) {
 		MONITOR_INC_VALUE_CUMULATIVE(
 			MONITOR_LRU_BATCH_SCANNED,
@@ -1567,26 +1715,22 @@ buf_do_LRU_batch(
 	return(count);
 }
 
-/*******************************************************************//**
-This utility flushes dirty blocks from the end of the flush_list.
-the calling thread is not allowed to own any latches on pages!
+/** This utility flushes dirty blocks from the end of the flush_list.
+The calling thread is not allowed to own any latches on pages!
+ at param[in]	buf_pool	buffer pool instance
+ at param[in]	min_n		wished minimum mumber of blocks flushed (it is
+not guaranteed that the actual number is that big, though)
+ at param[in]	lsn_limit	all blocks whose oldest_modification is smaller
+than this should be flushed (if their number does not exceed min_n)
 @return number of blocks for which the write request was queued;
 ULINT_UNDEFINED if there was a flush of the same type already
 running */
 static
 ulint
 buf_do_flush_list_batch(
-/*====================*/
-	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
-	ulint		min_n,		/*!< in: wished minimum mumber
-					of blocks flushed (it is not
-					guaranteed that the actual
-					number is that big, though) */
-	lsn_t		lsn_limit)	/*!< all blocks whose
-					oldest_modification is smaller
-					than this should be flushed (if
-					their number does not exceed
-					min_n) */
+	buf_pool_t*		buf_pool,
+	ulint			min_n,
+	lsn_t			lsn_limit)
 {
 	ulint		count = 0;
 	ulint		scanned = 0;
@@ -1606,6 +1750,7 @@ buf_do_flush_list_batch(
 	for (buf_page_t* bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
 	     count < min_n && bpage != NULL && len > 0
 	     && bpage->oldest_modification < lsn_limit;
+	     bpage = buf_pool->flush_hp.get(),
 	     ++scanned) {
 
 		buf_page_t*	prev;
@@ -1614,8 +1759,7 @@ buf_do_flush_list_batch(
 		ut_ad(bpage->in_flush_list);
 
 		prev = UT_LIST_GET_PREV(list, bpage);
-		buf_flush_set_hp(buf_pool, prev);
-
+		buf_pool->flush_hp.set(prev);
 		buf_flush_list_mutex_exit(buf_pool);
 
 #ifdef UNIV_DEBUG
@@ -1626,70 +1770,73 @@ buf_do_flush_list_batch(
 
 		buf_flush_list_mutex_enter(buf_pool);
 
-		ut_ad(flushed || buf_flush_is_hp(buf_pool, prev));
-
-		if (!buf_flush_is_hp(buf_pool, prev)) {
-			/* The hazard pointer was reset by some other
-			thread. Restart the scan. */
-			ut_ad(buf_flush_is_hp(buf_pool, NULL));
-			bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
-			len = UT_LIST_GET_LEN(buf_pool->flush_list);
-		} else {
-			bpage = prev;
-			--len;
-			buf_flush_set_hp(buf_pool, NULL);
-		}
+		ut_ad(flushed || buf_pool->flush_hp.is_hp(prev));
 
-		ut_ad(!bpage || bpage->in_flush_list);
+		--len;
 	}
 
+	buf_pool->flush_hp.set(NULL);
 	buf_flush_list_mutex_exit(buf_pool);
 
-	MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_BATCH_SCANNED,
-				     MONITOR_FLUSH_BATCH_SCANNED_NUM_CALL,
-				     MONITOR_FLUSH_BATCH_SCANNED_PER_CALL,
-				     scanned);
+	if (scanned) {
+		MONITOR_INC_VALUE_CUMULATIVE(
+			MONITOR_FLUSH_BATCH_SCANNED,
+			MONITOR_FLUSH_BATCH_SCANNED_NUM_CALL,
+			MONITOR_FLUSH_BATCH_SCANNED_PER_CALL,
+			scanned);
+	}
+
+	if (count) {
+		MONITOR_INC_VALUE_CUMULATIVE(
+			MONITOR_FLUSH_BATCH_TOTAL_PAGE,
+			MONITOR_FLUSH_BATCH_COUNT,
+			MONITOR_FLUSH_BATCH_PAGES,
+			count);
+	}
 
 	ut_ad(buf_pool_mutex_own(buf_pool));
 
 	return(count);
 }
 
-/*******************************************************************//**
-This utility flushes dirty blocks from the end of the LRU list or flush_list.
+/** This utility flushes dirty blocks from the end of the LRU list or
+flush_list.
 NOTE 1: in the case of an LRU flush the calling thread may own latches to
 pages: to avoid deadlocks, this function must be written so that it cannot
 end up waiting for these latches! NOTE 2: in the case of a flush list flush,
 the calling thread is not allowed to own any latches on pages!
+ at param[in]	buf_pool	buffer pool instance
+ at param[in]	flush_type	BUF_FLUSH_LRU or BUF_FLUSH_LIST; if
+BUF_FLUSH_LIST, then the caller must not own any latches on pages
+ at param[in]	min_n		wished minimum mumber of blocks flushed (it is
+not guaranteed that the actual number is that big, though)
+ at param[in]	lsn_limit	in the case of BUF_FLUSH_LIST all blocks whose
+oldest_modification is smaller than this should be flushed (if their number
+does not exceed min_n), otherwise ignored
 @return number of blocks for which the write request was queued */
 static
 ulint
 buf_flush_batch(
-/*============*/
-	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
-	buf_flush_t	flush_type,	/*!< in: BUF_FLUSH_LRU or
-					BUF_FLUSH_LIST; if BUF_FLUSH_LIST,
-					then the caller must not own any
-					latches on pages */
-	ulint		min_n,		/*!< in: wished minimum mumber of blocks
-					flushed (it is not guaranteed that the
-					actual number is that big, though) */
-	lsn_t		lsn_limit)	/*!< in: in the case of BUF_FLUSH_LIST
-					all blocks whose oldest_modification is
-					smaller than this should be flushed
-					(if their number does not exceed
-					min_n), otherwise ignored */
+	buf_pool_t*		buf_pool,
+	buf_flush_t		flush_type,
+	ulint			min_n,
+	lsn_t			lsn_limit)
 {
-	ulint		count	= 0;
-
 	ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad((flush_type != BUF_FLUSH_LIST)
-	      || sync_thread_levels_empty_except_dict());
-#endif /* UNIV_SYNC_DEBUG */
+
+#ifdef UNIV_DEBUG
+	{
+		dict_sync_check	check(true);
+
+		ut_ad(flush_type != BUF_FLUSH_LIST
+		      || !sync_check_iterate(check));
+	}
+#endif /* UNIV_DEBUG */
 
 	buf_pool_mutex_enter(buf_pool);
 
+	ulint	count = 0;
+
 	/* Note: The buffer pool mutex is released and reacquired within
 	the flush functions. */
 	switch (flush_type) {
@@ -1705,41 +1852,30 @@ buf_flush_batch(
 
 	buf_pool_mutex_exit(buf_pool);
 
-#ifdef UNIV_DEBUG
-	if (buf_debug_prints && count > 0) {
-		fprintf(stderr, flush_type == BUF_FLUSH_LRU
-			? "Flushed %lu pages in LRU flush\n"
-			: "Flushed %lu pages in flush list flush\n",
-			(ulong) count);
-	}
-#endif /* UNIV_DEBUG */
+	DBUG_PRINT("ib_buf", ("flush %u completed, %u pages",
+			      unsigned(flush_type), unsigned(count)));
 
 	return(count);
 }
 
 /******************************************************************//**
-Gather the aggregated stats for both flush list and LRU list flushing */
+Gather the aggregated stats for both flush list and LRU list flushing.
+ at param page_count_flush	number of pages flushed from the end of the flush_list
+ at param page_count_LRU	number of pages flushed from the end of the LRU list
+*/
 static
 void
-buf_flush_common(
-/*=============*/
-	buf_flush_t	flush_type,	/*!< in: type of flush */
-	ulint		page_count)	/*!< in: number of pages flushed */
+buf_flush_stats(
+/*============*/
+	ulint		page_count_flush,
+	ulint		page_count_LRU)
 {
-	buf_dblwr_flush_buffered_writes();
-
-	ut_a(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
+	DBUG_PRINT("ib_buf", ("flush completed, from flush_list %u pages, "
+			      "from LRU_list %u pages",
+			      unsigned(page_count_flush),
+			      unsigned(page_count_LRU)));
 
-#ifdef UNIV_DEBUG
-	if (buf_debug_prints && page_count > 0) {
-		fprintf(stderr, flush_type == BUF_FLUSH_LRU
-			? "Flushed %lu pages in LRU flush\n"
-			: "Flushed %lu pages in flush list flush\n",
-			(ulong) page_count);
-	}
-#endif /* UNIV_DEBUG */
-
-	srv_stats.buf_pool_flushed.add(page_count);
+	srv_stats.buf_pool_flushed.add(page_count_flush + page_count_LRU);
 }
 
 /******************************************************************//**
@@ -1752,6 +1888,8 @@ buf_flush_start(
 	buf_flush_t	flush_type)	/*!< in: BUF_FLUSH_LRU
 					or BUF_FLUSH_LIST */
 {
+	ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
+
 	buf_pool_mutex_enter(buf_pool);
 
 	if (buf_pool->n_flush[flush_type] > 0
@@ -1766,6 +1904,8 @@ buf_flush_start(
 
 	buf_pool->init_flush[flush_type] = TRUE;
 
+	os_event_reset(buf_pool->no_flush[flush_type]);
+
 	buf_pool_mutex_exit(buf_pool);
 
 	return(TRUE);
@@ -1795,11 +1935,16 @@ buf_flush_end(
 	}
 
 	buf_pool_mutex_exit(buf_pool);
+
+	if (!srv_read_only_mode) {
+		buf_dblwr_flush_buffered_writes();
+	} else {
+		os_aio_simulated_wake_handler_threads();
+	}
 }
 
 /******************************************************************//**
 Waits until a flush batch of the given type ends */
-UNIV_INTERN
 void
 buf_flush_wait_batch_end(
 /*=====================*/
@@ -1828,76 +1973,124 @@ buf_flush_wait_batch_end(
 	}
 }
 
-/*******************************************************************//**
-This utility flushes dirty blocks from the end of the LRU list and also
-puts replaceable clean pages from the end of the LRU list to the free
-list.
+/** Do flushing batch of a given type.
 NOTE: The calling thread is not allowed to own any latches on pages!
- at return true if a batch was queued successfully. false if another batch
-of same type was already running. */
-static
+ at param[in,out]	buf_pool	buffer pool instance
+ at param[in]	type		flush type
+ at param[in]	min_n		wished minimum mumber of blocks flushed
+(it is not guaranteed that the actual number is that big, though)
+ at param[in]	lsn_limit	in the case BUF_FLUSH_LIST all blocks whose
+oldest_modification is smaller than this should be flushed (if their number
+does not exceed min_n), otherwise ignored
+ at param[out]	n_processed	the number of pages which were processed is
+passed back to caller. Ignored if NULL
+ at retval true	if a batch was queued successfully.
+ at retval false	if another batch of same type was already running. */
 bool
-buf_flush_LRU(
-/*==========*/
-	buf_pool_t*	buf_pool,	/*!< in/out: buffer pool instance */
-	ulint		min_n,		/*!< in: wished minimum mumber of blocks
-					flushed (it is not guaranteed that the
-					actual number is that big, though) */
-	ulint*		n_processed)	/*!< out: the number of pages
-					which were processed is passed
-					back to caller. Ignored if NULL */
+buf_flush_do_batch(
+	buf_pool_t*		buf_pool,
+	buf_flush_t		type,
+	ulint			min_n,
+	lsn_t			lsn_limit,
+	ulint*			n_processed)
 {
-	ulint		page_count;
+	ut_ad(type == BUF_FLUSH_LRU || type == BUF_FLUSH_LIST);
 
-	if (n_processed) {
+	if (n_processed != NULL) {
 		*n_processed = 0;
 	}
 
-	if (!buf_flush_start(buf_pool, BUF_FLUSH_LRU)) {
+	if (!buf_flush_start(buf_pool, type)) {
 		return(false);
 	}
 
-	page_count = buf_flush_batch(buf_pool, BUF_FLUSH_LRU, min_n, 0);
-
-	buf_flush_end(buf_pool, BUF_FLUSH_LRU);
+	ulint	page_count = buf_flush_batch(buf_pool, type, min_n, lsn_limit);
 
-	buf_flush_common(BUF_FLUSH_LRU, page_count);
+	buf_flush_end(buf_pool, type);
 
-	if (n_processed) {
+	if (n_processed != NULL) {
 		*n_processed = page_count;
 	}
 
 	return(true);
 }
 
-/*******************************************************************//**
-This utility flushes dirty blocks from the end of the flush list of
-all buffer pool instances.
-NOTE: The calling thread is not allowed to own any latches on pages!
- at return true if a batch was queued successfully for each buffer pool
-instance. false if another batch of same type was already running in
-at least one of the buffer pool instance */
-UNIV_INTERN
-bool
-buf_flush_list(
-/*===========*/
-	ulint		min_n,		/*!< in: wished minimum mumber of blocks
-					flushed (it is not guaranteed that the
-					actual number is that big, though) */
-	lsn_t		lsn_limit,	/*!< in the case BUF_FLUSH_LIST all
-					blocks whose oldest_modification is
-					smaller than this should be flushed
-					(if their number does not exceed
-					min_n), otherwise ignored */
-	ulint*		n_processed)	/*!< out: the number of pages
-					which were processed is passed
-					back to caller. Ignored if NULL */
+/**
+Waits until a flush batch of the given lsn ends
+ at param[in]	new_oldest	target oldest_modified_lsn to wait for */
 
+void
+buf_flush_wait_flushed(
+	lsn_t		new_oldest)
 {
-	ulint		i;
-	bool		success = true;
+	for (ulint i = 0; i < srv_buf_pool_instances; ++i) {
+		buf_pool_t*	buf_pool;
+		lsn_t		oldest;
 
-	if (n_processed) {
+		buf_pool = buf_pool_from_array(i);
+
+		for (;;) {
+			/* We don't need to wait for fsync of the flushed
+			blocks, because anyway we need fsync to make chekpoint.
+			So, we don't need to wait for the batch end here. */
+
+			buf_flush_list_mutex_enter(buf_pool);
+
+			buf_page_t*	bpage;
+
+			/* We don't need to wait for system temporary pages */
+			for (bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
+			     bpage != NULL
+				&& fsp_is_system_temporary(bpage->id.space());
+			     bpage = UT_LIST_GET_PREV(list, bpage)) {
+				/* Do nothing. */
+			}
+
+			if (bpage != NULL) {
+				ut_ad(bpage->in_flush_list);
+				oldest = bpage->oldest_modification;
+			} else {
+				oldest = 0;
+			}
+
+			buf_flush_list_mutex_exit(buf_pool);
+
+			if (oldest == 0 || oldest >= new_oldest) {
+				break;
+			}
+
+			/* sleep and retry */
+			os_thread_sleep(buf_flush_wait_flushed_sleep_time);
+
+			MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS);
+		}
+	}
+}
+
+/** This utility flushes dirty blocks from the end of the flush list of all
+buffer pool instances.
+NOTE: The calling thread is not allowed to own any latches on pages!
+ at param[in]	min_n		wished minimum mumber of blocks flushed (it is
+not guaranteed that the actual number is that big, though)
+ at param[in]	lsn_limit	in the case BUF_FLUSH_LIST all blocks whose
+oldest_modification is smaller than this should be flushed (if their number
+does not exceed min_n), otherwise ignored
+ at param[out]	n_processed	the number of pages which were processed is
+passed back to caller. Ignored if NULL.
+ at return true if a batch was queued successfully for each buffer pool
+instance. false if another batch of same type was already running in
+at least one of the buffer pool instance */
+bool
+buf_flush_lists(
+	ulint			min_n,
+	lsn_t			lsn_limit,
+	ulint*			n_processed)
+{
+	ulint		i;
+	ulint		n_flushed = 0;
+	bool		success = true;
+
+	if (n_processed) {
 		*n_processed = 0;
 	}
 
@@ -1917,7 +2110,11 @@ buf_flush_list(
 
 		buf_pool = buf_pool_from_array(i);
 
-		if (!buf_flush_start(buf_pool, BUF_FLUSH_LIST)) {
+		if (!buf_flush_do_batch(buf_pool,
+					BUF_FLUSH_LIST,
+					min_n,
+					lsn_limit,
+					&page_count)) {
 			/* We have two choices here. If lsn_limit was
 			specified then skipping an instance of buffer
 			pool means we cannot guarantee that all pages
@@ -1933,124 +2130,158 @@ buf_flush_list(
 			continue;
 		}
 
-		page_count = buf_flush_batch(
-			buf_pool, BUF_FLUSH_LIST, min_n, lsn_limit);
-
-		buf_flush_end(buf_pool, BUF_FLUSH_LIST);
-
-		buf_flush_common(BUF_FLUSH_LIST, page_count);
+		n_flushed += page_count;
+	}
 
-		if (n_processed) {
-			*n_processed += page_count;
-		}
+	if (n_flushed) {
+		buf_flush_stats(n_flushed, 0);
+	}
 
-		if (page_count) {
-			MONITOR_INC_VALUE_CUMULATIVE(
-				MONITOR_FLUSH_BATCH_TOTAL_PAGE,
-				MONITOR_FLUSH_BATCH_COUNT,
-				MONITOR_FLUSH_BATCH_PAGES,
-				page_count);
-		}
+	if (n_processed) {
+		*n_processed = n_flushed;
 	}
 
 	return(success);
 }
 
 /******************************************************************//**
-This function picks up a single dirty page from the tail of the LRU
-list, flushes it, removes it from page_hash and LRU list and puts
-it on the free list. It is called from user threads when they are
-unable to find a replaceable page at the tail of the LRU list i.e.:
-when the background LRU flushing in the page_cleaner thread is not
-fast enough to keep pace with the workload.
- at return TRUE if success. */
-UNIV_INTERN
-ibool
+This function picks up a single page from the tail of the LRU
+list, flushes it (if it is dirty), removes it from page_hash and LRU
+list and puts it on the free list. It is called from user threads when
+they are unable to find a replaceable page at the tail of the LRU
+list i.e.: when the background LRU flushing in the page_cleaner thread
+is not fast enough to keep pace with the workload.
+ at return true if success. */
+bool
 buf_flush_single_page_from_LRU(
 /*===========================*/
 	buf_pool_t*	buf_pool)	/*!< in/out: buffer pool instance */
 {
 	ulint		scanned;
 	buf_page_t*	bpage;
+	ibool		freed;
 
 	buf_pool_mutex_enter(buf_pool);
 
-	for (bpage = UT_LIST_GET_LAST(buf_pool->LRU), scanned = 1;
+	for (bpage = buf_pool->single_scan_itr.start(), scanned = 0,
+	     freed = false;
 	     bpage != NULL;
-	     bpage = UT_LIST_GET_PREV(LRU, bpage), ++scanned) {
+	     ++scanned, bpage = buf_pool->single_scan_itr.get()) {
+
+		ut_ad(buf_pool_mutex_own(buf_pool));
+
+		buf_page_t*	prev = UT_LIST_GET_PREV(LRU, bpage);
+
+		buf_pool->single_scan_itr.set(prev);
 
-		ib_mutex_t*	block_mutex = buf_page_get_mutex(bpage);
+		BPageMutex*	block_mutex;
+
+		block_mutex = buf_page_get_mutex(bpage);
 
 		mutex_enter(block_mutex);
 
-		if (buf_flush_ready_for_flush(bpage, BUF_FLUSH_SINGLE_PAGE)) {
+		if (buf_flush_ready_for_replace(bpage)) {
+			/* block is ready for eviction i.e., it is
+			clean and is not IO-fixed or buffer fixed. */
+			mutex_exit(block_mutex);
 
-			/* The following call will release the buffer pool
-			and block mutex. */
+			if (buf_LRU_free_page(bpage, true)) {
+				buf_pool_mutex_exit(buf_pool);
+				freed = true;
+				break;
+			}
+
+		} else if (buf_flush_ready_for_flush(
+				   bpage, BUF_FLUSH_SINGLE_PAGE)) {
+
+			/* Block is ready for flush. Try and dispatch an IO
+			request. We'll put it on free list in IO completion
+			routine if it is not buffer fixed. The following call
+			will release the buffer pool and block mutex.
 
-			ibool	flushed = buf_flush_page(
+			Note: There is no guarantee that this page has actually
+			been freed, only that it has been flushed to disk */
+
+			freed = buf_flush_page(
 				buf_pool, bpage, BUF_FLUSH_SINGLE_PAGE, true);
 
-			if (flushed) {
-				/* buf_flush_page() will release the
-				block mutex */
+			if (freed) {
 				break;
 			}
+
+			mutex_exit(block_mutex);
+		} else {
+			mutex_exit(block_mutex);
 		}
 
-		mutex_exit(block_mutex);
+		ut_ad(!mutex_own(block_mutex));
 	}
 
-	MONITOR_INC_VALUE_CUMULATIVE(
-		MONITOR_LRU_SINGLE_FLUSH_SCANNED,
-		MONITOR_LRU_SINGLE_FLUSH_SCANNED_NUM_CALL,
-		MONITOR_LRU_SINGLE_FLUSH_SCANNED_PER_CALL,
-		scanned);
-
-	if (bpage == NULL) {
+	if (!freed) {
 		/* Can't find a single flushable page. */
+		ut_ad(!bpage);
 		buf_pool_mutex_exit(buf_pool);
-		return(FALSE);
 	}
 
+	if (scanned) {
+		MONITOR_INC_VALUE_CUMULATIVE(
+			MONITOR_LRU_SINGLE_FLUSH_SCANNED,
+			MONITOR_LRU_SINGLE_FLUSH_SCANNED_NUM_CALL,
+			MONITOR_LRU_SINGLE_FLUSH_SCANNED_PER_CALL,
+			scanned);
+	}
 
-	ibool	freed = FALSE;
-
-	/* At this point the page has been written to the disk.
-	As we are not holding buffer pool or block mutex therefore
-	we cannot use the bpage safely. It may have been plucked out
-	of the LRU list by some other thread or it may even have
-	relocated in case of a compressed page. We need to start
-	the scan of LRU list again to remove the block from the LRU
-	list and put it on the free list. */
-	buf_pool_mutex_enter(buf_pool);
-
-	for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
-	     bpage != NULL;
-	     bpage = UT_LIST_GET_PREV(LRU, bpage)) {
-
-		ib_mutex_t*	block_mutex = buf_page_get_mutex(bpage);
-
-		mutex_enter(block_mutex);
-
-		ibool	ready = buf_flush_ready_for_replace(bpage);
+	ut_ad(!buf_pool_mutex_own(buf_pool));
 
-		mutex_exit(block_mutex);
+	return(freed);
+}
 
-		if (ready) {
-			bool	evict_zip;
+/**
+Clears up tail of the LRU list of a given buffer pool instance:
+* Put replaceable pages at the tail of LRU to the free list
+* Flush dirty pages at the tail of LRU to the disk
+The depth to which we scan each buffer pool is controlled by dynamic
+config parameter innodb_LRU_scan_depth.
+ at param buf_pool buffer pool instance
+ at return total pages flushed */
+static
+ulint
+buf_flush_LRU_list(
+	buf_pool_t*	buf_pool)
+{
+	ulint	scan_depth, withdraw_depth;
+	ulint	n_flushed = 0;
 
-			evict_zip = !buf_LRU_evict_from_unzip_LRU(buf_pool);;
+	ut_ad(buf_pool);
 
-			freed = buf_LRU_free_page(bpage, evict_zip);
+	/* srv_LRU_scan_depth can be arbitrarily large value.
+	We cap it with current LRU size. */
+	buf_pool_mutex_enter(buf_pool);
+	scan_depth = UT_LIST_GET_LEN(buf_pool->LRU);
+	if (buf_pool->curr_size < buf_pool->old_size
+	    && buf_pool->withdraw_target > 0) {
+		withdraw_depth = buf_pool->withdraw_target
+				 - UT_LIST_GET_LEN(buf_pool->withdraw);
+	} else {
+		withdraw_depth = 0;
+	}
+	buf_pool_mutex_exit(buf_pool);
 
-			break;
-		}
+	if (withdraw_depth > srv_LRU_scan_depth) {
+		scan_depth = ut_min(withdraw_depth, scan_depth);
+	} else {
+		scan_depth = ut_min(static_cast<ulint>(srv_LRU_scan_depth),
+				    scan_depth);
 	}
 
-	buf_pool_mutex_exit(buf_pool);
+	/* Currently one of page_cleaners is the only thread
+	that can trigger an LRU flush at the same time.
+	So, it is not possible that a batch triggered during
+	last iteration is still running, */
+	buf_flush_do_batch(buf_pool, BUF_FLUSH_LRU, scan_depth,
+			   0, &n_flushed);
 
-	return(freed);
+	return(n_flushed);
 }
 
 /*********************************************************************//**
@@ -2060,72 +2291,26 @@ Clears up tail of the LRU lists:
 The depth to which we scan each buffer pool is controlled by dynamic
 config parameter innodb_LRU_scan_depth.
 @return total pages flushed */
-UNIV_INTERN
 ulint
-buf_flush_LRU_tail(void)
-/*====================*/
+buf_flush_LRU_lists(void)
+/*=====================*/
 {
-	ulint	total_flushed = 0;
+	ulint	n_flushed = 0;
 
 	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
 
-		buf_pool_t*	buf_pool = buf_pool_from_array(i);
-		ulint		scan_depth;
-
-		/* srv_LRU_scan_depth can be arbitrarily large value.
-		We cap it with current LRU size. */
-		buf_pool_mutex_enter(buf_pool);
-		scan_depth = UT_LIST_GET_LEN(buf_pool->LRU);
-		buf_pool_mutex_exit(buf_pool);
-
-		scan_depth = ut_min(srv_LRU_scan_depth, scan_depth);
-
-		/* We divide LRU flush into smaller chunks because
-		there may be user threads waiting for the flush to
-		end in buf_LRU_get_free_block(). */
-		for (ulint j = 0;
-		     j < scan_depth;
-		     j += PAGE_CLEANER_LRU_BATCH_CHUNK_SIZE) {
-
-			ulint	n_flushed = 0;
-
-			/* Currently page_cleaner is the only thread
-			that can trigger an LRU flush. It is possible
-			that a batch triggered during last iteration is
-			still running, */
-			if (buf_flush_LRU(buf_pool,
-					  PAGE_CLEANER_LRU_BATCH_CHUNK_SIZE,
-					  &n_flushed)) {
-
-				/* Allowed only one batch per
-				buffer pool instance. */
-				buf_flush_wait_batch_end(
-					buf_pool, BUF_FLUSH_LRU);
-			}
-
-			if (n_flushed) {
-				total_flushed += n_flushed;
-			} else {
-				/* Nothing to flush */
-				break;
-			}
-		}
+		n_flushed += buf_flush_LRU_list(buf_pool_from_array(i));
 	}
 
-	if (total_flushed) {
-		MONITOR_INC_VALUE_CUMULATIVE(
-			MONITOR_LRU_BATCH_TOTAL_PAGE,
-			MONITOR_LRU_BATCH_COUNT,
-			MONITOR_LRU_BATCH_PAGES,
-			total_flushed);
+	if (n_flushed) {
+		buf_flush_stats(0, n_flushed);
 	}
 
-	return(total_flushed);
+	return(n_flushed);
 }
 
 /*********************************************************************//**
 Wait for any possible LRU flushes that are in progress to end. */
-UNIV_INTERN
 void
 buf_flush_wait_LRU_batch_end(void)
 /*==============================*/
@@ -2149,26 +2334,6 @@ buf_flush_wait_LRU_batch_end(void)
 }
 
 /*********************************************************************//**
-Flush a batch of dirty pages from the flush list
- at return number of pages flushed, 0 if no page is flushed or if another
-flush_list type batch is running */
-static
-ulint
-page_cleaner_do_flush_batch(
-/*========================*/
-	ulint		n_to_flush,	/*!< in: number of pages that
-					we should attempt to flush. */
-	lsn_t		lsn_limit)	/*!< in: LSN up to which flushing
-					must happen */
-{
-	ulint n_flushed;
-
-	buf_flush_list(n_to_flush, lsn_limit, &n_flushed);
-
-	return(n_flushed);
-}
-
-/*********************************************************************//**
 Calculates if flushing is required based on number of dirty pages in
 the buffer pool.
 @return percent of io_capacity to flush to manage dirty page ratio */
@@ -2177,10 +2342,11 @@ ulint
 af_get_pct_for_dirty()
 /*==================*/
 {
-	ulint dirty_pct = buf_get_modified_ratio_pct();
+	double	dirty_pct = buf_get_modified_ratio_pct();
 
-	if (dirty_pct > 0 && srv_max_buf_pool_modified_pct == 0) {
-		return(100);
+	if (dirty_pct == 0.0) {
+		/* No pages modified */
+		return(0);
 	}
 
 	ut_a(srv_max_dirty_pages_pct_lwm
@@ -2189,16 +2355,16 @@ af_get_pct_for_dirty()
 	if (srv_max_dirty_pages_pct_lwm == 0) {
 		/* The user has not set the option to preflush dirty
 		pages as we approach the high water mark. */
-		if (dirty_pct > srv_max_buf_pool_modified_pct) {
+		if (dirty_pct >= srv_max_buf_pool_modified_pct) {
 			/* We have crossed the high water mark of dirty
 			pages In this case we start flushing at 100% of
 			innodb_io_capacity. */
 			return(100);
 		}
-	} else if (dirty_pct > srv_max_dirty_pages_pct_lwm) {
+	} else if (dirty_pct >= srv_max_dirty_pages_pct_lwm) {
 		/* We should start flushing pages gradually. */
-		return((dirty_pct * 100)
-		       / (srv_max_buf_pool_modified_pct + 1));
+		return(static_cast<ulint>((dirty_pct * 100)
+		       / (srv_max_buf_pool_modified_pct + 1)));
 	}
 
 	return(0);
@@ -2247,22 +2413,23 @@ af_get_pct_for_lsn(
 /*********************************************************************//**
 This function is called approximately once every second by the
 page_cleaner thread. Based on various factors it decides if there is a
-need to do flushing. If flushing is needed it is performed and the
-number of pages flushed is returned.
- at return number of pages flushed */
+need to do flushing.
+ at return number of pages recommended to be flushed
+ at param lsn_limit	pointer to return LSN up to which flushing must happen
+ at param last_pages_in	the number of pages flushed by the last flush_list
+			flushing. */
 static
 ulint
-page_cleaner_flush_pages_if_needed(void)
+page_cleaner_flush_pages_recommendation(
 /*====================================*/
+	lsn_t*	lsn_limit,
+	ulint	last_pages_in)
 {
-	static	lsn_t		lsn_avg_rate = 0;
 	static	lsn_t		prev_lsn = 0;
-	static	lsn_t		last_lsn = 0;
 	static	ulint		sum_pages = 0;
-	static	ulint		last_pages = 0;
-	static	ulint		prev_pages = 0;
 	static	ulint		avg_page_rate = 0;
 	static	ulint		n_iterations = 0;
+	static	time_t		prev_time;
 	lsn_t			oldest_lsn;
 	lsn_t			cur_lsn;
 	lsn_t			age;
@@ -2271,13 +2438,13 @@ page_cleaner_flush_pages_if_needed(void)
 	ulint			pct_for_dirty = 0;
 	ulint			pct_for_l