[Commits] 86167e908fe: MDEV-20611: MRR scan over partitioned InnoDB table produces "Out of memory" error

psergey sergey at mariadb.com
Fri Nov 15 22:37:28 EET 2019


revision-id: 86167e908fe5de6f6e9f5076b4ea8041514d0820 (mariadb-10.3.20-9-g86167e908fe)
parent(s): 3d4a80153345209bad736235d4f66dcaa51a9d51
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2019-11-15 23:37:28 +0300
message:

MDEV-20611: MRR scan over partitioned InnoDB table produces "Out of memory" error

Fix partitioning and DS-MRR to work together

- In ha_partition::index_end(): take into account that ha_innobase (and
  other engines using DS-MRR) will have inited=RND when initialized for
  DS-MRR scan.
- In ha_partition::multi_range_read_next(): if the MRR scan is using
  HA_MRR_NO_ASSOCIATION mode, it is not guaranteed that the partition's
  handler will store anything into *range_info.
- In DsMrr_impl::choose_mrr_impl(): ha_partition will inquire partitions
  about how much memory their MRR implementation needs by passing
  *buffer_size=0. DS-MRR code didn't know about this (actually it used
  uint for buffer size calculation and would have an under-flow).
  Returning *buffer_size=0 made ha_partition assume that partitions do
  not need MRR memory and pass the same buffer to each of them.

  Now, this is fixed. If DS-MRR gets *buffer_size=0, it will return
  the amount of buffer space needed, but not more than about
  @@mrr_buffer_size.

* Fix ha_{innobase,maria,myisam}::clone. If ha_partition uses MRR on its
  partitions, and partition use DS-MRR, the code will call handler->clone
  with TABLE (*NOT partition*) name as an argument.
  DS-MRR has no way of knowing the partition name, so the solution was
  to have the ::clone() function for the affected storage engine to ignore
  the name argument and get it elsewhere.

---
 mysql-test/include/partition_mrr.inc        | 46 +++++++++++++++++
 mysql-test/main/partition_mrr_aria.result   | 79 +++++++++++++++++++++++++++++
 mysql-test/main/partition_mrr_aria.test     |  2 +
 mysql-test/main/partition_mrr_innodb.result | 79 +++++++++++++++++++++++++++++
 mysql-test/main/partition_mrr_innodb.test   |  4 ++
 mysql-test/main/partition_mrr_myisam.result | 79 +++++++++++++++++++++++++++++
 mysql-test/main/partition_mrr_myisam.test   |  3 ++
 sql/ha_partition.cc                         | 14 ++++-
 sql/multi_range_read.cc                     | 33 +++++++++---
 sql/multi_range_read.h                      |  5 +-
 storage/innobase/handler/ha_innodb.cc       |  2 +-
 storage/maria/ha_maria.cc                   |  8 +--
 storage/myisam/ha_myisam.cc                 |  7 +--
 13 files changed, 343 insertions(+), 18 deletions(-)

diff --git a/mysql-test/include/partition_mrr.inc b/mysql-test/include/partition_mrr.inc
new file mode 100644
index 00000000000..4c285791ec7
--- /dev/null
+++ b/mysql-test/include/partition_mrr.inc
@@ -0,0 +1,46 @@
+--source include/have_partition.inc
+
+--disable_warnings
+drop table if exists t1,t3;
+--enable_warnings
+
+--echo #
+--echo # MDEV-20611: MRR scan over partitioned InnoDB table produces "Out of memory" error
+--echo #
+create table t1(a int);
+insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+
+set @tmp=@@storage_engine;
+eval set storage_engine=$engine_type;
+
+create table t3 (
+  ID bigint(20) NOT NULL AUTO_INCREMENT,
+  part_id int,
+  key_col int,
+  col2 int,
+  key(key_col),
+  PRIMARY KEY (ID,part_id)
+) PARTITION BY RANGE (part_id)
+(PARTITION p1 VALUES LESS THAN (3),
+ PARTITION p2 VALUES LESS THAN (7),
+ PARTITION p3 VALUES LESS THAN (10)
+);
+
+show create table t3;
+set storage_engine= @tmp;
+
+insert into t3 select 
+  A.a+10*B.a,
+  A.a,
+  B.a,
+  123456
+from t1 A, t1 B;
+
+set optimizer_switch='mrr=on';
+--replace_column 9 #
+explain 
+select * from t3 force index (key_col) where key_col < 3;
+select * from t3 force index (key_col) where key_col < 3;
+
+drop table t1,t3;
+
diff --git a/mysql-test/main/partition_mrr_aria.result b/mysql-test/main/partition_mrr_aria.result
new file mode 100644
index 00000000000..7a0c35a309e
--- /dev/null
+++ b/mysql-test/main/partition_mrr_aria.result
@@ -0,0 +1,79 @@
+drop table if exists t1,t3;
+#
+# MDEV-20611: MRR scan over partitioned InnoDB table produces "Out of memory" error
+#
+create table t1(a int);
+insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+set @tmp=@@storage_engine;
+set storage_engine=Aria;
+create table t3 (
+ID bigint(20) NOT NULL AUTO_INCREMENT,
+part_id int,
+key_col int,
+col2 int,
+key(key_col),
+PRIMARY KEY (ID,part_id)
+) PARTITION BY RANGE (part_id)
+(PARTITION p1 VALUES LESS THAN (3),
+PARTITION p2 VALUES LESS THAN (7),
+PARTITION p3 VALUES LESS THAN (10)
+);
+show create table t3;
+Table	Create Table
+t3	CREATE TABLE `t3` (
+  `ID` bigint(20) NOT NULL AUTO_INCREMENT,
+  `part_id` int(11) NOT NULL,
+  `key_col` int(11) DEFAULT NULL,
+  `col2` int(11) DEFAULT NULL,
+  PRIMARY KEY (`ID`,`part_id`),
+  KEY `key_col` (`key_col`)
+) ENGINE=Aria DEFAULT CHARSET=latin1
+ PARTITION BY RANGE (`part_id`)
+(PARTITION `p1` VALUES LESS THAN (3) ENGINE = Aria,
+ PARTITION `p2` VALUES LESS THAN (7) ENGINE = Aria,
+ PARTITION `p3` VALUES LESS THAN (10) ENGINE = Aria)
+set storage_engine= @tmp;
+insert into t3 select 
+A.a+10*B.a,
+A.a,
+B.a,
+123456
+from t1 A, t1 B;
+set optimizer_switch='mrr=on';
+explain 
+select * from t3 force index (key_col) where key_col < 3;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t3	range	key_col	key_col	5	NULL	#	Using where; Rowid-ordered scan
+select * from t3 force index (key_col) where key_col < 3;
+ID	part_id	key_col	col2
+1	0	0	123456
+1	1	0	123456
+2	2	0	123456
+10	0	1	123456
+11	1	1	123456
+12	2	1	123456
+20	0	2	123456
+21	1	2	123456
+22	2	2	123456
+3	3	0	123456
+4	4	0	123456
+5	5	0	123456
+6	6	0	123456
+13	3	1	123456
+14	4	1	123456
+15	5	1	123456
+16	6	1	123456
+23	3	2	123456
+24	4	2	123456
+25	5	2	123456
+26	6	2	123456
+7	7	0	123456
+8	8	0	123456
+9	9	0	123456
+17	7	1	123456
+18	8	1	123456
+19	9	1	123456
+27	7	2	123456
+28	8	2	123456
+29	9	2	123456
+drop table t1,t3;
diff --git a/mysql-test/main/partition_mrr_aria.test b/mysql-test/main/partition_mrr_aria.test
new file mode 100644
index 00000000000..e3dfe8cd9b5
--- /dev/null
+++ b/mysql-test/main/partition_mrr_aria.test
@@ -0,0 +1,2 @@
+let $engine_type= Aria;
+--source include/partition_mrr.inc
diff --git a/mysql-test/main/partition_mrr_innodb.result b/mysql-test/main/partition_mrr_innodb.result
new file mode 100644
index 00000000000..c188f7e9929
--- /dev/null
+++ b/mysql-test/main/partition_mrr_innodb.result
@@ -0,0 +1,79 @@
+drop table if exists t1,t3;
+#
+# MDEV-20611: MRR scan over partitioned InnoDB table produces "Out of memory" error
+#
+create table t1(a int);
+insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+set @tmp=@@storage_engine;
+set storage_engine=InnoDB;
+create table t3 (
+ID bigint(20) NOT NULL AUTO_INCREMENT,
+part_id int,
+key_col int,
+col2 int,
+key(key_col),
+PRIMARY KEY (ID,part_id)
+) PARTITION BY RANGE (part_id)
+(PARTITION p1 VALUES LESS THAN (3),
+PARTITION p2 VALUES LESS THAN (7),
+PARTITION p3 VALUES LESS THAN (10)
+);
+show create table t3;
+Table	Create Table
+t3	CREATE TABLE `t3` (
+  `ID` bigint(20) NOT NULL AUTO_INCREMENT,
+  `part_id` int(11) NOT NULL,
+  `key_col` int(11) DEFAULT NULL,
+  `col2` int(11) DEFAULT NULL,
+  PRIMARY KEY (`ID`,`part_id`),
+  KEY `key_col` (`key_col`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+ PARTITION BY RANGE (`part_id`)
+(PARTITION `p1` VALUES LESS THAN (3) ENGINE = InnoDB,
+ PARTITION `p2` VALUES LESS THAN (7) ENGINE = InnoDB,
+ PARTITION `p3` VALUES LESS THAN (10) ENGINE = InnoDB)
+set storage_engine= @tmp;
+insert into t3 select 
+A.a+10*B.a,
+A.a,
+B.a,
+123456
+from t1 A, t1 B;
+set optimizer_switch='mrr=on';
+explain 
+select * from t3 force index (key_col) where key_col < 3;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t3	range	key_col	key_col	5	NULL	#	Using where; Rowid-ordered scan
+select * from t3 force index (key_col) where key_col < 3;
+ID	part_id	key_col	col2
+1	0	0	123456
+1	1	0	123456
+2	2	0	123456
+10	0	1	123456
+11	1	1	123456
+12	2	1	123456
+20	0	2	123456
+21	1	2	123456
+22	2	2	123456
+3	3	0	123456
+4	4	0	123456
+5	5	0	123456
+6	6	0	123456
+13	3	1	123456
+14	4	1	123456
+15	5	1	123456
+16	6	1	123456
+23	3	2	123456
+24	4	2	123456
+25	5	2	123456
+26	6	2	123456
+7	7	0	123456
+8	8	0	123456
+9	9	0	123456
+17	7	1	123456
+18	8	1	123456
+19	9	1	123456
+27	7	2	123456
+28	8	2	123456
+29	9	2	123456
+drop table t1,t3;
diff --git a/mysql-test/main/partition_mrr_innodb.test b/mysql-test/main/partition_mrr_innodb.test
new file mode 100644
index 00000000000..1eccf070e5c
--- /dev/null
+++ b/mysql-test/main/partition_mrr_innodb.test
@@ -0,0 +1,4 @@
+--source include/have_innodb.inc
+let $engine_type= InnoDB;
+
+--source include/partition_mrr.inc
diff --git a/mysql-test/main/partition_mrr_myisam.result b/mysql-test/main/partition_mrr_myisam.result
new file mode 100644
index 00000000000..1f1cea8e9d6
--- /dev/null
+++ b/mysql-test/main/partition_mrr_myisam.result
@@ -0,0 +1,79 @@
+drop table if exists t1,t3;
+#
+# MDEV-20611: MRR scan over partitioned InnoDB table produces "Out of memory" error
+#
+create table t1(a int);
+insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+set @tmp=@@storage_engine;
+set storage_engine=myisam;
+create table t3 (
+ID bigint(20) NOT NULL AUTO_INCREMENT,
+part_id int,
+key_col int,
+col2 int,
+key(key_col),
+PRIMARY KEY (ID,part_id)
+) PARTITION BY RANGE (part_id)
+(PARTITION p1 VALUES LESS THAN (3),
+PARTITION p2 VALUES LESS THAN (7),
+PARTITION p3 VALUES LESS THAN (10)
+);
+show create table t3;
+Table	Create Table
+t3	CREATE TABLE `t3` (
+  `ID` bigint(20) NOT NULL AUTO_INCREMENT,
+  `part_id` int(11) NOT NULL,
+  `key_col` int(11) DEFAULT NULL,
+  `col2` int(11) DEFAULT NULL,
+  PRIMARY KEY (`ID`,`part_id`),
+  KEY `key_col` (`key_col`)
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+ PARTITION BY RANGE (`part_id`)
+(PARTITION `p1` VALUES LESS THAN (3) ENGINE = MyISAM,
+ PARTITION `p2` VALUES LESS THAN (7) ENGINE = MyISAM,
+ PARTITION `p3` VALUES LESS THAN (10) ENGINE = MyISAM)
+set storage_engine= @tmp;
+insert into t3 select 
+A.a+10*B.a,
+A.a,
+B.a,
+123456
+from t1 A, t1 B;
+set optimizer_switch='mrr=on';
+explain 
+select * from t3 force index (key_col) where key_col < 3;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t3	range	key_col	key_col	5	NULL	#	Using where; Rowid-ordered scan
+select * from t3 force index (key_col) where key_col < 3;
+ID	part_id	key_col	col2
+1	0	0	123456
+1	1	0	123456
+2	2	0	123456
+10	0	1	123456
+11	1	1	123456
+12	2	1	123456
+20	0	2	123456
+21	1	2	123456
+22	2	2	123456
+3	3	0	123456
+4	4	0	123456
+5	5	0	123456
+6	6	0	123456
+13	3	1	123456
+14	4	1	123456
+15	5	1	123456
+16	6	1	123456
+23	3	2	123456
+24	4	2	123456
+25	5	2	123456
+26	6	2	123456
+7	7	0	123456
+8	8	0	123456
+9	9	0	123456
+17	7	1	123456
+18	8	1	123456
+19	9	1	123456
+27	7	2	123456
+28	8	2	123456
+29	9	2	123456
+drop table t1,t3;
diff --git a/mysql-test/main/partition_mrr_myisam.test b/mysql-test/main/partition_mrr_myisam.test
new file mode 100644
index 00000000000..d67a37ab3d2
--- /dev/null
+++ b/mysql-test/main/partition_mrr_myisam.test
@@ -0,0 +1,3 @@
+let $engine_type= myisam;
+
+--source include/partition_mrr.inc
diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc
index ccda01de6b7..09664deb458 100644
--- a/sql/ha_partition.cc
+++ b/sql/ha_partition.cc
@@ -5478,6 +5478,13 @@ int ha_partition::index_end()
       if ((tmp= (*file)->ha_index_end()))
         error= tmp;
     }
+    else if ((*file)->inited == RND)
+    {
+      // Possible due to MRR
+      int tmp;
+      if ((tmp= (*file)->ha_rnd_end()))
+        error= tmp;
+    }
   } while (*(++file));
   destroy_record_priority_queue();
   DBUG_RETURN(error);
@@ -6519,8 +6526,11 @@ int ha_partition::multi_range_read_next(range_id_t *range_info)
     else if (unlikely((error= handle_unordered_next(table->record[0], FALSE))))
       DBUG_RETURN(error);
 
-    *range_info=
-      ((PARTITION_KEY_MULTI_RANGE *) m_range_info[m_last_part])->ptr;
+    if (!(m_mrr_mode & HA_MRR_NO_ASSOCIATION))
+    {
+      *range_info=
+        ((PARTITION_KEY_MULTI_RANGE *) m_range_info[m_last_part])->ptr;
+    }
   }
   DBUG_RETURN(0);
 }
diff --git a/sql/multi_range_read.cc b/sql/multi_range_read.cc
index f40c8d0fbd8..6d62ea07dfa 100644
--- a/sql/multi_range_read.cc
+++ b/sql/multi_range_read.cc
@@ -1589,11 +1589,10 @@ bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags,
   }
 
   uint add_len= share->key_info[keyno].key_length + primary_file->ref_length; 
-  *bufsz -= add_len;
-  if (get_disk_sweep_mrr_cost(keyno, rows, *flags, bufsz, &dsmrr_cost))
+  if (get_disk_sweep_mrr_cost(keyno, rows, *flags, bufsz, add_len,
+                              &dsmrr_cost))
     return TRUE;
-  *bufsz += add_len;
-  
+
   bool force_dsmrr;
   /* 
     If mrr_cost_based flag is not set, then set cost of DS-MRR to be minimum of
@@ -1682,6 +1681,11 @@ static void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, Cost_estimate *
   @param rows               E(Number of rows to be scanned)
   @param flags              Scan parameters (HA_MRR_* flags)
   @param buffer_size INOUT  Buffer size
+                            IN: Buffer of size 0 means the function
+                            will determine the best size and return it.
+  @param extra_mem_overhead Extra memory overhead of the MRR implementation
+                            (the function assumes this many bytes of buffer
+                             space will not be usable by DS-MRR)
   @param cost        OUT    The cost
 
   @retval FALSE  OK
@@ -1690,7 +1694,9 @@ static void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, Cost_estimate *
 */
 
 bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
-                                         uint *buffer_size, Cost_estimate *cost)
+                                         uint *buffer_size,
+                                         uint extra_mem_overhead,
+                                         Cost_estimate *cost)
 {
   ulong max_buff_entries, elem_size;
   ha_rows rows_in_full_step;
@@ -1700,11 +1706,24 @@ bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
 
   elem_size= primary_file->ref_length + 
              sizeof(void*) * (!MY_TEST(flags & HA_MRR_NO_ASSOCIATION));
-  max_buff_entries = *buffer_size / elem_size;
 
-  if (!max_buff_entries)
+  if (!*buffer_size)
+  {
+    /*
+      We are requested to determine how much memory we need.
+      Request memory to finish the scan in one pass but do not request
+      more than @@mrr_buff_size.
+    */
+    *buffer_size = MY_MIN(extra_mem_overhead + rows*elem_size,
+                          MY_MAX(table->in_use->variables.mrr_buff_size,
+                                 extra_mem_overhead));
+  }
+
+  if (elem_size + extra_mem_overhead > *buffer_size)
     return TRUE; /* Buffer has not enough space for even 1 rowid */
 
+  max_buff_entries = (*buffer_size - extra_mem_overhead) / elem_size;
+
   /* Number of iterations we'll make with full buffer */
   n_full_steps= (uint)floor(rows2double(rows) / max_buff_entries);
   
diff --git a/sql/multi_range_read.h b/sql/multi_range_read.h
index 85578aa312c..0473fef04ae 100644
--- a/sql/multi_range_read.h
+++ b/sql/multi_range_read.h
@@ -631,8 +631,9 @@ class DsMrr_impl
   
   bool choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, uint *bufsz, 
                        Cost_estimate *cost);
-  bool get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags, 
-                               uint *buffer_size, Cost_estimate *cost);
+  bool get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
+                               uint *buffer_size, uint extra_mem_overhead,
+                               Cost_estimate *cost);
   bool check_cpk_scan(THD *thd, TABLE_SHARE *share, uint keyno, uint mrr_flags);
 
   bool setup_buffer_sharing(uint key_size_in_keybuf, key_part_map key_tuple_map);
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 29f29544f29..eecc72ad1f6 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -6466,7 +6466,7 @@ ha_innobase::clone(
 	DBUG_ENTER("ha_innobase::clone");
 
 	ha_innobase*	new_handler = static_cast<ha_innobase*>(
-		handler::clone(name, mem_root));
+		handler::clone(m_prebuilt->table->name.m_name, mem_root));
 
 	if (new_handler != NULL) {
 		DBUG_ASSERT(new_handler->m_prebuilt != NULL);
diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc
index 57537a69082..71456666fbe 100644
--- a/storage/maria/ha_maria.cc
+++ b/storage/maria/ha_maria.cc
@@ -1000,10 +1000,12 @@ can_enable_indexes(1), bulk_insert_single_undo(BULK_INSERT_NONE)
 {}
 
 
-handler *ha_maria::clone(const char *name, MEM_ROOT *mem_root)
+handler *ha_maria::clone(const char *name __attribute__((unused)),
+                         MEM_ROOT *mem_root)
 {
-  ha_maria *new_handler= static_cast <ha_maria *>(handler::clone(name,
-                                                                 mem_root));
+  ha_maria *new_handler=
+    static_cast <ha_maria *>(handler::clone(file->s->open_file_name.str,
+                                            mem_root));
   if (new_handler)
   {
     new_handler->file->state= file->state;
diff --git a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc
index 40c8ea61ddc..9b4dff68683 100644
--- a/storage/myisam/ha_myisam.cc
+++ b/storage/myisam/ha_myisam.cc
@@ -702,10 +702,11 @@ ha_myisam::ha_myisam(handlerton *hton, TABLE_SHARE *table_arg)
    can_enable_indexes(1)
 {}
 
-handler *ha_myisam::clone(const char *name, MEM_ROOT *mem_root)
+handler *ha_myisam::clone(const char *name __attribute__((unused)),
+                          MEM_ROOT *mem_root)
 {
-  ha_myisam *new_handler= static_cast <ha_myisam *>(handler::clone(name,
-                                                                   mem_root));
+  ha_myisam *new_handler=
+    static_cast <ha_myisam *>(handler::clone(file->filename, mem_root));
   if (new_handler)
     new_handler->file->state= file->state;
   return new_handler;


More information about the commits mailing list