[Commits] Rev 4270: MDEV-6384: It seems like OPTIMIZER take into account the order of indexes... in file:///home/psergey/dev2/10.0/

Sergey Petrunya psergey at askmonty.org
Tue Jul 8 00:00:33 EEST 2014


At file:///home/psergey/dev2/10.0/

------------------------------------------------------------
revno: 4270
revision-id: psergey at askmonty.org-20140707210031-p8d3kwrhaidq6ow4
parent: holyfoot at askmonty.org-20140630193024-56wpcvwwy24kuzt4
committer: Sergey Petrunya <psergey at askmonty.org>
branch nick: 10.0
timestamp: Tue 2014-07-08 01:00:31 +0400
message:
  MDEV-6384: It seems like OPTIMIZER take into account the order of indexes...
  When ORDER BY optimizer considers whether to use an index
  to satisfy ORDER BY ... LIMIT, it should take into account
  that that index may have a potential range (or ref(const)) access.
  
  That access may allow to read fewer records than specified by LIMIT, and 
  we also should reuse range, or ref's cost formula to have "apples-to-apples"
  comparison with other plans.
=== modified file 'mysql-test/r/innodb_ext_key.result'
--- a/mysql-test/r/innodb_ext_key.result	2014-03-05 22:20:10 +0000
+++ b/mysql-test/r/innodb_ext_key.result	2014-07-07 21:00:31 +0000
@@ -1002,7 +1002,7 @@ insert into t2 (b) values (null), (null)
 set optimizer_switch='extended_keys=on';
 explain select a from t1 where b is null order by a desc limit 2;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	ref	b	b	9	const	2	Using where; Using filesort
+1	SIMPLE	t1	index	b	PRIMARY	8	NULL	3	Using where
 select a from t1 where b is null order by a desc limit 2;
 a
 3

=== modified file 'sql/opt_range.cc'
--- a/sql/opt_range.cc	2014-06-05 22:07:27 +0000
+++ b/sql/opt_range.cc	2014-07-07 21:00:31 +0000
@@ -10610,6 +10610,7 @@ ha_rows check_quick_select(PARAM *param,
       param->table->quick_condition_rows=
         MY_MIN(param->table->quick_condition_rows, rows);
       param->table->quick_rows[keynr]= rows;
+      param->table->quick_costs[keynr]= cost->total_cost();
     }
   }
   /* Figure out if the key scan is ROR (returns rows in ROWID order) or not */

=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc	2014-06-09 18:18:53 +0000
+++ b/sql/sql_select.cc	2014-07-07 21:00:31 +0000
@@ -24570,6 +24570,70 @@ void JOIN::cache_const_exprs()
 }
 
 
+/*
+  Get a cost of reading rows_limit rows through index keynr.
+
+  If there is a quick select, we try to use it.
+  If the quick select looks like ref(const), we try to use it too.
+
+*/
+
+static bool get_range_limit_read_cost(const JOIN_TAB *tab, 
+                                      const TABLE *table, 
+                                      uint keynr, 
+                                      ha_rows rows_limit,
+                                      double *read_time)
+{
+  bool res= false;
+  /* 
+    We need to adjust the estiamtes if we had a quick select (or ref(const)) on
+    index keynr.
+    (the check for tab->table is for handling single-table UPDATE/DELETEs)
+  */
+  if (table->quick_keys.is_set(keynr))
+  {
+    double quick_rows= table->quick_rows[keynr];
+    double tmp;
+    if (tab && table->quick_n_ranges[keynr] == 1)
+    {
+      /*
+        - We've had a join optimizer (tab!=NULL) (we don't have one when d
+           UPDATE/DELETE
+        - AND range access is actually ref(const). Ref access uses different
+          cost formulas than range (this is awful, yes). To compare
+          apples-to-apples, we reuse the cost formula from best_access_path:
+      */
+
+      /* Limit the number of matched rows */
+      tmp= quick_rows;
+      set_if_smaller(tmp, (double) tab->join->thd->variables.max_seeks_for_key);
+      if (table->covering_keys.is_set(keynr))
+        tmp= table->file->keyread_time(keynr, 1, (ha_rows) tmp);
+      else
+        tmp= table->file->read_time(keynr, 1,
+                                    (ha_rows) MY_MIN(tmp,tab->worst_seeks));
+    }
+    else
+      tmp= table->quick_costs[keynr];
+
+    if (quick_rows > rows_limit)
+    {
+      /*
+        LIMIT clause specifies that we will need to read fewer records than
+        quick select will return. Assume that quick select's cost is
+        proportional to the number of records we need to return (e.g. if we 
+        only need 1/3rd of records, it will cost us 1/3rd of quick select's
+        read time)
+      */
+      tmp *= rows_limit / quick_rows;
+    }
+    *read_time= tmp;
+    res= true;
+  }
+  return res;
+}
+
+
 /**
   Find a cheaper access key than a given @a key
 
@@ -24662,6 +24726,11 @@ test_if_cheaper_ordering(const JOIN_TAB
   }
   else
     read_time= table->file->scan_time();
+  
+  /*
+    TODO: add cost of sorting here.
+  */
+  read_time += COST_EPS;
 
   /*
     Calculate the selectivity of the ref_key for REF_ACCESS. For
@@ -24819,8 +24888,12 @@ test_if_cheaper_ordering(const JOIN_TAB
           to calculate the cost of accessing data rows for one 
           index entry.
         */
-        index_scan_time= select_limit/rec_per_key *
-                         MY_MIN(rec_per_key, table->file->scan_time());
+        if (!get_range_limit_read_cost(tab, table, nr, select_limit, 
+                                       &index_scan_time))
+        {
+          index_scan_time= select_limit/rec_per_key *
+                           MY_MIN(rec_per_key, table->file->scan_time());
+        }
         if ((ref_key < 0 && (group || table->force_index || is_covering)) ||
             index_scan_time < read_time)
         {

=== modified file 'sql/table.h'
--- a/sql/table.h	2014-06-05 22:07:27 +0000
+++ b/sql/table.h	2014-07-07 21:00:31 +0000
@@ -1113,6 +1113,7 @@ struct TABLE
     and max #key parts that range access would use.
   */
   ha_rows	quick_rows[MAX_KEY];
+  double 	quick_costs[MAX_KEY];
 
   /* 
     Bitmaps of key parts that =const for the duration of join execution. If



More information about the commits mailing list