[Commits] Rev 4145: MDEV-6081: ORDER BY+ref(const): selectivity is very incorrect (MySQL Bug#14338686) in file:///home/psergey/dev2/10.0/

Sergey Petrunya psergey at askmonty.org
Sat Apr 12 00:01:35 EEST 2014


At file:///home/psergey/dev2/10.0/

------------------------------------------------------------
revno: 4145
revision-id: psergey at askmonty.org-20140411210132-o00xo02lqdh29hcy
parent: psergey at askmonty.org-20140407094948-yuhs1nte94zhieew
committer: Sergey Petrunya <psergey at askmonty.org>
branch nick: 10.0
timestamp: Sat 2014-04-12 01:01:32 +0400
message:
  MDEV-6081: ORDER BY+ref(const): selectivity is very incorrect (MySQL Bug#14338686)
  Add a testcase and backport this fix:
  
  Bug#14338686: MYSQL IS GENERATING DIFFERENT AND SLOWER
                (IN NEWER VERSIONS) EXECUTION PLAN
  PROBLEM:
  While checking for an index to sort for the order by clause
  in this query
  "SELECT datestamp FROM contractStatusHistory WHERE
  contract_id = contracts.id ORDER BY datestamp asc limit 1;"
  
  we do not calculate the number of rows to be examined correctly.
  As a result we choose index 'idx_contractStatusHistory_datestamp'
  defined on the 'datestamp' field, rather than choosing index
  'contract_id'. And hence the lower performance.
  
  ANALYSIS:
  While checking if an index is present to give the records in
  sorted order(datestamp), we consider the selectivity of the
  'ref_key'(contract_id here) using 'table->quick_condition_rows'.
  'ref_key' here can be an index from 'REF_ACCESS' or from 'RANGE'.
  
  As this is a 'REF_ACCESS', 'table->quick_condition_rows' is not
  set to the actual value which is 2. Instead is set to the number
  of tuples present in the table indicating that every row that
  is selected would be satisfying the condition present in the query.
  
  Hence, the selectivity becomes 1 even when we choose the index
  on the order by column instead of the join_condition.
  
  But, in reality as only 2 rows satisy the condition, we need to
  examine half of the entire data set to get one tuple when we
  choose index on the order by column.
  Had we chosen the 'REF_ACCESS' we would have examined only 2 tuples.
  Hence the delay in executing the query specified.
    
  FIX:
  While calculating the selectivity of the ref_key:
  For REF_ACCESS consider quick_rows[ref_key] if range 
  optimizer has an estimate for this key. Else consider 
  'rec_per_key' statistic.
  For RANGE ACCESS consider 'table->quick_condition_rows'.
=== modified file 'mysql-test/r/subselect_innodb.result'
--- a/mysql-test/r/subselect_innodb.result	2014-04-07 09:49:48 +0000
+++ b/mysql-test/r/subselect_innodb.result	2014-04-11 21:01:32 +0000
@@ -526,4 +526,26 @@ t1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	10	
 2	DEPENDENT SUBQUERY	t2	ref	key1	key1	5	test.t1.a	1	Using where
-drop table t1, t2;
+#
+# MDEV-6081: ORDER BY+ref(const): selectivity is very incorrect (MySQL Bug#14338686)
+#
+alter table t2 add key2 int;
+update t2 set key2=key1;
+alter table t2 add key(key2);
+analyze table t2;
+Table	Op	Msg_type	Msg_text
+test.t2	analyze	status	OK
+flush tables;
+# Table tsubq must use 'ref' + Using filesort (not 'index' w/o filesort)
+explain select 
+(SELECT 
+concat(id, '-', key1, '-', col1)
+FROM t2
+WHERE t2.key1 = t1.a
+ORDER BY t2.key2 ASC LIMIT 1)
+from 
+t1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	#	
+2	DEPENDENT SUBQUERY	t2	ref	key1	key1	5	test.t1.a	#	Using where; Using filesort
+drop table t1,t2;

=== modified file 'mysql-test/t/subselect_innodb.test'
--- a/mysql-test/t/subselect_innodb.test	2014-04-07 09:49:48 +0000
+++ b/mysql-test/t/subselect_innodb.test	2014-04-11 21:01:32 +0000
@@ -513,5 +513,26 @@ explain select
     ORDER BY t2.id ASC LIMIT 1)
 from 
   t1;
-drop table t1, t2;
+
+--echo #
+--echo # MDEV-6081: ORDER BY+ref(const): selectivity is very incorrect (MySQL Bug#14338686)
+--echo #
+
+alter table t2 add key2 int;
+update t2 set key2=key1;
+alter table t2 add key(key2);
+analyze table t2;
+flush tables;
+--echo # Table tsubq must use 'ref' + Using filesort (not 'index' w/o filesort)
+--replace_column 9 #
+explain select 
+   (SELECT 
+      concat(id, '-', key1, '-', col1)
+    FROM t2
+    WHERE t2.key1 = t1.a
+    ORDER BY t2.key2 ASC LIMIT 1)
+from 
+  t1;
+
+drop table t1,t2;
 

=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc	2014-04-07 09:49:48 +0000
+++ b/sql/sql_select.cc	2014-04-11 21:01:32 +0000
@@ -24527,7 +24527,7 @@ test_if_cheaper_ordering(const JOIN_TAB
   double fanout= 1;
   ha_rows table_records= table->stat_records();
   bool group= join && join->group && order == join->group_list;
-  ha_rows ref_key_quick_rows= HA_POS_ERROR;
+  ha_rows refkey_rows_estimate= table->quick_condition_rows;
   const bool has_limit= (select_limit_arg != HA_POS_ERROR);
 
   /*
@@ -24553,10 +24553,6 @@ test_if_cheaper_ordering(const JOIN_TAB
   else
     keys= usable_keys;
 
-  if (ref_key >= 0 && ref_key != MAX_KEY &&
-      table->covering_keys.is_set(ref_key))
-    ref_key_quick_rows= table->quick_rows[ref_key];
-
   if (join)
   {
     uint tablenr= tab - join->join_tab;
@@ -24567,6 +24563,22 @@ test_if_cheaper_ordering(const JOIN_TAB
   else
     read_time= table->file->scan_time();
 
+  /*
+    Calculate the selectivity of the ref_key for REF_ACCESS. For
+    RANGE_ACCESS we use table->quick_condition_rows.
+  */
+  if (ref_key >= 0 && tab->type == JT_REF)
+  {
+    if (table->quick_keys.is_set(ref_key))
+      refkey_rows_estimate= table->quick_rows[ref_key];
+    else
+    {
+      const KEY *ref_keyinfo= table->key_info + ref_key;
+      refkey_rows_estimate= ref_keyinfo->rec_per_key[tab->ref.key_parts - 1];
+    }
+    set_if_bigger(refkey_rows_estimate, 1);
+  }
+
   for (nr=0; nr < table->s->keys ; nr++)
   {
     int direction;
@@ -24683,17 +24695,17 @@ test_if_cheaper_ordering(const JOIN_TAB
           with ref_key. Thus, to select first N records we have to scan
           N/selectivity(ref_key) index entries. 
           selectivity(ref_key) = #scanned_records/#table_records =
-          table->quick_condition_rows/table_records.
+          refkey_rows_estimate/table_records.
           In any case we can't select more than #table_records.
-          N/(table->quick_condition_rows/table_records) > table_records 
-          <=> N > table->quick_condition_rows.
-        */ 
-        if (select_limit > table->quick_condition_rows)
+          N/(refkey_rows_estimate/table_records) > table_records
+          <=> N > refkey_rows_estimate.
+         */
+        if (select_limit > refkey_rows_estimate)
           select_limit= table_records;
         else
           select_limit= (ha_rows) (select_limit *
                                    (double) table_records /
-                                    table->quick_condition_rows);
+                                    refkey_rows_estimate);
         rec_per_key= keyinfo->actual_rec_per_key(keyinfo->user_defined_key_parts-1);
         set_if_bigger(rec_per_key, 1);
         /*
@@ -24713,8 +24725,12 @@ test_if_cheaper_ordering(const JOIN_TAB
             index_scan_time < read_time)
         {
           ha_rows quick_records= table_records;
+          ha_rows refkey_select_limit= (ref_key >= 0 &&
+                                        table->covering_keys.is_set(ref_key)) ?
+                                        refkey_rows_estimate :
+                                        HA_POS_ERROR;
           if ((is_best_covering && !is_covering) ||
-              (is_covering && ref_key_quick_rows < select_limit))
+              (is_covering && refkey_select_limit < select_limit))
             continue;
           if (table->quick_keys.is_set(nr))
             quick_records= table->quick_rows[nr];



More information about the commits mailing list