[Commits] 5f407a2c542: MDEV-13275:Query optimizer not picking optimal ORDER BY PRIMARY in case of INNER JOIN

Varun varunraiko1803 at gmail.com
Tue Apr 3 19:48:47 EEST 2018


revision-id: 5f407a2c5425241bd3d045ce2ff9bdb0562ff355 (mariadb-10.2.5-571-g5f407a2c542)
parent(s): bc2501453c3ab9a2cf3516bc3557de8665bc2776
author: Varun Gupta
committer: Varun Gupta
timestamp: 2018-04-03 22:07:17 +0530
message:

MDEV-13275:Query optimizer not picking optimal ORDER BY PRIMARY in case of INNER JOIN
           on PRIMARY like it does for similar WHERE condition

Currently the code for the optimization orderby_uses_equalities takes into account full substitution so this
blocks cases where we have [VAR]char with insensitive comparisions('A'='a') and padding('A'='A ')
The solutuion would be to allow substitution for the purpose of comparison for order by
because order by only needs the sorting columns for comparision.

---
 mysql-test/r/order_by.result | 97 ++++++++++++++++++++++++++++++++++++++++++++
 mysql-test/t/order_by.test   | 15 +++++++
 sql/item.cc                  |  7 ++++
 sql/item.h                   |  1 +
 sql/sql_select.cc            | 18 +++++---
 5 files changed, 133 insertions(+), 5 deletions(-)

diff --git a/mysql-test/r/order_by.result b/mysql-test/r/order_by.result
index 1ca3034c610..157655184f4 100644
--- a/mysql-test/r/order_by.result
+++ b/mysql-test/r/order_by.result
@@ -3253,3 +3253,100 @@ Warnings:
 Note	1003	select `test`.`wings`.`id` AS `wing_id`,`test`.`wings`.`department_id` AS `department_id` from `test`.`wings` semi join (`test`.`books`) where `test`.`books`.`library_id` = 8663 and `test`.`books`.`scheduled_for_removal` = 0 and `test`.`wings`.`id` = `test`.`books`.`wings_id` order by `test`.`wings`.`id`
 set optimizer_switch= @save_optimizer_switch;
 DROP TABLE books, wings;
+#
+# MDEV-13275:Query optimizer not picking optimal ORDER BY PRIMARY in case of INNER JOIN on PRIMARY
+# like it does for similar WHERE condition
+#
+create table t1 (id char(32) NOT NULL primary key);
+insert into t1 values (0), (1), (2), (3), (4), (5), (6), (7), (8), (9);
+create table t2 (id char(32) NOT NULL primary key);
+insert into t2 values (0), (1), (2), (3);
+explain select t1.id from t1 INNER JOIN t2 on t1.id=t2.id order by t1.id;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t2	index	PRIMARY	PRIMARY	32	NULL	4	Using index
+1	SIMPLE	t1	eq_ref	PRIMARY	PRIMARY	32	test.t2.id	1	Using index
+analyze format=json select t1.id from t1 INNER JOIN t2 on t1.id=t2.id order by t1.id;
+ANALYZE
+{
+  "query_block": {
+    "select_id": 1,
+    "r_loops": 1,
+    "r_total_time_ms": 0.0555,
+    "table": {
+      "table_name": "t2",
+      "access_type": "index",
+      "possible_keys": ["PRIMARY"],
+      "key": "PRIMARY",
+      "key_length": "32",
+      "used_key_parts": ["id"],
+      "r_loops": 1,
+      "rows": 4,
+      "r_rows": 4,
+      "r_total_time_ms": 0.0159,
+      "filtered": 100,
+      "r_filtered": 100,
+      "using_index": true
+    },
+    "table": {
+      "table_name": "t1",
+      "access_type": "eq_ref",
+      "possible_keys": ["PRIMARY"],
+      "key": "PRIMARY",
+      "key_length": "32",
+      "used_key_parts": ["id"],
+      "ref": ["test.t2.id"],
+      "r_loops": 4,
+      "rows": 1,
+      "r_rows": 1,
+      "r_total_time_ms": 0.0155,
+      "filtered": 100,
+      "r_filtered": 100,
+      "using_index": true
+    }
+  }
+}
+explain select t1.id from t1 INNER JOIN t2 on t1.id=t2.id order by t2.id;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t2	index	PRIMARY	PRIMARY	32	NULL	4	Using index
+1	SIMPLE	t1	eq_ref	PRIMARY	PRIMARY	32	test.t2.id	1	Using index
+analyze format=json select t1.id from t1 INNER JOIN t2 on t1.id=t2.id order by t2.id;
+ANALYZE
+{
+  "query_block": {
+    "select_id": 1,
+    "r_loops": 1,
+    "r_total_time_ms": 0.0533,
+    "table": {
+      "table_name": "t2",
+      "access_type": "index",
+      "possible_keys": ["PRIMARY"],
+      "key": "PRIMARY",
+      "key_length": "32",
+      "used_key_parts": ["id"],
+      "r_loops": 1,
+      "rows": 4,
+      "r_rows": 4,
+      "r_total_time_ms": 0.0149,
+      "filtered": 100,
+      "r_filtered": 100,
+      "using_index": true
+    },
+    "table": {
+      "table_name": "t1",
+      "access_type": "eq_ref",
+      "possible_keys": ["PRIMARY"],
+      "key": "PRIMARY",
+      "key_length": "32",
+      "used_key_parts": ["id"],
+      "ref": ["test.t2.id"],
+      "r_loops": 4,
+      "rows": 1,
+      "r_rows": 1,
+      "r_total_time_ms": 0.0152,
+      "filtered": 100,
+      "r_filtered": 100,
+      "using_index": true
+    }
+  }
+}
+drop table t1,t2;
diff --git a/mysql-test/t/order_by.test b/mysql-test/t/order_by.test
index b047a31c863..62605a4b221 100644
--- a/mysql-test/t/order_by.test
+++ b/mysql-test/t/order_by.test
@@ -2187,3 +2187,18 @@ eval explain extended $q;
 set optimizer_switch= @save_optimizer_switch;
 
 DROP TABLE books, wings;
+
+--echo #
+--echo # MDEV-13275:Query optimizer not picking optimal ORDER BY PRIMARY in case of INNER JOIN on PRIMARY
+--echo # like it does for similar WHERE condition
+--echo #
+
+create table t1 (id char(32) NOT NULL primary key);
+insert into t1 values (0), (1), (2), (3), (4), (5), (6), (7), (8), (9);
+create table t2 (id char(32) NOT NULL primary key);
+insert into t2 values (0), (1), (2), (3);
+explain select t1.id from t1 INNER JOIN t2 on t1.id=t2.id order by t1.id;
+analyze format=json select t1.id from t1 INNER JOIN t2 on t1.id=t2.id order by t1.id;
+explain select t1.id from t1 INNER JOIN t2 on t1.id=t2.id order by t2.id;
+analyze format=json select t1.id from t1 INNER JOIN t2 on t1.id=t2.id order by t2.id;
+drop table t1,t2;
diff --git a/sql/item.cc b/sql/item.cc
index 007b4f4bd54..dd9e5bfaa1a 100644
--- a/sql/item.cc
+++ b/sql/item.cc
@@ -5898,6 +5898,13 @@ Item *Item_field::propagate_equal_fields(THD *thd,
   return item;
 }
 
+Item *Item_field::propagate_equal_fields_helper(THD *thd,
+                                         COND_EQUAL *arg)
+{
+  return propagate_equal_fields(thd,Context(ANY_SUBST,
+                                result_type(),
+                                collation.collation), arg);
+}
 
 /**
   Replace an Item_field for an equal Item_field that evaluated earlier
diff --git a/sql/item.h b/sql/item.h
index 8921ee76f6a..4f9d20f4f49 100644
--- a/sql/item.h
+++ b/sql/item.h
@@ -2671,6 +2671,7 @@ class Item_field :public Item_ident
   void set_item_equal(Item_equal *item_eq) { item_equal= item_eq; }
   Item_equal *find_item_equal(COND_EQUAL *cond_equal);
   Item* propagate_equal_fields(THD *, const Context &, COND_EQUAL *);
+  Item* propagate_equal_fields_helper(THD *, COND_EQUAL *);
   Item *replace_equal_field(THD *thd, uchar *arg);
   inline uint32 max_disp_length() { return field->max_display_length(); }
   Item_field *field_for_view_update() { return this; }
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 6450eb03d4d..55279fc56a4 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -12608,19 +12608,27 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond,
             table_map first_table_bit=
               join->join_tab[join->const_tables].table->map;
 
-            Item *item= order->item[0];
+            Item_field* item= (Item_field *)order->item[0];
 
             /*
-              TODO: equality substitution in the context of ORDER BY is 
+              Equality substitution in the context of ORDER BY is
               sometimes allowed when it is not allowed in the general case.
               
+              An example of this would be when we are using a collation that
+              is case insensitive so 'a' and 'A' would be the same but HEX(a)
+              and HEX('A') are different, so we don't make this substitution in
+              general case but the sustitutions works well with comparision, so
+              if we have 'a' < 'das' then we can substitute this with
+              'A' < 'das'.
+              For equality propagation the fields after substituion would be
+              used for comparision so we can do these substitutions in the
+              order by clause.
+
               We make the below call for its side effect: it will locate the
               multiple equality the item belongs to and set item->item_equal
               accordingly.
             */
-            Item *res= item->propagate_equal_fields(join->thd,
-                                                    Value_source::
-                                                    Context_identity(),
+            Item *res= item->propagate_equal_fields_helper(join->thd,
                                                     join->cond_equal);
             Item_equal *item_eq;
             if ((item_eq= res->get_item_equal()))


More information about the commits mailing list