[Commits] 2482867f844: MDEV-19134: EXISTS() slower if ORDER BY is defined

Sergei Petrunia psergey at askmonty.org
Sat May 4 21:49:50 EEST 2019


revision-id: 2482867f844eef0648533089181d678a3f157b2f (mariadb-10.4.4-37-g2482867f844)
parent(s): baadbe96019b205164167928d80e836ebbb6bcfe
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2019-05-04 21:49:50 +0300
message:

MDEV-19134: EXISTS() slower if ORDER BY is defined

Step 1: Removal of ORDER BY [LIMIT] from the subquery should be done
earlier and for broader class of subqueries.

The rewrite was done in Item_in_subselect::select_in_like_transformer(),
but this had problems:
- It didn't cover EXISTS subqueries
- It covered IN-subqueries, but was done after the semi-join transformation
  was considered inapplicable, because ORDER BY was present.

Remaining issue:
- EXISTS->IN transformation happens before
  check_and_do_in_subquery_rewrites() is called, so it is still prevented
  by the present ORDER BY.

---
 mysql-test/main/subselect4.result              | 25 ++++++++++++++++++++++++-
 mysql-test/main/subselect4.test                | 24 ++++++++++++++++++++++++
 mysql-test/main/subselect_innodb.result        |  4 ++--
 mysql-test/main/subselect_mat_cost_bugs.result |  2 +-
 mysql-test/main/subselect_sj_mat.result        | 18 +++++++-----------
 mysql-test/main/subselect_sj_mat.test          |  1 +
 mysql-test/main/union.result                   |  2 +-
 sql/item_subselect.cc                          | 15 ---------------
 sql/opt_subselect.cc                           | 23 ++++++++++++++++++++++-
 9 files changed, 82 insertions(+), 32 deletions(-)

diff --git a/mysql-test/main/subselect4.result b/mysql-test/main/subselect4.result
index bd9ecdc642b..05f65b4f550 100644
--- a/mysql-test/main/subselect4.result
+++ b/mysql-test/main/subselect4.result
@@ -2256,7 +2256,7 @@ SELECT a3 FROM t3 WHERE b2 = b1 AND b2 <= b1 ORDER BY b3
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t2	system	NULL	NULL	NULL	NULL	1	
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	2	Using where
-3	DEPENDENT SUBQUERY	t3	ALL	NULL	NULL	NULL	NULL	2	Using where
+1	PRIMARY	t3	ALL	NULL	NULL	NULL	NULL	2	Using where; FirstMatch(t1); Using join buffer (flat, BNL join)
 SELECT * FROM t1 WHERE a1 IN ( 
 SELECT a2 FROM t2 WHERE a2 IN ( 
 SELECT a3 FROM t3 WHERE b2 = b1 AND b2 <= b1 ORDER BY b3 
@@ -2534,3 +2534,26 @@ x
 c1
 1
 drop table t1;
+#
+# MDEV-19134: EXISTS() slower if ORDER BY is defined
+#
+create table t0 (a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1(a int, b int);
+insert into t1 select 
+A.a + B.a*10, A.a + B.a*10 from t0 A, t0 B;
+create table t2 as select * from t1;
+# This will not be able to convert to semi-join but will not require filesort:
+explain 
+select * from t1 where exists (select * from t2 where t2.a=t1.a order by t2.b);
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	#	Using where
+2	DEPENDENT SUBQUERY	t2	ALL	NULL	NULL	NULL	NULL	#	Using where
+# This will be merged and converted into a semi-join:
+explain 
+select * from t1 where t1.a in (select t2.a from t2 order by t2.b);
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	100	
+1	PRIMARY	<subquery2>	eq_ref	distinct_key	distinct_key	4	func	1	
+2	MATERIALIZED	t2	ALL	NULL	NULL	NULL	NULL	100	
+drop table t0, t1, t2;
diff --git a/mysql-test/main/subselect4.test b/mysql-test/main/subselect4.test
index d5a40419185..5c5dd797353 100644
--- a/mysql-test/main/subselect4.test
+++ b/mysql-test/main/subselect4.test
@@ -2075,3 +2075,27 @@ insert into t1 values(2,1),(1,2);
 select (select c1 from t1 group by c1,c2 order by c1 limit 1) as x;
 (select c1 from t1 group by c1,c2 order by c1 limit 1);
 drop table t1;
+
+--echo #
+--echo # MDEV-19134: EXISTS() slower if ORDER BY is defined
+--echo #
+create table t0 (a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+
+create table t1(a int, b int);
+insert into t1 select 
+  A.a + B.a*10, A.a + B.a*10 from t0 A, t0 B;
+
+create table t2 as select * from t1;
+
+--echo # This will not be able to convert to semi-join but will not require filesort:
+--replace_column 9 #
+explain 
+select * from t1 where exists (select * from t2 where t2.a=t1.a order by t2.b);
+
+--echo # This will be merged and converted into a semi-join:
+explain 
+select * from t1 where t1.a in (select t2.a from t2 order by t2.b);
+
+drop table t0, t1, t2;
+
diff --git a/mysql-test/main/subselect_innodb.result b/mysql-test/main/subselect_innodb.result
index 8e09be9b705..c0181451f66 100644
--- a/mysql-test/main/subselect_innodb.result
+++ b/mysql-test/main/subselect_innodb.result
@@ -458,7 +458,7 @@ EXPLAIN
 SELECT * FROM t1 WHERE EXISTS ( SELECT b FROM t2, t3 GROUP BY b HAVING b != 3 );
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	NULL	NULL	NULL	NULL	NULL	NULL	NULL	Impossible WHERE
-2	SUBQUERY	t2	index	PRIMARY	PRIMARY	4	NULL	1	Using where; Using index; Using temporary; Using filesort
+2	SUBQUERY	t2	index	PRIMARY	PRIMARY	4	NULL	1	Using where; Using index; Using temporary
 2	SUBQUERY	t3	ALL	NULL	NULL	NULL	NULL	1	Using join buffer (flat, BNL join)
 SELECT * FROM t1 WHERE EXISTS ( SELECT b FROM t2, t3 GROUP BY b HAVING b != 3 );
 a
@@ -480,7 +480,7 @@ HAVING SQ2_alias1 . col_int_key >= 7
 );
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	NULL	NULL	NULL	NULL	NULL	NULL	NULL	Impossible WHERE
-2	SUBQUERY	SQ2_alias2	index	NULL	col_int_key	5	NULL	1	Using index; Using temporary; Using filesort
+2	SUBQUERY	SQ2_alias2	index	NULL	col_int_key	5	NULL	1	Using index; Using temporary
 2	SUBQUERY	SQ2_alias1	ref	col_int_key	col_int_key	5	test.SQ2_alias2.col_int_key	1	Using where; Using index
 SELECT 1 FROM t1 AS alias1 
 WHERE EXISTS ( SELECT SQ2_alias1 . col_int_key AS SQ2_field1 
diff --git a/mysql-test/main/subselect_mat_cost_bugs.result b/mysql-test/main/subselect_mat_cost_bugs.result
index 2c696ed36fd..a18c5e608f1 100644
--- a/mysql-test/main/subselect_mat_cost_bugs.result
+++ b/mysql-test/main/subselect_mat_cost_bugs.result
@@ -148,7 +148,7 @@ FROM t2 GROUP BY f1
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	NULL	NULL	NULL	NULL	NULL	NULL	NULL	Impossible WHERE noticed after reading const tables
 2	SUBQUERY	t1	system	NULL	NULL	NULL	NULL	1	
-3	SUBQUERY	t2	ALL	NULL	NULL	NULL	NULL	2	Using temporary; Using filesort
+3	SUBQUERY	t2	ALL	NULL	NULL	NULL	NULL	2	Using temporary
 drop table t1, t2, t3;
 #
 # LP BUG#715034 Item_sum_distinct::clear(): Assertion `tree != 0' failed
diff --git a/mysql-test/main/subselect_sj_mat.result b/mysql-test/main/subselect_sj_mat.result
index 3fc8f9afd3e..53afa90ca73 100644
--- a/mysql-test/main/subselect_sj_mat.result
+++ b/mysql-test/main/subselect_sj_mat.result
@@ -265,11 +265,11 @@ set @@optimizer_switch=@save_optimizer_switch;
 explain extended
 select * from t1 where (a1, a2) in (select b1, b2 from t2 order by b1, b2);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	3	100.00	Using where
-1	PRIMARY	<subquery2>	eq_ref	distinct_key	distinct_key	16	test.t1.a1,test.t1.a2	1	100.00	
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	3	100.00	
+1	PRIMARY	<subquery2>	eq_ref	distinct_key	distinct_key	16	func,func	1	100.00	
 2	MATERIALIZED	t2	ALL	NULL	NULL	NULL	NULL	5	100.00	
 Warnings:
-Note	1003	/* select#1 */ select `test`.`t1`.`a1` AS `a1`,`test`.`t1`.`a2` AS `a2` from  <materialize> (/* select#2 */ select `test`.`t2`.`b1`,`test`.`t2`.`b2` from `test`.`t2` order by `test`.`t2`.`b1`,`test`.`t2`.`b2`) join `test`.`t1` where `<subquery2>`.`b1` = `test`.`t1`.`a1` and `<subquery2>`.`b2` = `test`.`t1`.`a2`
+Note	1003	select `test`.`t1`.`a1` AS `a1`,`test`.`t1`.`a2` AS `a2` from `test`.`t1` semi join (`test`.`t2`) where 1
 select * from t1 where (a1, a2) in (select b1, b2 from t2 order by b1, b2);
 a1	a2
 1 - 01	2 - 01
@@ -277,11 +277,10 @@ a1	a2
 explain extended
 select * from t1i where (a1, a2) in (select b1, b2 from t2i order by b1, b2);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	PRIMARY	t1i	index	it1i1,it1i2,it1i3	it1i3	18	NULL	3	100.00	Using where; Using index
-1	PRIMARY	<subquery2>	eq_ref	distinct_key	distinct_key	16	test.t1i.a1,test.t1i.a2	1	100.00	
-2	MATERIALIZED	t2i	index	NULL	it2i3	18	NULL	5	100.00	Using index
+1	PRIMARY	t2i	index	it2i1,it2i2,it2i3	it2i3	18	NULL	5	50.00	Using where; Using index; LooseScan
+1	PRIMARY	t1i	ref	it1i1,it1i2,it1i3	it1i3	18	test.t2i.b1,test.t2i.b2	1	100.00	Using index
 Warnings:
-Note	1003	/* select#1 */ select `test`.`t1i`.`a1` AS `a1`,`test`.`t1i`.`a2` AS `a2` from  <materialize> (/* select#2 */ select `test`.`t2i`.`b1`,`test`.`t2i`.`b2` from `test`.`t2i` order by `test`.`t2i`.`b1`,`test`.`t2i`.`b2`) join `test`.`t1i` where `<subquery2>`.`b1` = `test`.`t1i`.`a1` and `<subquery2>`.`b2` = `test`.`t1i`.`a2`
+Note	1003	select `test`.`t1i`.`a1` AS `a1`,`test`.`t1i`.`a2` AS `a2` from `test`.`t1i` semi join (`test`.`t2i`) where `test`.`t1i`.`a1` = `test`.`t2i`.`b1` and `test`.`t1i`.`a2` = `test`.`t2i`.`b2`
 select * from t1i where (a1, a2) in (select b1, b2 from t2i order by b1, b2);
 a1	a2
 1 - 01	2 - 01
@@ -1472,10 +1471,7 @@ INSERT INTO t2 VALUES (10,0),(11,0);
 explain SELECT * FROM t1 JOIN t2 USING (f1)
 WHERE t1.f1 IN (SELECT t1.pk FROM t1 ORDER BY t1.f1);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	PRIMARY	t1	system	NULL	NULL	NULL	NULL	1	
-1	PRIMARY	<subquery2>	eq_ref	distinct_key	distinct_key	4	const	1	
-1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	2	Using where; Using join buffer (flat, BNL join)
-2	MATERIALIZED	t1	system	NULL	NULL	NULL	NULL	1	
+1	PRIMARY	NULL	NULL	NULL	NULL	NULL	NULL	NULL	Impossible WHERE noticed after reading const tables
 SELECT * FROM t1 JOIN t2 USING (f1)
 WHERE t1.f1 IN (SELECT t1.pk FROM t1 ORDER BY t1.f1);
 f1	pk	pk
diff --git a/mysql-test/main/subselect_sj_mat.test b/mysql-test/main/subselect_sj_mat.test
index ac0baee3728..74d670bcf61 100644
--- a/mysql-test/main/subselect_sj_mat.test
+++ b/mysql-test/main/subselect_sj_mat.test
@@ -161,6 +161,7 @@ execute st1;
 set @@optimizer_switch=@save_optimizer_switch;
 
 # materialize the result of ORDER BY
+# No longer really happens as the optimizer is now smart enough not to sort in this case
 # non-indexed fields
 explain extended
 select * from t1 where (a1, a2) in (select b1, b2 from t2 order by b1, b2);
diff --git a/mysql-test/main/union.result b/mysql-test/main/union.result
index a0421bae922..00df8e9533d 100644
--- a/mysql-test/main/union.result
+++ b/mysql-test/main/union.result
@@ -2049,7 +2049,7 @@ insert t1 values (1),(2),(3),(1);
 explain select 1 from dual where exists (select max(a) from t1 group by a union select a+2 from t1);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	NULL	NULL	NULL	NULL	NULL	NULL	NULL	No tables used
-2	SUBQUERY	t1	ALL	NULL	NULL	NULL	NULL	4	Using temporary; Using filesort
+2	SUBQUERY	t1	ALL	NULL	NULL	NULL	NULL	4	Using temporary
 3	UNION	t1	ALL	NULL	NULL	NULL	NULL	4	
 NULL	UNION RESULT	<union2,3>	ALL	NULL	NULL	NULL	NULL	NULL	
 drop table t1;
diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc
index 2b1c4c174c7..531ce763d86 100644
--- a/sql/item_subselect.cc
+++ b/sql/item_subselect.cc
@@ -3204,21 +3204,6 @@ Item_in_subselect::select_in_like_transformer(JOIN *join)
 
   DBUG_ENTER("Item_in_subselect::select_in_like_transformer");
   DBUG_ASSERT(thd == join->thd);
-
-  /*
-    IN/SOME/ALL/ANY subqueries aren't support LIMIT clause. Without it
-    ORDER BY clause becomes meaningless thus we drop it here.
-  */
-  for (SELECT_LEX *sl= current->master_unit()->first_select();
-       sl; sl= sl->next_select())
-  {
-    if (sl->join)
-    {
-      sl->join->order= 0;
-      sl->join->skip_sort_order= 1;
-    }
-  }
-
   thd->where= "IN/ALL/ANY subquery";
 
   /*
diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc
index 2fedd8a4ed3..809c9378569 100644
--- a/sql/opt_subselect.cc
+++ b/sql/opt_subselect.cc
@@ -594,7 +594,8 @@ int check_and_do_in_subquery_rewrites(JOIN *join)
   {
     Item_in_subselect *in_subs= NULL;
     Item_allany_subselect *allany_subs= NULL;
-    switch (subselect->substype()) {
+    Item_subselect::subs_type substype= subselect->substype();
+    switch (substype) {
     case Item_subselect::IN_SUBS:
       in_subs= (Item_in_subselect *)subselect;
       break;
@@ -606,6 +607,26 @@ int check_and_do_in_subquery_rewrites(JOIN *join)
       break;
     }
 
+    /*
+      Try removing "ORDER BY" or even "ORDER BY ... LIMIT" from certain kinds
+      of subqueries. The removal might enable further transformations.
+    */
+    if (substype == Item_subselect::IN_SUBS ||
+        substype == Item_subselect::EXISTS_SUBS ||
+        substype == Item_subselect::ANY_SUBS ||
+        substype == Item_subselect::ALL_SUBS)
+    {
+      // (1) - ORDER BY without LIMIT can be removed from IN/EXISTS subqueries
+      // (2) - for EXISTS, can also remove "ORDER BY ... LIMIT n",
+      //       but cannot remove "ORDER BY ... LIMIT n OFFSET m"
+      if (!select_lex->select_limit ||                               // (1)
+          (substype == Item_subselect::EXISTS_SUBS &&                // (2)
+           !select_lex->offset_limit))                               // (2)
+      {
+        select_lex->join->order= 0;
+        select_lex->join->skip_sort_order= 1;
+      }
+    }
 
     /* Resolve expressions and perform semantic analysis for IN query */
     if (in_subs != NULL)


More information about the commits mailing list