[Commits] 3032cd8e91f: step #1: if a derived table has SELECT DISTINCT, provide index statistics for it so that the join optimizer in the

Varun varunraiko1803 at gmail.com
Mon Apr 22 16:39:45 EEST 2019


revision-id: 3032cd8e91f1e1ead8b6f941e75cd29e473e7eaa (mariadb-10.3.10-283-g3032cd8e91f)
parent(s): f4019f5b3544a18f3ddf32df2c5214c3f8dabdce
author: Varun Gupta
committer: Varun Gupta
timestamp: 2019-04-22 18:19:25 +0530
message:

step #1: if a derived table has SELECT DISTINCT, provide index statistics for it so that the join optimizer in the
upper select knows that ref access to the table will produce one row.

---
 mysql-test/main/cte_nonrecursive.result            |  8 ++--
 mysql-test/main/derived.result                     | 54 ++++++++++++++++++++++
 mysql-test/main/derived.test                       | 30 ++++++++++++
 mysql-test/main/derived_view.result                |  2 +-
 mysql-test/main/subselect_extra.result             |  2 +-
 mysql-test/main/subselect_extra_no_semijoin.result |  2 +-
 sql/sql_lex.h                                      |  1 +
 sql/sql_union.cc                                   | 40 ++++++++++++++++
 sql/table.cc                                       | 20 ++++++++
 9 files changed, 152 insertions(+), 7 deletions(-)

diff --git a/mysql-test/main/cte_nonrecursive.result b/mysql-test/main/cte_nonrecursive.result
index b846ec2d8ac..d80d34ecc7f 100644
--- a/mysql-test/main/cte_nonrecursive.result
+++ b/mysql-test/main/cte_nonrecursive.result
@@ -244,7 +244,7 @@ with t as (select distinct a from t1 where b >= 'c')
 select * from t as r1, t as r2 where r1.a=r2.a;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	<derived2>	ALL	NULL	NULL	NULL	NULL	8	Using where
-1	PRIMARY	<derived3>	ref	key0	key0	5	r1.a	2	
+1	PRIMARY	<derived3>	ref	key0	key0	5	r1.a	1	
 3	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	8	Using where; Using temporary
 2	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	8	Using where; Using temporary
 explain
@@ -253,7 +253,7 @@ select * from (select distinct a from t1 where b >= 'c') as r1,
 where r1.a=r2.a;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	<derived2>	ALL	NULL	NULL	NULL	NULL	8	Using where
-1	PRIMARY	<derived3>	ref	key0	key0	5	r1.a	2	
+1	PRIMARY	<derived3>	ref	key0	key0	5	r1.a	1	
 3	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	8	Using where; Using temporary
 2	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	8	Using where; Using temporary
 # two references to t specified by a query
@@ -369,7 +369,7 @@ select c as a from t2 where c < 4)
 select * from t2,t where t2.c=t.a;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	4	Using where
-1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.c	2	
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.c	1	
 2	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	8	Using where
 3	UNION	t2	ALL	NULL	NULL	NULL	NULL	4	Using where
 NULL	UNION RESULT	<union2,3>	ALL	NULL	NULL	NULL	NULL	NULL	
@@ -381,7 +381,7 @@ select c as a from t2 where c < 4) as t
 where t2.c=t.a;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	4	Using where
-1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.c	2	
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.c	1	
 2	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	8	Using where
 3	UNION	t2	ALL	NULL	NULL	NULL	NULL	4	Using where
 NULL	UNION RESULT	<union2,3>	ALL	NULL	NULL	NULL	NULL	NULL	
diff --git a/mysql-test/main/derived.result b/mysql-test/main/derived.result
index f0d0289c1ce..857246d68b4 100644
--- a/mysql-test/main/derived.result
+++ b/mysql-test/main/derived.result
@@ -1195,3 +1195,57 @@ drop table t1,t2,t3;
 #
 # End of 10.2 tests
 #
+#
+# MDEV-9959: A serious MariaDB server performance bug
+#
+create table t1(a int);
+insert into t1 values (1),(2),(3),(4),(5),(6);
+create table t2(a int, b int,c int);
+insert into t2(a,b,c) values (1,1,2),(2,2,3),(3,1,4),(4,2,2),(5,1,1),(6,2,5);
+create table t3(a int, b int);
+insert into t3(a,b) values (1,1),(2,2),(2,1),(1,2),(5,1),(9,2);
+table "<derived2>" should have type=ref and rows=1
+one select in derived table
+with distinct
+analyze select * from t1 , ((select distinct t2.a from t2 order by c))q  where t1.a=q.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	r_rows	filtered	r_filtered	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	6	6.00	100.00	100.00	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.a	1	1.00	100.00	100.00	
+2	DERIVED	t2	ALL	NULL	NULL	NULL	NULL	6	6.00	100.00	100.00	Using temporary; Using filesort
+analyze select * from t1 , ((select distinct t2.a, t2.b from t2 order by c))q  where t1.a=q.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	r_rows	filtered	r_filtered	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	6	6.00	100.00	100.00	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.a	2	1.00	100.00	100.00	
+2	DERIVED	t2	ALL	NULL	NULL	NULL	NULL	6	6.00	100.00	100.00	Using temporary; Using filesort
+# multiple selects in derived table
+# NO UNION ALL
+analyze select * from t1 , ( (select t2.a from t2 order by c) union  (select t2.a from t2 order by c))q  where t1.a=q.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	r_rows	filtered	r_filtered	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	6	6.00	100.00	100.00	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.a	1	1.00	100.00	100.00	
+2	DERIVED	t2	ALL	NULL	NULL	NULL	NULL	6	6.00	100.00	100.00	
+3	UNION	t2	ALL	NULL	NULL	NULL	NULL	6	6.00	100.00	100.00	
+NULL	UNION RESULT	<union2,3>	ALL	NULL	NULL	NULL	NULL	NULL	6.00	NULL	NULL	
+select * from t1 , ( (select t2.a from t2 order by c) union  (select t2.a from t2 order by c))q  where t1.a=q.a;
+a	a
+1	1
+2	2
+3	3
+4	4
+5	5
+6	6
+# UNION ALL and EXCEPT
+analyze select * from t1 , ( (select t2.a from t2 order by c) union all (select  t2.a from t2 order by c) except(select t3.a from t3 order by b))q  where t1.a=q.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	r_rows	filtered	r_filtered	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	6	6.00	100.00	100.00	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.a	1	0.50	100.00	100.00	
+2	DERIVED	t2	ALL	NULL	NULL	NULL	NULL	6	6.00	100.00	100.00	
+3	UNION	t2	ALL	NULL	NULL	NULL	NULL	6	6.00	100.00	100.00	
+4	EXCEPT	t3	ALL	NULL	NULL	NULL	NULL	6	6.00	100.00	100.00	
+NULL	UNIT RESULT	<unit2,3,4>	ALL	NULL	NULL	NULL	NULL	NULL	3.00	NULL	NULL	
+select * from t1 , ( (select t2.a from t2 order by c) union all (select  t2.a from t2 order by c) except(select t3.a from t3 order by b))q  where t1.a=q.a;
+a	a
+3	3
+4	4
+6	6
+drop table t1,t2,t3;
diff --git a/mysql-test/main/derived.test b/mysql-test/main/derived.test
index 6c51f23c51e..990f955450a 100644
--- a/mysql-test/main/derived.test
+++ b/mysql-test/main/derived.test
@@ -1032,3 +1032,33 @@ drop table t1,t2,t3;
 --echo #
 --echo # End of 10.2 tests
 --echo #
+
+--echo #
+--echo # MDEV-9959: A serious MariaDB server performance bug
+--echo #
+
+create table t1(a int);
+insert into t1 values (1),(2),(3),(4),(5),(6);
+create table t2(a int, b int,c int);
+insert into t2(a,b,c) values (1,1,2),(2,2,3),(3,1,4),(4,2,2),(5,1,1),(6,2,5);
+create table t3(a int, b int);
+insert into t3(a,b) values (1,1),(2,2),(2,1),(1,2),(5,1),(9,2);
+
+--echo table "<derived2>" should have type=ref and rows=1
+--echo one select in derived table
+
+--echo with distinct
+analyze select * from t1 , ((select distinct t2.a from t2 order by c))q  where t1.a=q.a; 
+analyze select * from t1 , ((select distinct t2.a, t2.b from t2 order by c))q  where t1.a=q.a;
+
+--echo # multiple selects in derived table
+--echo # NO UNION ALL
+analyze select * from t1 , ( (select t2.a from t2 order by c) union  (select t2.a from t2 order by c))q  where t1.a=q.a;
+select * from t1 , ( (select t2.a from t2 order by c) union  (select t2.a from t2 order by c))q  where t1.a=q.a;
+
+--echo # UNION ALL and EXCEPT
+analyze select * from t1 , ( (select t2.a from t2 order by c) union all (select  t2.a from t2 order by c) except(select t3.a from t3 order by b))q  where t1.a=q.a;
+
+select * from t1 , ( (select t2.a from t2 order by c) union all (select  t2.a from t2 order by c) except(select t3.a from t3 order by b))q  where t1.a=q.a;
+
+drop table t1,t2,t3;
diff --git a/mysql-test/main/derived_view.result b/mysql-test/main/derived_view.result
index 86dd73f5733..30831e75341 100644
--- a/mysql-test/main/derived_view.result
+++ b/mysql-test/main/derived_view.result
@@ -1525,7 +1525,7 @@ EXPLAIN
 SELECT a FROM t1 WHERE (a,b) IN (SELECT * FROM v2);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	2	Using where
-1	PRIMARY	<derived3>	ref	key0	key0	10	test.t1.a,test.t1.b	2	FirstMatch(t1)
+1	PRIMARY	<derived3>	ref	key0	key0	10	test.t1.a,test.t1.b	1	FirstMatch(t1)
 3	DERIVED	t2	ALL	NULL	NULL	NULL	NULL	6	
 4	UNION	t3	ALL	NULL	NULL	NULL	NULL	4	
 NULL	UNION RESULT	<union3,4>	ALL	NULL	NULL	NULL	NULL	NULL	
diff --git a/mysql-test/main/subselect_extra.result b/mysql-test/main/subselect_extra.result
index a3a0f1f9a15..dbcf00268c2 100644
--- a/mysql-test/main/subselect_extra.result
+++ b/mysql-test/main/subselect_extra.result
@@ -409,7 +409,7 @@ EXPLAIN
 SELECT a FROM t1 WHERE (a,b) IN (SELECT * FROM v2);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	2	Using where
-1	PRIMARY	<derived3>	ref	key0	key0	10	test.t1.a,test.t1.b	2	FirstMatch(t1)
+1	PRIMARY	<derived3>	ref	key0	key0	10	test.t1.a,test.t1.b	1	FirstMatch(t1)
 3	DERIVED	t2	ALL	NULL	NULL	NULL	NULL	6	
 4	UNION	t3	ALL	NULL	NULL	NULL	NULL	4	
 NULL	UNION RESULT	<union3,4>	ALL	NULL	NULL	NULL	NULL	NULL	
diff --git a/mysql-test/main/subselect_extra_no_semijoin.result b/mysql-test/main/subselect_extra_no_semijoin.result
index ec9ddb0452e..49a1431eb9b 100644
--- a/mysql-test/main/subselect_extra_no_semijoin.result
+++ b/mysql-test/main/subselect_extra_no_semijoin.result
@@ -411,7 +411,7 @@ EXPLAIN
 SELECT a FROM t1 WHERE (a,b) IN (SELECT * FROM v2);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	2	Using where
-2	DEPENDENT SUBQUERY	<derived3>	index_subquery	key0	key0	10	func,func	2	Using where
+2	DEPENDENT SUBQUERY	<derived3>	index_subquery	key0	key0	10	func,func	1	Using where
 3	DERIVED	t2	ALL	NULL	NULL	NULL	NULL	6	
 4	UNION	t3	ALL	NULL	NULL	NULL	NULL	4	
 NULL	UNION RESULT	<union3,4>	ALL	NULL	NULL	NULL	NULL	NULL	
diff --git a/sql/sql_lex.h b/sql/sql_lex.h
index 72ca4ac0b43..4eaec7d062b 100644
--- a/sql/sql_lex.h
+++ b/sql/sql_lex.h
@@ -896,6 +896,7 @@ class st_select_lex_unit: public st_select_lex_node {
   bool union_needs_tmp_table();
 
   void set_unique_exclude();
+  bool check_distinct_in_union();
 
   friend struct LEX;
   friend int subselect_union_engine::exec();
diff --git a/sql/sql_union.cc b/sql/sql_union.cc
index 87fbbebe4ba..3fb5552c77a 100644
--- a/sql/sql_union.cc
+++ b/sql/sql_union.cc
@@ -2049,3 +2049,43 @@ void st_select_lex_unit::set_unique_exclude()
     }
   }
 }
+
+/**
+  @brief
+  Check if the derived table is guaranteed to have distinct rows because of
+  UNION operations used to populate it.
+
+  @detail
+    UNION operation removes duplicate rows from its output. That is, a query like
+
+      select * from t1 UNION select * from t2
+
+    will not produce duplicate rows in its output, even if table t1 (and/or t2)
+    contain duplicate rows. EXCEPT and INTERSECT operations also have this
+    property.
+
+    On the other hand, UNION ALL operation doesn't remove duplicates. (The SQL
+    standard also defines EXCEPT ALL and INTERSECT ALL, but we don't support
+    them).
+
+    st_select_lex_unit computes its value left to right. That is, if there is
+     a st_select_lex_unit object describing
+
+      (select #1) OP1 (select #2) OP2 (select #3)
+
+    then ((select #1) OP1 (select #2)) is computed first, and OP2 is computed
+    second.
+
+    How can one tell if st_select_lex_unit is guaranteed to have distinct
+    output rows? This depends on whether the last operation was duplicate-
+    removing or not:
+    - UNION ALL is not duplicate-removing
+    - all other operations are duplicate-removing
+*/
+
+bool st_select_lex_unit::check_distinct_in_union()
+{
+  if (union_distinct && !union_distinct->next_select())
+    return true;
+  return false;
+}
diff --git a/sql/table.cc b/sql/table.cc
index 80995abc1f9..c4494c9ae4b 100644
--- a/sql/table.cc
+++ b/sql/table.cc
@@ -7269,6 +7269,26 @@ bool TABLE::add_tmp_key(uint key, uint key_parts,
     key_part_info++;
   }
 
+  /*
+    For the case when there is a derived table that would give distinct rows,
+    the index statistics are passed to the join optimizer to tell that a ref
+    access to all the fields of the derived table will produce only one row.
+  */
+
+  st_select_lex_unit* derived= pos_in_table_list ?
+                               pos_in_table_list->derived: NULL;
+  if (derived)
+  {
+    st_select_lex* first= derived->first_select();
+    uint select_list_items= first->get_item_list()->elements;
+    if (key_parts == select_list_items)
+    {
+      if ((!first->is_part_of_union() && (first->options & SELECT_DISTINCT)) ||
+          derived->check_distinct_in_union())
+        keyinfo->rec_per_key[key_parts - 1]= 1;
+    }
+  }
+
   set_if_bigger(s->max_key_length, keyinfo->key_length);
   s->keys++;
   return FALSE;


More information about the commits mailing list