[Commits] d4edb0510ec: MDEV-20646: 10.3.18 is slower than 10.3.17

psergey sergey at mariadb.com
Wed Nov 13 17:53:59 EET 2019


revision-id: d4edb0510ec1189f65850bb47977e94ed98b1f71 (mariadb-10.3.20-5-gd4edb0510ec)
parent(s): 5098d708a07f90484c9e13fe3ab58113a8a10191
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2019-11-13 18:53:59 +0300
message:

MDEV-20646: 10.3.18 is slower than 10.3.17

Fix incorrect change introduced in the fix for MDEV-20109.

The patch tried to compute a more precise estimate for the record_count
value in SJ-Materialization-Scan strategy (in
Sj_materialization_picker::check_qep).  However the new formula is worse
as it produces extremely optimistic results in common cases where
SJ-Materialization-Scan should be used)

The old formula produces pessimistic results in cases when Sj-Materialization-
Scan is unlikely to be a good choice anyway. So, the old behavior is better.

---
 mysql-test/main/subselect_sj2_mat.result | 14 +++++++-------
 mysql-test/main/subselect_sj_jcl6.result |  8 ++++----
 sql/opt_subselect.cc                     | 17 ++++++++++++++++-
 3 files changed, 27 insertions(+), 12 deletions(-)

diff --git a/mysql-test/main/subselect_sj2_mat.result b/mysql-test/main/subselect_sj2_mat.result
index dd9f560aeed..ab3e4652deb 100644
--- a/mysql-test/main/subselect_sj2_mat.result
+++ b/mysql-test/main/subselect_sj2_mat.result
@@ -1855,18 +1855,18 @@ AND t3.id_product IN (SELECT id_product FROM t2 t2_3 WHERE t2_3.id_t2 = 18 OR t2
 AND t3.id_product IN (SELECT id_product FROM t2 t2_4 WHERE t2_4.id_t2 = 34 OR t2_4.id_t2 = 23) 
 AND t3.id_product IN (SELECT id_product FROM t2 t2_5 WHERE t2_5.id_t2 = 29 OR t2_5.id_t2 = 28 OR t2_5.id_t2 = 26);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	PRIMARY	<subquery3>	ALL	distinct_key	NULL	NULL	NULL	12	
-1	PRIMARY	t3	eq_ref	PRIMARY	PRIMARY	4	test.t2_2.id_product	1	Using where; Using index
+1	PRIMARY	t1	index	NULL	PRIMARY	8	NULL	73	Using index
+1	PRIMARY	t3	eq_ref	PRIMARY	PRIMARY	4	test.t1.id_product	1	Using index
 1	PRIMARY	<subquery5>	eq_ref	distinct_key	distinct_key	4	func	1	Using where
 1	PRIMARY	<subquery4>	eq_ref	distinct_key	distinct_key	4	func	1	Using where
-1	PRIMARY	<subquery6>	eq_ref	distinct_key	distinct_key	4	func	1	Using where
+1	PRIMARY	<subquery3>	eq_ref	distinct_key	distinct_key	4	func	1	Using where
 1	PRIMARY	<subquery2>	eq_ref	distinct_key	distinct_key	4	func	1	Using where
+1	PRIMARY	t4	eq_ref	PRIMARY	PRIMARY	8	test.t1.id_product,const	1	Using where; Using index
+1	PRIMARY	<subquery6>	eq_ref	distinct_key	distinct_key	4	func	1	Using where
 1	PRIMARY	t5	ALL	NULL	NULL	NULL	NULL	18	Using where; Using join buffer (flat, BNL join)
-1	PRIMARY	t4	eq_ref	PRIMARY	PRIMARY	8	test.t3.id_product,const	1	Using where; Using index
-1	PRIMARY	t1	index	NULL	PRIMARY	8	NULL	73	Using where; Using index; Using join buffer (flat, BNL join)
-3	MATERIALIZED	t2_2	ref	id_t2,id_product	id_t2	5	const	12	Using where
 5	MATERIALIZED	t2_4	range	id_t2,id_product	id_t2	5	NULL	18	Using index condition; Using where
 4	MATERIALIZED	t2_3	range	id_t2,id_product	id_t2	5	NULL	32	Using index condition; Using where
-6	MATERIALIZED	t2_5	range	id_t2,id_product	id_t2	5	NULL	30	Using index condition; Using where
+3	MATERIALIZED	t2_2	ref	id_t2,id_product	id_t2	5	const	12	
 2	MATERIALIZED	t2_1	ref	id_t2,id_product	id_t2	5	const	50	
+6	MATERIALIZED	t2_5	range	id_t2,id_product	id_t2	5	NULL	30	Using index condition; Using where
 drop table t1,t2,t3,t4,t5;
diff --git a/mysql-test/main/subselect_sj_jcl6.result b/mysql-test/main/subselect_sj_jcl6.result
index 6278b5a0cf5..e9a19b2a1c3 100644
--- a/mysql-test/main/subselect_sj_jcl6.result
+++ b/mysql-test/main/subselect_sj_jcl6.result
@@ -3527,8 +3527,8 @@ EXPLAIN
 SELECT a FROM t1 t WHERE a IN (SELECT b FROM t1, t2 WHERE b = a)
 GROUP BY a HAVING a != 'z';
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	PRIMARY	<subquery2>	ALL	distinct_key	NULL	NULL	NULL	2	Using temporary; Using filesort
-1	PRIMARY	t	ref	idx_a	idx_a	4	test.t2.b	2	Using index
+1	PRIMARY	t	index	idx_a	idx_a	4	NULL	3	Using index
+1	PRIMARY	<subquery2>	eq_ref	distinct_key	distinct_key	4	func	1	
 2	MATERIALIZED	t2	ALL	NULL	NULL	NULL	NULL	2	Using where
 2	MATERIALIZED	t1	ref	idx_a	idx_a	4	test.t2.b	2	Using index
 SELECT a FROM t1 t WHERE a IN (SELECT b FROM t1, t2 WHERE b = a)
@@ -3541,8 +3541,8 @@ EXPLAIN
 SELECT a FROM t1 t WHERE a IN (SELECT b FROM t1, t2 WHERE b = a)
 GROUP BY a HAVING a != 'z';
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	PRIMARY	<subquery2>	ALL	distinct_key	NULL	NULL	NULL	2	Using temporary; Using filesort
-1	PRIMARY	t	ref	idx_a	idx_a	4	test.t2.b	2	Using index
+1	PRIMARY	t	index	idx_a	idx_a	4	NULL	3	Using index
+1	PRIMARY	<subquery2>	eq_ref	distinct_key	distinct_key	4	func	1	
 2	MATERIALIZED	t2	ALL	NULL	NULL	NULL	NULL	2	Using where
 2	MATERIALIZED	t1	ref	idx_a	idx_a	4	test.t2.b	2	Using index
 SELECT a FROM t1 t WHERE a IN (SELECT b FROM t1, t2 WHERE b = a)
diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc
index aeafc13998a..a8afd952a4d 100644
--- a/sql/opt_subselect.cc
+++ b/sql/opt_subselect.cc
@@ -3029,7 +3029,22 @@ bool Sj_materialization_picker::check_qep(JOIN *join,
 
     *strategy= SJ_OPT_MATERIALIZE_SCAN;
     *read_time=    prefix_cost;
-    *record_count= prefix_rec_count / mat_info->rows_with_duplicates;
+    /*
+      Note: the next line means we did not remove the subquery's fanout from
+      *record_count. It needs to be removed, as the join prefix is
+
+        ntX  SJM-SCAN(it1 ... itN) | (ot1 ... otN) ...
+
+      here, the SJM-SCAN may have introduced subquery's fanout (duplicate rows,
+      rows that don't have matches in ot1_i). All this fanout is gone after
+      table otN (or earlier) but taking it into account is hard.
+
+      Some consolation here is that SJM-Scan strategy is applicable when the
+      subquery is smaller than tables otX. If the subquery has large cardinality,
+      we can greatly overestimate *record_count here, but it doesn't matter as
+      SJ-Materialization-Lookup is a better strategy anyway.
+    */
+    *record_count= prefix_rec_count;
     *handled_fanout= mat_nest->sj_inner_tables;
     return TRUE;
   }


More information about the commits mailing list