[Commits] db71ea2: MDEV-7474: Semi-Join's DuplicateWeedout strategy skipped ...

Sergei Petrunia psergey at askmonty.org
Fri Mar 13 17:46:12 EET 2015


revision-id: db71ea247d8e6f470a04e789599565638380d4b3
parent(s): ed04c40b01c122436eda6552c550d62ce8a3920b
committer: Sergei Petrunia
branch nick: 10.0
timestamp: 2015-03-13 18:46:12 +0300
message:

MDEV-7474: Semi-Join's DuplicateWeedout strategy skipped ...

JOIN::cur_dups_producing_tables was not maintained correctly in
the cases of greedy optimization (search_depth > n_tables).

Moved it to POSITION structure where it will be maintained automatically.

Removed POSITION::prefix_dups_producing_tables since its value can now
be calculated.

---
 mysql-test/r/subselect_sj2.result      |   84 ++++++++++++++++++++++++++++++++
 mysql-test/r/subselect_sj2_jcl6.result |   84 ++++++++++++++++++++++++++++++++
 mysql-test/r/subselect_sj2_mat.result  |   84 ++++++++++++++++++++++++++++++++
 mysql-test/t/subselect_sj2.test        |   71 +++++++++++++++++++++++++++
 sql/opt_subselect.cc                   |   17 ++++---
 sql/opt_subselect.h                    |    2 +-
 sql/sql_select.cc                      |    1 -
 sql/sql_select.h                       |   14 +++---
 8 files changed, 341 insertions(+), 16 deletions(-)

diff --git a/mysql-test/r/subselect_sj2.result b/mysql-test/r/subselect_sj2.result
index 43ba6ea..9a5da71 100644
--- a/mysql-test/r/subselect_sj2.result
+++ b/mysql-test/r/subselect_sj2.result
@@ -1178,5 +1178,89 @@ id	nombre
 2	row 2
 3	row 3
 DROP TABLE t1, t2;
+#
+# MDEV-7474: Semi-Join's DuplicateWeedout strategy skipped for some values of optimizer_search_depth
+#
+CREATE TABLE t1 (
+t1id BIGINT(20) NOT NULL,
+code VARCHAR(20),
+PRIMARY KEY (t1id)
+) COLLATE='utf8mb4_bin' ENGINE=InnoDB;
+CREATE TABLE t2 (
+t2id BIGINT(20) NOT NULL,
+t1idref BIGINT(20) NOT NULL,
+code VARCHAR(20),
+PRIMARY KEY (t2id),
+INDEX FK_T2_T1Id (t1idref),
+CONSTRAINT FK_T2_T1Id FOREIGN KEY (t1idref) REFERENCES t1 (t1id)
+) COLLATE='utf8mb4_bin' ENGINE=InnoDB;
+CREATE TABLE t3 (
+t3idref BIGINT(20) NOT NULL,
+t2idref BIGINT(20) NOT NULL,
+sequencenumber INT(10) NOT NULL,
+PRIMARY KEY (t3idref, t2idref),
+INDEX FK_T3_T2Id (t2idref),
+CONSTRAINT FK_T3_T2Id FOREIGN KEY (t2idref) REFERENCES t2 (t2id)
+) COLLATE='utf8mb4_bin' ENGINE=InnoDB;
+INSERT INTO t1 (t1id) VALUES (100001),(100017),(100018),(100026),(100027),(100028),(100029),(100030),
+(100031),(100032),(100033),(100034),(100035),(100036),(100037),(100038),(100040),(100041),(100042),
+(100043),(100044),(100045),(100046),(100047);
+INSERT IGNORE INTO t2 (t2id, t1idref) SELECT t1id, t1id FROM t1;
+INSERT IGNORE INTO t1 VALUES (200001, 'a');
+INSERT IGNORE INTO t2 (t2id, t1idref) VALUES (200011, 200001),(200012, 200001),(200013, 200001);
+INSERT IGNORE INTO t3 VALUES (1, 200011, 1),  (1, 200012, 2), (1, 200013, 3);
+set @tmp7474= @@optimizer_search_depth;
+SET SESSION optimizer_search_depth = 1;
+SELECT SQL_NO_CACHE 
+T2_0_.t1idref,
+T2_0_.t2id
+FROM
+t2 T2_0_ 
+WHERE
+T2_0_.t1idref IN (
+SELECT
+T1_1_.t1id 
+FROM
+t3 T3_0_ 
+INNER JOIN
+t2 T2_1_ 
+ON T3_0_.t2idref=T2_1_.t2id 
+INNER JOIN
+t1 T1_1_ 
+ON T2_1_.t1idref=T1_1_.t1id            
+WHERE
+T3_0_.t3idref= 1
+);
+t1idref	t2id
+200001	200011
+200001	200012
+200001	200013
+explain SELECT SQL_NO_CACHE 
+T2_0_.t1idref,
+T2_0_.t2id
+FROM
+t2 T2_0_ 
+WHERE
+T2_0_.t1idref IN (
+SELECT
+T1_1_.t1id 
+FROM
+t3 T3_0_ 
+INNER JOIN
+t2 T2_1_ 
+ON T3_0_.t2idref=T2_1_.t2id 
+INNER JOIN
+t1 T1_1_ 
+ON T2_1_.t1idref=T1_1_.t1id            
+WHERE
+T3_0_.t3idref= 1
+);
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	T3_0_	ref	PRIMARY,FK_T3_T2Id	PRIMARY	8	const	3	Using index; Start temporary
+1	PRIMARY	T2_1_	eq_ref	PRIMARY,FK_T2_T1Id	PRIMARY	8	test.T3_0_.t2idref	1	
+1	PRIMARY	T1_1_	eq_ref	PRIMARY	PRIMARY	8	test.T2_1_.t1idref	1	Using index
+1	PRIMARY	T2_0_	ref	FK_T2_T1Id	FK_T2_T1Id	8	test.T2_1_.t1idref	1	Using index; End temporary
+drop table t3,t2,t1;
+set optimizer_search_depth=@tmp7474;
 # This must be the last in the file:
 set optimizer_switch=@subselect_sj2_tmp;
diff --git a/mysql-test/r/subselect_sj2_jcl6.result b/mysql-test/r/subselect_sj2_jcl6.result
index ee8aa39..c10b550 100644
--- a/mysql-test/r/subselect_sj2_jcl6.result
+++ b/mysql-test/r/subselect_sj2_jcl6.result
@@ -1193,6 +1193,90 @@ id	nombre
 2	row 2
 3	row 3
 DROP TABLE t1, t2;
+#
+# MDEV-7474: Semi-Join's DuplicateWeedout strategy skipped for some values of optimizer_search_depth
+#
+CREATE TABLE t1 (
+t1id BIGINT(20) NOT NULL,
+code VARCHAR(20),
+PRIMARY KEY (t1id)
+) COLLATE='utf8mb4_bin' ENGINE=InnoDB;
+CREATE TABLE t2 (
+t2id BIGINT(20) NOT NULL,
+t1idref BIGINT(20) NOT NULL,
+code VARCHAR(20),
+PRIMARY KEY (t2id),
+INDEX FK_T2_T1Id (t1idref),
+CONSTRAINT FK_T2_T1Id FOREIGN KEY (t1idref) REFERENCES t1 (t1id)
+) COLLATE='utf8mb4_bin' ENGINE=InnoDB;
+CREATE TABLE t3 (
+t3idref BIGINT(20) NOT NULL,
+t2idref BIGINT(20) NOT NULL,
+sequencenumber INT(10) NOT NULL,
+PRIMARY KEY (t3idref, t2idref),
+INDEX FK_T3_T2Id (t2idref),
+CONSTRAINT FK_T3_T2Id FOREIGN KEY (t2idref) REFERENCES t2 (t2id)
+) COLLATE='utf8mb4_bin' ENGINE=InnoDB;
+INSERT INTO t1 (t1id) VALUES (100001),(100017),(100018),(100026),(100027),(100028),(100029),(100030),
+(100031),(100032),(100033),(100034),(100035),(100036),(100037),(100038),(100040),(100041),(100042),
+(100043),(100044),(100045),(100046),(100047);
+INSERT IGNORE INTO t2 (t2id, t1idref) SELECT t1id, t1id FROM t1;
+INSERT IGNORE INTO t1 VALUES (200001, 'a');
+INSERT IGNORE INTO t2 (t2id, t1idref) VALUES (200011, 200001),(200012, 200001),(200013, 200001);
+INSERT IGNORE INTO t3 VALUES (1, 200011, 1),  (1, 200012, 2), (1, 200013, 3);
+set @tmp7474= @@optimizer_search_depth;
+SET SESSION optimizer_search_depth = 1;
+SELECT SQL_NO_CACHE 
+T2_0_.t1idref,
+T2_0_.t2id
+FROM
+t2 T2_0_ 
+WHERE
+T2_0_.t1idref IN (
+SELECT
+T1_1_.t1id 
+FROM
+t3 T3_0_ 
+INNER JOIN
+t2 T2_1_ 
+ON T3_0_.t2idref=T2_1_.t2id 
+INNER JOIN
+t1 T1_1_ 
+ON T2_1_.t1idref=T1_1_.t1id            
+WHERE
+T3_0_.t3idref= 1
+);
+t1idref	t2id
+200001	200011
+200001	200012
+200001	200013
+explain SELECT SQL_NO_CACHE 
+T2_0_.t1idref,
+T2_0_.t2id
+FROM
+t2 T2_0_ 
+WHERE
+T2_0_.t1idref IN (
+SELECT
+T1_1_.t1id 
+FROM
+t3 T3_0_ 
+INNER JOIN
+t2 T2_1_ 
+ON T3_0_.t2idref=T2_1_.t2id 
+INNER JOIN
+t1 T1_1_ 
+ON T2_1_.t1idref=T1_1_.t1id            
+WHERE
+T3_0_.t3idref= 1
+);
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	T3_0_	ref	PRIMARY,FK_T3_T2Id	PRIMARY	8	const	3	Using index; Start temporary
+1	PRIMARY	T2_1_	eq_ref	PRIMARY,FK_T2_T1Id	PRIMARY	8	test.T3_0_.t2idref	1	Using join buffer (flat, BKA join); Key-ordered scan
+1	PRIMARY	T1_1_	eq_ref	PRIMARY	PRIMARY	8	test.T2_1_.t1idref	1	Using index
+1	PRIMARY	T2_0_	ref	FK_T2_T1Id	FK_T2_T1Id	8	test.T2_1_.t1idref	1	Using index; End temporary
+drop table t3,t2,t1;
+set optimizer_search_depth=@tmp7474;
 # This must be the last in the file:
 set optimizer_switch=@subselect_sj2_tmp;
 #
diff --git a/mysql-test/r/subselect_sj2_mat.result b/mysql-test/r/subselect_sj2_mat.result
index 0ff366b..4e75aee 100644
--- a/mysql-test/r/subselect_sj2_mat.result
+++ b/mysql-test/r/subselect_sj2_mat.result
@@ -1180,6 +1180,90 @@ id	nombre
 2	row 2
 3	row 3
 DROP TABLE t1, t2;
+#
+# MDEV-7474: Semi-Join's DuplicateWeedout strategy skipped for some values of optimizer_search_depth
+#
+CREATE TABLE t1 (
+t1id BIGINT(20) NOT NULL,
+code VARCHAR(20),
+PRIMARY KEY (t1id)
+) COLLATE='utf8mb4_bin' ENGINE=InnoDB;
+CREATE TABLE t2 (
+t2id BIGINT(20) NOT NULL,
+t1idref BIGINT(20) NOT NULL,
+code VARCHAR(20),
+PRIMARY KEY (t2id),
+INDEX FK_T2_T1Id (t1idref),
+CONSTRAINT FK_T2_T1Id FOREIGN KEY (t1idref) REFERENCES t1 (t1id)
+) COLLATE='utf8mb4_bin' ENGINE=InnoDB;
+CREATE TABLE t3 (
+t3idref BIGINT(20) NOT NULL,
+t2idref BIGINT(20) NOT NULL,
+sequencenumber INT(10) NOT NULL,
+PRIMARY KEY (t3idref, t2idref),
+INDEX FK_T3_T2Id (t2idref),
+CONSTRAINT FK_T3_T2Id FOREIGN KEY (t2idref) REFERENCES t2 (t2id)
+) COLLATE='utf8mb4_bin' ENGINE=InnoDB;
+INSERT INTO t1 (t1id) VALUES (100001),(100017),(100018),(100026),(100027),(100028),(100029),(100030),
+(100031),(100032),(100033),(100034),(100035),(100036),(100037),(100038),(100040),(100041),(100042),
+(100043),(100044),(100045),(100046),(100047);
+INSERT IGNORE INTO t2 (t2id, t1idref) SELECT t1id, t1id FROM t1;
+INSERT IGNORE INTO t1 VALUES (200001, 'a');
+INSERT IGNORE INTO t2 (t2id, t1idref) VALUES (200011, 200001),(200012, 200001),(200013, 200001);
+INSERT IGNORE INTO t3 VALUES (1, 200011, 1),  (1, 200012, 2), (1, 200013, 3);
+set @tmp7474= @@optimizer_search_depth;
+SET SESSION optimizer_search_depth = 1;
+SELECT SQL_NO_CACHE 
+T2_0_.t1idref,
+T2_0_.t2id
+FROM
+t2 T2_0_ 
+WHERE
+T2_0_.t1idref IN (
+SELECT
+T1_1_.t1id 
+FROM
+t3 T3_0_ 
+INNER JOIN
+t2 T2_1_ 
+ON T3_0_.t2idref=T2_1_.t2id 
+INNER JOIN
+t1 T1_1_ 
+ON T2_1_.t1idref=T1_1_.t1id            
+WHERE
+T3_0_.t3idref= 1
+);
+t1idref	t2id
+200001	200011
+200001	200012
+200001	200013
+explain SELECT SQL_NO_CACHE 
+T2_0_.t1idref,
+T2_0_.t2id
+FROM
+t2 T2_0_ 
+WHERE
+T2_0_.t1idref IN (
+SELECT
+T1_1_.t1id 
+FROM
+t3 T3_0_ 
+INNER JOIN
+t2 T2_1_ 
+ON T3_0_.t2idref=T2_1_.t2id 
+INNER JOIN
+t1 T1_1_ 
+ON T2_1_.t1idref=T1_1_.t1id            
+WHERE
+T3_0_.t3idref= 1
+);
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	T3_0_	ref	PRIMARY,FK_T3_T2Id	PRIMARY	8	const	3	Using index; Start temporary
+1	PRIMARY	T2_1_	eq_ref	PRIMARY,FK_T2_T1Id	PRIMARY	8	test.T3_0_.t2idref	1	
+1	PRIMARY	T1_1_	eq_ref	PRIMARY	PRIMARY	8	test.T2_1_.t1idref	1	Using index
+1	PRIMARY	T2_0_	ref	FK_T2_T1Id	FK_T2_T1Id	8	test.T2_1_.t1idref	1	Using index; End temporary
+drop table t3,t2,t1;
+set optimizer_search_depth=@tmp7474;
 # This must be the last in the file:
 set optimizer_switch=@subselect_sj2_tmp;
 set optimizer_switch=default;
diff --git a/mysql-test/t/subselect_sj2.test b/mysql-test/t/subselect_sj2.test
index a82baf0..0bf9c6d 100644
--- a/mysql-test/t/subselect_sj2.test
+++ b/mysql-test/t/subselect_sj2.test
@@ -1320,5 +1320,76 @@ SELECT * FROM t1 WHERE id in (select distinct id_agente from t2);
 
 DROP TABLE t1, t2;
 
+--echo #
+--echo # MDEV-7474: Semi-Join's DuplicateWeedout strategy skipped for some values of optimizer_search_depth
+--echo #
+
+CREATE TABLE t1 (
+	t1id BIGINT(20) NOT NULL,
+	code VARCHAR(20),
+	PRIMARY KEY (t1id)
+) COLLATE='utf8mb4_bin' ENGINE=InnoDB;
+
+CREATE TABLE t2 (
+	t2id BIGINT(20) NOT NULL,
+	t1idref BIGINT(20) NOT NULL,
+	code VARCHAR(20),
+	PRIMARY KEY (t2id),
+	INDEX FK_T2_T1Id (t1idref),
+	CONSTRAINT FK_T2_T1Id FOREIGN KEY (t1idref) REFERENCES t1 (t1id)
+) COLLATE='utf8mb4_bin' ENGINE=InnoDB;
+
+CREATE TABLE t3 (
+	t3idref BIGINT(20) NOT NULL,
+	t2idref BIGINT(20) NOT NULL,
+	sequencenumber INT(10) NOT NULL,
+	PRIMARY KEY (t3idref, t2idref),
+	INDEX FK_T3_T2Id (t2idref),
+	CONSTRAINT FK_T3_T2Id FOREIGN KEY (t2idref) REFERENCES t2 (t2id)
+) COLLATE='utf8mb4_bin' ENGINE=InnoDB;
+
+# Load up dummy data (needed to reproduce issue)
+INSERT INTO t1 (t1id) VALUES (100001),(100017),(100018),(100026),(100027),(100028),(100029),(100030),
+(100031),(100032),(100033),(100034),(100035),(100036),(100037),(100038),(100040),(100041),(100042),
+(100043),(100044),(100045),(100046),(100047);
+
+INSERT IGNORE INTO t2 (t2id, t1idref) SELECT t1id, t1id FROM t1;
+
+# Now the test Data
+INSERT IGNORE INTO t1 VALUES (200001, 'a');
+INSERT IGNORE INTO t2 (t2id, t1idref) VALUES (200011, 200001),(200012, 200001),(200013, 200001);
+INSERT IGNORE INTO t3 VALUES (1, 200011, 1),  (1, 200012, 2), (1, 200013, 3);
+
+set @tmp7474= @@optimizer_search_depth;
+SET SESSION optimizer_search_depth = 1;
+
+let $query=
+SELECT SQL_NO_CACHE 
+T2_0_.t1idref,
+T2_0_.t2id
+FROM
+        t2 T2_0_ 
+WHERE
+        T2_0_.t1idref IN (
+                SELECT
+                        T1_1_.t1id 
+                FROM
+                        t3 T3_0_ 
+                INNER JOIN
+                        t2 T2_1_ 
+                                ON T3_0_.t2idref=T2_1_.t2id 
+                INNER JOIN
+                        t1 T1_1_ 
+                                ON T2_1_.t1idref=T1_1_.t1id            
+                WHERE
+                        T3_0_.t3idref= 1
+);
+
+eval $query;
+eval explain $query;
+
+drop table t3,t2,t1;
+set optimizer_search_depth=@tmp7474;
+
 --echo # This must be the last in the file:
 set optimizer_switch=@subselect_sj2_tmp;
diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc
index 2122349..fc4d15e 100644
--- a/sql/opt_subselect.cc
+++ b/sql/opt_subselect.cc
@@ -2508,10 +2508,16 @@ void advance_sj_state(JOIN *join, table_map remaining_tables, uint idx,
     LooseScan detector in best_access_path)
   */
   remaining_tables &= ~new_join_tab->table->map;
-  pos->prefix_dups_producing_tables= join->cur_dups_producing_tables;
+  table_map dups_producing_tables;
+
+  if (idx == join->const_tables)
+    dups_producing_tables= 0;
+  else
+    dups_producing_tables= pos[-1].dups_producing_tables;
+
   TABLE_LIST *emb_sj_nest;
   if ((emb_sj_nest= new_join_tab->emb_sj_nest))
-    join->cur_dups_producing_tables |= emb_sj_nest->sj_inner_tables;
+    dups_producing_tables |= emb_sj_nest->sj_inner_tables;
 
   Semi_join_strategy_picker **strategy;
   if (idx == join->const_tables)
@@ -2564,7 +2570,7 @@ void advance_sj_state(JOIN *join, table_map remaining_tables, uint idx,
                    fanout from semijoin X.
                 3. We have no clue what to do about fanount of semi-join Y.
         */
-        if ((join->cur_dups_producing_tables & handled_fanout) ||
+        if ((dups_producing_tables & handled_fanout) ||
             (read_time < *current_read_time && 
              !(handled_fanout & pos->inner_tables_handled_with_other_sjs)))
         {
@@ -2577,7 +2583,7 @@ void advance_sj_state(JOIN *join, table_map remaining_tables, uint idx,
             join->sjm_lookup_tables &= ~handled_fanout;
           *current_read_time= read_time;
           *current_record_count= rec_count;
-          join->cur_dups_producing_tables &= ~handled_fanout;
+          dups_producing_tables &= ~handled_fanout;
           //TODO: update bitmap of semi-joins that were handled together with
           // others.
           if (is_multiple_semi_joins(join, join->positions, idx, handled_fanout))
@@ -2604,6 +2610,7 @@ void advance_sj_state(JOIN *join, table_map remaining_tables, uint idx,
 
   pos->prefix_cost.convert_from_cost(*current_read_time);
   pos->prefix_record_count= *current_record_count;
+  pos->dups_producing_tables= dups_producing_tables;
 }
 
 
@@ -3115,8 +3122,6 @@ void restore_prev_sj_state(const table_map remaining_tables,
       tab->join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables;
     }
   }
-  POSITION *pos= tab->join->positions + idx;
-  tab->join->cur_dups_producing_tables= pos->prefix_dups_producing_tables;
 }
 
 
diff --git a/sql/opt_subselect.h b/sql/opt_subselect.h
index acfbebe..77e1096 100644
--- a/sql/opt_subselect.h
+++ b/sql/opt_subselect.h
@@ -299,7 +299,7 @@ class Loose_scan_opt
 };
 
 
-extern void advance_sj_state(JOIN *join, table_map remaining_tables, uint idx,
+void advance_sj_state(JOIN *join, table_map remaining_tables, uint idx,
                       double *current_record_count, double *current_read_time,
                       POSITION *loose_scan_pos);
 void restore_prev_sj_state(const table_map remaining_tables, 
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index eb222b2..7d75c12 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -6466,7 +6466,6 @@ static void choose_initial_table_order(JOIN *join)
   DBUG_ENTER("choose_plan");
 
   join->cur_embedding_map= 0;
-  join->cur_dups_producing_tables= 0;
   reset_nj_counters(join, join->join_list);
   qsort2_cmp jtab_sort_func;
 
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 45041de..4bbbd74 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -825,7 +825,12 @@ class Sj_materialization_picker : public Semi_join_strategy_picker
   */
   uint n_sj_tables;
 
-  table_map prefix_dups_producing_tables;
+  /*
+    Bitmap of semi-join inner tables that are in the join prefix and for
+    which there's no provision for how to eliminate semi-join duplicates
+    they produce.
+  */
+  table_map dups_producing_tables;
 
   table_map inner_tables_handled_with_other_sjs;
    
@@ -1046,13 +1051,6 @@ class JOIN :public Sql_alloc
   */
   table_map cur_sj_inner_tables;
   
-  /*
-    Bitmap of semi-join inner tables that are in the join prefix and for
-    which there's no provision for how to eliminate semi-join duplicates
-    they produce.
-  */
-  table_map cur_dups_producing_tables;
-  
   /* We also maintain a stack of join optimization states in * join->positions[] */
 /******* Join optimization state members end *******/
 


More information about the commits mailing list