[Commits] Rev 2810: MWL#89: Cost-based choice between Materialization and IN->EXISTS transformation in file:///home/tsk/mprog/src/5.3-mwl89/

timour at askmonty.org timour at askmonty.org
Fri Sep 17 13:17:38 EEST 2010


At file:///home/tsk/mprog/src/5.3-mwl89/

------------------------------------------------------------
revno: 2810
revision-id: timour at askmonty.org-20100917101727-tzznd9k9wrm9jadf
parent: timour at askmonty.org-20100916140658-2hu9mhv2yqfzax2p
committer: timour at askmonty.org
branch nick: 5.3-mwl89
timestamp: Fri 2010-09-17 13:17:27 +0300
message:
  MWL#89: Cost-based choice between Materialization and IN->EXISTS transformation
  
  * Fixed a "crack" between semijoin analysis and materialization analysis
    where semijoin didn't set the correct strategy for the IN predicate.
  
  * Cosmetic changes in the code/comments.
-------------- next part --------------
=== modified file 'sql/opt_subselect.cc'
--- a/sql/opt_subselect.cc	2010-09-16 14:06:58 +0000
+++ b/sql/opt_subselect.cc	2010-09-17 10:17:27 +0000
@@ -74,8 +74,6 @@ int check_and_do_in_subquery_rewrites(JO
       1) this join is inside a subquery (of any type except FROM-clause 
          subquery) and
       2) we aren't just normalizing a VIEW
-      3) The join and its select_lex object do not represent the 'fake'
-         select used to compute the result of a UNION.
 
     Then perform early unconditional subquery transformations:
      - Convert subquery predicate into semi-join, or
@@ -87,9 +85,8 @@ int check_and_do_in_subquery_rewrites(JO
     TODO: for PS, make the whole block execute only on the first execution
   */
   Item_subselect *subselect;
-  if (!thd->lex->view_prepare_mode &&              // (1)
-      (subselect= parent_unit->item))// &&            // (2)
-//      select_lex == parent_unit->fake_select_lex)  // (3)
+  if (!thd->lex->view_prepare_mode &&            // (1)
+      (subselect= parent_unit->item))            // (2)
   {
     Item_in_subselect *in_subs= NULL;
     if (subselect->substype() == Item_subselect::IN_SUBS)
@@ -134,7 +131,13 @@ int check_and_do_in_subquery_rewrites(JO
         DBUG_RETURN(-1); /* purecov: deadcode */
     }
     if (select_lex == parent_unit->fake_select_lex)
+    {
+      /*
+        The join and its select_lex object represent the 'fake' select used
+        to compute the result of a UNION.
+      */
       DBUG_RETURN(0);
+    }
 
     DBUG_PRINT("info", ("Checking if subq can be converted to semi-join"));
     /*
@@ -528,6 +531,15 @@ skip_conversion:
                                      FALSE))
         DBUG_RETURN(TRUE);
     }
+    /*
+      Revert to the IN->EXISTS strategy in the rare case when the subquery could
+      be flattened.
+      TODO: This is a limitation done for simplicity. Such subqueries could also
+      be executed via materialization. In order to determine this, we should
+      re-run the test for materialization that was done in
+      check_and_do_in_subquery_rewrites.
+    */
+    (*in_subq)->exec_method= Item_in_subselect::IN_TO_EXISTS;
   }
 
   if (arena)
@@ -3524,6 +3536,34 @@ static void remove_subq_pushed_predicate
 }
 
 
+/**
+  Setup for execution all subqueries of a query, for which the optimizer
+  chose hash semi-join.
+
+  @details Iterate over all immediate child subqueries of the query, and if
+  they are under an IN predicate, and the optimizer chose to compute it via
+  materialization:
+  - optimize each subquery,
+  - choose an optimial execution strategy for the IN predicate - either
+    materialization, or an IN=>EXISTS transformation with an approriate
+    engine.
+
+  This phase must be called after substitute_for_best_equal_field() because
+  that function may replace items with other items from a multiple equality,
+  and we need to reference the correct items in the index access method of the
+  IN predicate.
+
+  @return Operation status
+  @retval FALSE     success.
+  @retval TRUE      error occurred.
+*/
+
+bool JOIN::optimize_unflattened_subqueries()
+{
+  return select_lex->optimize_unflattened_subqueries();
+}
+
+
 bool JOIN::choose_subquery_plan()
 {
   double mat_strategy_cost;    /* The cost to compute IN via materialization. */
@@ -3564,7 +3604,13 @@ bool JOIN::choose_subquery_plan()
     }
   }
   else
-    in_subs->exec_method= Item_in_subselect::IN_TO_EXISTS;
+  {
+    /*
+      Previous optimizer phases should have chosen either a materialization
+      or IN->EXISTS strategy.
+    */
+    DBUG_ASSERT(in_subs->exec_method == Item_in_subselect::IN_TO_EXISTS);
+  }
 
   if (in_subs->exec_method == Item_in_subselect::MATERIALIZATION)
   {

=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc	2010-09-05 15:43:47 +0000
+++ b/sql/sql_select.cc	2010-09-17 10:17:27 +0000
@@ -2618,34 +2618,6 @@ err:
 }
 
 
-/**
-  Setup for execution all subqueries of a query, for which the optimizer
-  chose hash semi-join.
-
-  @details Iterate over all immediate child subqueries of the query, and if
-  they are under an IN predicate, and the optimizer chose to compute it via
-  materialization:
-  - optimize each subquery,
-  - choose an optimial execution strategy for the IN predicate - either
-    materialization, or an IN=>EXISTS transformation with an approriate
-    engine.
-
-  This phase must be called after substitute_for_best_equal_field() because
-  that function may replace items with other items from a multiple equality,
-  and we need to reference the correct items in the index access method of the
-  IN predicate.
-
-  @return Operation status
-  @retval FALSE     success.
-  @retval TRUE      error occurred.
-*/
-
-bool JOIN::optimize_unflattened_subqueries()
-{
-  return select_lex->optimize_unflattened_subqueries();
-}
-
-
 /*****************************************************************************
   Create JOIN_TABS, make a guess about the table types,
   Approximate how many records will be used in each table



More information about the commits mailing list