[Commits] Rev 2811: MWL#89: Cost-based choice between Materialization and IN->EXISTS transformation in file:///home/tsk/mprog/src/5.3-mwl89/

timour at askmonty.org timour at askmonty.org
Thu Sep 30 18:32:57 EEST 2010


At file:///home/tsk/mprog/src/5.3-mwl89/

------------------------------------------------------------
revno: 2811
revision-id: timour at askmonty.org-20100930153244-euf58b5c27dpdj0u
parent: timour at askmonty.org-20100917101727-tzznd9k9wrm9jadf
committer: timour at askmonty.org
branch nick: 5.3-mwl89
timestamp: Thu 2010-09-30 18:32:44 +0300
message:
  MWL#89: Cost-based choice between Materialization and IN->EXISTS transformation
  
    Phase 3: Implementation of re-optimization of subqueries with injected predicates
             and cost comparison between Materialization and IN->EXISTS strategies.
  
  The commit contains the following known problems:
  - The implementation of EXPLAIN has not been re-engineered to reflect the
    changes in subquery optimization. EXPLAIN for subqueries is called during
    the execute phase, which results in different code paths during JOIN::optimize
    and thus in differing EXPLAIN messages for constant/system tables.
  - There are some valgrind warnings that need investigation
  - Several EXPLAINs with minor differences need to be reconsidered after fixing
    the EXPLAIN problem above.
  
  This patch also adds one extra optimizer_switch: 'in_to_exists' for complete
  manual control of the subquery execution strategies.
-------------- next part --------------
=== modified file 'mysql-test/r/subselect3.result'
--- a/mysql-test/r/subselect3.result	2010-09-16 13:49:20 +0000
+++ b/mysql-test/r/subselect3.result	2010-09-30 15:32:44 +0000
@@ -840,7 +840,12 @@ x	ROW(11, 12) = (SELECT MAX(x), 22)	ROW(
 1       0       0
 2       0       0
 11      0       0
-# 2nd and 3rd columns should be same for x == 11 only
+# 2nd and 3rd columns should be same
+EXPLAIN SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
+id      select_type     table   type    possible_keys   key     key_len ref     rows    Extra
+1       PRIMARY t1      ALL     NULL    NULL    NULL    NULL    3       
+3       DEPENDENT SUBQUERY      NULL    NULL    NULL    NULL    NULL    NULL    NULL    No tables used
+2       DEPENDENT SUBQUERY      NULL    NULL    NULL    NULL    NULL    NULL    NULL    No tables used
 SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
 x       ROW(11, 12) = (SELECT MAX(x), 12)       ROW(11, 12) IN (SELECT MAX(x), 12)
 1       0       0

=== modified file 'mysql-test/r/subselect3_jcl6.result'
--- a/mysql-test/r/subselect3_jcl6.result	2010-07-10 10:37:30 +0000
+++ b/mysql-test/r/subselect3_jcl6.result	2010-09-30 15:32:44 +0000
@@ -844,11 +844,16 @@ x	ROW(11, 12) = (SELECT MAX(x), 22)	ROW(
 1       0       0
 2       0       0
 11      0       0
-# 2nd and 3rd columns should be same for x == 11 only
+# 2nd and 3rd columns should be same
+EXPLAIN SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
+id      select_type     table   type    possible_keys   key     key_len ref     rows    Extra
+1       PRIMARY t1      ALL     NULL    NULL    NULL    NULL    3       
+3       DEPENDENT SUBQUERY      NULL    NULL    NULL    NULL    NULL    NULL    NULL    No tables used
+2       DEPENDENT SUBQUERY      NULL    NULL    NULL    NULL    NULL    NULL    NULL    No tables used
 SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
 x       ROW(11, 12) = (SELECT MAX(x), 12)       ROW(11, 12) IN (SELECT MAX(x), 12)
-1       0       1
-2       0       1
+1       0       0
+2       0       0
 11      1       1
 DROP TABLE t1;
 # both columns should be same

=== modified file 'mysql-test/t/subselect3.test'
--- a/mysql-test/t/subselect3.test	2010-07-10 10:37:30 +0000
+++ b/mysql-test/t/subselect3.test	2010-09-30 15:32:44 +0000
@@ -681,7 +681,8 @@ SELECT a, ROW(11, 12) = (SELECT a, 12), 
 # The x alias is used below to workaround bug #40674.
 # Regression tests for sum function on outer column in subselect from dual:
 SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 22), ROW(11, 12) IN (SELECT MAX(x), 22) FROM t1;
---echo # 2nd and 3rd columns should be same for x == 11 only
+--echo # 2nd and 3rd columns should be same
+EXPLAIN SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
 SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
 
 DROP TABLE t1;

=== modified file 'sql/item_subselect.cc'
--- a/sql/item_subselect.cc	2010-09-16 14:06:58 +0000
+++ b/sql/item_subselect.cc	2010-09-30 15:32:44 +0000
@@ -62,7 +62,6 @@ void Item_subselect::init(st_select_lex 
   DBUG_PRINT("enter", ("select_lex: 0x%lx", (long) select_lex));
   unit= select_lex->master_unit();
   thd= unit->thd;
-  is_min_max_optimized= FALSE;
   if (unit->item)
   {
     /*
@@ -160,8 +159,8 @@ void Item_in_subselect::cleanup()
   }
   first_execution= TRUE;
   is_constant= FALSE;
-  if (exec_method == MATERIALIZATION)
-    exec_method= NOT_TRANSFORMED;
+  if (in_strategy & SUBS_MATERIALIZATION)
+    in_strategy= 0;
   pushed_cond_guards= NULL;
   Item_subselect::cleanup();
   DBUG_VOID_RETURN;
@@ -618,7 +617,7 @@ bool Item_in_subselect::exec()
     - on a cost-based basis, that takes into account the cost of a cache
       lookup, the cache hit rate, and the savings per cache hit.
   */
-  if (!left_expr_cache && exec_method == MATERIALIZATION)
+  if (!left_expr_cache && (in_strategy & SUBS_MATERIALIZATION))
     init_left_expr_cache();
 
   /*
@@ -1072,7 +1071,7 @@ Item_in_subselect::Item_in_subselect(Ite
                                      st_select_lex *select_lex):
   Item_exists_subselect(), left_expr_cache(0), first_execution(TRUE),
   is_constant(FALSE), optimizer(0), pushed_cond_guards(NULL),
-  exec_method(NOT_TRANSFORMED), upper_item(0)
+  in_strategy(0), upper_item(0)
 {
   DBUG_ENTER("Item_in_subselect::Item_in_subselect");
   left_expr= left_exp;
@@ -1488,7 +1487,6 @@ Item_in_subselect::single_value_transfor
     }
     /* fix fields is already called for  left expression */
     substitution= func->create(left_expr, subs);
-    is_min_max_optimized= TRUE;
     DBUG_RETURN(RES_OK);
   }
 
@@ -1555,8 +1553,21 @@ Item_in_subselect::single_value_transfor
 }
 
 
+bool Item_in_subselect::fix_having(Item *having, SELECT_LEX *select_lex)
+{
+  bool fix_res= 0;
+  if (!having->fixed)
+  {
+    select_lex->having_fix_field= 1;
+    fix_res= having->fix_fields(thd, 0);
+    select_lex->having_fix_field= 0;
+  }
+  return fix_res;
+}
+
+
 /**
-  Transofrm an IN predicate into EXISTS via predicate injection.
+  Transform an IN predicate into EXISTS via predicate injection.
 
   @details The transformation injects additional predicates into the subquery
   (and makes the subquery correlated) as follows.
@@ -1607,6 +1618,9 @@ Item_in_subselect::create_single_in_to_e
 
   DBUG_ENTER("Item_in_subselect::create_single_in_to_exists_cond");
 
+  *where_item= NULL;
+  *having_item= NULL;
+
   if (join_having || select_lex->with_sum_func ||
       select_lex->group_list.elements)
   {
@@ -1626,11 +1640,11 @@ Item_in_subselect::create_single_in_to_e
       item= new Item_func_trig_cond(item, get_cond_guard(0));
     }
 
-    if (item->fix_fields(thd, 0))
+    if (!join_having)
+      item->name= (char*) in_having_cond;
+    if (fix_having(item, select_lex))
       DBUG_RETURN(RES_ERROR);
-
     *having_item= item;
-    *where_item= NULL;
   }
   else
   {
@@ -1651,10 +1665,9 @@ Item_in_subselect::create_single_in_to_e
                                                 get_cond_guard(0))))
             DBUG_RETURN(RES_ERROR);
         }
-
-        if (having->fix_fields(thd, 0))
-          DBUG_RETURN(RES_ERROR);
-
+        having->name= (char*) in_having_cond;
+        if (fix_having(having, select_lex))
+          DBUG_RETURN(RES_ERROR);
         *having_item= having;
 
         item= new Item_cond_or(item,
@@ -1670,20 +1683,20 @@ Item_in_subselect::create_single_in_to_e
           DBUG_RETURN(RES_ERROR);
       }
 
-      if (item->fix_fields(thd, 0))
-        DBUG_RETURN(RES_ERROR);
-
+      /*
+        TODO: figure out why the following is done here in 
+        single_value_transformer but there is no corresponding action in
+        row_value_transformer?
+      */
+      item->name= (char *) in_additional_cond;
+      if (!item->fixed && item->fix_fields(thd, 0))
+        DBUG_RETURN(RES_ERROR);
       *where_item= item;
     }
     else
     {
       if (select_lex->master_unit()->is_union())
       {
-        /*
-          comparison functions can't be changed during fix_fields()
-          we can assign select_lex->having here, and pass 0 as last
-          argument (reference) to fix_fields()
-        */
         Item *new_having=
           func->create(expr,
                        new Item_ref_null_helper(&select_lex->context, this,
@@ -1697,132 +1710,10 @@ Item_in_subselect::create_single_in_to_e
             DBUG_RETURN(RES_ERROR);
         }
 
-        if (new_having->fix_fields(thd, 0))
-          DBUG_RETURN(RES_ERROR);
-
+        new_having->name= (char*) in_having_cond;
+        if (fix_having(new_having, select_lex))
+          DBUG_RETURN(RES_ERROR);
         *having_item= new_having;
-        *where_item= NULL;
-      }
-      else
-      {
-        DBUG_ASSERT(FALSE);
-        /* TIMOUR TODO */
-        *having_item= NULL;
-        *where_item= (Item*) select_lex->item_list.head();
-      }
-    }
-  }
-
-  DBUG_RETURN(RES_OK);
-}
-
-
-
-Item_subselect::trans_res
-Item_in_subselect::inject_single_in_to_exists_cond(JOIN * join,
-                                                   Item *where_item,
-                                                   Item *having_item)
-{
-  SELECT_LEX *select_lex= join->select_lex;
-  /*
-    The non-transformed HAVING clause of 'join' may be stored differently in
-    JOIN::optimize:
-    this->tmp_having= this->having
-    this->having= 0;
-  */
-  Item* join_having= join->having ? join->having : join->tmp_having;
-  bool fix_res= 0;
-  DBUG_ENTER("Item_in_subselect::inject_single_in_to_exists_cond");
-
-  if (join_having || select_lex->with_sum_func ||
-      select_lex->group_list.elements)
-  {
-    /*
-      AND and comparison functions can't be changed during fix_fields()
-      we can assign select_lex->having here, and pass 0 as last
-      argument (reference) to fix_fields()
-    */
-    thd->change_item_tree(&select_lex->having, and_items(join_having, having_item));
-    join->having= select_lex->having;
-    if (join->having == having_item)
-      having_item->name= (char*)in_having_cond;
-    select_lex->having_fix_field= 1;
-    /*
-      we do not check join->having->fixed, because Item_and (from and_items)
-      or comparison function (from func->create) can't be fixed after creation
-    */
-    if (!join->having->fixed)
-      fix_res= join->having->fix_fields(thd, 0);
-    select_lex->having_fix_field= 0;
-    if (fix_res)
-      DBUG_RETURN(RES_ERROR);
-  }
-  else
-  {
-    if (select_lex->table_list.elements)
-    {
-      if (!abort_on_null && select_lex->item_list.head()->maybe_null)
-      {
-        /*
-          Item_is_not_null_test can't be changed during fix_fields()
-          we can assign select_lex->having here, and pass 0 as last
-          argument (reference) to fix_fields()
-        */
-        having_item->name= (char*)in_having_cond;
-        thd->change_item_tree(&select_lex->having, having_item);
-        join->having= select_lex->having;
-        select_lex->having_fix_field= 1;
-        /*
-          we do not check join->having->fixed, because Item_and (from
-          and_items) or comparison function (from func->create) can't be
-          fixed after creation
-        */
-        if (!join->having->fixed)
-          fix_res= join->having->fix_fields(thd, 0);
-        select_lex->having_fix_field= 0;
-        if (fix_res)
-          DBUG_RETURN(RES_ERROR);
-      }
-      /*
-        TODO: figure out why the following is done here in 
-        single_value_transformer but there is no corresponding action in
-        row_value_transformer?
-      */
-      where_item->name= (char *)in_additional_cond;
-
-      /*
-        AND can't be changed during fix_fields()
-        we can assign select_lex->having here, and pass 0 as last
-        argument (reference) to fix_fields()
-      */
-      thd->change_item_tree(&select_lex->where, and_items(join->conds, where_item));
-      join->conds= select_lex->where;
-      select_lex->where->top_level_item();
-      /*
-        we do not check join->conds->fixed, because Item_and can't be fixed
-        after creation
-      */
-      if (!join->conds->fixed && join->conds->fix_fields(thd, 0))
-        DBUG_RETURN(RES_ERROR);
-    }
-    else
-    {
-      if (select_lex->master_unit()->is_union())
-      {
-        having_item->name= (char*)in_having_cond;
-        thd->change_item_tree(&select_lex->having, having_item);
-        join->having= select_lex->having;
-        select_lex->having_fix_field= 1;
-        
-        /*
-          we do not check join->having->fixed, because comparison function
-          (from func->create) can't be fixed after creation
-        */
-        if (!join->having->fixed)
-          fix_res= join->having->fix_fields(thd, 0);
-        select_lex->having_fix_field= 0;
-        if (fix_res)
-          DBUG_RETURN(RES_ERROR);
       }
       else
         DBUG_ASSERT(FALSE);
@@ -1992,7 +1883,6 @@ Item_in_subselect::create_row_in_to_exis
       item_having_part2->top_level_item();
     }
     *having_item= and_items(*having_item, item_having_part2);
-    (*having_item)->top_level_item();
   }
   else
   {
@@ -2074,66 +1964,22 @@ Item_in_subselect::create_row_in_to_exis
       }
       *where_item= and_items(*where_item, item);
     }
-    (*where_item)->fix_fields(thd, 0);
   }
 
-  DBUG_RETURN(RES_OK);
-}
-
-
-Item_subselect::trans_res
-Item_in_subselect::inject_row_in_to_exists_cond(JOIN * join,
-                                                Item *where_item,
-                                                Item *having_item)
-{
-  SELECT_LEX *select_lex= join->select_lex;
-  /*
-    The non-transformed HAVING clause of 'join' may be stored differently in
-    JOIN::optimize:
-    this->tmp_having= this->having
-    this->having= 0;
-  */
-  Item* join_having= join->having ? join->having : join->tmp_having;
-  bool is_having_used= (join_having || select_lex->with_sum_func ||
-                        select_lex->group_list.first ||
-                        !select_lex->table_list.elements);
-
-  DBUG_ENTER("Item_in_subselect::inject_row_in_to_exists_cond");
-
-  if (!is_having_used)
+  if (*where_item)
   {
-    /*
-      AND can't be changed during fix_fields()
-      we can assign select_lex->where here, and pass 0 as last
-      argument (reference) to fix_fields()
-    */
-    thd->change_item_tree(&select_lex->where, and_items(join->conds, where_item));
-    join->conds= select_lex->where;
-    select_lex->where->top_level_item();
-    if (!join->conds->fixed && join->conds->fix_fields(thd, 0))
+    if (!(*where_item)->fixed && (*where_item)->fix_fields(thd, 0))
       DBUG_RETURN(RES_ERROR);
+    (*where_item)->top_level_item();
   }
 
-  if (having_item)
+  if (*having_item)
   {
-    bool res;
-    thd->change_item_tree(&select_lex->having,
-                          and_items(join_having, having_item));
-    join->having= select_lex->having;
-    if (having_item == select_lex->having)
-      having_item->name= (char*)in_having_cond;
-    select_lex->having->top_level_item();
-    /*
-      AND can't be changed during fix_fields()
-      we can assign select_lex->having here, and pass 0 as last
-      argument (reference) to fix_fields()
-    */
-    select_lex->having_fix_field= 1;
-    if (!join->having->fixed)
-      res= join->having->fix_fields(thd, 0);
-    select_lex->having_fix_field= 0;
-    if (res)
+    if (!join_having)
+      (*having_item)->name= (char*) in_having_cond;
+    if (fix_having(*having_item, select_lex))
       DBUG_RETURN(RES_ERROR);
+    (*having_item)->top_level_item();
   }
 
   DBUG_RETURN(RES_OK);
@@ -2175,20 +2021,36 @@ bool Item_in_subselect::create_in_to_exi
 }
 
 
-bool Item_in_subselect::inject_in_to_exists_cond(JOIN * join_arg)
+bool Item_in_subselect::inject_in_to_exists_cond(JOIN *join_arg)
 {
-  Item_subselect::trans_res res;
+  SELECT_LEX *select_lex= join_arg->select_lex;
+  Item *where_item= join_arg->in_to_exists_where;
+  Item *having_item= join_arg->in_to_exists_having;
 
-  if (left_expr->cols() == 1)
-    res= inject_single_in_to_exists_cond(join_arg,
-                                         join_arg->in_to_exists_where,
-                                         join_arg->in_to_exists_having);
-  else
-    res= inject_row_in_to_exists_cond(join_arg,
-                                      join_arg->in_to_exists_where,
-                                      join_arg->in_to_exists_having);
+  DBUG_ENTER("Item_in_subselect::inject_in_to_exists_cond");
 
-  return (res != RES_OK && res != RES_REDUCE);
+  if (where_item)
+  {
+    where_item= and_items(join_arg->conds, where_item);
+    if (!where_item->fixed && where_item->fix_fields(thd, 0))
+      DBUG_RETURN(true);
+    thd->change_item_tree(&select_lex->where, where_item);
+    select_lex->where->top_level_item();
+    join_arg->conds= select_lex->where;
+  }
+
+  if (having_item)
+  {
+    Item* join_having= join_arg->having ? join_arg->having:join_arg->tmp_having;
+    having_item= and_items(join_having, having_item);
+    if (fix_having(having_item, select_lex))
+      DBUG_RETURN(true);
+    thd->change_item_tree(&select_lex->having, having_item);
+    select_lex->having->top_level_item();
+    join_arg->having= select_lex->having;
+  }
+
+  DBUG_RETURN(false);
 }
 
 
@@ -2301,7 +2163,7 @@ err:
 
 void Item_in_subselect::print(String *str, enum_query_type query_type)
 {
-  if (exec_method == IN_TO_EXISTS)
+  if (in_strategy & SUBS_IN_TO_EXISTS)
     str->append(STRING_WITH_LEN("<exists>"));
   else
   {
@@ -2317,7 +2179,7 @@ bool Item_in_subselect::fix_fields(THD *
   uint outer_cols_num;
   List<Item> *inner_cols;
 
-  if (exec_method == SEMI_JOIN)
+  if (in_strategy & SUBS_SEMI_JOIN)
     return !( (*ref)= new Item_int(1));
 
   /*
@@ -2498,10 +2360,7 @@ bool Item_in_subselect::init_cond_guards
   Callback to test if an IN predicate is expensive.
 
   @details
-    IN predicates are considered expensive only if they will be executed via
-    materialization. The return value affects the behavior of
-    make_cond_for_table() in such a way that it is unchanged when we use
-    the IN=>EXISTS transformation to compute IN.
+    The return value affects the behavior of make_cond_for_table().
 
   @retval TRUE  if the predicate is expensive
   @retval FALSE otherwise
@@ -2509,7 +2368,8 @@ bool Item_in_subselect::init_cond_guards
 
 bool Item_in_subselect::is_expensive_processor(uchar *arg)
 {
-  return TRUE; // exec_method == MATERIALIZATION;
+  /* TIMOUR: TODO: decide on a cost basis whether it is expensive or not. */
+  return TRUE;
 }
 
 
@@ -2517,7 +2377,7 @@ Item_subselect::trans_res
 Item_allany_subselect::select_transformer(JOIN *join)
 {
   DBUG_ENTER("Item_allany_subselect::select_transformer");
-  exec_method= IN_TO_EXISTS;
+  in_strategy= SUBS_IN_TO_EXISTS;
   if (upper_item)
     upper_item->show= 1;
   DBUG_RETURN(select_in_like_transformer(join));
@@ -2526,7 +2386,7 @@ Item_allany_subselect::select_transforme
 
 void Item_allany_subselect::print(String *str, enum_query_type query_type)
 {
-  if (exec_method == IN_TO_EXISTS)
+  if (in_strategy & SUBS_IN_TO_EXISTS)
     str->append(STRING_WITH_LEN("<exists>"));
   else
   {

=== modified file 'sql/item_subselect.h'
--- a/sql/item_subselect.h	2010-09-05 15:43:47 +0000
+++ b/sql/item_subselect.h	2010-09-30 15:32:44 +0000
@@ -108,9 +108,6 @@ public:
   /* subquery is transformed */
   bool changed;
 
-  /* TIMOUR: this is temporary, remove it. */
-  bool is_min_max_optimized;
-
   /* TRUE <=> The underlying SELECT is correlated w.r.t some ancestor select */
   bool is_correlated; 
 
@@ -121,6 +118,12 @@ public:
   Item_subselect();
 
   virtual subs_type substype() { return UNKNOWN_SUBS; }
+  bool is_in_predicate()
+  {
+    return (substype() == Item_subselect::IN_SUBS ||
+            substype() == Item_subselect::ALL_SUBS ||
+            substype() == Item_subselect::ANY_SUBS);
+  }
 
   /*
     We need this method, because some compilers do not allow 'this'
@@ -314,6 +317,18 @@ public:
 };
 
 
+/*
+  Possible methods to execute an IN predicate. These are set by the optimizer
+  based on user-set optimizer switches, syntactic analysis and cost comparison.
+*/
+#define SUBS_NOT_TRANSFORMED 0 /* No execution method was chosen for this IN. */
+#define SUBS_SEMI_JOIN 1       /* IN was converted to semi-join. */
+#define SUBS_IN_TO_EXISTS 2    /* IN was converted to correlated EXISTS. */
+#define SUBS_MATERIALIZATION 4 /* Execute IN via subquery materialization. */
+/* Partial matching substrategies of MATERIALIZATION. */
+#define SUBS_PARTIAL_MATCH_ROWID_MERGE 8
+#define SUBS_PARTIAL_MATCH_TABLE_SCAN 16
+
 /**
   Representation of IN subquery predicates of the form
   "left_expr IN (SELECT ...)".
@@ -362,19 +377,13 @@ protected:
   trans_res select_in_like_transformer(JOIN *join);
   trans_res single_value_transformer(JOIN *join);
   trans_res row_value_transformer(JOIN * join);
+  bool fix_having(Item *having, st_select_lex *select_lex);
   trans_res create_single_in_to_exists_cond(JOIN * join,
                                             Item **where_item,
                                             Item **having_item);
-  trans_res inject_single_in_to_exists_cond(JOIN * join,
-                                            Item *where_item,
-                                            Item *having_item);
-
   trans_res create_row_in_to_exists_cond(JOIN * join,
                                          Item **where_item,
                                          Item **having_item);
-  trans_res inject_row_in_to_exists_cond(JOIN * join,
-                                         Item *where_item,
-                                         Item *having_item);
 public:
   Item *left_expr;
   /* Priority of this predicate in the convert-to-semi-join-nest process. */
@@ -407,14 +416,8 @@ public:
   */
   bool sjm_scan_allowed;
 
-  /* The method chosen to execute the IN predicate.  */
-  enum enum_exec_method {
-    NOT_TRANSFORMED, /* No execution method was chosen for this IN. */
-    SEMI_JOIN,   /* IN was converted to semi-join nest and should be removed. */
-    IN_TO_EXISTS, /* IN was converted to correlated EXISTS. */
-    MATERIALIZATION /* IN will be executed via subquery materialization. */
-  };
-  enum_exec_method exec_method;
+  /* A bitmap of possible execution strategies for an IN predicate. */
+  uchar in_strategy;
 
   bool *get_cond_guard(int i)
   {
@@ -433,7 +436,7 @@ public:
   Item_in_subselect()
     :Item_exists_subselect(), left_expr_cache(0), first_execution(TRUE),
     is_constant(FALSE), optimizer(0), abort_on_null(0),
-    pushed_cond_guards(NULL), func(NULL), exec_method(NOT_TRANSFORMED),
+    pushed_cond_guards(NULL), func(NULL), in_strategy(0),
     upper_item(0)
     {}
   void cleanup();
@@ -446,8 +449,8 @@ public:
     was_null= 0;
   }
   trans_res select_transformer(JOIN *join);
-  bool create_in_to_exists_cond(JOIN * join_arg);
-  bool inject_in_to_exists_cond(JOIN * join_arg);
+  bool create_in_to_exists_cond(JOIN *join_arg);
+  bool inject_in_to_exists_cond(JOIN *join_arg);
 
   virtual bool exec();
   longlong val_int();

=== modified file 'sql/mysql_priv.h'
--- a/sql/mysql_priv.h	2010-07-10 10:37:30 +0000
+++ b/sql/mysql_priv.h	2010-09-30 15:32:44 +0000
@@ -567,16 +566,17 @@ protected:
 #define OPTIMIZER_SWITCH_FIRSTMATCH 32
 #define OPTIMIZER_SWITCH_LOOSE_SCAN 64
 #define OPTIMIZER_SWITCH_MATERIALIZATION 128
-#define OPTIMIZER_SWITCH_SEMIJOIN 256
-#define OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE 512
-#define OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN 1024
-#define OPTIMIZER_SWITCH_SUBQUERY_CACHE (1<<11)
+#define OPTIMIZER_SWITCH_IN_TO_EXISTS 256
+#define OPTIMIZER_SWITCH_SEMIJOIN 512
+#define OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE 1024
+#define OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN  (1<<11)
+#define OPTIMIZER_SWITCH_SUBQUERY_CACHE  (1<<12)
 
 #ifdef DBUG_OFF
-#  define OPTIMIZER_SWITCH_LAST (1<<12)
-#else
-#  define OPTIMIZER_SWITCH_TABLE_ELIMINATION (1<<12)
 #  define OPTIMIZER_SWITCH_LAST (1<<13)
+#else
+#  define OPTIMIZER_SWITCH_TABLE_ELIMINATION (1<<13)
+#  define OPTIMIZER_SWITCH_LAST (1<<14)
 #endif
 
 #ifdef DBUG_OFF 
@@ -589,6 +589,7 @@ protected:
                                     OPTIMIZER_SWITCH_FIRSTMATCH | \
                                     OPTIMIZER_SWITCH_LOOSE_SCAN | \
                                     OPTIMIZER_SWITCH_MATERIALIZATION | \
+                                    OPTIMIZER_SWITCH_IN_TO_EXISTS | \
                                     OPTIMIZER_SWITCH_SEMIJOIN | \
                                     OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\
                                     OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN|\
@@ -603,6 +604,7 @@ protected:
                                     OPTIMIZER_SWITCH_FIRSTMATCH | \
                                     OPTIMIZER_SWITCH_LOOSE_SCAN | \
                                     OPTIMIZER_SWITCH_MATERIALIZATION | \
+                                    OPTIMIZER_SWITCH_IN_TO_EXISTS | \
                                     OPTIMIZER_SWITCH_SEMIJOIN | \
                                     OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\
                                     OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN|\

=== modified file 'sql/mysqld.cc'
--- a/sql/mysqld.cc	2010-07-16 08:58:24 +0000
+++ b/sql/mysqld.cc	2010-09-30 15:32:44 +0000
@@ -304,7 +304,7 @@ static const char *optimizer_switch_name
   "index_merge","index_merge_union","index_merge_sort_union",
   "index_merge_intersection",
   "index_condition_pushdown",
-  "firstmatch","loosescan","materialization", "semijoin",
+  "firstmatch","loosescan","materialization","in_to_exists","semijoin",
   "partial_match_rowid_merge",
   "partial_match_table_scan",
   "subquery_cache",
@@ -325,6 +325,7 @@ static const unsigned int optimizer_swit
   sizeof("firstmatch") - 1,
   sizeof("loosescan") - 1,
   sizeof("materialization") - 1,
+  sizeof("in_to_exists") - 1,
   sizeof("semijoin") - 1,
   sizeof("partial_match_rowid_merge") - 1,
   sizeof("partial_match_table_scan") - 1,
@@ -412,9 +413,10 @@ static const char *optimizer_switch_str=
                                         "firstmatch=on,"
                                         "loosescan=on,"
                                         "materialization=on,"
+                                        "in_to_exists=on,"
                                         "semijoin=on,"
                                         "partial_match_rowid_merge=on,"
-                                        "partial_match_table_scan=on,"
+  "partial_match_table_scan=on,"
                                         "subquery_cache=on"
 #ifndef DBUG_OFF
                                         ",table_elimination=on";
@@ -7233,7 +7235,7 @@ The minimum value for this variable is 4
   {"optimizer_switch", OPT_OPTIMIZER_SWITCH,
    "optimizer_switch=option=val[,option=val...], where option={index_merge, "
    "index_merge_union, index_merge_sort_union, index_merge_intersection, "
-   "index_condition_pushdown, firstmatch, loosescan, materialization, "
+   "index_condition_pushdown, firstmatch, loosescan, materialization, in_to_exists, "
    "semijoin, partial_match_rowid_merge, partial_match_table_scan, "
    "subquery_cache"
 #ifndef DBUG_OFF

=== modified file 'sql/opt_subselect.cc'
--- a/sql/opt_subselect.cc	2010-09-17 10:17:27 +0000
+++ b/sql/opt_subselect.cc	2010-09-30 15:32:44 +0000
@@ -165,7 +165,7 @@ int check_and_do_in_subquery_rewrites(JO
         thd->thd_marker.emb_on_expr_nest &&                           // 5
         select_lex->outer_select()->join &&                           // 6
         parent_unit->first_select()->leaf_tables &&                   // 7
-        in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED && // 8
+        !in_subs->in_strategy &&                                      // 8
         select_lex->outer_select()->leaf_tables &&                    // 9
         !((join->select_options |                                     // 10
            select_lex->outer_select()->join->select_options)          // 10
@@ -185,65 +185,71 @@ int check_and_do_in_subquery_rewrites(JO
     else
     {
       DBUG_PRINT("info", ("Subquery can't be converted to semi-join"));
-      /*
-        Check if the subquery predicate can be executed via materialization.
-        The required conditions are:
-        1. Subquery predicate is an IN/=ANY subq predicate
-        2. Subquery is a single SELECT (not a UNION)
-        3. Subquery is not a table-less query. In this case there is no
-           point in materializing.
-          3A The upper query is not a table-less SELECT ... FROM DUAL. We
+      if (!optimizer_flag(thd, OPTIMIZER_SWITCH_IN_TO_EXISTS) &&
+          !optimizer_flag(thd, OPTIMIZER_SWITCH_MATERIALIZATION))
+        my_error(ER_ILLEGAL_SUBQUERY_OPTIMIZER_SWITCHES, MYF(0));
+
+      if (in_subs)
+      {
+        /* Subquery predicate is an IN/=ANY predicate. */
+        if (optimizer_flag(thd, OPTIMIZER_SWITCH_IN_TO_EXISTS))
+          in_subs->in_strategy|= SUBS_IN_TO_EXISTS;
+        if (optimizer_flag(thd, OPTIMIZER_SWITCH_MATERIALIZATION))
+          in_subs->in_strategy|= SUBS_MATERIALIZATION;
+
+        /*
+          Check if the subquery predicate can be executed via materialization.
+          The required conditions are:
+          1. Subquery is a single SELECT (not a UNION)
+          2. Subquery is not a table-less query. In this case there is no
+             point in materializing.
+          2A The upper query is not a table-less SELECT ... FROM DUAL. We
              can't do materialization for SELECT .. FROM DUAL because it
              does not call setup_subquery_materialization(). We could make 
              SELECT ... FROM DUAL call that function but that doesn't seem
              to be the case that is worth handling.
-        4. Either the subquery predicate is a top-level predicate, or at
-           least one partial match strategy is enabled. If no partial match
-           strategy is enabled, then materialization cannot be used for
-           non-top-level queries because it cannot handle NULLs correctly.
-        5. Subquery is non-correlated
-           TODO:
-           This is an overly restrictive condition. It can be extended to:
-           (Subquery is non-correlated ||
-            Subquery is correlated to any query outer to IN predicate ||
-            (Subquery is correlated to the immediate outer query &&
-             Subquery !contains {GROUP BY, ORDER BY [LIMIT],
-             aggregate functions}) && subquery predicate is not under "NOT IN"))
-        6. No execution method was already chosen (by a prepared statement).
-
-        (*) The subquery must be part of a SELECT statement. The current
-             condition also excludes multi-table update statements.
-
-        Determine whether we will perform subquery materialization before
-        calling the IN=>EXISTS transformation, so that we know whether to
-        perform the whole transformation or only that part of it which wraps
-        Item_in_subselect in an Item_in_optimizer.
-      */
-      if (optimizer_flag(thd, OPTIMIZER_SWITCH_MATERIALIZATION)  && 
-          in_subs  &&                                                   // 1
-          !select_lex->is_part_of_union() &&                            // 2
-          parent_unit->first_select()->leaf_tables &&                   // 3
-          thd->lex->sql_command == SQLCOM_SELECT &&                     // *
-          select_lex->outer_select()->leaf_tables &&                    // 3A
-          subquery_types_allow_materialization(in_subs) &&
-          // psergey-todo: duplicated_subselect_card_check: where it's done?
-          (in_subs->is_top_level_item() ||
-           optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) ||
-           optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN)) &&//4
-          !in_subs->is_correlated &&                                  // 5
-          in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED) // 6
-      {
-        /*
-          Materialization is possible, later the optimize phase of each
-          subquery will choose either materialization or in-to-exists based
-          on cost.
+          3. Either the subquery predicate is a top-level predicate, or at
+             least one partial match strategy is enabled. If no partial match
+             strategy is enabled, then materialization cannot be used for
+             non-top-level queries because it cannot handle NULLs correctly.
+          4. Subquery is non-correlated
+             TODO:
+             This is an overly restrictive condition. It can be extended to:
+             (Subquery is non-correlated ||
+              Subquery is correlated to any query outer to IN predicate ||
+              (Subquery is correlated to the immediate outer query &&
+               Subquery !contains {GROUP BY, ORDER BY [LIMIT],
+               aggregate functions}) && subquery predicate is not under "NOT IN"))
+
+          (*) The subquery must be part of a SELECT statement. The current
+               condition also excludes multi-table update statements.
         */
-        in_subs->exec_method= Item_in_subselect::MATERIALIZATION;
-      }
-      else if (in_subs)
-      {
-        /* Materialization is not possible at all. */
-        in_subs->exec_method= Item_in_subselect::IN_TO_EXISTS;
+        if (!(in_subs->in_strategy & SUBS_MATERIALIZATION && 
+              !select_lex->is_part_of_union() &&                            // 1
+              parent_unit->first_select()->leaf_tables &&                   // 2
+              thd->lex->sql_command == SQLCOM_SELECT &&                     // *
+              select_lex->outer_select()->leaf_tables &&                    // 2A
+              subquery_types_allow_materialization(in_subs) &&
+              // psergey-todo: duplicated_subselect_card_check: where it's done?
+              (in_subs->is_top_level_item() ||                               //3
+               optimizer_flag(thd,
+                              OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) || //3
+               optimizer_flag(thd,
+                              OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN)) && //3
+              !in_subs->is_correlated))                                      //4
+        {
+          /* Materialization is not possible based on syntactic properties. */
+          in_subs->in_strategy&= ~SUBS_MATERIALIZATION;
+        }
+
+        if (!in_subs->in_strategy)
+        {
+          /*
+            If neither materialization is possible, nor the user chose
+            IN-TO-EXISTS, choose IN-TO-EXISTS as the only universal strategy.
+          */
+          in_subs->in_strategy|= SUBS_IN_TO_EXISTS;
+        }
       }
 
       /*
@@ -533,13 +539,13 @@ skip_conversion:
     }
     /*
       Revert to the IN->EXISTS strategy in the rare case when the subquery could
-      be flattened.
+      not be flattened.
       TODO: This is a limitation done for simplicity. Such subqueries could also
       be executed via materialization. In order to determine this, we should
       re-run the test for materialization that was done in
       check_and_do_in_subquery_rewrites.
     */
-    (*in_subq)->exec_method= Item_in_subselect::IN_TO_EXISTS;
+    (*in_subq)->in_strategy= SUBS_IN_TO_EXISTS;
   }
 
   if (arena)
@@ -800,8 +806,7 @@ static bool convert_subq_to_sj(JOIN *par
   /* 3. Remove the original subquery predicate from the WHERE/ON */
 
   // The subqueries were replaced for Item_int(1) earlier
-  subq_pred->exec_method=
-    Item_in_subselect::SEMI_JOIN;         // for subsequent executions
+  subq_pred->in_strategy= SUBS_SEMI_JOIN;         // for subsequent executions
   /*TODO: also reset the 'with_subselect' there. */
 
   /* n. Adjust the parent_join->tables counter */
@@ -3564,56 +3570,157 @@ bool JOIN::optimize_unflattened_subqueri
 }
 
 
-bool JOIN::choose_subquery_plan()
+bool JOIN::choose_subquery_plan(table_map join_tables)
 {
-  double mat_strategy_cost;    /* The cost to compute IN via materialization. */
-  double in_exists_strategy_cost; /* The cost of the IN->EXISTS strategy. */
-  bool res;
+  /* The original QEP of the subquery. */
+  DYNAMIC_ARRAY save_keyuse; /* Copy of the JOIN::keyuse array. */
+  POSITION save_best_positions[MAX_TABLES+1]; /* Copy of JOIN::best_positions */
+  /* Copies of the JOIN_TAB::keyuse pointers for each JOIN_TAB. */
+  KEYUSE *save_join_tab_keyuse[MAX_TABLES];
+  /* Copies of JOIN_TAB::checked_keys for each JOIN_TAB. */
+  key_map save_join_tab_checked_keys[MAX_TABLES];
+
+  bool in_exists_reoptimized= false;
+  Item_in_subselect *in_subs;
+
+  if (select_lex->master_unit()->item &&
+      select_lex->master_unit()->item->is_in_predicate())
+  {
+    in_subs= (Item_in_subselect*) select_lex->master_unit()->item;
+    if (in_subs->create_in_to_exists_cond(this))
+      return true;
+  }
+  else
+    return false;
 
+  DBUG_ASSERT(in_subs->in_strategy); /* A strategy must be chosen earlier. */
   DBUG_ASSERT(in_to_exists_where || in_to_exists_having);
-  DBUG_ASSERT(select_lex->master_unit()->item &&
-              (select_lex->master_unit()->item->substype() ==
-               Item_subselect::IN_SUBS ||
-               select_lex->master_unit()->item->substype() ==
-               Item_subselect::ALL_SUBS ||
-               select_lex->master_unit()->item->substype() ==
-               Item_subselect::ANY_SUBS));
-
-  Item_in_subselect *in_subs= (Item_in_subselect*)
-                              select_lex->master_unit()->item;
-
-  /* Always revert to IN->EXISTS. */
-  mat_strategy_cost= 0;
-  in_exists_strategy_cost= 1;
+  DBUG_ASSERT(!in_to_exists_where || in_to_exists_where->fixed);
+  DBUG_ASSERT(!in_to_exists_having || in_to_exists_having->fixed);
+
+  save_keyuse.elements= 0;
+  save_keyuse.buffer= NULL;
 
   /*
-    If (1) materialization is a possible strategy based on static analysis, and
-    (2) it is cheaper strategy than the IN->EXISTS transformation, then compute
-    in_subs via the materialization trategy.
+    Compute and compare the costs of materialization and in-exists if both
+    strategies are possible and allowed by the user (checked during the prepare
+    phase.
   */
-  if (in_subs->exec_method == Item_in_subselect::MATERIALIZATION && // 1
-      mat_strategy_cost < in_exists_strategy_cost)                  // 2
+  if (in_subs->in_strategy & SUBS_MATERIALIZATION &&
+      in_subs->in_strategy & SUBS_IN_TO_EXISTS)
   {
-    if (in_subs->setup_mat_engine())
+    JOIN *outer_join= unit->outer_select() ? unit->outer_select()->join : NULL;
+    JOIN *inner_join= this;
+    /* Cost of the outer JOIN. */
+    double outer_read_time= 0, outer_record_count= 0;
+    /* Cost of the unmodified subquery. */
+    double inner_read_time_1= 0, inner_record_count_1= 0;
+    /* Cost of the subquery with injected IN-EXISTS predicates. */
+    double inner_read_time_2= 0, inner_record_count_2= 0;
+    /* The cost to compute IN via materialization. */
+    double materialize_strategy_cost= 0;
+    /* The cost of the IN->EXISTS strategy. */
+    double in_exists_strategy_cost= 1;
+
+    if (outer_join)
+      get_partial_join_cost(outer_join, outer_join->tables,
+                            &outer_read_time, &outer_record_count);
+    else
+    {
+      /* TODO: outer_join can be NULL for DELETE statements. */
+      outer_read_time= 1; /* TODO */
+      outer_record_count= 1;  /* TODO */
+    }
+
+    get_partial_join_cost(inner_join, inner_join->tables,
+                          &inner_read_time_1, &inner_record_count_1);
+
+    if (in_to_exists_where && const_tables != tables)
     {
       /*
-        In some cases it is not possible to create usable indexes for the
-        materialization strategy, so fall back to IN->EXISTS.
+        Re-optimize and cost the subquery taking into account the IN-EXISTS
+        conditions.
       */
-      in_subs->exec_method= Item_in_subselect::IN_TO_EXISTS;
+      if (save_query_plan(&save_keyuse, save_best_positions,
+                          save_join_tab_keyuse, save_join_tab_checked_keys) ||
+          reoptimize(in_to_exists_where, join_tables, save_best_positions))
+        return TRUE;
+      in_exists_reoptimized= true;
+
+      get_partial_join_cost(inner_join, inner_join->tables,
+                            &inner_read_time_2, &inner_record_count_2);
     }
+    else
+    {
+      /* Reoptimization would not produce any better plan. */
+      inner_read_time_2= inner_read_time_1;
+    }
+
+    /* Compute execution costs. */
+    /*
+      1. Compute the cost of the materialization strategy.
+    */
+    double materialization_cost; /* The cost of executing the subquery and */
+                                 /* storing its result in an indexed temp table.*/
+    /* The cost of a lookup into the unique index of the materialized table. */
+    double lookup_cost;
+    double write_row_cost= 1; /* TODO: what is the real cost to write a row? */
+    materialization_cost= inner_read_time_1 +
+                          inner_record_count_1 * write_row_cost;
+    /*
+      The cost of a hash/btree lookup into a unique index of a materialized
+      subquery.
+      TIMOUR: TODO: the block of code below is exact copy/paste from
+      opt_subselect.cc:optimize_semi_join_nests() - refactor it.
+    */
+    uint rowlen= get_tmp_table_rec_length(unit->first_select()->item_list);
+    if (rowlen * inner_record_count_1 < thd->variables.max_heap_table_size)
+      lookup_cost= HEAP_TEMPTABLE_LOOKUP_COST;
+    else
+      lookup_cost= DISK_TEMPTABLE_LOOKUP_COST;
+    materialize_strategy_cost= materialization_cost +
+                               outer_record_count * lookup_cost;
+
+    /*
+      2. Compute the cost of the IN=>EXISTS strategy.
+    */
+    in_exists_strategy_cost= outer_record_count * inner_read_time_2;
+
+    /* Compare the costs and choose the cheaper strategy. */
+    if (materialize_strategy_cost >= in_exists_strategy_cost)
+      in_subs->in_strategy&= ~SUBS_MATERIALIZATION;
+    else
+      in_subs->in_strategy&= ~SUBS_IN_TO_EXISTS;
   }
-  else
+
+  /*
+    If (1) materialization is a possible strategy based on static analysis
+    during the prepare phase, then if
+      (2) it is more expensive than the IN->EXISTS transformation, and
+      (3) it is not possible to create usable indexes for the materialization
+          strategy,
+      fall back to IN->EXISTS.
+    otherwise use materialization.
+  */
+  if (in_subs->in_strategy & SUBS_MATERIALIZATION &&
+      in_subs->setup_mat_engine())
   {
     /*
-      Previous optimizer phases should have chosen either a materialization
-      or IN->EXISTS strategy.
+      If materialization was the cheaper or the only user-selected strategy,
+      but it is not possible to execute it due to limitations in the
+      implementation, fall back to IN-TO-EXISTS.
     */
-    DBUG_ASSERT(in_subs->exec_method == Item_in_subselect::IN_TO_EXISTS);
+    in_subs->in_strategy&= ~SUBS_MATERIALIZATION;
+    in_subs->in_strategy|= SUBS_IN_TO_EXISTS;
   }
 
-  if (in_subs->exec_method == Item_in_subselect::MATERIALIZATION)
+  if (in_subs->in_strategy & SUBS_MATERIALIZATION)
   {
+    /* Restore orginal query plan used for materialization. */
+    if (in_exists_reoptimized)
+      restore_query_plan(&save_keyuse, save_best_positions,
+                         save_join_tab_keyuse, save_join_tab_checked_keys);
+
     /* TODO: should we set/unset this flag for both select_lex and its unit? */
     in_subs->unit->uncacheable&= ~UNCACHEABLE_DEPENDENT;
     select_lex->uncacheable&= ~UNCACHEABLE_DEPENDENT;
@@ -3633,13 +3740,32 @@ bool JOIN::choose_subquery_plan()
       set in the beginning of JOIN::optimize, which was already done.
     */
     select_limit= in_subs->unit->select_limit_cnt;
+  }
+  else if (in_subs->in_strategy & SUBS_IN_TO_EXISTS)
+  {
+    /* Keep the new query plan with injected conditions, delete the old one. */
+    if (save_keyuse.elements)
+    {
+      DBUG_ASSERT(in_exists_reoptimized);
+      delete_dynamic(&save_keyuse);
+    }
+
+    if (!in_exists_reoptimized && in_to_exists_where && const_tables != tables)
+    {
+      for (uint i= 0; i < tables; i++)
+      {
+        join_tab[i].keyuse= NULL;
+        join_tab[i].checked_keys.clear_all();
+      }
+      if (reoptimize(in_to_exists_where, join_tables, NULL))
+        return TRUE;
+    }
 
-    // TODO: inner_join->restore_plan (keyuse, best_positions, best_read)
+    if (in_subs->inject_in_to_exists_cond(this))
+      return TRUE;
   }
-  else if (in_subs->exec_method == Item_in_subselect::IN_TO_EXISTS)
-    res= in_subs->inject_in_to_exists_cond(this);
   else
     DBUG_ASSERT(FALSE);
 
-  return res;
+  return FALSE;
 }

=== modified file 'sql/share/errmsg.txt'
--- a/sql/share/errmsg.txt	2010-06-01 19:52:20 +0000
+++ b/sql/share/errmsg.txt	2010-09-30 15:32:44 +0000
@@ -6245,3 +6245,5 @@ ER_UNKNOWN_OPTION
         eng "Unknown option '%-.64s'"
 ER_BAD_OPTION_VALUE
         eng "Incorrect value '%-.64s' for option '%-.64s'"
+ER_ILLEGAL_SUBQUERY_OPTIMIZER_SWITCHES
+        eng "At least one of the 'in_to_exists' or 'materialization' optimizer_switch flags must be 'on'."

=== modified file 'sql/sql_lex.cc'
--- a/sql/sql_lex.cc	2010-09-16 14:06:58 +0000
+++ b/sql/sql_lex.cc	2010-09-30 15:32:44 +0000
@@ -2157,8 +2157,8 @@ void st_select_lex::print_limit(THD *thd
                     select_limit == 1, and there should be no offset_limit.
                   */
                   (((subs_type == Item_subselect::IN_SUBS) &&
-                    ((Item_in_subselect*)item)->exec_method ==
-                    Item_in_subselect::MATERIALIZATION) ?
+                    ((Item_in_subselect*)item)->in_strategy &
+                    SUBS_MATERIALIZATION) ?
                    TRUE :
                    (select_limit->val_int() == 1LL) &&
                    offset_limit == 0));
@@ -3096,25 +3096,11 @@ bool st_select_lex::optimize_unflattened
     Item_subselect *subquery_predicate= un->item;
     if (subquery_predicate)
     {
-      Item_in_subselect *item_in= NULL;
-      if (subquery_predicate->substype() == Item_subselect::IN_SUBS ||
-          subquery_predicate->substype() == Item_subselect::ALL_SUBS ||
-          subquery_predicate->substype() == Item_subselect::ANY_SUBS)
-        item_in= (Item_in_subselect*) subquery_predicate;
       for (SELECT_LEX *sl= un->first_select(); sl; sl= sl->next_select())
       {
         JOIN *inner_join= sl->join;
         SELECT_LEX *save_select= un->thd->lex->current_select;
         int res;
-
-        /*
-          Make sure that we do not create IN->EXISTS conditions for
-          subquery predicates that were substituted by Item_maxmin_subselect
-          or by Item_singlerow_subselect.
-        */
-        DBUG_ASSERT(!item_in || (item_in && !item_in->is_min_max_optimized));
-        if (item_in && item_in->create_in_to_exists_cond(inner_join))
-            return TRUE;
         /* We need only 1 row to determine existence */
         un->set_limit(un->global_parameters);
         un->thd->lex->current_select= sl;

=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc	2010-09-17 10:17:27 +0000
+++ b/sql/sql_select.cc	2010-09-30 15:32:44 +0000
@@ -53,9 +53,9 @@ static bool make_join_statistics(JOIN *j
 static bool update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,
                                 JOIN_TAB *join_tab,
                                 uint tables, COND *conds,
-                                COND_EQUAL *cond_equal,
                                 table_map table_map, SELECT_LEX *select_lex,
                                 st_sargable_param **sargables);
+static bool sort_and_filter_keyuse(DYNAMIC_ARRAY *keyuse);
 static int sort_keyuse(KEYUSE *a,KEYUSE *b);
 static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse,
                                table_map used_tables);
@@ -928,24 +928,29 @@ JOIN::optimize()
     error= 0;
     if (optimize_unflattened_subqueries())
       DBUG_RETURN(1);
-    if (in_to_exists_where || in_to_exists_having)
-    {
-      /*
-        TIMOUR: TODO: refactor this block and JOIN::choose_subquery_plan
-      */
-      Item_in_subselect *in_subs= (Item_in_subselect*)
-        select_lex->master_unit()->item;
+    /*
+      TIMOUR: TODO: consider do we need to optimize here at all and refactor
+      this block and JOIN::choose_subquery_plan.
 
-      if (in_subs->exec_method ==  Item_in_subselect::MATERIALIZATION)
-        ; // setup materialized execution structures
-      else if (in_subs->exec_method == Item_in_subselect::IN_TO_EXISTS)
+    if (choose_subquery_plan())
+      DBUG_RETURN(1);
+    */
+    Item_in_subselect *in_subs;
+    if (select_lex->master_unit()->item &&
+        select_lex->master_unit()->item->is_in_predicate())
+    {
+      in_subs= (Item_in_subselect*) select_lex->master_unit()->item;
+      if (in_subs->in_strategy & SUBS_MATERIALIZATION &&
+          in_subs->setup_mat_engine())
+        in_subs->in_strategy= SUBS_IN_TO_EXISTS;
+      if (in_subs->in_strategy & SUBS_IN_TO_EXISTS)
       {
+        if (in_subs->create_in_to_exists_cond(this))
+          DBUG_RETURN(1);
         if (in_subs->inject_in_to_exists_cond(this))
           DBUG_RETURN(1);
         tmp_having= having;
       }
-      else
-        DBUG_ASSERT(FALSE);
     }
     DBUG_RETURN(0);
   }
@@ -2828,10 +2833,14 @@ make_join_statistics(JOIN *join, TABLE_L
   }
 
   if (conds || outer_join)
+  {
     if (update_ref_and_keys(join->thd, keyuse_array, stat, join->tables,
-                            conds, join->cond_equal,
-                            ~outer_join, join->select_lex, &sargables))
+                            conds, ~outer_join, join->select_lex, &sargables))
       goto error;
+    if (keyuse_array->elements && sort_and_filter_keyuse(keyuse_array))
+      goto error;
+    DBUG_EXECUTE("opt", print_keyuse_array(keyuse_array););
+  }
 
   join->const_table_map= no_rows_const_tables;
   join->const_tables= const_count;
@@ -3135,8 +3144,7 @@ make_join_statistics(JOIN *join, TABLE_L
            sizeof(POSITION)*join->const_tables);
     join->best_read=1.0;
   }
-  if ((join->in_to_exists_where || join->in_to_exists_having)
-      && join->choose_subquery_plan())
+  if (join->choose_subquery_plan(all_table_map & ~join->const_table_map))
     goto error;
 
   /* Generate an execution plan from the found optimal join order. */
@@ -4089,11 +4097,10 @@ static void add_key_fields_for_nj(JOIN *
 
 static bool
 update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab,
-                    uint tables, COND *cond, COND_EQUAL *cond_equal,
-                    table_map normal_tables, SELECT_LEX *select_lex,
-                    SARGABLE_PARAM **sargables)
+                    uint tables, COND *cond, table_map normal_tables,
+                    SELECT_LEX *select_lex, SARGABLE_PARAM **sargables)
 {
-  uint  and_level,i,found_eq_constant;
+  uint  and_level,i;
   KEY_FIELD *key_fields, *end, *field;
   uint sz;
   uint m= max(select_lex->max_equal_elems,1);
@@ -4189,64 +4196,72 @@ update_ref_and_keys(THD *thd, DYNAMIC_AR
       return TRUE;
   }
 
-  /*
-    Sort the array of possible keys and remove the following key parts:
-    - ref if there is a keypart which is a ref and a const.
-      (e.g. if there is a key(a,b) and the clause is a=3 and b=7 and b=t2.d,
-      then we skip the key part corresponding to b=t2.d)
-    - keyparts without previous keyparts
-      (e.g. if there is a key(a,b,c) but only b < 5 (or a=2 and c < 3) is
-      used in the query, we drop the partial key parts from consideration).
-    Special treatment for ft-keys.
-  */
-  if (keyuse->elements)
-  {
-    KEYUSE key_end,*prev,*save_pos,*use;
+  return FALSE;
+}
 
-    my_qsort(keyuse->buffer,keyuse->elements,sizeof(KEYUSE),
-          (qsort_cmp) sort_keyuse);
 
-    bzero((char*) &key_end,sizeof(key_end));    /* Add for easy testing */
-    if (insert_dynamic(keyuse,(uchar*) &key_end))
-      return TRUE;
+/**
+  Sort the array of possible keys and remove the following key parts:
+  - ref if there is a keypart which is a ref and a const.
+    (e.g. if there is a key(a,b) and the clause is a=3 and b=7 and b=t2.d,
+    then we skip the key part corresponding to b=t2.d)
+  - keyparts without previous keyparts
+    (e.g. if there is a key(a,b,c) but only b < 5 (or a=2 and c < 3) is
+    used in the query, we drop the partial key parts from consideration).
+  Special treatment for ft-keys.
+*/
+
+static bool sort_and_filter_keyuse(DYNAMIC_ARRAY *keyuse)
+{
+  KEYUSE key_end, *prev, *save_pos, *use;
+  uint found_eq_constant, i;
+
+  DBUG_ASSERT(keyuse->elements);
+
+  my_qsort(keyuse->buffer, keyuse->elements, sizeof(KEYUSE),
+           (qsort_cmp) sort_keyuse);
 
-    use=save_pos=dynamic_element(keyuse,0,KEYUSE*);
-    prev= &key_end;
-    found_eq_constant=0;
-    for (i=0 ; i < keyuse->elements-1 ; i++,use++)
-    {
-      if (!use->used_tables && use->optimize != KEY_OPTIMIZE_REF_OR_NULL)
-        use->table->const_key_parts[use->key]|= use->keypart_map;
-      if (use->keypart != FT_KEYPART)
+  bzero((char*) &key_end, sizeof(key_end));    /* Add for easy testing */
+  if (insert_dynamic(keyuse, (uchar*) &key_end))
+    return TRUE;
+
+  use= save_pos= dynamic_element(keyuse,0,KEYUSE*);
+  prev= &key_end;
+  found_eq_constant= 0;
+
+  for (i=0 ; i < keyuse->elements-1 ; i++,use++)
+  {
+    if (!use->used_tables && use->optimize != KEY_OPTIMIZE_REF_OR_NULL)
+      use->table->const_key_parts[use->key]|= use->keypart_map;
+    if (use->keypart != FT_KEYPART)
+    {
+      if (use->key == prev->key && use->table == prev->table)
       {
-        if (use->key == prev->key && use->table == prev->table)
-        {
-          if (prev->keypart+1 < use->keypart ||
-              (prev->keypart == use->keypart && found_eq_constant))
-            continue;                           /* remove */
-        }
-        else if (use->keypart != 0)             // First found must be 0
-          continue;
+        if (prev->keypart+1 < use->keypart ||
+            (prev->keypart == use->keypart && found_eq_constant))
+          continue;                             /* remove */
       }
+      else if (use->keypart != 0)               // First found must be 0
+        continue;
+    }
 
 #ifdef HAVE_valgrind
-      /* Valgrind complains about overlapped memcpy when save_pos==use. */
-      if (save_pos != use)
+    /* Valgrind complains about overlapped memcpy when save_pos==use. */
+    if (save_pos != use)
 #endif
-        *save_pos= *use;
-      prev=use;
-      found_eq_constant= !use->used_tables;
-      /* Save ptr to first use */
-      if (!use->table->reginfo.join_tab->keyuse)
-        use->table->reginfo.join_tab->keyuse=save_pos;
-      use->table->reginfo.join_tab->checked_keys.set_bit(use->key);
-      save_pos++;
-    }
-    i=(uint) (save_pos-(KEYUSE*) keyuse->buffer);
-    VOID(set_dynamic(keyuse,(uchar*) &key_end,i));
-    keyuse->elements=i;
-  }
-  DBUG_EXECUTE("opt", print_keyuse_array(keyuse););
+      *save_pos= *use;
+    prev= use;
+    found_eq_constant= !use->used_tables;
+    /* Save ptr to first use */
+    if (!use->table->reginfo.join_tab->keyuse)
+      use->table->reginfo.join_tab->keyuse=save_pos;
+    use->table->reginfo.join_tab->checked_keys.set_bit(use->key);
+    save_pos++;
+  }
+  i= (uint) (save_pos-(KEYUSE*) keyuse->buffer);
+  VOID(set_dynamic(keyuse,(uchar*) &key_end,i));
+  keyuse->elements= i;
+
   return FALSE;
 }
 
@@ -19265,6 +19281,131 @@ bool JOIN::change_result(select_result *
   DBUG_RETURN(FALSE);
 }
 
+
+/**
+  Save the original query execution plan so that the caller can revert to it
+  if needed.
+*/
+int JOIN::save_query_plan(DYNAMIC_ARRAY *save_keyuse,
+                          POSITION *save_best_positions,
+                          KEYUSE **save_join_tab_keyuse,
+                          key_map *save_join_tab_checked_keys)
+{
+  if (keyuse.elements)
+  {
+    DYNAMIC_ARRAY tmp_keyuse;
+    if (my_init_dynamic_array(save_keyuse, sizeof(KEYUSE), 20, 64))
+      return 1;
+    /* Swap the current and the backup keyuse arrays. */
+    tmp_keyuse= keyuse;
+    keyuse= (*save_keyuse);
+    (*save_keyuse)= tmp_keyuse;
+
+    for (uint i= 0; i < tables; i++)
+    {
+      save_join_tab_keyuse[i]= join_tab[i].keyuse;
+      join_tab[i].keyuse= NULL;
+      save_join_tab_checked_keys[i]= join_tab[i].checked_keys;
+      join_tab[i].checked_keys.clear_all();
+    }
+  }
+  memcpy((uchar*) save_best_positions, (uchar*) best_positions,
+         sizeof(POSITION) * (tables + 1));
+  memset(best_positions, 0, sizeof(POSITION) * (tables + 1));
+  return 0;
+}
+
+
+/**
+  Restore the query plan saved before reoptimization with additional
+  conditions.
+*/
+
+void JOIN::restore_query_plan(DYNAMIC_ARRAY *save_keyuse,
+                              POSITION *save_best_positions,
+                              KEYUSE **save_join_tab_keyuse,
+                              key_map *save_join_tab_checked_keys)
+{
+  if (save_keyuse->elements)
+  {
+    DYNAMIC_ARRAY tmp_keyuse;
+    tmp_keyuse= keyuse;
+    keyuse= (*save_keyuse);
+    (*save_keyuse)= tmp_keyuse;
+    delete_dynamic(save_keyuse);
+
+    for (uint i= 0; i < tables; i++)
+    {
+      join_tab[i].keyuse= save_join_tab_keyuse[i];
+      join_tab[i].checked_keys= save_join_tab_checked_keys[i];
+    }
+
+  }
+  memcpy((uchar*) best_positions, (uchar*) save_best_positions,
+         sizeof(POSITION) * (tables + 1));
+}
+
+
+/**
+   Reoptimize a query plan taking into account an additional conjunct to the
+   WHERE clause.
+*/
+
+int JOIN::reoptimize(Item *added_where, table_map join_tables,
+                     POSITION *save_best_positions)
+{
+  DYNAMIC_ARRAY added_keyuse;
+  SARGABLE_PARAM *sargables= 0; /* Used only as a dummy parameter. */
+
+  if (my_init_dynamic_array(&added_keyuse, sizeof(KEYUSE), 20, 64))
+  {
+    delete_dynamic(&added_keyuse);
+    return 1;
+  }
+
+  /* Re-run the REF optimizer to take into account the new conditions. */
+  if (update_ref_and_keys(thd, &added_keyuse, join_tab, tables, added_where,
+                          ~outer_join, select_lex, &sargables))
+  {
+    delete_dynamic(&added_keyuse);
+    return 1;
+  }
+
+  if (!added_keyuse.elements)
+  {
+    /* No need to optimize if no new access methods were discovered. */
+    if (save_best_positions)
+      memcpy((uchar*) best_positions, (uchar*) save_best_positions,
+             sizeof(POSITION) * (tables + 1));
+    delete_dynamic(&added_keyuse);
+    return 0;
+  }
+
+  /* Add the new access methods to the keyuse array. */
+  if (!keyuse.buffer &&
+      my_init_dynamic_array(&keyuse, sizeof(KEYUSE), 20, 64))
+  {
+    delete_dynamic(&added_keyuse);
+    return 1;
+  }
+  allocate_dynamic(&keyuse, keyuse.elements + added_keyuse.elements);
+  memcpy(keyuse.buffer + keyuse.elements * keyuse.size_of_element,
+         added_keyuse.buffer,
+         (size_t) added_keyuse.elements * added_keyuse.size_of_element);
+  keyuse.elements+= added_keyuse.elements;
+  delete_dynamic(&added_keyuse);
+
+  if (sort_and_filter_keyuse(&keyuse))
+    return 1;
+  optimize_keyuse(this, &keyuse);
+
+  /* Re-run the join optimizer to compute a new query plan. */
+  if (choose_plan(this, join_tables))
+    return 1;
+
+  return 0;
+}
+
 /**
   @} (end of group Query_Optimizer)
 */

=== modified file 'sql/sql_select.h'
--- a/sql/sql_select.h	2010-09-05 15:43:47 +0000
+++ b/sql/sql_select.h	2010-09-30 15:32:44 +0000
@@ -1369,8 +1369,21 @@ inline bool sj_is_materialize_strategy(u
 
 class JOIN :public Sql_alloc
 {
+private:
   JOIN(const JOIN &rhs);                        /**< not implemented */
   JOIN& operator=(const JOIN &rhs);             /**< not implemented */
+
+protected:
+  /* Support for plan reoptimization with rewritten conditions. */
+  int reoptimize(Item *added_where, table_map join_tables,
+                 POSITION *save_best_positions);
+  int save_query_plan(DYNAMIC_ARRAY *save_keyuse, POSITION *save_positions,
+                      KEYUSE **save_join_tab_keyuse,
+                      key_map *save_join_tab_checked_keys);
+  void restore_query_plan(DYNAMIC_ARRAY *save_keyuse, POSITION *save_positions,
+                      KEYUSE **save_join_tab_keyuse,
+                      key_map *save_join_tab_checked_keys);
+
 public:
   JOIN_TAB *join_tab,**best_ref;
   JOIN_TAB **map2table;    ///< mapping between table indexes and JOIN_TABs
@@ -1746,7 +1759,7 @@ public:
               NULL : join_tab+const_tables;
   }
   bool setup_subquery_caches();
-  bool choose_subquery_plan();
+  bool choose_subquery_plan(table_map join_tables);
 private:
   /**
     TRUE if the query contains an aggregate function but has no GROUP



More information about the commits mailing list