[Commits] Rev 3481: MDEV-402: Take into account the cost of subqueries during optimization in file:///home/tsk/mprog/src/10.0-md402-md446/

timour at askmonty.org timour at askmonty.org
Thu Nov 29 09:37:45 EET 2012


At file:///home/tsk/mprog/src/10.0-md402-md446/

------------------------------------------------------------
revno: 3481
revision-id: timour at askmonty.org-20121129073730-83o076ytc78rd1m4
parent: timour at askmonty.org-20121115144706-wiuxb5svyrz23k0l
fixes bug: https://mariadb.atlassian.net/browse/MDEV-402
committer: timour at askmonty.org
branch nick: 10.0-md402-md446
timestamp: Thu 2012-11-29 09:37:30 +0200
message:
  MDEV-402: Take into account the cost of subqueries during optimization 
  
  Merge with mdev-446. Intermediate commit.
  This will be changed definitely while implementing mdev-83 on top of it.
-------------- next part --------------
=== modified file 'sql/item_cmpfunc.cc'
--- a/sql/item_cmpfunc.cc	2012-08-27 16:13:17 +0000
+++ b/sql/item_cmpfunc.cc	2012-11-29 07:37:30 +0000
@@ -461,7 +461,17 @@ static bool convert_const_to_int(THD *th
       field_item->field_type() != MYSQL_TYPE_YEAR)
     return 1;
 
-  if ((*item)->const_item() && !(*item)->is_expensive())
+  /*
+    An item can be converted to an int if:
+    - is constant, and
+    - it is not a subquery (or contains a subquery), and
+    - it is cheap to compute.
+    The reason for the second requirement is to prevent subquery
+    optimization/execution during the JOIN::prepare phase, because this
+    function is called during the prepare phase as well.
+  */
+  if ((*item)->const_item() &&
+      !(*item)->with_subselect && !(*item)->is_expensive())
   {
     TABLE *table= field->table;
     ulonglong orig_sql_mode= thd->variables.sql_mode;

=== modified file 'sql/item_subselect.cc'
--- a/sql/item_subselect.cc	2012-11-15 10:54:50 +0000
+++ b/sql/item_subselect.cc	2012-11-29 07:37:30 +0000
@@ -150,6 +150,34 @@ void Item_subselect::cleanup()
 }
 
 
+/**
+  Check if the subquery predicate has only one possible execution strategy.
+
+  @details
+  If there is only one possible execution strategy for a subquery predicate,
+  the optimizer doesn't need to know the number of times the subquery will be
+  executed in order to choose the better strategy. In this case the underlying
+  subquery can be optimized before optimizing the outer query.
+
+  @retval FALSE     more than one strategies are possible
+  @retval TRUE      only one strategy is possible
+*/
+
+bool Item_subselect::has_single_strategy()
+{
+  if (!is_in_predicate())
+    return true;
+  else
+  {
+    Item_in_subselect *in_subs= (Item_in_subselect*) this;
+    if (!in_subs->test_strategy(SUBS_MATERIALIZATION) ||
+        !in_subs->test_strategy(SUBS_IN_TO_EXISTS))
+      return true;
+  }
+  return false;
+}
+
+
 void Item_singlerow_subselect::cleanup()
 {
   DBUG_ENTER("Item_singlerow_subselect::cleanup");
@@ -548,9 +576,17 @@ bool Item_subselect::is_expensive()
       continue;
 
     /* If a subquery is not optimized we cannot estimate its cost. */
-    if (!cur_join->join_tab)
+    if (cur_join->have_query_plan != JOIN::QEP_EXECUTABLE)
       return true;
 
+    /*
+      If JOIN::optimize determined that the subquery has an empty result, or
+      this is a table-less subquery, then then subquery is cheap.
+    */
+    if (!cur_join->table_count || !cur_join->tables_list ||
+        cur_join->zero_result_cause)
+      return false;
+
     if (sl->first_inner_unit())
     {
       /*

=== modified file 'sql/item_subselect.h'
--- a/sql/item_subselect.h	2012-10-25 12:50:10 +0000
+++ b/sql/item_subselect.h	2012-11-29 07:37:30 +0000
@@ -140,6 +140,7 @@ class Item_subselect :public Item_result
             substype() == Item_subselect::ALL_SUBS ||
             substype() == Item_subselect::ANY_SUBS);
   }
+  bool has_single_strategy();
 
   /*
     We need this method, because some compilers do not allow 'this'

=== modified file 'sql/opt_subselect.cc'
--- a/sql/opt_subselect.cc	2012-10-19 18:38:59 +0000
+++ b/sql/opt_subselect.cc	2012-11-29 07:37:30 +0000
@@ -4877,10 +4877,8 @@ static void remove_subq_pushed_predicate
 }
 
 
-
-
 /**
-  Optimize all subqueries of a query that were not flattened into a semijoin.
+  Optimize all non-IN subqueries of a query that were not flattened into a semijoin.
 
   @details
   Optimize all immediate children subqueries of a query.
@@ -4897,7 +4895,12 @@ static void remove_subq_pushed_predicate
 
 bool JOIN::optimize_unflattened_subqueries()
 {
-  return select_lex->optimize_unflattened_subqueries(false);
+  return select_lex->optimize_subqueries(2);
+}
+
+bool JOIN::optimize_in_subqueries()
+{
+  return select_lex->optimize_subqueries(3);
 }
 
 /**
@@ -4931,7 +4934,7 @@ bool JOIN::optimize_constant_subqueries(
     not for EXPLAIN.
   */
   select_lex->options&= ~SELECT_DESCRIBE;
-  res= select_lex->optimize_unflattened_subqueries(true);
+  res= select_lex->optimize_subqueries(1);
   select_lex->options= save_options;
   return res;
 }

=== modified file 'sql/sql_delete.cc'
--- a/sql/sql_delete.cc	2012-10-19 18:38:59 +0000
+++ b/sql/sql_delete.cc	2012-11-29 07:37:30 +0000
@@ -120,7 +120,7 @@ bool mysql_delete(THD *thd, TABLE_LIST *
   }
 
   /* Apply the IN=>EXISTS transformation to all subqueries and optimize them. */
-  if (select_lex->optimize_unflattened_subqueries(false))
+  if (select_lex->optimize_subqueries(0))
     DBUG_RETURN(TRUE);
 
   const_cond= (!conds || conds->const_item());

=== modified file 'sql/sql_lex.cc'
--- a/sql/sql_lex.cc	2012-10-19 18:38:59 +0000
+++ b/sql/sql_lex.cc	2012-11-29 07:37:30 +0000
@@ -3440,7 +3440,7 @@ bool st_select_lex::add_index_hint (THD
   @retval TRUE      error occurred.
 */
 
-bool st_select_lex::optimize_unflattened_subqueries(bool const_only)
+bool st_select_lex::optimize_subqueries(int predicate_filter)
 {
   for (SELECT_LEX_UNIT *un= first_inner_unit(); un; un= un->next_unit())
   {
@@ -3455,11 +3455,20 @@ bool st_select_lex::optimize_unflattened
           continue;
       }
 
-      if (const_only && !subquery_predicate->const_item())
-      {
-        /* Skip non-constant subqueries if the caller asked so. */
-        continue;
+      /* TODO Use a callback method instead of switch. */
+      bool optimizeable;
+      switch (predicate_filter) {
+      case 1:
+        optimizeable= subquery_predicate->const_item();
+        break;
+      case 2:
+        optimizeable= subquery_predicate->has_single_strategy();
+        break;
+      default:
+        optimizeable= true;
       }
+      if (!optimizeable)
+        continue;
 
       bool empty_union_result= true;
       bool is_correlated_unit= false;
@@ -4181,7 +4190,9 @@ int st_select_lex::print_explain(select_
                                  bool *printed_anything)
 {
   int res;
-  if (join && join->have_query_plan == JOIN::QEP_AVAILABLE)
+  if (join &&
+      (join->have_query_plan == JOIN::QEP_EXPLAINABLE ||
+       join->have_query_plan == JOIN::QEP_EXECUTABLE))
   {
     /*
       There is a number of reasons join can be marked as degenerate, so all

=== modified file 'sql/sql_lex.h'
--- a/sql/sql_lex.h	2012-10-19 18:38:59 +0000
+++ b/sql/sql_lex.h	2012-11-29 07:37:30 +0000
@@ -931,7 +931,7 @@ class st_select_lex: public st_select_le
 
   void clear_index_hints(void) { index_hints= NULL; }
   bool is_part_of_union() { return master_unit()->is_union(); }
-  bool optimize_unflattened_subqueries(bool const_only);
+  bool optimize_subqueries(int predicate_filter);
   /* Set the EXPLAIN type for this subquery. */
   void set_explain_type(bool on_the_fly);
   bool handle_derived(LEX *lex, uint phases);

=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc	2012-11-15 14:47:06 +0000
+++ b/sql/sql_select.cc	2012-11-29 07:37:30 +0000
@@ -990,9 +990,16 @@ int JOIN::optimize()
     short-circuit because optimized==TRUE.
   */
   if (!res && have_query_plan != QEP_DELETED)
-    have_query_plan= QEP_AVAILABLE;
+  {
+    if (have_query_plan == QEP_INCOMPLETE)
+      have_query_plan= QEP_EXPLAINABLE;
+    else
+      have_query_plan= QEP_EXECUTABLE;
+  }
   return res;
 }
+
+
 /**
   global select optimisation.
 
@@ -1689,12 +1696,18 @@ JOIN::optimize_inner()
   if (!(select_options & SELECT_DESCRIBE))
     init_ftfuncs(thd, select_lex, test(order));
 
-  if (optimize_unflattened_subqueries())
+  if (optimize_in_subqueries())
     DBUG_RETURN(1);
   
   int res;
   if ((res= rewrite_to_index_subquery_engine(this)) != -1)
+  {
+    /*
+      This subquery has been rewritten to direct index access via handler calls
+      instead of going through JOIN::exec.
+    */
     DBUG_RETURN(res);
+  }
   if (setup_subquery_caches())
     DBUG_RETURN(-1);
 
@@ -1822,7 +1835,7 @@ JOIN::optimize_inner()
     Even with zero matching rows, subqueries in the HAVING clause may
     need to be evaluated if there are aggregate functions in the query.
   */
-  if (optimize_unflattened_subqueries())
+  if (optimize_unflattened_subqueries() || optimize_in_subqueries())
     DBUG_RETURN(1);
   error= 0;
 
@@ -3818,6 +3831,9 @@ make_join_statistics(JOIN *join, List<TA
   if (join->const_tables != join->table_count)
     optimize_keyuse(join, keyuse_array);
    
+  if (join->optimize_unflattened_subqueries())
+    DBUG_RETURN(1);
+
   if (optimize_semijoin_nests(join, all_table_map))
     DBUG_RETURN(TRUE); /* purecov: inspected */
 
@@ -5382,6 +5398,8 @@ best_access_path(JOIN      *join,
   bool best_uses_jbuf= FALSE;
   MY_BITMAP *eq_join_set= &s->table->eq_join_set;
   KEYUSE *hj_start_key= 0;
+  double subquery_cost= join->get_subquery_cost(remaining_tables,
+                                                s->table->map);
 
   disable_jbuf= disable_jbuf || idx == join->const_tables;  
 
@@ -6824,6 +6842,41 @@ void JOIN::get_prefix_cost_and_fanout(ui
 
 
 /**
+*/
+
+double JOIN::get_subquery_cost(table_map remaining_tables, table_map new_table)
+{
+  double total_cost= 0;
+  for (SELECT_LEX_UNIT *un= select_lex->first_inner_unit();
+       un; un= un->next_unit())
+  {
+    if (!un->item)
+      continue;
+
+    table_map sq_tables= un->item->used_tables();
+    /*
+      If the subquery predicate depends on new_table, and it doesn't depend on
+      any table later in the plan, then the subquery predicate can be pushed
+      either to 'new_table', or any table later in the plan. In other words,
+      the last table on which this predicate depends on is 'new_table'.
+    */
+    if ((sq_tables & new_table) &&
+        !(sq_tables & (remaining_tables & ~new_table)))
+    {
+      for (SELECT_LEX *sl= un->first_select(); sl; sl= sl->next_select())
+      {
+        JOIN *inner_join= sl->join;
+        total_cost+= inner_join->best_read + /* The cost of this subquery. */
+          /* The cost of the subquery subqueries, independent of their used_tables. */
+          inner_join->get_subquery_cost(0, -1 /* all bits set */);
+      }
+    }
+  }
+  return total_cost;
+}
+
+
+/**
   Estimate the number of rows that query execution will read.
 
   @todo This is a very pessimistic upper bound. Use join selectivity
@@ -6992,7 +7045,6 @@ best_extension_by_limited_search(JOIN
   double best_record_count= DBL_MAX;
   double best_read_time=    DBL_MAX;
   bool disable_jbuf= join->thd->variables.join_cache_level == 0;
-  double partial_join_cardinality;
 
   DBUG_EXECUTE("opt", print_plan(join, idx, record_count, read_time, read_time,
                                 "part_plan"););
@@ -7080,12 +7132,10 @@ best_extension_by_limited_search(JOIN
       if ( (search_depth > 1) && (remaining_tables & ~real_table_bit) & allowed_tables )
       { /* Recursively expand the current partial plan */
         swap_variables(JOIN_TAB*, join->best_ref[idx], *pos);
-        partial_join_cardinality= current_record_count *
-                                  s->cond_selectivity(idx);
         if (best_extension_by_limited_search(join,
                                              remaining_tables & ~real_table_bit,
                                              idx + 1,
-                                             partial_join_cardinality,
+                                             current_record_count,
                                              current_read_time,
                                              search_depth - 1,
                                              prune_level))
@@ -9893,6 +9943,8 @@ uint check_join_cache_usage(JOIN_TAB *ta
     if ((tab->cache= new JOIN_CACHE_BNL(join, tab, prev_cache)) &&
         ((options & SELECT_DESCRIBE) || !tab->cache->init()))
     {
+      if (options & SELECT_DESCRIBE)
+        join->have_query_plan= JOIN::QEP_INCOMPLETE;
       tab->icp_other_tables_ok= FALSE;
       return (2-test(!prev_cache));
     }
@@ -9928,6 +9980,8 @@ uint check_join_cache_usage(JOIN_TAB *ta
       if ((tab->cache= new JOIN_CACHE_BNLH(join, tab, prev_cache)) &&
           ((options & SELECT_DESCRIBE) || !tab->cache->init()))
       {
+        if (options & SELECT_DESCRIBE)
+          join->have_query_plan= JOIN::QEP_INCOMPLETE;
         tab->icp_other_tables_ok= FALSE;        
         return (4-test(!prev_cache));
       }
@@ -9948,7 +10002,11 @@ uint check_join_cache_usage(JOIN_TAB *ta
           prev_cache= 0;
         if ((tab->cache= new JOIN_CACHE_BKA(join, tab, flags, prev_cache)) &&
             ((options & SELECT_DESCRIBE) || !tab->cache->init()))
+        {
+          if (options & SELECT_DESCRIBE)
+            join->have_query_plan= JOIN::QEP_INCOMPLETE;
           return (6-test(!prev_cache));
+        }
         goto no_join_cache;
       }
       else
@@ -9958,7 +10016,9 @@ uint check_join_cache_usage(JOIN_TAB *ta
         if ((tab->cache= new JOIN_CACHE_BKAH(join, tab, flags, prev_cache)) &&
             ((options & SELECT_DESCRIBE) || !tab->cache->init()))
         {
-         tab->idx_cond_fact_out= FALSE;
+          if (options & SELECT_DESCRIBE)
+            join->have_query_plan= JOIN::QEP_INCOMPLETE;
+          tab->idx_cond_fact_out= FALSE;
           return (8-test(!prev_cache));
         }
         goto no_join_cache;
@@ -21747,7 +21807,8 @@ int JOIN::print_explain(select_result_si
   DBUG_PRINT("info", ("Select 0x%lx, type %s, message %s",
                       (ulong)join->select_lex, join->select_lex->type,
                       message ? message : "NULL"));
-  DBUG_ASSERT(on_the_fly? have_query_plan == QEP_AVAILABLE: TRUE);
+  DBUG_ASSERT(on_the_fly? (have_query_plan == QEP_EXPLAINABLE ||
+                           have_query_plan == QEP_EXECUTABLE) : TRUE);
   /* Don't log this into the slow query log */
 
   if (!on_the_fly)

=== modified file 'sql/sql_select.h'
--- a/sql/sql_select.h	2012-11-15 14:47:06 +0000
+++ b/sql/sql_select.h	2012-11-29 07:37:30 +0000
@@ -1049,6 +1049,8 @@ class JOIN :public Sql_alloc
     account the changes made by test_if_skip_sort_order()).
   */
   double   best_read;
+  /* The sum of the costs (best_read) of all subqueries. */
+  double   total_subquery_cost;
   /*
     Estimated result rows (fanout) of the join operation. If this is a subquery
     that is reexecuted multiple times, this value includes the estiamted # of
@@ -1193,13 +1195,20 @@ class JOIN :public Sql_alloc
   
   bool union_part; ///< this subselect is part of union 
 
-  enum join_optimization_state { NOT_OPTIMIZED=0,
-                                 OPTIMIZATION_IN_PROGRESS=1,
-                                 OPTIMIZATION_DONE=2};
   bool optimized; ///< flag to avoid double optimization in EXPLAIN
   bool initialized; ///< flag to avoid double init_execution calls
   
-  enum { QEP_NOT_PRESENT_YET, QEP_AVAILABLE, QEP_DELETED} have_query_plan;
+  enum {QEP_NOT_PRESENT_YET,
+        /* A query plan exists. It is being optimized, but is still incomplete. */
+        QEP_INCOMPLETE,
+        /*
+          A query plan exists, but it is usable only for EXPLAIN because
+          some of its structures are incomplete.
+        */
+        QEP_EXPLAINABLE,
+        /* A query plan exists, and is ready for execution. */
+        QEP_EXECUTABLE,
+        QEP_DELETED} have_query_plan;
 
   /*
     Additional WHERE and HAVING predicates to be considered for IN=>EXISTS
@@ -1253,6 +1262,7 @@ class JOIN :public Sql_alloc
     found_records= 0;
     fetch_limit= HA_POS_ERROR;
     examined_rows= 0;
+    total_subquery_cost= 0;
     exec_tmp_table1= 0;
     exec_tmp_table2= 0;
     sortorder= 0;
@@ -1323,6 +1333,7 @@ class JOIN :public Sql_alloc
   bool alloc_func_list();
   bool flatten_subqueries();
   bool optimize_unflattened_subqueries();
+  bool optimize_in_subqueries();
   bool optimize_constant_subqueries();
   bool make_sum_func_list(List<Item> &all_fields, List<Item> &send_fields,
                           bool before_group_by, bool recompute= FALSE);
@@ -1423,6 +1434,7 @@ class JOIN :public Sql_alloc
   void get_prefix_cost_and_fanout(uint n_tables, 
                                   double *read_time_arg,
                                   double *record_count_arg);
+  double get_subquery_cost(table_map partial_plan, table_map new_table);
   double get_examined_rows();
   /* defined in opt_subselect.cc */
   bool transform_max_min_subquery();

=== modified file 'sql/sql_update.cc'
--- a/sql/sql_update.cc	2012-11-03 08:24:36 +0000
+++ b/sql/sql_update.cc	2012-11-29 07:37:30 +0000
@@ -368,7 +368,7 @@ int mysql_update(THD *thd,
   }
 
   /* Apply the IN=>EXISTS transformation to all subqueries and optimize them. */
-  if (select_lex->optimize_unflattened_subqueries(false))
+  if (select_lex->optimize_subqueries(0))
     DBUG_RETURN(TRUE);
 
   if (select_lex->inner_refs_list.elements &&



More information about the commits mailing list