[Commits] b9e28d4227e: MDEV-15777:Support Early NULLs filtering-like restrictions in the range optimizer

Varun varunraiko1803 at gmail.com
Fri Apr 20 20:32:50 EEST 2018


revision-id: b9e28d4227ea4b4d9cd147f87354afaf44e14746 (mariadb-10.3.0-765-gb9e28d4227e)
parent(s): 91245909a2f0c89444ecb5af587284f53b7196ee
author: Varun Gupta
committer: Varun Gupta
timestamp: 2018-04-20 22:58:45 +0530
message:

MDEV-15777:Support Early NULLs filtering-like restrictions in the range optimizer

Introduced function make_null_rejecting_conds() that would calcualte the null rejecting conds for the keys of a table and
then we can perform range analysis on these conditions.

TABLE strucuture introduces a null_rejecting_cond field that would hold these null rejecting conditions for a table.

---
 mysql-test/main/mdev15777.result |  65 ++++++++++++++++++++
 mysql-test/main/mdev15777.test   |  34 +++++++++++
 sql/opt_range.cc                 | 124 ++++++++++++++++++++++++++++++++++++++-
 sql/opt_range.h                  |   2 +
 sql/sql_select.cc                |  22 ++++++-
 sql/table.cc                     |   1 +
 sql/table.h                      |   6 ++
 7 files changed, 250 insertions(+), 4 deletions(-)

diff --git a/mysql-test/main/mdev15777.result b/mysql-test/main/mdev15777.result
new file mode 100644
index 00000000000..87167c16f6d
--- /dev/null
+++ b/mysql-test/main/mdev15777.result
@@ -0,0 +1,65 @@
+create table ten(a int);
+insert into ten values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table one_k(a int);
+insert into one_k select A.a + B.a* 10 + C.a * 100 from ten A, ten B, ten C;
+create table one_m(a int);
+insert into one_m select A.a + B.a* 1000  from one_k A, one_k B;
+delete from one_m where a=0 limit 1;
+create table t1 (
+id int(10) unsigned NOT NULL AUTO_INCREMENT,
+filler varchar(100),
+subset_id int(11) DEFAULT NULL,
+PRIMARY KEY (id),
+KEY t1_subset_id (subset_id)
+);
+create table t1_subsets (
+id int(10) unsigned NOT NULL AUTO_INCREMENT,
+filler1 varchar(100),
+filler2 varchar(100),
+filler3 varchar(100),
+PRIMARY KEY (id)
+);
+insert into t1 select a,a, NULL from one_m where a < 50*1000;
+insert into t1_subsets select a,a,a,a from one_m where a < 500*1000 limit 499000;
+analyze format=json
+SELECT * FROM   t1 WHERE t1.subset_id IN (SELECT t1_subsets.id FROM t1_subsets);
+ANALYZE
+{
+  "query_block": {
+    "select_id": 1,
+    "r_loops": 1,
+    "r_total_time_ms": 0.0444,
+    "table": {
+      "table_name": "t1",
+      "access_type": "range",
+      "possible_keys": ["t1_subset_id"],
+      "key": "t1_subset_id",
+      "key_length": "5",
+      "used_key_parts": ["subset_id"],
+      "r_loops": 1,
+      "rows": 3,
+      "r_rows": 0,
+      "r_total_time_ms": 0.0146,
+      "filtered": 100,
+      "r_filtered": 100,
+      "index_condition": "t1.subset_id is not null"
+    },
+    "table": {
+      "table_name": "t1_subsets",
+      "access_type": "eq_ref",
+      "possible_keys": ["PRIMARY"],
+      "key": "PRIMARY",
+      "key_length": "4",
+      "used_key_parts": ["id"],
+      "ref": ["test.t1.subset_id"],
+      "r_loops": 0,
+      "rows": 1,
+      "r_rows": null,
+      "filtered": 100,
+      "r_filtered": null,
+      "attached_condition": "t1.subset_id = t1_subsets.`id`",
+      "using_index": true
+    }
+  }
+}
+drop table t1,t1_subsets,ten,one_k,one_m;
diff --git a/mysql-test/main/mdev15777.test b/mysql-test/main/mdev15777.test
new file mode 100644
index 00000000000..ad5777aeecf
--- /dev/null
+++ b/mysql-test/main/mdev15777.test
@@ -0,0 +1,34 @@
+create table ten(a int);
+insert into ten values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+
+create table one_k(a int);
+insert into one_k select A.a + B.a* 10 + C.a * 100 from ten A, ten B, ten C;
+
+create table one_m(a int);
+insert into one_m select A.a + B.a* 1000  from one_k A, one_k B;
+delete from one_m where a=0 limit 1;
+
+create table t1 (
+  id int(10) unsigned NOT NULL AUTO_INCREMENT,
+  filler varchar(100),
+  subset_id int(11) DEFAULT NULL,
+  PRIMARY KEY (id),
+  KEY t1_subset_id (subset_id)
+);
+
+create table t1_subsets (
+  id int(10) unsigned NOT NULL AUTO_INCREMENT,
+  filler1 varchar(100),
+  filler2 varchar(100),
+  filler3 varchar(100),
+  PRIMARY KEY (id)
+);
+
+insert into t1 select a,a, NULL from one_m where a < 50*1000;
+insert into t1_subsets select a,a,a,a from one_m where a < 500*1000 limit 499000;
+
+analyze format=json
+SELECT * FROM   t1 WHERE t1.subset_id IN (SELECT t1_subsets.id FROM t1_subsets);
+drop table t1,t1_subsets,ten,one_k,one_m;
+
+
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index 38dbed92a22..9343f97382d 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -2399,6 +2399,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
 {
   uint idx;
   double scan_time;
+  Item *null_rejecting_conds= NULL;
   DBUG_ENTER("SQL_SELECT::test_quick_select");
   DBUG_PRINT("enter",("keys_to_use: %lu  prev_tables: %lu  const_tables: %lu",
 		      (ulong) keys_to_use.to_ulonglong(), (ulong) prev_tables,
@@ -2422,6 +2423,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
     read_time= (double) records + scan_time + 1; // Force to use index
   
   possible_keys.clear_all();
+  null_rejecting_conds= head->null_rejecting_conds;
 
   DBUG_PRINT("info",("Time to scan table: %g", read_time));
 
@@ -2430,7 +2432,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
   {
     uchar buff[STACK_BUFF_ALLOC];
     MEM_ROOT alloc;
-    SEL_TREE *tree= NULL;
+    SEL_TREE *tree= NULL, *not_null_cond_tree= NULL;
     KEY_PART *key_parts;
     KEY *key_info;
     PARAM param;
@@ -2539,6 +2541,12 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
     TRP_GROUP_MIN_MAX *group_trp;
     double best_read_time= read_time;
 
+    if (null_rejecting_conds)
+    {
+      not_null_cond_tree= null_rejecting_conds->get_mm_tree(&param, 
+                                          &null_rejecting_conds);
+    }
+
     if (cond)
     {
       if ((tree= cond->get_mm_tree(&param, &cond)))
@@ -2557,6 +2565,13 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
           tree= NULL;
       }
     }
+    if (not_null_cond_tree)
+    {
+      if (!tree)
+        tree= not_null_cond_tree;
+      else
+        tree= tree_and(&param, tree, not_null_cond_tree);
+    }
 
     /*
       Try to construct a QUICK_GROUP_MIN_MAX_SELECT.
@@ -14647,6 +14662,113 @@ void QUICK_GROUP_MIN_MAX_SELECT::add_keys_and_lengths(String *key_names,
   add_key_and_length(key_names, used_lengths, &first);
 }
 
+inline void add_cond(THD *thd, Item **e1, Item *e2)
+{
+  if (*e1)
+  {
+    if (!e2)
+      return;
+    Item *res;
+    if ((res= new (thd->mem_root) Item_cond_and(thd, *e1, e2)))
+    {
+      res->fix_fields(thd, 0);
+      res->update_used_tables();
+      *e1= res;
+    }
+  }
+  else
+    *e1= e2;
+}
+
+/*
+  Create null rejecting conditions for a table, for all the equalites
+  present in the WHERE clause of a query.
+
+  SYNOPSIS
+    make_null_rejecting_conds()
+    @param TABLE        - Keys of this table will participate in null
+                          rejecting conditions
+    @param keyuse_array - array that has all the equalites of the
+                          WHERE clasuse
+
+  DESCRIPTION
+    This function creates null rejecting conditions for a table. These
+    conditions are created to do range analysis on them , the conditions
+    are of the form tbl.key.keypart IS NOT NULL.
+
+  IMPLEMENTATION
+    Lookup in the keyuse array to check if it has equalites that belong
+    to the given table. If yes then find out if the conditions are null
+    rejecting and accordingly create all the condition for the keys of a
+    given table and AND them.
+
+
+  RETURN
+    NOT NULL - Found null rejecting conditions for the given table
+    NULL - No null rejecting conditions for the given table
+*/
+
+void make_null_rejecting_conds(THD *thd, TABLE *table,
+                        DYNAMIC_ARRAY *keyuse_array, key_map *const_keys)
+{
+  KEY *keyinfo;
+  Item *cond= NULL;
+  KEYUSE* keyuse;
+  
+  /*
+    The null rejecting conds added will be on the keypart of a key, so for
+    that we need the table to atleast have a key.
+  */
+  if (!table->s->keys)
+    return ;
+  if (table->null_rejecting_conds)
+    return;
+
+  for(uint i=0; i < keyuse_array->elements; i++)
+  {
+    keyuse= (KEYUSE*)dynamic_array_ptr(keyuse_array, i);
+    if (keyuse->table == table)
+    {
+      /*
+        No null rejecting conds for a hash key
+      */
+      if (keyuse->key == MAX_KEY)
+        continue;
+      keyinfo= keyuse->table->key_info+keyuse->key;
+      Field *field= keyinfo->key_part[keyuse->keypart].field;
+
+      /*
+        No need to add null-rejecting condition if we have a
+        keyuse element as
+          - table.key.keypart= const
+          - (table.key.keypart= tbl.otherfield or table.key.keypart IS NULL)
+          - table.key.keypart IS NOT NULLABLE
+      */
+
+      if (keyuse->val->const_item()
+          || !(keyuse->null_rejecting && field->maybe_null())
+          || keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL)
+        continue;
+
+      Item_field *field_item= new (thd->mem_root)Item_field(thd, field);
+      Item* not_null_item= new (thd->mem_root)Item_func_isnotnull(thd,
+                                                             field_item);
+
+      /*
+        adding the key to const keys as we have the condition
+        as key.keypart IS NOT NULL
+      */
+
+      const_keys->set_bit(keyuse->key);
+      not_null_item->fix_fields(thd, 0);
+      not_null_item->update_used_tables();
+      add_cond(thd, &cond, not_null_item);
+    }
+  }
+  table->null_rejecting_conds= cond;
+  return;
+}
+
 
 #ifndef DBUG_OFF
 
diff --git a/sql/opt_range.h b/sql/opt_range.h
index bd85a12d4a1..894d46a892c 100644
--- a/sql/opt_range.h
+++ b/sql/opt_range.h
@@ -1728,6 +1728,8 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond);
 bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond);
 #endif
 void store_key_image_to_rec(Field *field, uchar *ptr, uint len);
+void make_null_rejecting_conds(THD *thd, TABLE *table,
+                          DYNAMIC_ARRAY *keyuse_array, key_map *const_keys);
 
 extern String null_string;
 
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 796ea569e64..daef79e2f68 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -4783,6 +4783,9 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
     add_group_and_distinct_keys(join, s);
 
     s->table->cond_selectivity= 1.0;
+
+    make_null_rejecting_conds(join->thd, s->table,
+                              keyuse_array, &s->const_keys);
     
     /*
       Perform range analysis if there are keys it could use (1). 
@@ -4812,6 +4815,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
 			    1, &error);
         if (!select)
           goto error;
+
         records= get_quick_record_count(join->thd, select, s->table,
 				        &s->const_keys, join->row_limit);
         /* Range analyzer could modify the condition. */
@@ -5330,15 +5334,24 @@ add_key_field(JOIN *join,
     If the condition has form "tbl.keypart = othertbl.field" and 
     othertbl.field can be NULL, there will be no matches if othertbl.field 
     has NULL value.
+
+    The field KEY_FIELD::null_rejecting is set to TRUE if we have both
+    the left and right hand side of the equality are NULLABLE
+
     We use null_rejecting in add_not_null_conds() to add
     'othertbl.field IS NOT NULL' to tab->select_cond.
+
+    We use null_rejecting in make_null_rejecting_conds() to add
+    tbl.keypart IS NOT NULL so we can do range analysis on this condition
+
   */
   {
     Item *real= (*value)->real_item();
     if (((cond->functype() == Item_func::EQ_FUNC) ||
          (cond->functype() == Item_func::MULT_EQUAL_FUNC)) &&
-        (real->type() == Item::FIELD_ITEM) &&
+        (((real->type() == Item::FIELD_ITEM) &&
         ((Item_field*)real)->field->maybe_null())
+        ||(field->maybe_null())))
       (*key_fields)->null_rejecting= true;
     else
       (*key_fields)->null_rejecting= false;
@@ -9794,7 +9807,10 @@ static bool create_ref_for_key(JOIN *join, JOIN_TAB *j,
       uint maybe_null= MY_TEST(keyinfo->key_part[i].null_bit);
       j->ref.items[i]=keyuse->val;		// Save for cond removal
       j->ref.cond_guards[i]= keyuse->cond_guard;
-      if (keyuse->null_rejecting) 
+      Item *real= (keyuse->val)->real_item();
+      if (keyuse->null_rejecting && 
+        (real->type() == Item::FIELD_ITEM) &&
+        ((Item_field*)real)->field->maybe_null())
         j->ref.null_rejecting|= (key_part_map)1 << i;
       keyuse_uses_no_tables= keyuse_uses_no_tables && !keyuse->used_tables;
       /*
@@ -18516,7 +18532,7 @@ free_tmp_table(THD *thd, TABLE *entry)
     DBUG_ASSERT(entry->pos_in_table_list->table == entry);
     entry->pos_in_table_list->table= NULL;
   }
-
+  entry->null_rejecting_conds= NULL;
   free_root(&own_root, MYF(0)); /* the table is allocated in its own root */
   thd_proc_info(thd, save_proc_info);
 
diff --git a/sql/table.cc b/sql/table.cc
index 577ed20a87e..c4e7c3aba09 100644
--- a/sql/table.cc
+++ b/sql/table.cc
@@ -4593,6 +4593,7 @@ void TABLE::init(THD *thd, TABLE_LIST *tl)
   created= TRUE;
   cond_selectivity= 1.0;
   cond_selectivity_sampling_explain= NULL;
+  null_rejecting_conds= NULL;
 #ifdef HAVE_REPLICATION
   /* used in RBR Triggers */
   master_had_triggers= 0;
diff --git a/sql/table.h b/sql/table.h
index 32e99db880f..9496aef046d 100644
--- a/sql/table.h
+++ b/sql/table.h
@@ -1354,6 +1354,12 @@ struct TABLE
   SplM_opt_info *spl_opt_info;
   key_map keys_usable_for_splitting;
 
+  /*
+   Null rejecting conds added for all tables so we can do range analysis
+   on these conditions
+  */
+  Item* null_rejecting_conds;
+
   void init(THD *thd, TABLE_LIST *tl);
   bool fill_item_list(List<Item> *item_list) const;
   void reset_item_list(List<Item> *item_list, uint skip) const;


More information about the commits mailing list