[Commits] ab841ac6c7e: MDEV-8954 unnecessary fetch of entire table

jacob.mathew at mariadb.com jacob.mathew at mariadb.com
Thu Mar 16 20:35:37 EET 2017


revision-id: ab841ac6c7eed993d4a69f8f205b97d43ebc3847 (mariadb-10.2.3-84-gab841ac6c7e)
parent(s): 60cea70a07a10a3990517bd6c9bd4c2b20fc70ce
author: Jacob Mathew
committer: Jacob Mathew
timestamp: 2017-03-15 14:49:40 -0700
message:

MDEV-8954 unnecessary fetch of entire table

Avoid visiting each row multiple times during a SELECT DISTINCT query
on a Spider table.

---
 storage/spider/ha_spider.cc            | 80 ++++++++++++++++++++++++++++++++++
 storage/spider/ha_spider.h             |  2 +
 storage/spider/spd_db_handlersocket.cc |  9 ++++
 storage/spider/spd_db_handlersocket.h  |  3 ++
 storage/spider/spd_db_include.h        |  3 ++
 storage/spider/spd_db_mysql.cc         | 59 +++++++++++++++++++++++++
 storage/spider/spd_db_mysql.h          |  3 ++
 storage/spider/spd_db_oracle.cc        | 59 +++++++++++++++++++++++++
 storage/spider/spd_db_oracle.h         |  3 ++
 9 files changed, 221 insertions(+)

diff --git a/storage/spider/ha_spider.cc b/storage/spider/ha_spider.cc
index 5698edb1e20..23734469f3a 100644
--- a/storage/spider/ha_spider.cc
+++ b/storage/spider/ha_spider.cc
@@ -2109,6 +2109,7 @@ int ha_spider::index_read_map_internal(
   result_list.desc_flg = FALSE;
   result_list.sorted = TRUE;
   result_list.key_info = &table->key_info[active_index];
+  check_distinct_key_query();
   result_list.limit_num =
     result_list.internal_limit >= result_list.split_read ?
     result_list.split_read : result_list.internal_limit;
@@ -2624,6 +2625,7 @@ int ha_spider::index_read_last_map_internal(
   result_list.desc_flg = TRUE;
   result_list.sorted = TRUE;
   result_list.key_info = &table->key_info[active_index];
+  check_distinct_key_query();
   result_list.limit_num =
     result_list.internal_limit >= result_list.split_read ?
     result_list.split_read : result_list.internal_limit;
@@ -3089,6 +3091,7 @@ int ha_spider::index_first_internal(
     result_list.sorted = TRUE;
     result_list.key_info = &table->key_info[active_index];
     result_list.key_order = 0;
+    check_distinct_key_query();
     result_list.limit_num =
       result_list.internal_limit >= result_list.split_read ?
       result_list.split_read : result_list.internal_limit;
@@ -3472,6 +3475,7 @@ int ha_spider::index_last_internal(
     result_list.sorted = TRUE;
     result_list.key_info = &table->key_info[active_index];
     result_list.key_order = 0;
+    check_distinct_key_query();
     result_list.limit_num =
       result_list.internal_limit >= result_list.split_read ?
       result_list.split_read : result_list.internal_limit;
@@ -3914,6 +3918,7 @@ int ha_spider::read_range_first_internal(
   result_list.desc_flg = FALSE;
   result_list.sorted = sorted;
   result_list.key_info = &table->key_info[active_index];
+  check_distinct_key_query();
   result_list.limit_num =
     result_list.internal_limit >= result_list.split_read ?
     result_list.split_read : result_list.internal_limit;
@@ -12077,6 +12082,81 @@ void ha_spider::check_direct_order_limit()
   DBUG_VOID_RETURN;
 }
 
+/********************************************************************
+ * Check whether the current query is a SELECT DISTINCT using an
+ * index in a non-partitioned Spider configuration, with a
+ * projection list that consists solely of the first key prefix
+ * column.
+ *
+ * For a SELECT DISTINCT query using an index in a non-partitioned
+ * Spider configuration, with a projection list that consists
+ * solely of the first key prefix, set the internal row retrieval
+ * limit to avoid visiting each row multiple times.
+ ********************************************************************/
+void ha_spider::check_distinct_key_query()
+{
+    DBUG_ENTER( "ha_spider::check_distinct_key_query" );
+
+    if ( result_list.direct_distinct && !partition_handler_share->handlers &&
+         result_list.keyread && result_list.check_direct_order_limit )
+    {
+        // SELECT DISTINCT query using an index in a non-partitioned configuration
+        KEY_PART_INFO*  key_part = result_list.key_info->key_part;
+        Field*          key_field = key_part->field;
+
+        if ( is_sole_projection_field( key_field->field_index ) )
+        {
+            // Projection list consists solely of the first key prefix column
+
+            // Set the internal row retrieval limit to avoid visiting each row
+            // multiple times.  This fixes a Spider performance bug that
+            // caused each row to be visited multiple times.
+            result_list.internal_limit = 1;
+        }
+    }
+
+    DBUG_VOID_RETURN;
+}
+
+/********************************************************************
+ * Determine whether the current query's projection list
+ * consists solely of the specified column.
+ *
+ * Params   IN      - field_index:
+ *                    Field index of the column of interest within
+ *                    its table.
+ *
+ * Returns  TRUE    - if the query's projection list consists
+ *                    solely of the specified column.
+ *          FALSE   - otherwise.
+ ********************************************************************/
+bool ha_spider::is_sole_projection_field( uint16 field_index )
+{
+    // NOTE: It is assumed that spider_db_append_select_columns() has already been called
+    //       to build the bitmap of projection fields
+    bool                is_ha_sole_projection_field;
+    uint                loop_index, dbton_id;
+    spider_db_handler*  dbton_hdl;
+    DBUG_ENTER( "ha_spider::is_sole_projection_field" );
+
+    for ( loop_index = 0; loop_index < share->use_sql_dbton_count; loop_index++ )
+    {
+        dbton_id    = share->use_sql_dbton_ids[ loop_index ];
+        dbton_hdl   = dbton_handler[ dbton_id ];
+
+        if ( dbton_hdl->first_link_idx >= 0 )
+        {
+            is_ha_sole_projection_field = dbton_hdl->is_sole_projection_field( field_index );
+            if ( !is_ha_sole_projection_field )
+            {
+                DBUG_RETURN( FALSE );
+            }
+        }
+    }
+
+    DBUG_RETURN( TRUE );
+}
+
 int ha_spider::check_ha_range_eof()
 {
   DBUG_ENTER("ha_spider::check_ha_range_eof");
diff --git a/storage/spider/ha_spider.h b/storage/spider/ha_spider.h
index e51718dc0c9..e196725a048 100644
--- a/storage/spider/ha_spider.h
+++ b/storage/spider/ha_spider.h
@@ -760,6 +760,8 @@ class ha_spider: public handler
   );
   uint check_partitioned();
   void check_direct_order_limit();
+  void check_distinct_key_query();
+  bool is_sole_projection_field( uint16 field_index );
   int check_ha_range_eof();
   int drop_tmp_tables();
   bool handler_opened(
diff --git a/storage/spider/spd_db_handlersocket.cc b/storage/spider/spd_db_handlersocket.cc
index 2ae84499aff..7c2a2dae378 100644
--- a/storage/spider/spd_db_handlersocket.cc
+++ b/storage/spider/spd_db_handlersocket.cc
@@ -4994,6 +4994,15 @@ int spider_handlersocket_handler::append_explain_select_part(
   DBUG_RETURN(0);
 }
 
+int spider_handlersocket_handler::is_sole_projection_field(
+  uint16 field_index
+) {
+  DBUG_ENTER("spider_handlersocket_handler::is_sole_projection_field");
+  DBUG_PRINT("info", ("spider this=%p", this));
+  DBUG_ASSERT(0);
+  DBUG_RETURN(0);
+}
+
 bool spider_handlersocket_handler::is_bulk_insert_exec_period(
   bool bulk_end
 ) {
diff --git a/storage/spider/spd_db_handlersocket.h b/storage/spider/spd_db_handlersocket.h
index 1cc1476c83a..14c229a4f0e 100644
--- a/storage/spider/spd_db_handlersocket.h
+++ b/storage/spider/spd_db_handlersocket.h
@@ -776,6 +776,9 @@ class spider_handlersocket_handler: public spider_db_handler
     ulong sql_type,
     int link_idx
   );
+  bool is_sole_projection_field(
+    uint16 field_index
+  );
   bool is_bulk_insert_exec_period(
     bool bulk_end
   );
diff --git a/storage/spider/spd_db_include.h b/storage/spider/spd_db_include.h
index 56dbc9ce0dc..b6534e60a70 100644
--- a/storage/spider/spd_db_include.h
+++ b/storage/spider/spd_db_include.h
@@ -1279,6 +1279,9 @@ class spider_db_handler
     ulong sql_type,
     int link_idx
   ) = 0;
+  virtual bool is_sole_projection_field(
+      uint16 field_index
+  ) = 0;
   virtual bool is_bulk_insert_exec_period(
     bool bulk_end
   ) = 0;
diff --git a/storage/spider/spd_db_mysql.cc b/storage/spider/spd_db_mysql.cc
index a41a943cd04..b6032707e0e 100644
--- a/storage/spider/spd_db_mysql.cc
+++ b/storage/spider/spd_db_mysql.cc
@@ -9516,6 +9516,65 @@ int spider_mysql_handler::append_explain_select(
   DBUG_RETURN(0);
 }
 
+/********************************************************************
+ * Determine whether the current query's projection list
+ * consists solely of the specified column.
+ *
+ * Params   IN      - field_index:
+ *                    Field index of the column of interest within
+ *                    its table.
+ *
+ * Returns  TRUE    - if the query's projection list consists
+ *                    solely of the specified column.
+ *          FALSE   - otherwise.
+ ********************************************************************/
+bool spider_mysql_handler::is_sole_projection_field( uint16 field_index )
+{
+    // Determine whether the projection list consists solely of the field of interest
+    bool            is_field_in_projection_list = FALSE;
+    TABLE*          table                       = spider->get_table();
+    uint16          projection_field_count      = 0;
+    uint16          projection_field_index;
+    Field**         field;
+    DBUG_ENTER( "spider_mysql_handler::is_sole_projection_field" );
+
+    for ( field = table->field; *field ; field++ )
+    {
+        projection_field_index = ( *field )->field_index;
+
+        if ( !( minimum_select_bit_is_set( projection_field_index ) ) )
+        {
+            // Current field is not in the projection list
+            continue;
+        }
+
+        projection_field_count++;
+
+        if ( !is_field_in_projection_list )
+        {
+            if ( field_index == projection_field_index )
+            {
+                // Field of interest is in the projection list
+                is_field_in_projection_list = TRUE;
+            }
+        }
+
+        if ( is_field_in_projection_list && ( projection_field_count != 1 ) )
+        {
+            // Field of interest is not the sole column in the projection list
+            DBUG_RETURN( FALSE );
+        }
+    }
+
+    if ( is_field_in_projection_list && ( projection_field_count == 1 ) )
+    {
+        // Field of interest is the only column in the projection list
+        DBUG_RETURN( TRUE );
+    }
+
+    DBUG_RETURN( FALSE );
+}
+
 bool spider_mysql_handler::is_bulk_insert_exec_period(
   bool bulk_end
 ) {
diff --git a/storage/spider/spd_db_mysql.h b/storage/spider/spd_db_mysql.h
index 48942d701dc..0619d8fde88 100644
--- a/storage/spider/spd_db_mysql.h
+++ b/storage/spider/spd_db_mysql.h
@@ -1128,6 +1128,9 @@ class spider_mysql_handler: public spider_db_handler
     ulong sql_type,
     int link_idx
   );
+  bool is_sole_projection_field(
+      uint16 field_index
+  );
   bool is_bulk_insert_exec_period(
     bool bulk_end
   );
diff --git a/storage/spider/spd_db_oracle.cc b/storage/spider/spd_db_oracle.cc
index c8237f24f0f..f7959cfed29 100644
--- a/storage/spider/spd_db_oracle.cc
+++ b/storage/spider/spd_db_oracle.cc
@@ -9571,6 +9571,65 @@ int spider_oracle_handler::append_explain_select(
   DBUG_RETURN(0);
 }
 
+/********************************************************************
+ * Determine whether the current query's projection list
+ * consists solely of the specified column.
+ *
+ * Params   IN      - field_index:
+ *                    Field index of the column of interest within
+ *                    its table.
+ *
+ * Returns  TRUE    - if the query's projection list consists
+ *                    solely of the specified column.
+ *          FALSE   - otherwise.
+ ********************************************************************/
+bool spider_oracle_handler::is_sole_projection_field( uint16 field_index )
+{
+    // Determine whether the projection list consists solely of the field of interest
+    bool            is_field_in_projection_list = FALSE;
+    TABLE*          table                       = spider->get_table();
+    uint16          projection_field_count      = 0;
+    uint16          projection_field_index;
+    Field**         field;
+    DBUG_ENTER( "spider_oracle_handler::is_sole_projection_field" );
+
+    for ( field = table->field; *field; field++ )
+    {
+        projection_field_index = ( *field )->field_index;
+
+        if ( !( minimum_select_bit_is_set( projection_field_index ) ) )
+        {
+            // Current field is not in the projection list
+            continue;
+        }
+
+        projection_field_count++;
+
+        if ( !is_field_in_projection_list )
+        {
+            if (field_index == projection_field_index)
+            {
+                // Field of interest is in the projection list
+                is_field_in_projection_list     = TRUE;
+            }
+        }
+
+        if ( is_field_in_projection_list && ( projection_field_count != 1 ) )
+        {
+            // Field of interest is not the sole column in the projection list
+            DBUG_RETURN( FALSE );
+        }
+    }
+
+    if ( is_field_in_projection_list && ( projection_field_count == 1 ) )
+    {
+        // Field of interest is the only column in the projection list
+        DBUG_RETURN( TRUE );
+    }
+
+    DBUG_RETURN( FALSE );
+}
+
 bool spider_oracle_handler::is_bulk_insert_exec_period(
   bool bulk_end
 ) {
diff --git a/storage/spider/spd_db_oracle.h b/storage/spider/spd_db_oracle.h
index 05bea4d33ab..186c1bb9f37 100644
--- a/storage/spider/spd_db_oracle.h
+++ b/storage/spider/spd_db_oracle.h
@@ -1208,6 +1208,9 @@ class spider_oracle_handler: public spider_db_handler
     ulong sql_type,
     int link_idx
   );
+  bool is_sole_projection_field(
+      uint16 field_index
+  );
   bool is_bulk_insert_exec_period(
     bool bulk_end
   );


More information about the commits mailing list