[Commits] 48c3207570d: MDEV-6707: Wrong result (extra row) with group by, multi-part key

Varun varunraiko1803 at gmail.com
Mon Apr 9 19:59:37 EEST 2018


revision-id: 48c3207570d83468dd276110e7e8297708f8cbc9 (mariadb-5.5.56-198-g48c3207570d)
parent(s): 3eb2a265eac53050089bc5d563e65161717a2983
author: Varun Gupta
committer: Varun Gupta
timestamp: 2018-04-09 22:28:36 +0530
message:

MDEV-6707: Wrong result (extra row) with group by, multi-part key

This case involves using a composite key,few parts of which are involved in GROUP BY and few in the MIN/MAX
functions in the select list.
Ranges are created in accordance with the where condition, so during the execution of such queries,
we try to find a prefix using the fields involved in GROUP BY
and we then check if this newly returned prefix lies within the range we had calculated earlier.

For queries which use composite key, few parts of which are involved in GROUP BY and few in the MIN/MAX functtions
in the select list, we try to find the prefix of the ranges by using the fields involved in the group by clause.
We get extra rows in the output when we have same partial ranges created for the fields in the GROUP BY clause

This issue can be fixed if we compare such partial ranges and don't lookup if we see the same prefix again.

---
 mysql-test/r/range.result         | 30 +++++++++++++++++++++++++++
 mysql-test/r/range_mrr_icp.result | 30 +++++++++++++++++++++++++++
 mysql-test/t/range.test           | 18 ++++++++++++++++
 sql/opt_range.cc                  | 43 ++++++++++++++++++++++++++++++++++++---
 sql/opt_range.h                   |  9 ++++++--
 5 files changed, 125 insertions(+), 5 deletions(-)

diff --git a/mysql-test/r/range.result b/mysql-test/r/range.result
index 630a692cef6..859cc96af27 100644
--- a/mysql-test/r/range.result
+++ b/mysql-test/r/range.result
@@ -2144,3 +2144,33 @@ value1	1000685	12345
 value1	1003560	12345
 value1	1004807	12345
 drop table t1;
+#
+# MDEV-6707: Wrong result (extra row) with group by, multi-part key
+#
+CREATE TABLE t1 (f1 INT, f2 VARCHAR(1), KEY(f2,f1)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES
+(7,'v'),(0,'s'),(9,'l'),(4,'c');
+create table t2(a int);
+insert into t2 values (0),(1),(2),(3),(4),(5),(6);
+explain
+SELECT MAX(f1), f2 FROM t1 WHERE f2 LIKE 'c%' AND f1 <> 9 GROUP BY f2;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	f2	f2	9	NULL	2	Using where; Using index for group-by
+SELECT MAX(f1), f2 FROM t1 WHERE f2 LIKE 'c%' AND f1 <> 9 GROUP BY f2;
+MAX(f1)	f2
+4	c
+explain
+select (SELECT MAX(f1) as MAXVAL FROM t1 WHERE f2 LIKE 'c%' AND f1 <> 9 GROUP BY f2 having maxval<100+t2.a) AS SUBQ from t2;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	7	
+2	DEPENDENT SUBQUERY	t1	range	f2	f2	9	NULL	2	Using where; Using index for group-by
+select (SELECT MAX(f1) as MAXVAL FROM t1 WHERE f2 LIKE 'c%' AND f1 <> 9 GROUP BY f2 having maxval<100+t2.a) AS SUBQ from t2;
+SUBQ
+4
+4
+4
+4
+4
+4
+4
+DROP TABLE t1,t2;
diff --git a/mysql-test/r/range_mrr_icp.result b/mysql-test/r/range_mrr_icp.result
index 3f5de5b0189..65ec548fa74 100644
--- a/mysql-test/r/range_mrr_icp.result
+++ b/mysql-test/r/range_mrr_icp.result
@@ -2146,4 +2146,34 @@ value1	1000685	12345
 value1	1003560	12345
 value1	1004807	12345
 drop table t1;
+#
+# MDEV-6707: Wrong result (extra row) with group by, multi-part key
+#
+CREATE TABLE t1 (f1 INT, f2 VARCHAR(1), KEY(f2,f1)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES
+(7,'v'),(0,'s'),(9,'l'),(4,'c');
+create table t2(a int);
+insert into t2 values (0),(1),(2),(3),(4),(5),(6);
+explain
+SELECT MAX(f1), f2 FROM t1 WHERE f2 LIKE 'c%' AND f1 <> 9 GROUP BY f2;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	f2	f2	9	NULL	2	Using where; Using index for group-by
+SELECT MAX(f1), f2 FROM t1 WHERE f2 LIKE 'c%' AND f1 <> 9 GROUP BY f2;
+MAX(f1)	f2
+4	c
+explain
+select (SELECT MAX(f1) as MAXVAL FROM t1 WHERE f2 LIKE 'c%' AND f1 <> 9 GROUP BY f2 having maxval<100+t2.a) AS SUBQ from t2;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	7	
+2	DEPENDENT SUBQUERY	t1	range	f2	f2	9	NULL	2	Using where; Using index for group-by
+select (SELECT MAX(f1) as MAXVAL FROM t1 WHERE f2 LIKE 'c%' AND f1 <> 9 GROUP BY f2 having maxval<100+t2.a) AS SUBQ from t2;
+SUBQ
+4
+4
+4
+4
+4
+4
+4
+DROP TABLE t1,t2;
 set optimizer_switch=@mrr_icp_extra_tmp;
diff --git a/mysql-test/t/range.test b/mysql-test/t/range.test
index 393ca68e945..21c581b24dd 100644
--- a/mysql-test/t/range.test
+++ b/mysql-test/t/range.test
@@ -1718,3 +1718,21 @@ where (key1varchar='value1' AND (key2int <=1 OR  key2int > 1));
 --echo # The following must show col1=12345 for all rows:
 select * from t1;
 drop table t1;
+
+--echo #
+--echo # MDEV-6707: Wrong result (extra row) with group by, multi-part key
+--echo #
+
+CREATE TABLE t1 (f1 INT, f2 VARCHAR(1), KEY(f2,f1)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES
+(7,'v'),(0,'s'),(9,'l'),(4,'c');
+create table t2(a int);
+insert into t2 values (0),(1),(2),(3),(4),(5),(6);
+
+explain
+SELECT MAX(f1), f2 FROM t1 WHERE f2 LIKE 'c%' AND f1 <> 9 GROUP BY f2;
+SELECT MAX(f1), f2 FROM t1 WHERE f2 LIKE 'c%' AND f1 <> 9 GROUP BY f2;
+explain
+select (SELECT MAX(f1) as MAXVAL FROM t1 WHERE f2 LIKE 'c%' AND f1 <> 9 GROUP BY f2 having maxval<100+t2.a) AS SUBQ from t2;
+select (SELECT MAX(f1) as MAXVAL FROM t1 WHERE f2 LIKE 'c%' AND f1 <> 9 GROUP BY f2 having maxval<100+t2.a) AS SUBQ from t2;
+DROP TABLE t1,t2;
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index 04ab8415dfe..504ed2d63b6 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -11249,6 +11249,7 @@ int QUICK_RANGE_SELECT::get_next()
   @param prefix_length   length of cur_prefix
   @param group_key_parts The number of key parts in the group prefix
   @param cur_prefix      prefix of a key to be searched for
+  @param save_last_range INOUT Saving the last range we encountered
 
   Each subsequent call to the method retrieves the first record that has a
   prefix with length prefix_length and which is different from cur_prefix,
@@ -11271,7 +11272,8 @@ int QUICK_RANGE_SELECT::get_next()
 
 int QUICK_RANGE_SELECT::get_next_prefix(uint prefix_length,
                                         uint group_key_parts,
-                                        uchar *cur_prefix)
+                                        uchar *cur_prefix,
+                                        QUICK_RANGE **save_last_range)
 {
   DBUG_ENTER("QUICK_RANGE_SELECT::get_next_prefix");
   const key_part_map keypart_map= make_prev_keypart_map(group_key_parts);
@@ -11303,6 +11305,39 @@ int QUICK_RANGE_SELECT::get_next_prefix(uint prefix_length,
     }
     last_range= *(cur_range++);
 
+    /*
+      While calculating these prefixes we might encounter a case where there
+      would be same partial ranges for multiple ranges.
+      An example would be
+      select max(key_part2), key_part1 from t1 where key_part1 ='c' AND key_part2 <> 9 group by key_part1;
+      so the ranges would be
+            (c,NULL) <= (key_part1,key_part2) <= (c,9)
+            (c,9) <= (key_part1,key_part2) <= (c,+infinity)
+
+      In this case for calculating prefixes with the group by field we take up the
+      partial ranges involving field f2 those would be
+             c<= key_part2 <=c
+             c<= key_part2 <=c
+
+      So we lookup rows with the same prefix in all such ranges and
+      then we check for the other part(in this case f1) in ALL the ranges.
+      So if a record lies in a range, then it would satisfy both the partial
+      ranges in this case and therefore there would be multiple outputs for
+      the same row.
+      For such cases we should calculate the prefix only when we have the next
+      partial range different from the previous one.
+    */
+
+    if (*save_last_range)
+    {
+      if (!key_tuple_cmp(key_part_info, (*save_last_range)->min_key,
+                        last_range->min_key, prefix_length))
+      {
+        last_range=NULL;
+        continue;
+      }
+    }
+    *save_last_range= last_range;
     key_range start_key, end_key;
     last_range->make_min_endpoint(&start_key, prefix_length, keypart_map);
     last_range->make_max_endpoint(&end_key, prefix_length, keypart_map);
@@ -13315,7 +13350,7 @@ QUICK_GROUP_MIN_MAX_SELECT(TABLE *table, JOIN *join_arg, bool have_min_arg,
    seen_first_key(FALSE), doing_key_read(FALSE), min_max_arg_part(min_max_arg_part_arg),
    key_infix(key_infix_arg), key_infix_len(key_infix_len_arg),
    min_functions_it(NULL), max_functions_it(NULL),
-   is_index_scan(is_index_scan_arg)
+   is_index_scan(is_index_scan_arg), save_last_range(NULL)
 {
   head=       table;
   index=      use_index;
@@ -13652,6 +13687,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::reset(void)
     DBUG_RETURN(0);
   /* Save the prefix of the last group. */
   key_copy(last_prefix, record, index_info, group_prefix_len);
+  save_last_range= NULL;
 
   DBUG_RETURN(0);
 }
@@ -13968,7 +14004,8 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_prefix()
     uchar *cur_prefix= seen_first_key ? group_prefix : NULL;
     if ((result= quick_prefix_select->get_next_prefix(group_prefix_len,
                                                       group_key_parts, 
-                                                      cur_prefix)))
+                                                      cur_prefix,
+                                                      &save_last_range)))
       DBUG_RETURN(result);
     seen_first_key= TRUE;
   }
diff --git a/sql/opt_range.h b/sql/opt_range.h
index b8b46ae5ab1..12c20b8ea45 100644
--- a/sql/opt_range.h
+++ b/sql/opt_range.h
@@ -477,7 +477,7 @@ class QUICK_RANGE_SELECT : public QUICK_SELECT_I
   int get_next();
   void range_end();
   int get_next_prefix(uint prefix_length, uint group_key_parts, 
-                      uchar *cur_prefix);
+                      uchar *cur_prefix, QUICK_RANGE **save_last_range);
   bool reverse_sorted() { return 0; }
   bool unique_key_range();
   int init_ror_merged_scan(bool reuse_handler, MEM_ROOT *alloc);
@@ -916,7 +916,12 @@ class QUICK_GROUP_MIN_MAX_SELECT : public QUICK_SELECT_I
     Use index scan to get the next different key instead of jumping into it 
     through index read 
   */
-  bool is_index_scan; 
+  bool is_index_scan;
+  /*
+    Used to save a partial range, so that we can check for records in 
+    different partial ranges.
+  */
+  QUICK_RANGE *save_last_range;
 public:
   /*
     The following two members are public to allow easy access from


More information about the commits mailing list