[Commits] ca81e591062: MDEV-26337: subquery with groupby and ROLLUP returns incorrect results on LEFT JOIN on INDEXED values

psergey sergey at mariadb.com
Mon Nov 22 09:55:38 EET 2021


revision-id: ca81e591062bab602069b527297d93084d7bc58c (mariadb-10.3.31-70-gca81e591062)
parent(s): 9962cda52722b77c2a7e0314bbaa2e4f963f55c1
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2021-11-22 10:55:38 +0300
message:

MDEV-26337: subquery with groupby and ROLLUP returns incorrect results on LEFT JOIN on INDEXED values

Make const_expression_in_where() ignore the equalities injected by
LATERAL DERIVED optimization, unless these equalities are used for
ref access.

This follows what make_join_select()/make_cond_for_table() does:
these functions will discard such equalities too.

---
 mysql-test/main/derived_split_innodb.result |  88 ++++++++++++++++++++++
 mysql-test/main/derived_split_innodb.test   | 112 ++++++++++++++++++++++++++++
 sql/sql_select.cc                           |  34 +++++++++
 3 files changed, 234 insertions(+)

diff --git a/mysql-test/main/derived_split_innodb.result b/mysql-test/main/derived_split_innodb.result
index 7ea3b689f23..fcdba0d7d07 100644
--- a/mysql-test/main/derived_split_innodb.result
+++ b/mysql-test/main/derived_split_innodb.result
@@ -234,4 +234,92 @@ id	itemid	id	id
 4	2	4	2
 drop table t1,t2,t3;
 set optimizer_switch='split_materialized=default';
+#
+# MDEV-26337: subquery with groupby and ROLLUP returns incorrect results on LEFT JOIN on INDEXED values
+#
+CREATE TABLE t1 (
+the_date  date NOT NULL
+, PRIMARY KEY ( the_date )
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
+INSERT INTO t1 VALUES ('2021-08-10'),('2021-08-11'),('2021-08-12'),('2021-08-13');
+CREATE TABLE t2 (
+the_date  date NOT NULL,
+ptn_id  char(5) CHARACTER SET utf8mb3 NOT NULL DEFAULT '',
+cco_stk_ttl  int,
+PRIMARY KEY ( the_date , ptn_id )
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
+INSERT INTO  t2 VALUES 
+('2021-08-11','10002',NULL),('2021-08-11','10741',128),
+('2021-08-11','11001',4),('2021-08-11','11003',2048),
+('2021-08-12','10001',4096),('2021-08-12','10002',1),
+('2021-08-12','10429',256),('2021-08-12','10499',16),
+('2021-08-12','10580',8),('2021-08-12','10740',32),
+('2021-08-12','10741',64),('2021-08-12','10771',512),
+('2021-08-12','11001',2),('2021-08-12','11003',1024);
+CREATE TABLE t3 (
+id  int NOT NULL AUTO_INCREMENT,
+nsc_id  char(5) NOT NULL,
+dept_id  char(4) NOT NULL,
+district_id  char(3) NOT NULL,
+region_id  char(2) NOT NULL,
+PRIMARY KEY ( id ),
+UNIQUE KEY  dept_district  ( dept_id , district_id ),
+KEY  region_id  ( dept_id , region_id )
+) ENGINE=InnoDB  DEFAULT CHARSET=utf8mb4;
+INSERT INTO t3 VALUES 
+(1,'MMD','ADVB','10','1'), (2,'MMD','ADVB','11','1'),
+(3,'MMD','ADVB','21','2'),(4,'MMD','ADVB','22','2');
+CREATE TABLE t4 (
+dept_id  char(4) CHARACTER SET utf8mb3 NOT NULL,
+ptn_id  char(5) CHARACTER SET utf8mb3 NOT NULL,
+district_id  char(3) CHARACTER SET utf8mb3 NOT NULL DEFAULT '0',
+nsc_id  char(5) CHARACTER SET utf8mb3 NOT NULL
+, PRIMARY KEY (ptn_id , dept_id) 
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
+INSERT INTO  t4 VALUES 
+('ADVB','10001','10','MMD'),('ADVB','10002','10','MMD'),
+('ADVB','10003','10','MMD'),('ADVB','10429','22','MMD'),
+('ADVB','10740','21','MMD'),('ADVB','10741','21','MMD'),
+('ADVB','10771','23','MMD'),('ADVB','11001','11','MMD'),
+('ADVB','11002','11','MMD');
+CREATE TABLE `t10` (
+`the_date` date NOT NULL,
+`dept_id` char(4) CHARACTER SET utf8mb4 ,
+`org_id` varchar(3) CHARACTER SET utf8mb4 ,
+`district_id` char(3) CHARACTER SET utf8mb4 ,
+`region_id` char(2) CHARACTER SET utf8mb4 
+);
+insert into t10
+SELECT cal.the_date ,
+org.dept_id ,
+coalesce(org.district_id, org.region_id, 'MMD') AS org_id ,
+org.district_id ,
+org.region_id
+FROM t1 cal
+CROSS JOIN t3 org
+WHERE org.nsc_id = 'MMD'
+ AND org.dept_id IN ('ADVB')
+AND cal.the_date = '2021-08-12'
+GROUP BY cal.the_date,
+org.dept_id,
+org.region_id,
+org.district_id WITH ROLLUP HAVING NOT (cal.the_date IS NULL
+OR org.dept_id IS NULL);
+explain $q;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	org2	ALL	NULL	NULL	NULL	NULL	7	
+1	PRIMARY	<derived2>	ref	key0	key0	43	test.org2.the_date,test.org2.dept_id,test.org2.region_id,test.org2.district_id	2	Using where
+2	LATERAL DERIVED	sub	ref	PRIMARY	PRIMARY	3	test.org2.the_date	1	Using temporary; Using filesort
+2	LATERAL DERIVED	org	ref	PRIMARY	PRIMARY	15	test.sub.ptn_id	1	Using where
+2	LATERAL DERIVED	dis	eq_ref	dept_district,region_id	dept_district	28	const,func	1	Using index condition; Using where
+$q;
+the_date	org_id	dept_id	cco_stk_ttl
+2021-08-12	10	ADVB	4097
+2021-08-12	11	ADVB	2
+2021-08-12	1	ADVB	4099
+2021-08-12	21	ADVB	96
+2021-08-12	22	ADVB	256
+2021-08-12	2	ADVB	352
+2021-08-12	MMD	ADVB	4451
+drop table t1,t2,t3,t4,t10;
 # End of 10.3 tests
diff --git a/mysql-test/main/derived_split_innodb.test b/mysql-test/main/derived_split_innodb.test
index 6f33c71eede..c127404f975 100644
--- a/mysql-test/main/derived_split_innodb.test
+++ b/mysql-test/main/derived_split_innodb.test
@@ -186,4 +186,116 @@ eval $q;
 drop table t1,t2,t3;
 set optimizer_switch='split_materialized=default';
 
+--echo #
+--echo # MDEV-26337: subquery with groupby and ROLLUP returns incorrect results on LEFT JOIN on INDEXED values
+--echo #
+
+CREATE TABLE t1 (
+   the_date  date NOT NULL
+  , PRIMARY KEY ( the_date )
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
+INSERT INTO t1 VALUES ('2021-08-10'),('2021-08-11'),('2021-08-12'),('2021-08-13');
+ 
+CREATE TABLE t2 (
+   the_date  date NOT NULL,
+   ptn_id  char(5) CHARACTER SET utf8mb3 NOT NULL DEFAULT '',
+   cco_stk_ttl  int,
+  PRIMARY KEY ( the_date , ptn_id )
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
+ 
+INSERT INTO  t2 VALUES 
+('2021-08-11','10002',NULL),('2021-08-11','10741',128),
+('2021-08-11','11001',4),('2021-08-11','11003',2048),
+('2021-08-12','10001',4096),('2021-08-12','10002',1),
+('2021-08-12','10429',256),('2021-08-12','10499',16),
+('2021-08-12','10580',8),('2021-08-12','10740',32),
+('2021-08-12','10741',64),('2021-08-12','10771',512),
+('2021-08-12','11001',2),('2021-08-12','11003',1024);
+ 
+CREATE TABLE t3 (
+   id  int NOT NULL AUTO_INCREMENT,
+   nsc_id  char(5) NOT NULL,
+   dept_id  char(4) NOT NULL,
+   district_id  char(3) NOT NULL,
+   region_id  char(2) NOT NULL,
+  PRIMARY KEY ( id ),
+  UNIQUE KEY  dept_district  ( dept_id , district_id ),
+  KEY  region_id  ( dept_id , region_id )
+) ENGINE=InnoDB  DEFAULT CHARSET=utf8mb4;
+ 
+INSERT INTO t3 VALUES 
+(1,'MMD','ADVB','10','1'), (2,'MMD','ADVB','11','1'),
+(3,'MMD','ADVB','21','2'),(4,'MMD','ADVB','22','2');
+ 
+CREATE TABLE t4 (
+   dept_id  char(4) CHARACTER SET utf8mb3 NOT NULL,
+   ptn_id  char(5) CHARACTER SET utf8mb3 NOT NULL,
+   district_id  char(3) CHARACTER SET utf8mb3 NOT NULL DEFAULT '0',
+   nsc_id  char(5) CHARACTER SET utf8mb3 NOT NULL
+  , PRIMARY KEY (ptn_id , dept_id) 
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
+ 
+INSERT INTO  t4 VALUES 
+('ADVB','10001','10','MMD'),('ADVB','10002','10','MMD'),
+('ADVB','10003','10','MMD'),('ADVB','10429','22','MMD'),
+('ADVB','10740','21','MMD'),('ADVB','10741','21','MMD'),
+('ADVB','10771','23','MMD'),('ADVB','11001','11','MMD'),
+('ADVB','11002','11','MMD');
+ 
+CREATE TABLE `t10` (
+  `the_date` date NOT NULL,
+  `dept_id` char(4) CHARACTER SET utf8mb4 ,
+  `org_id` varchar(3) CHARACTER SET utf8mb4 ,
+  `district_id` char(3) CHARACTER SET utf8mb4 ,
+  `region_id` char(2) CHARACTER SET utf8mb4 
+);
+
+insert into t10
+SELECT cal.the_date ,
+        org.dept_id ,
+        coalesce(org.district_id, org.region_id, 'MMD') AS org_id ,
+        org.district_id ,
+        org.region_id
+FROM t1 cal
+CROSS JOIN t3 org
+WHERE org.nsc_id = 'MMD'
+ AND org.dept_id IN ('ADVB')
+ AND cal.the_date = '2021-08-12'
+GROUP BY cal.the_date,
+         org.dept_id,
+         org.region_id,
+         org.district_id WITH ROLLUP HAVING NOT (cal.the_date IS NULL
+                                                 OR org.dept_id IS NULL);
+
+let $q=
+SELECT sql_no_cache org2.the_date ,
+       org2.org_id ,
+       org2.dept_id ,
+       msr. cco_stk_ttl
+FROM
+ t10 org2
+LEFT JOIN
+ ( SELECT sub.the_date ,
+          dis.dept_id ,
+          dis.region_id ,
+          dis.district_id ,
+          sum(sub.cco_stk_ttl) AS cco_stk_ttl
+  FROM t2 sub
+  JOIN t4 org ON org.ptn_id = sub.ptn_id
+  JOIN t3 dis ON dis.dept_id = org.dept_id
+  AND dis.district_id = org.district_id
+  WHERE dis.nsc_id = 'MMD'
+   AND dis.dept_id IN ('ADVB')
+  GROUP BY sub.the_date,
+           dis.dept_id,
+           dis.region_id,
+           dis.district_id WITH ROLLUP ) msr ON msr.the_date = org2.the_date
+AND msr.dept_id <=> org2.dept_id
+AND msr.region_id <=> org2.region_id
+AND msr.district_id <=> org2.district_id;
+
+evalp explain $q;
+evalp $q;
+
+drop table t1,t2,t3,t4,t10;
 --echo # End of 10.3 tests
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index fe02e7b44e4..0ea861987bd 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -17047,6 +17047,26 @@ const_expression_in_where(COND *cond, Item *comp_item, Field *comp_field,
     Item *right_item= ((Item_func*) cond)->arguments()[1];
     if (equal(left_item, comp_item, comp_field))
     {
+      /*
+        If this condition
+        1. Was injected by LATERAL DERIVED optimization,
+        2. But is not used for ref access
+        then we ingore it. This is the same as what mmake_cond_for_table() does
+        when it is invoked from make_join_select().
+      */
+      if (func->functype() == Item_func::EQ_FUNC)
+      {
+        if (is_eq_cond_injected_for_split_opt((Item_func_eq*)func))
+        {
+          bool used_for_ref= false;
+          if (left_item->type() == Item::FIELD_ITEM &&
+              test_if_ref(func, (Item_field*)left_item, right_item))
+           used_for_ref= true;
+
+          if (!used_for_ref)
+           return 0;
+        }
+      }
       if (test_if_equality_guarantees_uniqueness (left_item, right_item))
       {
 	if (*const_item)
@@ -17057,6 +17077,20 @@ const_expression_in_where(COND *cond, Item *comp_item, Field *comp_field,
     }
     else if (equal(right_item, comp_item, comp_field))
     {
+      /* Do the same as above */
+      if (func->functype() == Item_func::EQ_FUNC)
+      {
+        if (is_eq_cond_injected_for_split_opt((Item_func_eq*)func))
+        {
+          bool used_for_ref= false;
+          if (right_item->type() == Item::FIELD_ITEM &&
+              test_if_ref(func, (Item_field*)right_item, left_item))
+           used_for_ref= true;
+
+          if (!used_for_ref)
+           return 0;
+        }
+      }
       if (test_if_equality_guarantees_uniqueness (right_item, left_item))
       {
 	if (*const_item)


More information about the commits mailing list