[Commits] 6beb08c: MDEV-15624 Changing the default character set to utf8mb4 changes query evaluation in a very surprising way

Alexey Botchkov holyfoot at askmonty.org
Sat Apr 7 23:30:56 EEST 2018


revision-id: 6beb08c7b67ed7610e95c0350f9f93005db1e055 (mariadb-5.5.59-54-g6beb08c)
parent(s): f5369faf5bbfb56b5e945836eb3f7c7ee88b4079
committer: Alexander Barkov
timestamp: 2018-04-04 09:12:44 +0400
message:

MDEV-15624 Changing the default character set to utf8mb4 changes query evaluation in a very surprising way

---
 mysql-test/r/ctype_ucs.result     | 31 +++++++++++++++++++++++++++++++
 mysql-test/r/ctype_utf8mb4.result | 23 +++++++++++++++++++++++
 mysql-test/t/ctype_ucs.test       | 22 ++++++++++++++++++++++
 mysql-test/t/ctype_utf8mb4.test   | 19 +++++++++++++++++++
 sql/item_func.h                   |  2 ++
 sql/item_strfunc.h                |  1 +
 6 files changed, 98 insertions(+)

diff --git a/mysql-test/r/ctype_ucs.result b/mysql-test/r/ctype_ucs.result
index 1bbd3af..1c9e31d 100644
--- a/mysql-test/r/ctype_ucs.result
+++ b/mysql-test/r/ctype_ucs.result
@@ -4397,5 +4397,36 @@ Field	Type	Null	Key	Default	Extra
 c1	mediumtext	YES		NULL	
 DROP TABLE t1;
 #
+# MDEV-15624 Changing the default character set to utf8mb4 changes query evaluation in a very surprising way
+#
+SET NAMES utf8;
+CREATE TABLE t1 (id INT);
+INSERT INTO t1 VALUES (1),(2),(3);
+SELECT COUNT(DISTINCT c) FROM (SELECT id, REPLACE(uuid_short(), '0', CAST('o' AS CHAR CHARACTER SET ucs2)) AS c FROM t1) AS d1;
+COUNT(DISTINCT c)
+3
+SELECT DISTINCT REPLACE(uuid_short(), '0', CAST('o' AS CHAR CHARACTER SET ucs2)) AS c FROM t1;
+c
+xxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxx
+SELECT COUNT(DISTINCT c) FROM (SELECT id, INSERT(uuid_short(), 1, 1, CAST('0' AS CHAR CHARACTER SET ucs2)) AS c FROM t1) AS d1;
+COUNT(DISTINCT c)
+3
+SELECT DISTINCT INSERT(uuid_short(), 1, 1, CAST('0' AS CHAR CHARACTER SET ucs2)) AS c FROM t1;
+c
+xxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxx
+SELECT COUNT(DISTINCT c) FROM (SELECT id, CONCAT(uuid_short(), CAST('0' AS CHAR CHARACTER SET ucs2)) AS c FROM t1) AS d1;
+COUNT(DISTINCT c)
+3
+SELECT DISTINCT CONCAT(uuid_short(), CAST('0' AS CHAR CHARACTER SET ucs2)) AS c FROM t1;
+c
+xxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxx
+DROP TABLE t1;
+#
 # End of 5.5 tests
 #
diff --git a/mysql-test/r/ctype_utf8mb4.result b/mysql-test/r/ctype_utf8mb4.result
index 17a1a2f..2c0bdfb 100644
--- a/mysql-test/r/ctype_utf8mb4.result
+++ b/mysql-test/r/ctype_utf8mb4.result
@@ -2656,6 +2656,29 @@ SELECT LENGTH(data) AS len FROM (SELECT REPEAT('☃', 65536) AS data ) AS sub;
 len
 196608
 #
+# MDEV-15624 Changing the default character set to utf8mb4 changes query evaluation in a very surprising way
+#
+SET NAMES utf8mb4;
+CREATE TABLE t1 (id INT);
+INSERT INTO t1 VALUES (1),(2),(3);
+SELECT COUNT(DISTINCT c) FROM (SELECT id, REPLACE(UUID(), "-", "") AS c FROM t1) AS d1;
+COUNT(DISTINCT c)
+3
+SELECT DISTINCT INSERT(uuid(), 9, 1, "X") AS c FROM t1;
+c
+xxxxxxxxxxxxx-xxxx-xxxx-xxxxxxxxxxxx
+xxxxxxxxxxxxx-xxxx-xxxx-xxxxxxxxxxxx
+xxxxxxxxxxxxx-xxxx-xxxx-xxxxxxxxxxxx
+SELECT COUNT(DISTINCT c) FROM (SELECT id, INSERT(UUID(), 9, 1, "X") AS c FROM t1) AS d1;
+COUNT(DISTINCT c)
+3
+SELECT DISTINCT INSERT(UUID(), 9, 1, "X") AS c FROM t1;
+c
+xxxxxxxxxxxxx-xxxx-xxxx-xxxxxxxxxxxx
+xxxxxxxxxxxxx-xxxx-xxxx-xxxxxxxxxxxx
+xxxxxxxxxxxxx-xxxx-xxxx-xxxxxxxxxxxx
+DROP TABLE t1;
+#
 # End of 5.5 tests
 #
 #
diff --git a/mysql-test/t/ctype_ucs.test b/mysql-test/t/ctype_ucs.test
index b3d0be4..6f846ea 100644
--- a/mysql-test/t/ctype_ucs.test
+++ b/mysql-test/t/ctype_ucs.test
@@ -886,5 +886,27 @@ DROP TABLE t1;
 
 
 --echo #
+--echo # MDEV-15624 Changing the default character set to utf8mb4 changes query evaluation in a very surprising way
+--echo #
+
+SET NAMES utf8;
+CREATE TABLE t1 (id INT);
+INSERT INTO t1 VALUES (1),(2),(3);
+
+SELECT COUNT(DISTINCT c) FROM (SELECT id, REPLACE(uuid_short(), '0', CAST('o' AS CHAR CHARACTER SET ucs2)) AS c FROM t1) AS d1;
+--replace_column 1 xxxxxxxxxxxxxxxxx
+SELECT DISTINCT REPLACE(uuid_short(), '0', CAST('o' AS CHAR CHARACTER SET ucs2)) AS c FROM t1;
+
+SELECT COUNT(DISTINCT c) FROM (SELECT id, INSERT(uuid_short(), 1, 1, CAST('0' AS CHAR CHARACTER SET ucs2)) AS c FROM t1) AS d1;
+--replace_column 1 xxxxxxxxxxxxxxxxx
+SELECT DISTINCT INSERT(uuid_short(), 1, 1, CAST('0' AS CHAR CHARACTER SET ucs2)) AS c FROM t1;
+
+SELECT COUNT(DISTINCT c) FROM (SELECT id, CONCAT(uuid_short(), CAST('0' AS CHAR CHARACTER SET ucs2)) AS c FROM t1) AS d1;
+--replace_column 1 xxxxxxxxxxxxxxxxx
+SELECT DISTINCT CONCAT(uuid_short(), CAST('0' AS CHAR CHARACTER SET ucs2)) AS c FROM t1;
+DROP TABLE t1;
+
+
+--echo #
 --echo # End of 5.5 tests
 --echo #
diff --git a/mysql-test/t/ctype_utf8mb4.test b/mysql-test/t/ctype_utf8mb4.test
index c240f26..551a570 100644
--- a/mysql-test/t/ctype_utf8mb4.test
+++ b/mysql-test/t/ctype_utf8mb4.test
@@ -1859,6 +1859,25 @@ SELECT LENGTH(data) AS len FROM (SELECT REPEAT('☃', 65535) AS data ) AS sub;
 SELECT LENGTH(data) AS len FROM (SELECT REPEAT('☃', 65536) AS data ) AS sub;
 
 --echo #
+--echo # MDEV-15624 Changing the default character set to utf8mb4 changes query evaluation in a very surprising way
+--echo #
+
+SET NAMES utf8mb4;
+CREATE TABLE t1 (id INT);
+INSERT INTO t1 VALUES (1),(2),(3);
+
+SELECT COUNT(DISTINCT c) FROM (SELECT id, REPLACE(UUID(), "-", "") AS c FROM t1) AS d1;
+--replace_column 1 xxxxxxxxxxxxx-xxxx-xxxx-xxxxxxxxxxxx
+SELECT DISTINCT INSERT(uuid(), 9, 1, "X") AS c FROM t1;
+
+SELECT COUNT(DISTINCT c) FROM (SELECT id, INSERT(UUID(), 9, 1, "X") AS c FROM t1) AS d1;
+--replace_column 1 xxxxxxxxxxxxx-xxxx-xxxx-xxxxxxxxxxxx
+SELECT DISTINCT INSERT(UUID(), 9, 1, "X") AS c FROM t1;
+
+DROP TABLE t1;
+
+
+--echo #
 --echo # End of 5.5 tests
 --echo #
 
diff --git a/sql/item_func.h b/sql/item_func.h
index 60122f0..5781822 100644
--- a/sql/item_func.h
+++ b/sql/item_func.h
@@ -2133,6 +2133,8 @@ class Item_func_uuid_short :public Item_int_func
   Item_func_uuid_short() :Item_int_func() {}
   const char *func_name() const { return "uuid_short"; }
   longlong val_int();
+  bool const_item() const { return false; }
+  table_map used_tables() const { return RAND_TABLE_BIT; }
   void fix_length_and_dec()
   { max_length= 21; unsigned_flag=1; }
   bool check_vcol_func_processor(uchar *int_arg) 
diff --git a/sql/item_strfunc.h b/sql/item_strfunc.h
index 006b1b9..c1138c2 100644
--- a/sql/item_strfunc.h
+++ b/sql/item_strfunc.h
@@ -997,6 +997,7 @@ class Item_func_uuid: public Item_str_func
                   DERIVATION_COERCIBLE, MY_REPERTOIRE_ASCII);
     fix_char_length(MY_UUID_STRING_LENGTH);
   }
+  bool const_item() const { return false; }
   table_map used_tables() const { return RAND_TABLE_BIT; }
   const char *func_name() const{ return "uuid"; }
   String *val_str(String *);


More information about the commits mailing list