[Commits] 6e1450c: Issue#16: Point lookup by reverse column family doesn't work

Sergei Petrunia psergey at askmonty.org
Tue Feb 24 20:22:44 EET 2015


revision-id: 6e1450c46c4269cf9d75e513e5c86a612cda8684
parent(s): 43c740d78ca08b4be04cb5770dc44f9314793da6
committer: Sergei Petrunia
branch nick: mysql-5.6-rocksdb-issue16-splitthefix
timestamp: 2015-02-24 21:22:44 +0300
message:

Issue#16: Point lookup by reverse column family doesn't work

The code that converts MySQL's ha_rkey_function codes into RocksDB's
iterator calls didn't convert correctly for the case of reverse column
family.
Equality lookups on reverse column family may need to call Prev() in
certain cases.

---
 mysql-test/r/rocksdb_range.result        |   72 ++++++++++++++++++++++++++++
 mysql-test/t/rocksdb_range.test          |   51 ++++++++++++++++++++
 storage/rocksdb/ha_rocksdb.cc            |   36 ++++++++++----
 storage/rocksdb/rocksdb-range-access.txt |   76 ++++++++++++++++++++++++++++++
 4 files changed, 227 insertions(+), 8 deletions(-)

diff --git a/mysql-test/r/rocksdb_range.result b/mysql-test/r/rocksdb_range.result
new file mode 100644
index 0000000..2d6f34b
--- /dev/null
+++ b/mysql-test/r/rocksdb_range.result
@@ -0,0 +1,72 @@
+select * from information_schema.engines where engine = 'rocksdb';
+ENGINE	SUPPORT	COMMENT	TRANSACTIONS	XA	SAVEPOINTS
+ROCKSDB	YES	RocksDB storage engine	YES	YES	NO
+drop table if exists t0,t1;
+create table t0 (a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1(a int);
+insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
+create table t2 (
+pk int not null,
+a  int not null,
+b  int not null,
+primary key(pk),
+key(a) comment 'rev:cf1'
+) engine=rocksdb;
+insert into t2 select A.a, FLOOR(A.a/10), A.a from t1 A;
+#
+# HA_READ_KEY_EXACT tests
+#
+# Original failure was here:
+explain
+select * from t2 force index (a) where a=0;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t2	ref	a	a	4	const	10	NULL
+select * from t2 force index (a) where a=0;
+pk	a	b
+0	0	0
+1	0	1
+2	0	2
+3	0	3
+4	0	4
+5	0	5
+6	0	6
+7	0	7
+8	0	8
+9	0	9
+# The rest are for code coverage:
+explain
+select * from t2 force index (a) where a=2;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t2	ref	a	a	4	const	10	NULL
+select * from t2 force index (a) where a=2;
+pk	a	b
+20	2	20
+21	2	21
+22	2	22
+23	2	23
+24	2	24
+25	2	25
+26	2	26
+27	2	27
+28	2	28
+29	2	29
+explain 
+select * from t2 force index (a) where a=3 and pk=33;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t2	const	a	a	8	const,const	10	NULL
+select * from t2 force index (a) where a=3 and pk=33;
+pk	a	b
+33	3	33
+select * from t2 force index (a) where a=99 and pk=99;
+pk	a	b
+select * from t2 force index (a) where a=0 and pk=0;
+pk	a	b
+0	0	0
+select * from t2 force index (a) where a=-1;
+pk	a	b
+select * from t2 force index (a) where a=-1 and pk in (101,102);
+pk	a	b
+select * from t2 force index (a) where a=100 and pk in (101,102);
+pk	a	b
+drop table t0,t1,t2;
diff --git a/mysql-test/t/rocksdb_range.test b/mysql-test/t/rocksdb_range.test
new file mode 100644
index 0000000..61a87d8
--- /dev/null
+++ b/mysql-test/t/rocksdb_range.test
@@ -0,0 +1,51 @@
+#
+# Range access test for RocksDB storage engine
+#
+select * from information_schema.engines where engine = 'rocksdb';
+
+--disable_warnings
+drop table if exists t0,t1;
+--enable_warnings
+create table t0 (a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+
+create table t1(a int);
+insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
+
+create table t2 (
+  pk int not null,
+  a  int not null,
+  b  int not null,
+  primary key(pk),
+  key(a) comment 'rev:cf1'
+) engine=rocksdb;
+
+# 10 pk values for each value of a...
+insert into t2 select A.a, FLOOR(A.a/10), A.a from t1 A;
+
+--echo #
+--echo # HA_READ_KEY_EXACT tests
+--echo #
+
+--echo # Original failure was here:
+explain
+select * from t2 force index (a) where a=0;
+select * from t2 force index (a) where a=0;
+
+--echo # The rest are for code coverage:
+explain
+select * from t2 force index (a) where a=2;
+select * from t2 force index (a) where a=2;
+
+explain 
+select * from t2 force index (a) where a=3 and pk=33;
+select * from t2 force index (a) where a=3 and pk=33;
+
+select * from t2 force index (a) where a=99 and pk=99;
+select * from t2 force index (a) where a=0 and pk=0;
+select * from t2 force index (a) where a=-1;
+select * from t2 force index (a) where a=-1 and pk in (101,102);
+select * from t2 force index (a) where a=100 and pk in (101,102);
+
+drop table t0,t1,t2;
+
diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc
index 3cb3334..2eed302 100644
--- a/storage/rocksdb/ha_rocksdb.cc
+++ b/storage/rocksdb/ha_rocksdb.cc
@@ -2442,7 +2442,10 @@ int ha_rocksdb::secondary_index_read(int keyno, uchar *buf)
 }
 
 
-//void store_key_image_to_rec(Field *field, uchar *ptr, uint len);
+/*
+   See storage/rocksdb/rocksdb-range-access.txt for description of how MySQL
+   index navigation commands are converted into RocksDB lookup commands.
+*/
 
 int ha_rocksdb::index_read_map(uchar *buf, const uchar *key,
                                key_part_map keypart_map,
@@ -2451,16 +2454,17 @@ int ha_rocksdb::index_read_map(uchar *buf, const uchar *key,
   int rc= 0;
   DBUG_ENTER("ha_rocksdb::index_read_map");
 
-  uint key_parts= table->key_info[active_index].user_defined_key_parts;
   RDBSE_KEYDEF *kd= key_descr[active_index];
+  uint actual_key_parts= table->key_info[active_index].actual_key_parts;
+  bool using_full_key= (keypart_map == HA_WHOLE_KEY) ||
+                       (keypart_map == ((key_part_map(1) << actual_key_parts)
+                                         - 1));
 
   stats.rows_requested++;
   io_perf_start();
 
-  if (active_index == table->s->primary_key && 
-      find_flag == HA_READ_KEY_EXACT &&
-      ((keypart_map == ((key_part_map(1) << key_parts) - 1) ||
-        keypart_map == HA_WHOLE_KEY)))
+  if (active_index == table->s->primary_key &&
+      find_flag == HA_READ_KEY_EXACT && using_full_key)
   {
     /* 
       Equality lookup over primary key, using full tuple. 
@@ -2512,6 +2516,22 @@ int ha_rocksdb::index_read_map(uchar *buf, const uchar *key,
   switch (find_flag) {
   case HA_READ_KEY_EXACT:
   {
+    /* 
+      We are looking for the first record such that 
+        index_tuple= lookup_tuple.
+      lookup_tuple may be a prefix of the index.
+    */
+    if (key_descr[active_index]->is_reverse_cf)
+    {
+      if (!using_full_key)
+      {
+        if (!scan_it->Valid())
+          scan_it->SeekToLast();
+        else
+          scan_it->Prev();
+      }
+    }
+
     if (!scan_it->Valid())
       rc= HA_ERR_KEY_NOT_FOUND;
     else
@@ -2522,8 +2542,8 @@ int ha_rocksdb::index_read_map(uchar *buf, const uchar *key,
                              slice.data(), slice.size(), n_used_parts))
       {
         /* 
-          The record we've got is not from this index, or is not equal to the
-          lookup table
+          Got a record that is not equal to the lookup value, or even a record
+          from another table.index.
         */
         rc= HA_ERR_KEY_NOT_FOUND;
       }
diff --git a/storage/rocksdb/rocksdb-range-access.txt b/storage/rocksdb/rocksdb-range-access.txt
new file mode 100644
index 0000000..d86d5c6
--- /dev/null
+++ b/storage/rocksdb/rocksdb-range-access.txt
@@ -0,0 +1,76 @@
+
+This file describes how MySQL index navigation commands are translated into 
+RocksDB index navigation commands.
+
+Index tuples are shown as
+
+  ( kv )-aaa-pkN
+
+here 
+ * '(kv)' is the 4-byte index number. 
+ * '-' is just for readability
+ * everything that follows the '-' is mem-comparable form of the key.
+   In ascii encoding,  aaa < bbb < ccc < xxx.
+
+Tuples that start with '#' do not exist in the database. They are only shown
+to demonstrate where Seek() calls end up with.
+
+== HA_READ_KEY_EXACT, forward CF ==
+
+  (kv-1)-xxx-pk
+# ( kv )-aaa      <-- "kv-aaa" doesn't exist in the database, but it would be
+                       here.
+  ( kv )-aaa-pk   <--- Seek("kv-aaa") will put us here on the next record.
+  ( kv )-aaa-pk2       
+  ( kv )-bbb-...
+
+RocksDB calls:
+
+  it->Seek(kv); 
+  if (it->Valid() && kd->covers_key(..) && kd->cmp_full_keys(...))
+    return record.
+
+== HA_READ_KEY_EXACT, backward CF ==
+
+When we need to seek to a tuple that is a prefix of a full key:
+
+  (kv+1)-xxx-pk
+  ( kv )-ccc-pk 
+  ( kv )-bbb-pk3
+  ( kv )-bbb-pk2
+  ( kv )-bbb-pk1 < -- We need to be here
+# ( kv )-bbb         <---we call Seek(kv-bbb)
+  ( kv )-aaa-pk      ... and end up here.  Should call it->Prev().  
+
+There is a special case when (kv)-bbb-pk1 is the last record in the CF, and 
+we get invalid iterator. Then, we need to call SeekToLast().
+
+Another kind of special case is when we need to seek to the full value. 
+Suppose, the lookup tuple is kv-bbb-pk1:
+
+  (kv+1)-xxx-pk
+  ( kv )-ccc-pk 
+  ( kv )-bbb-pk3
+  ( kv )-bbb-pk2
+  ( kv )-bbb-pk1 < -- Seek(kv-bbb-pk1)
+  ( kv )-bbb-pk0  
+
+Then, Seek(kv-bbb-pk1) will position us exactly the tuple we need, and we
+won't need to call it->Prev().  If we get an invalid iterator, there is no
+need to call SeekToLast().
+
+RocksDB calls:
+
+  it->Seek(tuple);
+
+  if (!using_full_key)
+  {
+    if (!it->Valid())
+      it->SeekToLast();
+    else
+      it->Prev();
+  }
+
+  if (it->Valid() && kd->covers_key(..) && kd->cmp_full_keys(...))
+    return record.
+


More information about the commits mailing list