[Commits] b50a976: Issue#16: Point lookup by reverse column family doesn't work

Sergei Petrunia psergey at askmonty.org
Tue Feb 24 20:24:38 EET 2015


revision-id: b50a976519bae83b96a78e13ef42df0560d0a44e
parent(s): 43c740d78ca08b4be04cb5770dc44f9314793da6
committer: Sergei Petrunia
branch nick: mysql-5.6-rocksdb-issue16-splitthefix
timestamp: 2015-02-24 21:24:38 +0300
message:

Issue#16: Point lookup by reverse column family doesn't work

Summary:
The code that converts MySQL's ha_rkey_function codes into RocksDB's
iterator calls didn't convert correctly for the case of reverse column
family.
Equality lookups on reverse column family may need to call Prev() in
certain cases.

Test Plan: mtr

Reviewers: maykov, hermanlee4, yoshinorim, jonahcohen

Subscribers: jtolmer

Differential Revision: https://reviews.facebook.net/D33975

---
 mysql-test/r/rocksdb_range.result        |   72 ++++++++++++++++++++++++++++
 mysql-test/t/rocksdb_range.test          |   51 ++++++++++++++++++++
 storage/rocksdb/ha_rocksdb.cc            |   36 ++++++++++----
 storage/rocksdb/rocksdb-range-access.txt |   76 ++++++++++++++++++++++++++++++
 4 files changed, 227 insertions(+), 8 deletions(-)

diff --git a/mysql-test/r/rocksdb_range.result b/mysql-test/r/rocksdb_range.result
new file mode 100644
index 0000000..2d6f34b
--- /dev/null
+++ b/mysql-test/r/rocksdb_range.result
@@ -0,0 +1,72 @@
+select * from information_schema.engines where engine = 'rocksdb';
+ENGINE	SUPPORT	COMMENT	TRANSACTIONS	XA	SAVEPOINTS
+ROCKSDB	YES	RocksDB storage engine	YES	YES	NO
+drop table if exists t0,t1;
+create table t0 (a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1(a int);
+insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
+create table t2 (
+pk int not null,
+a  int not null,
+b  int not null,
+primary key(pk),
+key(a) comment 'rev:cf1'
+) engine=rocksdb;
+insert into t2 select A.a, FLOOR(A.a/10), A.a from t1 A;
+#
+# HA_READ_KEY_EXACT tests
+#
+# Original failure was here:
+explain
+select * from t2 force index (a) where a=0;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t2	ref	a	a	4	const	10	NULL
+select * from t2 force index (a) where a=0;
+pk	a	b
+0	0	0
+1	0	1
+2	0	2
+3	0	3
+4	0	4
+5	0	5
+6	0	6
+7	0	7
+8	0	8
+9	0	9
+# The rest are for code coverage:
+explain
+select * from t2 force index (a) where a=2;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t2	ref	a	a	4	const	10	NULL
+select * from t2 force index (a) where a=2;
+pk	a	b
+20	2	20
+21	2	21
+22	2	22
+23	2	23
+24	2	24
+25	2	25
+26	2	26
+27	2	27
+28	2	28
+29	2	29
+explain 
+select * from t2 force index (a) where a=3 and pk=33;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t2	const	a	a	8	const,const	10	NULL
+select * from t2 force index (a) where a=3 and pk=33;
+pk	a	b
+33	3	33
+select * from t2 force index (a) where a=99 and pk=99;
+pk	a	b
+select * from t2 force index (a) where a=0 and pk=0;
+pk	a	b
+0	0	0
+select * from t2 force index (a) where a=-1;
+pk	a	b
+select * from t2 force index (a) where a=-1 and pk in (101,102);
+pk	a	b
+select * from t2 force index (a) where a=100 and pk in (101,102);
+pk	a	b
+drop table t0,t1,t2;
diff --git a/mysql-test/t/rocksdb_range.test b/mysql-test/t/rocksdb_range.test
new file mode 100644
index 0000000..61a87d8
--- /dev/null
+++ b/mysql-test/t/rocksdb_range.test
@@ -0,0 +1,51 @@
+#
+# Range access test for RocksDB storage engine
+#
+select * from information_schema.engines where engine = 'rocksdb';
+
+--disable_warnings
+drop table if exists t0,t1;
+--enable_warnings
+create table t0 (a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+
+create table t1(a int);
+insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
+
+create table t2 (
+  pk int not null,
+  a  int not null,
+  b  int not null,
+  primary key(pk),
+  key(a) comment 'rev:cf1'
+) engine=rocksdb;
+
+# 10 pk values for each value of a...
+insert into t2 select A.a, FLOOR(A.a/10), A.a from t1 A;
+
+--echo #
+--echo # HA_READ_KEY_EXACT tests
+--echo #
+
+--echo # Original failure was here:
+explain
+select * from t2 force index (a) where a=0;
+select * from t2 force index (a) where a=0;
+
+--echo # The rest are for code coverage:
+explain
+select * from t2 force index (a) where a=2;
+select * from t2 force index (a) where a=2;
+
+explain 
+select * from t2 force index (a) where a=3 and pk=33;
+select * from t2 force index (a) where a=3 and pk=33;
+
+select * from t2 force index (a) where a=99 and pk=99;
+select * from t2 force index (a) where a=0 and pk=0;
+select * from t2 force index (a) where a=-1;
+select * from t2 force index (a) where a=-1 and pk in (101,102);
+select * from t2 force index (a) where a=100 and pk in (101,102);
+
+drop table t0,t1,t2;
+
diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc
index 3cb3334..2eed302 100644
--- a/storage/rocksdb/ha_rocksdb.cc
+++ b/storage/rocksdb/ha_rocksdb.cc
@@ -2442,7 +2442,10 @@ int ha_rocksdb::secondary_index_read(int keyno, uchar *buf)
 }
 
 
-//void store_key_image_to_rec(Field *field, uchar *ptr, uint len);
+/*
+   See storage/rocksdb/rocksdb-range-access.txt for description of how MySQL
+   index navigation commands are converted into RocksDB lookup commands.
+*/
 
 int ha_rocksdb::index_read_map(uchar *buf, const uchar *key,
                                key_part_map keypart_map,
@@ -2451,16 +2454,17 @@ int ha_rocksdb::index_read_map(uchar *buf, const uchar *key,
   int rc= 0;
   DBUG_ENTER("ha_rocksdb::index_read_map");
 
-  uint key_parts= table->key_info[active_index].user_defined_key_parts;
   RDBSE_KEYDEF *kd= key_descr[active_index];
+  uint actual_key_parts= table->key_info[active_index].actual_key_parts;
+  bool using_full_key= (keypart_map == HA_WHOLE_KEY) ||
+                       (keypart_map == ((key_part_map(1) << actual_key_parts)
+                                         - 1));
 
   stats.rows_requested++;
   io_perf_start();
 
-  if (active_index == table->s->primary_key && 
-      find_flag == HA_READ_KEY_EXACT &&
-      ((keypart_map == ((key_part_map(1) << key_parts) - 1) ||
-        keypart_map == HA_WHOLE_KEY)))
+  if (active_index == table->s->primary_key &&
+      find_flag == HA_READ_KEY_EXACT && using_full_key)
   {
     /* 
       Equality lookup over primary key, using full tuple. 
@@ -2512,6 +2516,22 @@ int ha_rocksdb::index_read_map(uchar *buf, const uchar *key,
   switch (find_flag) {
   case HA_READ_KEY_EXACT:
   {
+    /* 
+      We are looking for the first record such that 
+        index_tuple= lookup_tuple.
+      lookup_tuple may be a prefix of the index.
+    */
+    if (key_descr[active_index]->is_reverse_cf)
+    {
+      if (!using_full_key)
+      {
+        if (!scan_it->Valid())
+          scan_it->SeekToLast();
+        else
+          scan_it->Prev();
+      }
+    }
+
     if (!scan_it->Valid())
       rc= HA_ERR_KEY_NOT_FOUND;
     else
@@ -2522,8 +2542,8 @@ int ha_rocksdb::index_read_map(uchar *buf, const uchar *key,
                              slice.data(), slice.size(), n_used_parts))
       {
         /* 
-          The record we've got is not from this index, or is not equal to the
-          lookup table
+          Got a record that is not equal to the lookup value, or even a record
+          from another table.index.
         */
         rc= HA_ERR_KEY_NOT_FOUND;
       }
diff --git a/storage/rocksdb/rocksdb-range-access.txt b/storage/rocksdb/rocksdb-range-access.txt
new file mode 100644
index 0000000..d86d5c6
--- /dev/null
+++ b/storage/rocksdb/rocksdb-range-access.txt
@@ -0,0 +1,76 @@
+
+This file describes how MySQL index navigation commands are translated into 
+RocksDB index navigation commands.
+
+Index tuples are shown as
+
+  ( kv )-aaa-pkN
+
+here 
+ * '(kv)' is the 4-byte index number. 
+ * '-' is just for readability
+ * everything that follows the '-' is mem-comparable form of the key.
+   In ascii encoding,  aaa < bbb < ccc < xxx.
+
+Tuples that start with '#' do not exist in the database. They are only shown
+to demonstrate where Seek() calls end up with.
+
+== HA_READ_KEY_EXACT, forward CF ==
+
+  (kv-1)-xxx-pk
+# ( kv )-aaa      <-- "kv-aaa" doesn't exist in the database, but it would be
+                       here.
+  ( kv )-aaa-pk   <--- Seek("kv-aaa") will put us here on the next record.
+  ( kv )-aaa-pk2       
+  ( kv )-bbb-...
+
+RocksDB calls:
+
+  it->Seek(kv); 
+  if (it->Valid() && kd->covers_key(..) && kd->cmp_full_keys(...))
+    return record.
+
+== HA_READ_KEY_EXACT, backward CF ==
+
+When we need to seek to a tuple that is a prefix of a full key:
+
+  (kv+1)-xxx-pk
+  ( kv )-ccc-pk 
+  ( kv )-bbb-pk3
+  ( kv )-bbb-pk2
+  ( kv )-bbb-pk1 < -- We need to be here
+# ( kv )-bbb         <---we call Seek(kv-bbb)
+  ( kv )-aaa-pk      ... and end up here.  Should call it->Prev().  
+
+There is a special case when (kv)-bbb-pk1 is the last record in the CF, and 
+we get invalid iterator. Then, we need to call SeekToLast().
+
+Another kind of special case is when we need to seek to the full value. 
+Suppose, the lookup tuple is kv-bbb-pk1:
+
+  (kv+1)-xxx-pk
+  ( kv )-ccc-pk 
+  ( kv )-bbb-pk3
+  ( kv )-bbb-pk2
+  ( kv )-bbb-pk1 < -- Seek(kv-bbb-pk1)
+  ( kv )-bbb-pk0  
+
+Then, Seek(kv-bbb-pk1) will position us exactly the tuple we need, and we
+won't need to call it->Prev().  If we get an invalid iterator, there is no
+need to call SeekToLast().
+
+RocksDB calls:
+
+  it->Seek(tuple);
+
+  if (!using_full_key)
+  {
+    if (!it->Valid())
+      it->SeekToLast();
+    else
+      it->Prev();
+  }
+
+  if (it->Valid() && kd->covers_key(..) && kd->cmp_full_keys(...))
+    return record.
+


More information about the commits mailing list