[Commits] ad08c90: Issue#36: Range in form tbl.key >= const doesn't work in reverse column family

Sergei Petrunia psergey at askmonty.org
Wed Mar 11 17:36:11 EET 2015


revision-id: ad08c90b1e462dcfb93139adfc21ca27ed29106d
parent(s): 3172566cf53fa3bf9a05c6a4bb20b4abe1e4b739
committer: Sergei Petrunia
branch nick: mysql-5.6-rocksdb-issue16-splitthefix
timestamp: 2015-03-11 18:36:11 +0300
message:

Issue#36: Range in form tbl.key >= const doesn't work in reverse column family

Summary:
Fix ha_rocksdb::index_read_map() to correctly handle HA_READ_KEY_OR_NEXT
search flag. (ha_rocksdb::is_ascending already handles HA_READ_KEY_OR_NEXT
correctly)

Test Plan: mtr

Reviewers: maykov, hermanlee4, jonahcohen, yoshinorim

Subscribers: jtolmer

Differential Revision: https://reviews.facebook.net/D34647

---
 mysql-test/r/rocksdb_range.result        |   39 +++++++++++++++++-
 mysql-test/t/rocksdb_range.test          |   25 ++++++++++-
 storage/rocksdb/ha_rocksdb.cc            |   30 ++++++++++++--
 storage/rocksdb/rdb_datadic.h            |   22 ++++++++--
 storage/rocksdb/rocksdb-range-access.txt |   66 ++++++++++++++++++++++++++++++
 5 files changed, 174 insertions(+), 8 deletions(-)

diff --git a/mysql-test/r/rocksdb_range.result b/mysql-test/r/rocksdb_range.result
index 2d6f34b..69dae45 100644
--- a/mysql-test/r/rocksdb_range.result
+++ b/mysql-test/r/rocksdb_range.result
@@ -69,4 +69,41 @@ select * from t2 force index (a) where a=-1 and pk in (101,102);
 pk	a	b
 select * from t2 force index (a) where a=100 and pk in (101,102);
 pk	a	b
-drop table t0,t1,t2;
+#
+# #36: Range in form tbl.key >= const doesn't work in reverse column family
+#
+explain
+select count(*) from t2 force index (a) where a>=0 and a <=1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t2	range	a	a	4	NULL	10	Using where; Using index
+select count(*) from t2 force index (a) where a>=0 and a <=1;
+count(*)
+20
+explain
+select count(*) from t2 force index (a) where a>=-1 and a <=1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t2	range	a	a	4	NULL	10	Using where; Using index
+select count(*) from t2 force index (a) where a>=-1 and a <=1;
+count(*)
+20
+explain
+select * from t2 force index (a) where a=0 and pk>=3;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t2	range	a	a	8	NULL	10	Using where
+select * from t2 force index (a) where a=0 and pk>=3;
+pk	a	b
+3	0	3
+4	0	4
+5	0	5
+6	0	6
+7	0	7
+8	0	8
+9	0	9
+# Try edge cases where we fall over the end of the table
+create table t3 like t2;
+insert into t3 select * from t2;
+select * from t3 where pk>=1000000;
+pk	a	b
+select * from t2 where pk>=1000000;
+pk	a	b
+drop table t0,t1,t2,t3;
diff --git a/mysql-test/t/rocksdb_range.test b/mysql-test/t/rocksdb_range.test
index 61a87d8..4ac4276 100644
--- a/mysql-test/t/rocksdb_range.test
+++ b/mysql-test/t/rocksdb_range.test
@@ -47,5 +47,28 @@ select * from t2 force index (a) where a=-1;
 select * from t2 force index (a) where a=-1 and pk in (101,102);
 select * from t2 force index (a) where a=100 and pk in (101,102);
 
-drop table t0,t1,t2;
+
+--echo #
+--echo # #36: Range in form tbl.key >= const doesn't work in reverse column family
+--echo #
+explain
+select count(*) from t2 force index (a) where a>=0 and a <=1;
+select count(*) from t2 force index (a) where a>=0 and a <=1;
+
+explain
+select count(*) from t2 force index (a) where a>=-1 and a <=1;
+select count(*) from t2 force index (a) where a>=-1 and a <=1;
+
+explain
+select * from t2 force index (a) where a=0 and pk>=3;
+select * from t2 force index (a) where a=0 and pk>=3;
+
+--echo # Try edge cases where we fall over the end of the table
+create table t3 like t2;
+insert into t3 select * from t2;
+
+select * from t3 where pk>=1000000;
+select * from t2 where pk>=1000000;
+
+drop table t0,t1,t2,t3;
 
diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc
index 4dbbe3c..163e0f4 100644
--- a/storage/rocksdb/ha_rocksdb.cc
+++ b/storage/rocksdb/ha_rocksdb.cc
@@ -2647,9 +2647,7 @@ int ha_rocksdb::index_read_map(uchar *buf, const uchar *key,
     else
     {
       rkey= scan_it->key();
-      if (!kd->covers_key(rkey.data(), rkey.size()) ||
-           kd->cmp_full_keys(rkey.data(), rkey.size(), 
-                             slice.data(), slice.size(), n_used_parts))
+      if (!kd->value_matches_prefix(rkey, slice))
       {
         /* 
           Got a record that is not equal to the lookup value, or even a record
@@ -2662,8 +2660,34 @@ int ha_rocksdb::index_read_map(uchar *buf, const uchar *key,
   }
   case HA_READ_KEY_OR_NEXT:
   {
+    /* 
+      We are looking for the first record such that 
+        index_tuple >= lookup_tuple.
+    */
+    if (key_descr[active_index]->is_reverse_cf)
+    {
+      if (!scan_it->Valid())
+      {
+        scan_it->SeekToLast();
+      }
+      else
+      {
+        /*
+          When using a full extended key tuple: we should not step back if 
+          we've found an exact match.
+        */
+        rkey= scan_it->key();
+        if (!using_full_key || !kd->value_matches_prefix(rkey, slice))
+        {
+          scan_it->Prev();
+        }
+      }
+    }
+
     if (!scan_it->Valid())
+    {
       rc= HA_ERR_KEY_NOT_FOUND;
+    }
     else
     {
       rkey= scan_it->key();
diff --git a/storage/rocksdb/rdb_datadic.h b/storage/rocksdb/rdb_datadic.h
index 5346cf1..b7371cb 100644
--- a/storage/rocksdb/rdb_datadic.h
+++ b/storage/rocksdb/rdb_datadic.h
@@ -195,11 +195,13 @@ public:
   /*
     This can be used to compare prefixes.
     if  X is a prefix of Y, then we consider that X = Y.
+
+    @detail
+      n_parts parameter is not used anymore. TODO: remove it.
   */
-  // psergey-todo: this seems to work for variable-length keys, does it?
   // {pb, b_len} describe the lookup key, which can be a prefix of pa/a_len.
   int cmp_full_keys(const char *pa, uint a_len, const char *pb, uint b_len,
-                    uint n_parts)
+                    uint n_parts=0) const
   {
     DBUG_ASSERT(covers_key(pa, a_len));
     DBUG_ASSERT(covers_key(pb, b_len));
@@ -210,7 +212,7 @@ public:
   }
   
   /* Check if given mem-comparable key belongs to this index */
-  bool covers_key(const char *key, uint keylen)
+  bool covers_key(const char *key, uint keylen) const
   {
     if (keylen < INDEX_NUMBER_SIZE)
       return false;
@@ -220,6 +222,20 @@ public:
       return true;
   }
 
+  /*
+    Return true if the passed mem-comparable key
+    - is from this index, and
+    - it matches the passed key prefix (the prefix is also in mem-comparable
+      form)
+  */
+  bool value_matches_prefix(const rocksdb::Slice &value, 
+                            const rocksdb::Slice &prefix) const
+  {
+    return covers_key(value.data(), value.size()) &&
+           !cmp_full_keys(value.data(), value.size(),
+                          prefix.data(), prefix.size());
+  }
+
   uint32 get_index_number()
   {
     return index_number;
diff --git a/storage/rocksdb/rocksdb-range-access.txt b/storage/rocksdb/rocksdb-range-access.txt
index d86d5c6..4fb5d26 100644
--- a/storage/rocksdb/rocksdb-range-access.txt
+++ b/storage/rocksdb/rocksdb-range-access.txt
@@ -74,3 +74,69 @@ RocksDB calls:
   if (it->Valid() && kd->covers_key(..) && kd->cmp_full_keys(...))
     return record.
 
+== HA_READ_KEY_OR_NEXT, forward CF ==
+
+This is finding min(key) such that key >= lookup_tuple.
+
+If lookup tuple is kv-bbb:
+
+  ( kv )-aaa-pk
+# ( kv )-bbb      <-- "kv-bbb" doesn't exist in the database, but it would be
+                       here.
+  ( kv )-bbb-pk1  <--- Seek("kv-bbb") will put us here on the next record.
+  ( kv )-bbb-pk2       
+  ( kv )-bbb-...
+
+RocksDB calls:
+
+  Seek(kv);
+  if (it->Valid() && kd->covers_key(..) && kd->cmp_full_keys(...))
+    return record.
+
+== HA_READ_KEY_OR_NEXT, backward CF ==
+
+When specified key tuple is a key prefix:
+
+  (kv+1)-xxx-pk
+  ( kv )-ccc-pk 
+  ( kv )-bbb-pk3
+  ( kv )-bbb-pk2
+  ( kv )-bbb-pk1 < -- We need to be here (or above)
+# ( kv )-bbb         <---we call Seek(kv-bbb)
+  ( kv )-aaa-pk      ... and end up here.  Should call it->Prev().  
+
+There is a special case when (kv)-bbb-pk1 is the last record in the CF, and 
+we get invalid iterator. Then, we need to call SeekToLast().
+
+Another kind of special case is when we need to seek to the full value. 
+Suppose, the lookup tuple is kv-bbb-pk1:
+
+  (kv+1)-xxx-pk
+  ( kv )-ccc-pk 
+  ( kv )-bbb-pk3
+  ( kv )-bbb-pk2
+  ( kv )-bbb-pk1 < -- Seek(kv-bbb-pk1)
+  ( kv )-bbb-pk0  
+
+Then, Seek(kv-bbb-pk1) may position us exactly at the tuple we need, and we
+won't need to call it->Prev().
+If kv-bbb-pk1 is not present in the database, we will be positioned on
+kv-bbb-pk0, and we will need to call it->Prev().
+If we get an invalid iterator, we DO need to call SeekToLast().
+
+RocksDB calls:
+ 
+  Seek(...);
+
+  if (!it->Valid())
+    it->SeekToLast();
+  else
+  {
+    if (!using_full_key ||
+        !(kd->covers_key(...) || kd->cmp_full_keys(...))
+      it->Prev();
+  }
+
+  if (it->Valid() && kd->covers_key(..))
+    return record.
+


More information about the commits mailing list