[Commits] 8bdfd583ec4: Range Locking: use range endpoints, not rowkeys of range bounds

Sergei Petrunia psergey at askmonty.org
Fri Jan 4 17:37:30 EET 2019


revision-id: 8bdfd583ec47522874a5f3b95b91e1f7e4323a97 (fb-prod201801-180-g8bdfd583ec4)
parent(s): bb04a774561548d159576b3bcaa2f638ffd76829
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2019-01-04 18:37:29 +0300
message:

Range Locking: use range endpoints, not rowkeys of range bounds

This provides correct range locking for multi-part keys.

---
 mysql-test/suite/rocksdb/r/range_locking.result    |  50 ++++++-
 mysql-test/suite/rocksdb/r/varbinary_format.result |  17 ++-
 mysql-test/suite/rocksdb/t/delete_before_lock.test |   5 +
 mysql-test/suite/rocksdb/t/range_locking.test      |  36 +++++
 mysql-test/suite/rocksdb/t/varbinary_format.test   |  14 +-
 rocksdb                                            |   2 +-
 storage/rocksdb/ha_rocksdb.cc                      | 166 ++++++++++++++++++---
 storage/rocksdb/range_locking/ft/comparator.h      |  16 +-
 8 files changed, 262 insertions(+), 44 deletions(-)

diff --git a/mysql-test/suite/rocksdb/r/range_locking.result b/mysql-test/suite/rocksdb/r/range_locking.result
index 964458b783f..c3271db2730 100644
--- a/mysql-test/suite/rocksdb/r/range_locking.result
+++ b/mysql-test/suite/rocksdb/r/range_locking.result
@@ -90,18 +90,60 @@ pk	a
 10	10
 select * from information_schema.rocksdb_locks;
 COLUMN_FAMILY_ID	TRANSACTION_ID	KEY	MODE
-0	15	000001078000000a	X
+0	15	00000001078000000a	X
 delete from t1 where pk between 25 and 40;
 select * from information_schema.rocksdb_locks;
 COLUMN_FAMILY_ID	TRANSACTION_ID	KEY	MODE
-0	15	0000010780000019 - 0000010780000028	X
-0	15	000001078000000a	X
+0	15	000000010780000019 - 010000010780000028	X
+0	15	00000001078000000a	X
 rollback;
 begin;
+# The following will show a range lock on 2-9 and also a point lock on 10.
+# This is how things currently work.
 select * from t1 where pk between 2 and 9 for update;
 pk	a
 select * from information_schema.rocksdb_locks;
 COLUMN_FAMILY_ID	TRANSACTION_ID	KEY	MODE
-0	16	0000010780000002 - 0000010780000009	X
+0	16	00000001078000000a	X
+0	16	000000010780000002 - 010000010780000009	X
 rollback;
 drop table t1;
+#
+# MDEV-18104: MyRocks-Gap-Lock: range locking bounds are incorrect for multi-part keys
+#
+create table t0(a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1 (
+kp1 int not null,
+kp2 int not null,
+a int,
+primary key(kp1, kp2)
+) engine=rocksdb;
+insert into t1 select 1, a, 1234 from t0;
+insert into t1 select 2, a, 1234 from t0;
+insert into t1 select 3, a, 1234 from t0;
+connect  con1,localhost,root,,;
+connection con1;
+begin;
+select * from t1 where kp1=2 for update;
+kp1	kp2	a
+2	0	1234
+2	1	1234
+2	2	1234
+2	3	1234
+2	4	1234
+2	5	1234
+2	6	1234
+2	7	1234
+2	8	1234
+2	9	1234
+connection default;
+# The lock on kp1=2 should inhibit the following INSERT:
+insert into t1 values ( 2,5,9999);
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on index: test.t1.PRIMARY
+rollback;
+connection con1;
+rollback;
+connection default;
+disconnect con1;
+drop table t0,t1;
diff --git a/mysql-test/suite/rocksdb/r/varbinary_format.result b/mysql-test/suite/rocksdb/r/varbinary_format.result
index 9362d42515c..e3dde737f9f 100644
--- a/mysql-test/suite/rocksdb/r/varbinary_format.result
+++ b/mysql-test/suite/rocksdb/r/varbinary_format.result
@@ -1,3 +1,4 @@
+set @skip_head=9 + if(@@rocksdb_use_range_locking, 2, 0);
 CREATE TABLE t1(
 vb VARBINARY(64) primary key
 ) ENGINE=rocksdb;
@@ -45,8 +46,8 @@ hex(vb)
 00000000000000000000000000000000000000000000000000000000000000
 0000000000000000000000000000000000000000000000000000000000000000
 000000000000000000000000000000000000000000000000000000000000000000
-SELECT SUBSTRING(a.key,9) FROM information_schema.rocksdb_locks AS a ORDER BY a.key;
-SUBSTRING(a.key,9)
+SELECT SUBSTRING(a.key, at skip_head) FROM information_schema.rocksdb_locks AS a ORDER BY a.key;
+SUBSTRING(a.key, at skip_head)
 000000000000000001
 000000000000000002
 000000000000000003
@@ -111,8 +112,8 @@ hex(vb)
 00000000000000000000000000000000000000000000000000000000000000
 0000000000000000000000000000000000000000000000000000000000000000
 000000000000000000000000000000000000000000000000000000000000000000
-SELECT SUBSTRING(a.key,9) FROM information_schema.rocksdb_locks AS a ORDER BY a.key;
-SUBSTRING(a.key,9)
+SELECT SUBSTRING(a.key, at skip_head) FROM information_schema.rocksdb_locks AS a ORDER BY a.key;
+SUBSTRING(a.key, at skip_head)
 0000000000000000f8
 0000000000000000f9
 0000000000000000fa
@@ -175,8 +176,8 @@ aaaaaaaaaa
 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
-SELECT SUBSTRING(a.key,9) FROM information_schema.rocksdb_locks AS a ORDER BY a.key;
-SUBSTRING(a.key,9)
+SELECT SUBSTRING(a.key, at skip_head) FROM information_schema.rocksdb_locks AS a ORDER BY a.key;
+SUBSTRING(a.key, at skip_head)
 610000000000000001
 616100000000000002
 616161000000000003
@@ -241,8 +242,8 @@ aaaaaaaaaa
 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
-SELECT SUBSTRING(a.key,9) FROM information_schema.rocksdb_locks AS a ORDER BY a.key;
-SUBSTRING(a.key,9)
+SELECT SUBSTRING(a.key, at skip_head) FROM information_schema.rocksdb_locks AS a ORDER BY a.key;
+SUBSTRING(a.key, at skip_head)
 6100000000000000f8
 6161000000000000f9
 6161610000000000fa
diff --git a/mysql-test/suite/rocksdb/t/delete_before_lock.test b/mysql-test/suite/rocksdb/t/delete_before_lock.test
index 93a9d1adaf9..52d8cff9cb8 100644
--- a/mysql-test/suite/rocksdb/t/delete_before_lock.test
+++ b/mysql-test/suite/rocksdb/t/delete_before_lock.test
@@ -1,6 +1,11 @@
 --source include/have_rocksdb.inc
 --source include/have_debug_sync.inc
 
+# The testcase doesn't work with range locking:
+#   UPDATE where id1=1 sets a lock on [(1;-inf), (1,+inf)]
+#   then DELETE will try to get a lock on (1,1) and cause lock wait timeout
+--source suite/rocksdb/include/not_range_locking.inc
+
 # This is a test case to reproduce https://github.com/facebook/mysql-5.6/issues/162
 # Expected output of the last select for update was (1,2,100) and (1,3,100), but
 # currently it returns (1,2,1) and (1,3,1), which must be fixed.
diff --git a/mysql-test/suite/rocksdb/t/range_locking.test b/mysql-test/suite/rocksdb/t/range_locking.test
index 4cf6f079ebe..b0f5157f304 100644
--- a/mysql-test/suite/rocksdb/t/range_locking.test
+++ b/mysql-test/suite/rocksdb/t/range_locking.test
@@ -114,10 +114,46 @@ select * from information_schema.rocksdb_locks;
 
 rollback;
 begin;
+--echo # The following will show a range lock on 2-9 and also a point lock on 10.
+--echo # This is how things currently work.
 select * from t1 where pk between 2 and 9 for update;
 select * from information_schema.rocksdb_locks;
 rollback;
 
 drop table t1;
 
+--echo #
+--echo # MDEV-18104: MyRocks-Gap-Lock: range locking bounds are incorrect for multi-part keys
+--echo #
+
+create table t0(a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1 (
+  kp1 int not null,
+  kp2 int not null,
+  a int,
+  primary key(kp1, kp2)
+) engine=rocksdb;
+
+insert into t1 select 1, a, 1234 from t0;
+insert into t1 select 2, a, 1234 from t0;
+insert into t1 select 3, a, 1234 from t0;
+
+connect (con1,localhost,root,,);
+connection con1;
+
+begin;
+select * from t1 where kp1=2 for update;
+
+connection default;
+--echo # The lock on kp1=2 should inhibit the following INSERT:
+--error ER_LOCK_WAIT_TIMEOUT
+insert into t1 values ( 2,5,9999); 
+rollback;
+
+connection con1;
+rollback;
+connection default;
+disconnect con1;
 
+drop table t0,t1;
diff --git a/mysql-test/suite/rocksdb/t/varbinary_format.test b/mysql-test/suite/rocksdb/t/varbinary_format.test
index d00a8b7afbe..cca230614fd 100644
--- a/mysql-test/suite/rocksdb/t/varbinary_format.test
+++ b/mysql-test/suite/rocksdb/t/varbinary_format.test
@@ -1,6 +1,12 @@
 --source include/have_debug.inc
 --source include/have_rocksdb.inc
 
+#
+# In point-locking mode, skip first 4 bytes, as they indexnr.
+# In range-locking mode, there is one more range control byte at the front
+#
+set @skip_head=9 + if(@@rocksdb_use_range_locking, 2, 0);
+
 # Create a table with a varbinary key with the current format and validate
 # that it sorts correctly
 CREATE TABLE t1(
@@ -27,7 +33,7 @@ SELECT hex(vb) FROM t1;
 # validate that the keys were encoded as expected
 BEGIN;
 SELECT hex(vb) FROM t1 FOR UPDATE;
-SELECT SUBSTRING(a.key,9) FROM information_schema.rocksdb_locks AS a ORDER BY a.key;
+SELECT SUBSTRING(a.key, at skip_head) FROM information_schema.rocksdb_locks AS a ORDER BY a.key;
 ROLLBACK;
 
 DROP TABLE t1;
@@ -60,7 +66,7 @@ SELECT hex(vb) FROM t1;
 # validate that the keys were encoded as expected
 BEGIN;
 SELECT hex(vb) FROM t1 FOR UPDATE;
-SELECT SUBSTRING(a.key,9) FROM information_schema.rocksdb_locks AS a ORDER BY a.key;
+SELECT SUBSTRING(a.key, at skip_head) FROM information_schema.rocksdb_locks AS a ORDER BY a.key;
 ROLLBACK;
 
 DROP TABLE t1;
@@ -91,7 +97,7 @@ SELECT * FROM t1;
 # validate that the keys were encoded as expected
 BEGIN;
 SELECT * FROM t1 FOR UPDATE;
-SELECT SUBSTRING(a.key,9) FROM information_schema.rocksdb_locks AS a ORDER BY a.key;
+SELECT SUBSTRING(a.key, at skip_head) FROM information_schema.rocksdb_locks AS a ORDER BY a.key;
 ROLLBACK;
 
 DROP TABLE t1;
@@ -124,7 +130,7 @@ SELECT * FROM t1;
 # validate that the keys were encoded as expected
 BEGIN;
 SELECT * FROM t1 FOR UPDATE;
-SELECT SUBSTRING(a.key,9) FROM information_schema.rocksdb_locks AS a ORDER BY a.key;
+SELECT SUBSTRING(a.key, at skip_head) FROM information_schema.rocksdb_locks AS a ORDER BY a.key;
 ROLLBACK;
 
 DROP TABLE t1;
diff --git a/rocksdb b/rocksdb
index 756b3e44a8b..dd648b42fde 160000
--- a/rocksdb
+++ b/rocksdb
@@ -1 +1 @@
-Subproject commit 756b3e44a8b7173993c0076a76c56c6a31d52a8b
+Subproject commit dd648b42fde54a15675002981c305cf786db8594
diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc
index 3ec6b189f8b..72dd31bbb85 100644
--- a/storage/rocksdb/ha_rocksdb.cc
+++ b/storage/rocksdb/ha_rocksdb.cc
@@ -2275,7 +2275,9 @@ protected:
 
   virtual rocksdb::Status lock_range(rocksdb::ColumnFamilyHandle *const cf,
                                      const rocksdb::Slice &start,
-                                     const rocksdb::Slice &end) = 0;
+                                     bool  start_inf_suffix,
+                                     const rocksdb::Slice &end,
+                                     bool  end_inf_suffix) = 0;
 
   virtual bool prepare(const rocksdb::TransactionName &name) = 0;
 
@@ -2729,11 +2731,33 @@ public:
 
   virtual bool is_writebatch_trx() const override { return false; }
 
+  /*
+    Both start and end endpoint may may be prefixes.
+    Both bounds are inclusive.
+  */
   rocksdb::Status lock_range(rocksdb::ColumnFamilyHandle *const cf,
                              const rocksdb::Slice &start,
-                             const rocksdb::Slice &end) override
+                             bool  start_inf_suffix,
+                             const rocksdb::Slice &end,
+                             bool  end_inf_suffix
+                             ) override
   {
-    return m_rocksdb_tx->GetRangeLock(cf, start, end);
+    const char SUFFIX_INF= 0x0;
+    const char SUFFIX_SUP= 0x1;
+    //  GetRangeLock accepts range endpoints, not keys.
+    // Convert keys to range endpoints here.
+    // See also range_endpoint_convert() below.
+    StringBuffer<64> left;
+    left.append(start_inf_suffix? SUFFIX_SUP :SUFFIX_INF);
+    left.append(start.data(), start.size());
+
+    StringBuffer<64> right;
+    right.append(end_inf_suffix? SUFFIX_SUP :SUFFIX_INF);
+    right.append(end.data(), end.size());
+
+    return m_rocksdb_tx->GetRangeLock(cf,
+                           rocksdb::Slice(left.ptr(),left.length()),
+                           rocksdb::Slice(right.ptr(),right.length()));
   }
 private:
   void release_tx(void) {
@@ -3135,7 +3159,9 @@ public:
 
   rocksdb::Status lock_range(rocksdb::ColumnFamilyHandle *const cf,
                              const rocksdb::Slice &start,
-                             const rocksdb::Slice &end) override
+                             bool  start_inf_suffix,
+                             const rocksdb::Slice &end,
+                             bool  end_inf_suffix) override
   {
     return rocksdb::Status::OK();
   }
@@ -4333,6 +4359,63 @@ void rocksdb_kill_connection(handlerton* hton, THD* thd)
 }
 
 
+void range_endpoint_convert(const rocksdb::Slice &key,
+                            std::string *res)
+{
+  const char SUFFIX_INF= 0x0;
+  res->clear();
+  res->append(&SUFFIX_INF, 1);
+  res->append(key.data(), key.size());
+}
+
+int range_endpoints_compare(const char *a, size_t a_len,
+                            const char *b, size_t b_len)
+{
+  size_t min_len= std::min(a_len, b_len);
+
+  //compare the values. Skip the first byte as it is the endpoint signifier
+  int res= memcmp(a+1, b+1, min_len-1);
+  if (!res)
+  {
+    if (b_len > min_len)
+    {
+      // a is shorter;
+      if (a[0] == 0)
+        return  -1; //"a is smaller"
+      else
+      {
+        // a is considered padded with 0xFF:FF:FF:FF...
+        return 1; // "a" is bigger
+      }
+    }
+    else if (a_len > min_len)
+    {
+      // the opposite of the above: b is shorter.
+      if (b[0] == 0)
+        return  1; //"b is smaller"
+      else
+      {
+        // b is considered padded with 0xFF:FF:FF:FF...
+        return -1; // "b" is bigger
+      }
+    }
+    else
+    {
+      // the lengths are equal (and the key values, too)
+      if (a[0] < b[0])
+        return -1;
+      else if (a[0] > b[0])
+        return 1;
+      else
+        return 0;
+    }
+  }
+  else
+    return res;
+}
+
+
+
 void Rdb_transaction::kill_lock_wait_by_thd(THD *thd)
 {
   /*
@@ -4969,8 +5052,11 @@ static int rocksdb_init_func(void *const p) {
   
   if (rocksdb_use_range_locking)
   {
-    //psergey-todo: this is ugly: 
-    rdb->get_range_lock_manager()->set_max_lock_memory(rocksdb_max_lock_memory);
+    rocksdb::RangeLockMgrControl *mgr= rdb->get_range_lock_manager();
+
+    mgr->set_endpoint_cmp_functions(range_endpoint_convert,
+                                    range_endpoints_compare);
+    mgr->set_max_lock_memory(rocksdb_max_lock_memory);
     sql_print_information("RocksDB: USING NEW RANGE LOCKING");
     sql_print_information("RocksDB: Max lock memory=%lu", rocksdb_max_lock_memory);
   }
@@ -8129,19 +8215,31 @@ void ha_rocksdb::set_range_lock(Rdb_transaction *tx,
                                 const rocksdb::Slice &slice,
                                 const key_range *const end_key)
 {
-  // We have 'slice' with start_slice.
   rocksdb::Slice end_slice;
   uchar end_slice_buf[MAX_KEY_LENGTH];
+  bool start_has_inf_suffix, end_has_inf_suffix;
 
   if (m_lock_rows != RDB_LOCK_WRITE || !rocksdb_use_range_locking) {
     return;
   }
 
+  /*
+    The 'slice' parameter has the left endpoint of the range to lock.
+    Figure out the right endpoint
+  */
+
   if (find_flag == HA_READ_KEY_EXACT) {
+    /* 
+      This is "key_part= const" interval
+    */
+    start_has_inf_suffix= false;
+    end_has_inf_suffix= true;
     end_slice= slice;
   }
-  else
-  if (find_flag == HA_READ_PREFIX_LAST) {
+  else if (find_flag == HA_READ_PREFIX_LAST) {
+    //psergey-todo: swap the bounds? so that the start bounds is the end?
+    //Testscase!
+
     /*
       We have made the kd.successor(m_sk_packed_tuple) call above.
 
@@ -8151,8 +8249,29 @@ void ha_rocksdb::set_range_lock(Rdb_transaction *tx,
     kd.successor(end_slice_buf, slice.size());
     end_slice= rocksdb::Slice((const char*)end_slice_buf, slice.size());
   }
-  else 
-  if (end_key) {
+  else if (end_key) {
+    // Known start range bounds: HA_READ_KEY_OR_NEXT, HA_READ_AFTER_KEY
+    if (find_flag == HA_READ_KEY_OR_NEXT)
+      start_has_inf_suffix= false;
+    else if (find_flag == HA_READ_AFTER_KEY)
+      start_has_inf_suffix= true;
+    else
+      DBUG_ASSERT(0);
+
+    // Known end range bounds: HA_READ_AFTER_KEY, HA_READ_BEFORE_KEY
+    if (end_key->flag == HA_READ_AFTER_KEY)
+    {
+      // this is "key_part <= const".
+      end_has_inf_suffix= true;
+    }
+    else if (end_key->flag == HA_READ_BEFORE_KEY)
+    {
+      // this is "key_part < const", non-inclusive.
+      end_has_inf_suffix= false;
+    }
+    else
+      DBUG_ASSERT(0);
+
     uchar pack_buffer[MAX_KEY_LENGTH];
     uint end_slice_size=
         kd.pack_index_tuple(table, pack_buffer, end_slice_buf,
@@ -8163,18 +8282,22 @@ void ha_rocksdb::set_range_lock(Rdb_transaction *tx,
   }
   else
   {
-    /*
-      On range scan without any end key condition, there is no
-      eq cond, and eq cond length is the same as index_id size (4 bytes).
-      Example1: id1 BIGINT, id2 INT, id3 BIGINT, PRIMARY KEY (id1, id2, id3)
-       WHERE id1>=1 AND id2 >= 2 and id2 <= 5 => eq_cond_len= 4
-    */
+    // Known start range bounds: HA_READ_KEY_OR_NEXT, HA_READ_AFTER_KEY
+    if (find_flag == HA_READ_KEY_OR_NEXT)
+      start_has_inf_suffix= false;
+    else if (find_flag == HA_READ_AFTER_KEY)
+      start_has_inf_suffix= true;
+    else
+      DBUG_ASSERT(0);
+
     uint end_slice_size;
-    // TODO: or this should be kd.get_index_number() ? 
     kd.get_infimum_key(end_slice_buf, &end_slice_size);
     end_slice= rocksdb::Slice((char*)end_slice_buf, end_slice_size);
+    end_has_inf_suffix= true;
   }
-  tx->lock_range(kd.get_cf(), slice, end_slice);
+  tx->lock_range(kd.get_cf(),
+                 slice,     start_has_inf_suffix,
+                 end_slice, end_has_inf_suffix);
 }
 
 /*
@@ -8296,11 +8419,12 @@ int ha_rocksdb::index_read_map_impl(uchar *const buf, const uchar *const key,
 
   Rdb_transaction *const tx = get_or_create_tx(table->in_use);
   const bool is_new_snapshot = !tx->has_snapshot();
+
+  set_range_lock(tx, kd, find_flag, slice, end_key);
+
   // Loop as long as we get a deadlock error AND we end up creating the
   // snapshot here (i.e. it did not exist prior to this)
   for (;;) {
-    //psergey:
-    set_range_lock(tx, kd, find_flag, slice, end_key);
 
     /*
       This will open the iterator and position it at a record that's equal or
diff --git a/storage/rocksdb/range_locking/ft/comparator.h b/storage/rocksdb/range_locking/ft/comparator.h
index 9775cc13387..8d7fe6a0f90 100644
--- a/storage/rocksdb/range_locking/ft/comparator.h
+++ b/storage/rocksdb/range_locking/ft/comparator.h
@@ -44,7 +44,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 
 #include "util/dbt.h"
 
-typedef int (*ft_compare_func)(DB *db, const DBT *a, const DBT *b);
+typedef int (*ft_compare_func)(DB *db, void *arg, const DBT *a, const DBT *b);
 
 int toku_keycompare(const void *key1, uint32_t key1len, const void *key2, uint32_t key2len);
 
@@ -57,8 +57,9 @@ namespace toku {
     // that points may be positive or negative infinity.
 
     class comparator {
-        void init(ft_compare_func cmp, DESCRIPTOR desc, uint8_t memcmp_magic) {
+        void init(ft_compare_func cmp, void *cmp_arg, DESCRIPTOR desc, uint8_t memcmp_magic) {
             _cmp = cmp;
+            _cmp_arg= cmp_arg;
             _fake_db->cmp_descriptor = desc;
             _memcmp_magic = memcmp_magic;
         }
@@ -67,9 +68,9 @@ namespace toku {
         // This magic value is reserved to mean that the magic has not been set.
         static const uint8_t MEMCMP_MAGIC_NONE = 0;
 
-        void create(ft_compare_func cmp, DESCRIPTOR desc, uint8_t memcmp_magic = MEMCMP_MAGIC_NONE) {
+        void create(ft_compare_func cmp, void *cmp_arg, DESCRIPTOR desc, uint8_t memcmp_magic = MEMCMP_MAGIC_NONE) {
             XCALLOC(_fake_db);
-            init(cmp, desc, memcmp_magic);
+            init(cmp, cmp_arg, desc, memcmp_magic);
         }
 
         // inherit the attributes of another comparator, but keep our own
@@ -78,7 +79,7 @@ namespace toku {
             invariant_notnull(_fake_db);
             invariant_notnull(cmp._cmp);
             invariant_notnull(cmp._fake_db);
-            init(cmp._cmp, cmp._fake_db->cmp_descriptor, cmp._memcmp_magic);
+            init(cmp._cmp, cmp._cmp_arg, cmp._fake_db->cmp_descriptor, cmp._memcmp_magic);
         }
 
         // like inherit, but doesn't require that the this comparator
@@ -120,16 +121,19 @@ namespace toku {
                        && dbt_has_memcmp_magic(a)
                        // ..then we expect `b' to also have the memcmp magic
                        && __builtin_expect(dbt_has_memcmp_magic(b), 1)) {
+                assert(0); // psergey: this branch should not be taken.
                 return toku_builtin_compare_fun(nullptr, a, b);
             } else {
                 // yikes, const sadness here
-                return _cmp(const_cast<DB *>(_fake_db), a, b);
+                return _cmp(const_cast<DB *>(_fake_db), _cmp_arg, a, b);
             }
         }
 
     private:
         DB *_fake_db;
         ft_compare_func _cmp;
+        void *_cmp_arg;
+
         uint8_t _memcmp_magic;
     };
 


More information about the commits mailing list