[Commits] 1b423e395: Range Locking: clean up comments

Sergei Petrunia psergey at askmonty.org
Mon Apr 29 23:29:02 EEST 2019


revision-id: 1b423e3954d932c4624337308e3e1cd98481a495 (v5.8-1041-g1b423e395)
parent(s): bcc7923688446c58a21f4f0ab287b147f406abc2
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2019-04-29 23:29:02 +0300
message:

Range Locking: clean up comments

---
 include/rocksdb/utilities/transaction.h           | 62 ++++++++++++-----------
 include/rocksdb/utilities/transaction_db.h        | 13 +++--
 utilities/transactions/pessimistic_transaction.cc |  1 -
 utilities/transactions/transaction_lock_mgr.cc    | 21 +++-----
 utilities/transactions/transaction_lock_mgr.h     | 14 +++--
 5 files changed, 52 insertions(+), 59 deletions(-)

diff --git a/include/rocksdb/utilities/transaction.h b/include/rocksdb/utilities/transaction.h
index b0f1308ed..6b84d4a44 100644
--- a/include/rocksdb/utilities/transaction.h
+++ b/include/rocksdb/utilities/transaction.h
@@ -31,50 +31,52 @@ using TransactionID = uint64_t;
   Prefix ranges are introduced below.
 
   == Basic Ranges ==
-
-  Basic ranges can be defined over rowkeys. Key Comparator defines ordering of
-  rowkeys, a finite closed range is the same as range over numbers:
+  Let's start from basic ranges. Key Comparator defines ordering of rowkeys.
+  Then, one can specify finite closed ranges by just providing rowkeys of their
+  endpoints:
 
     lower_endpoint <= X <= upper_endpoint
 
-  The endpoints here are possible rowkey values.
-
-  == Lexicographic-like ordering ==
-
-  A lexicographic-like ordering satisfies these criteria:
-
-  1.The ordering is prefix-based. If there are two keys in form
+  However our goal is to provide a richer set of endpoints. Read on.
 
-    key_a = {prefix_a suffix_a}
-    key_b = {prefix_b suffix_b}
+  == Lexicographic ordering ==
+  A lexicographic (or dictionary) ordering satisfies these criteria: If there
+  are two keys in form
+    key_a = {prefix_a, suffix_a}
+    key_b = {prefix_b, suffix_b}
   and
     prefix_a < prefix_b
   then
     key_a < key_b.
 
-  2. An empty string is less than any other value. From this it follows that
-  for any prefix and suffix, {prefix, suffix} > {prefix}.
+  == Prefix ranges ==
+  With lexicographic ordering, one may want to define ranges in form
 
-  3. The row comparison function is able to compare key prefixes. If the data
-  domain includes keys A and B, then the comparison function is able to compare
-  equal-length prefixes:
+     "prefix is $PREFIX"
 
-    min_len= min(byte_length(A), byte_length(B));
-    cmp(Slice(A, min_len), Slice(B, min_len))  // this call is valid
+  which translates to a range in form
 
-  == Prefix ranges ==
+    {$PREFIX, -infinity} < X < {$PREFIX, +infinity}
+
+  where -infinity will compare less than any possible suffix, and +infinity
+  will compare as greater than any possible suffix.
 
-  With lexicographic-like ordering, one may wish to construct ranges from a
-  restriction in form prefix=P:
-   - the left endpoint would would be {P, inf_suffix=false}
-   - the right endpoint would be {P, inf_suffix=true}.
+  class Endpoint allows to define these kind of rangtes.
 
-  == Supported comparators ==
-  BytewiseComparator and ReverseBytweiseComparator meet the lexicographic-like
-  ordering requirements.
+  == Notes ==
+  BytewiseComparator and ReverseBytewiseComparator produce lexicographic
+  ordering.
+
+  The row comparison function is able to compare key prefixes. If the data
+  domain includes keys A and B, then the comparison function is able to compare
+  equal-length prefixes:
+
+    min_len= min(byte_length(A), byte_length(B));
+    cmp(Slice(A, min_len), Slice(B, min_len));  // this call is valid
 
-  TODO: RangeLocking will refuse to work if any other comparator is used,
-  although other comparators meeting this property could be used as well.
+  == Other options ==
+  As far as MyRocks is concerned, the alternative to prefix ranges would be to
+  support both open (non-inclusive) and closed (inclusive) range endpoints.
 */
 
 class Endpoint {
@@ -82,7 +84,7 @@ class Endpoint {
   Slice slice;
 
   /*
-    true  : the key has an "infinity" suffix. A suffix that would compare as
+    true  : the key has a "+infinity" suffix. A suffix that would compare as
             greater than any other suffix
     false : otherwise
   */
diff --git a/include/rocksdb/utilities/transaction_db.h b/include/rocksdb/utilities/transaction_db.h
index 8f0e018f2..7ebac3e06 100644
--- a/include/rocksdb/utilities/transaction_db.h
+++ b/include/rocksdb/utilities/transaction_db.h
@@ -33,7 +33,8 @@ enum TxnDBWritePolicy {
 
 const uint32_t kInitialMaxDeadlocks = 5;
 
-// A handle to control RangeLockMgr
+// A handle to control RangeLockMgr (Range-based lock manager) from outside
+// RocksDB
 class RangeLockMgrHandle {
  public:
   virtual int set_max_lock_memory(size_t max_lock_memory) = 0;
@@ -41,12 +42,15 @@ class RangeLockMgrHandle {
   virtual ~RangeLockMgrHandle() {};
 };
 
-// A factory function to create a Range Lock Manager
+// A factory function to create a Range Lock Manager. The created object should
+// be:
+//  1. Passed in TransactionDBOptions::range_lock_mgr to open the database in
+//     range-locking mode
+//  2. Used to control the lock manager when the DB is already open.
 RangeLockMgrHandle* NewRangeLockManager(
   std::shared_ptr<TransactionDBMutexFactory> mutex_factory
 );
 
-
 struct TransactionDBOptions {
   // Specifies the maximum number of keys that can be locked at the same time
   // per column family.
@@ -210,7 +214,6 @@ struct DeadlockPath {
   bool empty() { return path.empty() && !limit_exceeded; }
 };
 
-
 class TransactionDB : public StackableDB {
  public:
   // Optimized version of ::Write that receives more optimization request such
@@ -284,7 +287,7 @@ class TransactionDB : public StackableDB {
   GetLockStatusData() = 0;
   virtual std::vector<DeadlockPath> GetDeadlockInfoBuffer() = 0;
   virtual void SetDeadlockInfoBufferSize(uint32_t target_size) = 0;
-  
+
  protected:
   // To Create an TransactionDB, call Open()
   // The ownership of db is transferred to the base StackableDB
diff --git a/utilities/transactions/pessimistic_transaction.cc b/utilities/transactions/pessimistic_transaction.cc
index ff653fa0e..082934e75 100644
--- a/utilities/transactions/pessimistic_transaction.cc
+++ b/utilities/transactions/pessimistic_transaction.cc
@@ -133,7 +133,6 @@ WriteCommittedTxn::WriteCommittedTxn(TransactionDB* txn_db,
 
 Status PessimisticTransaction::CommitBatch(WriteBatch* batch) {
   TransactionKeyMap keys_to_unlock;
-
   Status s = LockBatch(batch, &keys_to_unlock);
 
   if (!s.ok()) {
diff --git a/utilities/transactions/transaction_lock_mgr.cc b/utilities/transactions/transaction_lock_mgr.cc
index f837f6a5a..b6c91393d 100644
--- a/utilities/transactions/transaction_lock_mgr.cc
+++ b/utilities/transactions/transaction_lock_mgr.cc
@@ -1180,18 +1180,6 @@ void RangeLockMgr::AddColumnFamily(const ColumnFamilyHandle *cfh) {
   InstrumentedMutexLock l(&ltree_map_mutex_);
   if (ltree_map_.find(column_family_id) == ltree_map_.end()) {
     DICTIONARY_ID dict_id = { .dictid = column_family_id };
-    // "RocksDB_SE_v3.10" // BytewiseComparator() ,ReverseBytewiseComparator()
-    /*
-    toku::comparator *ltree_cmp;
-    if (!strcmp(cfh->GetComparator()->Name(), "RocksDB_SE_v3.10"))
-      ltree_cmp = &fw_cmp_;
-    else if (!strcmp(cfh->GetComparator()->Name(),"rev:RocksDB_SE_v3.10"))
-      ltree_cmp = &bw_cmp_;
-    else {
-      assert(false);
-      ltree_cmp= nullptr;
-    }
-    */
     toku::comparator cmp;
     cmp.create(compare_dbt_endpoints, (void*)cfh->GetComparator(), NULL);
     toku::locktree *ltree= ltm_.get_lt(dict_id, cmp,
@@ -1210,6 +1198,10 @@ void RangeLockMgr::RemoveColumnFamily(const ColumnFamilyHandle *cfh) {
   // Remove lock_map for this column family.  Since the lock map is stored
   // as a shared ptr, concurrent transactions can still keep using it
   // until they release their references to it.
+
+  // TODO^ if one can drop a column family while a transaction is holding a
+  // lock in it, is column family's comparator guaranteed to survive until
+  // all locks are released? (we depend on this).
   {
     InstrumentedMutexLock l(&ltree_map_mutex_);
 
@@ -1263,9 +1255,8 @@ toku::locktree *RangeLockMgr::get_locktree_by_cfid(uint32_t column_family_id) {
 }
 
 struct LOCK_PRINT_CONTEXT {
-  BaseLockMgr::LockStatusData *data;
-  // this will not be needed when locks are per-column-family:
-  uint32_t cfh_id;
+  BaseLockMgr::LockStatusData *data; // Save locks here
+  uint32_t cfh_id; // Column Family whose tree we are traversing
 };
 
 static 
diff --git a/utilities/transactions/transaction_lock_mgr.h b/utilities/transactions/transaction_lock_mgr.h
index 1f5dbc634..608d6f34f 100644
--- a/utilities/transactions/transaction_lock_mgr.h
+++ b/utilities/transactions/transaction_lock_mgr.h
@@ -212,8 +212,8 @@ class RangeLockMgr :
   };
 
   // Get a lock on a range
-  //  (TODO: this allows to acquire exclusive range locks although they are not
-  //  used ATM)
+  //  @note only exclusive locks are currently supported (requesting a
+  //  non-exclusive lock will get an exclusive one)
   Status TryRangeLock(PessimisticTransaction* txn,
                       uint32_t column_family_id,
                       const Endpoint &start_endp,
@@ -222,10 +222,7 @@ class RangeLockMgr :
   
   void UnLock(const PessimisticTransaction* txn, const TransactionKeyMap* keys,
               Env* env) override ;
-  /*
-    Same as above, but *keys is guaranteed to hold all the locks obtained by
-    the transaction.
-  */
+  // Release all locks the transaction is holding
   void UnLockAll(const PessimisticTransaction* txn, Env* env);
   void UnLock(PessimisticTransaction* txn, uint32_t column_family_id,
               const std::string& key, Env* env) override ;
@@ -238,8 +235,7 @@ class RangeLockMgr :
 
   ~RangeLockMgr();
 
-  int set_max_lock_memory(size_t max_lock_memory) override
-  {
+  int set_max_lock_memory(size_t max_lock_memory) override {
     return ltm_.set_max_lock_memory(max_lock_memory);
   }
 
@@ -261,8 +257,10 @@ class RangeLockMgr :
   InstrumentedMutex ltree_map_mutex_;
 
   // Per-thread cache of ltree_map_.
+  // (uses the same approach as TransactionLockMgr::lock_maps_cache_)
   std::unique_ptr<ThreadLocalPtr> ltree_lookup_cache_;
 
+  // Get the lock tree which stores locks for Column Family with given cf_id
   toku::locktree *get_locktree_by_cfid(uint32_t cf_id);
 
   static int compare_dbt_endpoints(__toku_db*, void *arg, const DBT *a_key, const DBT *b_key);


More information about the commits mailing list