[Commits] 8bb3a6e5bd4: Range Locking: make LockingIterator not lock the same range twice.

psergey sergey at mariadb.com
Mon Jul 26 21:33:56 EEST 2021


revision-id: 8bb3a6e5bd499f81d704ed1f764c43517f14d390 (percona-202103-72-g8bb3a6e5bd4)
parent(s): 7c8da0555e60c4eb5bdfda1d77016fa283eb8e49
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2021-07-26 21:33:56 +0300
message:

Range Locking: make LockingIterator not lock the same range twice.

LockingIterator may attempt to lock the same range multiple times
if another transaction is inserting records into the range it is scanning
through. Make the code avoid this.

---
 .../r/range_locking_seek_for_update2.result        | 42 ++++++++++++++++
 .../r/range_locking_seek_for_update2_rev_cf.result | 42 ++++++++++++++++
 .../rocksdb/t/range_locking_seek_for_update2.inc   | 50 +++++++++++++++++++
 .../rocksdb/t/range_locking_seek_for_update2.test  |  4 ++
 .../t/range_locking_seek_for_update2_rev_cf.test   |  4 ++
 storage/rocksdb/rdb_locking_iter.cc                |  2 +
 storage/rocksdb/rdb_locking_iter.h                 | 57 ++++++++++++++++------
 7 files changed, 185 insertions(+), 16 deletions(-)

diff --git a/mysql-test/suite/rocksdb/r/range_locking_seek_for_update2.result b/mysql-test/suite/rocksdb/r/range_locking_seek_for_update2.result
new file mode 100644
index 00000000000..329fd0063fc
--- /dev/null
+++ b/mysql-test/suite/rocksdb/r/range_locking_seek_for_update2.result
@@ -0,0 +1,42 @@
+show variables like 'rocksdb_use_range_locking';
+Variable_name	Value
+rocksdb_use_range_locking	ON
+create table t0(a int primary key);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1 (
+pk int,
+a int,
+primary key (pk) comment 'rlsfu_test'
+) engine=rocksdb;
+insert into t1 (pk)
+select
+A.a + B.a*10 + C.a*100
+from 
+t0 A, t0 B, t0 C;
+delete from t1 where pk<100;
+connect  con1,localhost,root,,;
+connection con1;
+begin;
+set debug_sync='rocksdb.locking_iter_scan SIGNAL about_to_lock_range WAIT_FOR spoiler_inserted';
+select * from t1 where pk >=5 order by pk limit 5 for update;
+connection default;
+set debug_sync='now WAIT_FOR about_to_lock_range';
+insert into t1 (pk) values 
+(10),(20),(30),(40),(50);
+set debug_sync='now SIGNAL spoiler_inserted';
+connection con1;
+pk	a
+10	NULL
+20	NULL
+30	NULL
+40	NULL
+50	NULL
+# This must return 1, no 5:
+select lock_count from information_schema.rocksdb_trx
+where thread_id=CONNECTION_ID();
+lock_count
+1
+rollback;
+disconnect con1;
+connection default;
+drop table t0, t1;
diff --git a/mysql-test/suite/rocksdb/r/range_locking_seek_for_update2_rev_cf.result b/mysql-test/suite/rocksdb/r/range_locking_seek_for_update2_rev_cf.result
new file mode 100644
index 00000000000..3177f94821b
--- /dev/null
+++ b/mysql-test/suite/rocksdb/r/range_locking_seek_for_update2_rev_cf.result
@@ -0,0 +1,42 @@
+show variables like 'rocksdb_use_range_locking';
+Variable_name	Value
+rocksdb_use_range_locking	ON
+create table t0(a int primary key);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1 (
+pk int,
+a int,
+primary key (pk) comment 'rev:rlsfu_test'
+) engine=rocksdb;
+insert into t1 (pk)
+select
+A.a + B.a*10 + C.a*100
+from 
+t0 A, t0 B, t0 C;
+delete from t1 where pk<100;
+connect  con1,localhost,root,,;
+connection con1;
+begin;
+set debug_sync='rocksdb.locking_iter_scan SIGNAL about_to_lock_range WAIT_FOR spoiler_inserted';
+select * from t1 where pk >=5 order by pk limit 5 for update;
+connection default;
+set debug_sync='now WAIT_FOR about_to_lock_range';
+insert into t1 (pk) values 
+(10),(20),(30),(40),(50);
+set debug_sync='now SIGNAL spoiler_inserted';
+connection con1;
+pk	a
+10	NULL
+20	NULL
+30	NULL
+40	NULL
+50	NULL
+# This must return 1, no 5:
+select lock_count from information_schema.rocksdb_trx
+where thread_id=CONNECTION_ID();
+lock_count
+1
+rollback;
+disconnect con1;
+connection default;
+drop table t0, t1;
diff --git a/mysql-test/suite/rocksdb/t/range_locking_seek_for_update2.inc b/mysql-test/suite/rocksdb/t/range_locking_seek_for_update2.inc
new file mode 100644
index 00000000000..8287cd59030
--- /dev/null
+++ b/mysql-test/suite/rocksdb/t/range_locking_seek_for_update2.inc
@@ -0,0 +1,50 @@
+
+
+--source include/have_rocksdb.inc
+--source include/have_debug_sync.inc
+--source suite/rocksdb/include/have_range_locking.inc
+--enable_connect_log
+show variables like 'rocksdb_use_range_locking';
+
+
+create table t0(a int primary key);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+
+eval create table t1 (
+  pk int,
+  a int,
+  primary key (pk) comment '$cf'
+) engine=rocksdb;
+
+insert into t1 (pk)
+select
+  A.a + B.a*10 + C.a*100
+from 
+  t0 A, t0 B, t0 C;
+delete from t1 where pk<100;
+
+connect (con1,localhost,root,,);
+connection con1;
+
+begin;
+set debug_sync='rocksdb.locking_iter_scan SIGNAL about_to_lock_range WAIT_FOR spoiler_inserted';
+send
+select * from t1 where pk >=5 order by pk limit 5 for update;
+
+connection default;
+set debug_sync='now WAIT_FOR about_to_lock_range';
+insert into t1 (pk) values 
+(10),(20),(30),(40),(50);
+set debug_sync='now SIGNAL spoiler_inserted';
+
+connection con1;
+reap;
+--echo # This must return 1, no 5:
+select lock_count from information_schema.rocksdb_trx
+where thread_id=CONNECTION_ID();
+
+rollback;
+disconnect con1;
+connection default;
+drop table t0, t1;
+
diff --git a/mysql-test/suite/rocksdb/t/range_locking_seek_for_update2.test b/mysql-test/suite/rocksdb/t/range_locking_seek_for_update2.test
new file mode 100644
index 00000000000..703331cab9a
--- /dev/null
+++ b/mysql-test/suite/rocksdb/t/range_locking_seek_for_update2.test
@@ -0,0 +1,4 @@
+
+--let cf=rlsfu_test
+--source range_locking_seek_for_update2.inc
+
diff --git a/mysql-test/suite/rocksdb/t/range_locking_seek_for_update2_rev_cf.test b/mysql-test/suite/rocksdb/t/range_locking_seek_for_update2_rev_cf.test
new file mode 100644
index 00000000000..0620593b4e7
--- /dev/null
+++ b/mysql-test/suite/rocksdb/t/range_locking_seek_for_update2_rev_cf.test
@@ -0,0 +1,4 @@
+
+--let cf=rev:rlsfu_test
+
+--source range_locking_seek_for_update2.inc
diff --git a/storage/rocksdb/rdb_locking_iter.cc b/storage/rocksdb/rdb_locking_iter.cc
index 1378de6f655..39858bd6f35 100644
--- a/storage/rocksdb/rdb_locking_iter.cc
+++ b/storage/rocksdb/rdb_locking_iter.cc
@@ -27,12 +27,14 @@ rocksdb::Iterator* GetLockingIterator(
 */
 
 void LockingIterator::Seek(const rocksdb::Slice& target) {
+  m_have_locked_until = false;
   m_iter = m_txn->GetIterator(m_read_opts, m_cfh);
   m_iter->Seek(target);
   ScanForward(target, false);
 }
 
 void LockingIterator::SeekForPrev(const rocksdb::Slice& target) {
+  m_have_locked_until = false;
   m_iter = m_txn->GetIterator(m_read_opts, m_cfh);
   m_iter->SeekForPrev(target);
   ScanBackward(target, false);
diff --git a/storage/rocksdb/rdb_locking_iter.h b/storage/rocksdb/rdb_locking_iter.h
index 433e2230c7d..25ad4b7fd5a 100644
--- a/storage/rocksdb/rdb_locking_iter.h
+++ b/storage/rocksdb/rdb_locking_iter.h
@@ -44,6 +44,14 @@ class LockingIterator : public rocksdb::Iterator {
   bool  m_valid;
 
   ulonglong *m_lock_count;
+
+  // If true, m_locked_until has a valid key value.
+  bool m_have_locked_until;
+
+  // The key value until we've locked the range. That is, we have a range lock
+  // on [current_position ... m_locked_until].
+  // This is used to avoid making extra GetRangeLock() calls.
+  std::string m_locked_until;
  public:
   LockingIterator(rocksdb::Transaction *txn,
                   rocksdb::ColumnFamilyHandle *cfh,
@@ -53,7 +61,7 @@ class LockingIterator : public rocksdb::Iterator {
                   ) :
     m_txn(txn), m_cfh(cfh), m_is_rev_cf(is_rev_cf), m_read_opts(opts), m_iter(nullptr),
     m_status(rocksdb::Status::InvalidArgument()), m_valid(false),
-    m_lock_count(lock_count) {}
+    m_lock_count(lock_count), m_have_locked_until(false) {}
 
   ~LockingIterator() {
     delete m_iter;
@@ -111,24 +119,43 @@ class LockingIterator : public rocksdb::Iterator {
         return;
       }
 
+      const int inv = forward ? 1 : -1;
+      auto cmp= m_cfh->GetComparator();
+
       auto end_key = m_iter->key();
       bool endp_arg= m_is_rev_cf;
-      if (forward) {
-        m_status = m_txn->GetRangeLock(m_cfh,
-                                     rocksdb::Endpoint(target, endp_arg),
-                                     rocksdb::Endpoint(end_key, endp_arg));
+
+      if (m_have_locked_until &&
+          cmp->Compare(end_key, rocksdb::Slice(m_locked_until))*inv <= 0) {
+        // We've already locked this range. The following has happened:
+        // - m_iter->key() returned $KEY
+        // - we got a range lock on [range_start, $KEY]
+        // - other transaction(s) have inserted row $ROW before the $KEY.
+        // - we've read $ROW and returned.
+        // Now, we're looking to lock [$ROW, $KEY] but we don't need to,
+        // we already have a lock on this range.
       } else {
-        m_status = m_txn->GetRangeLock(m_cfh,
-                                     rocksdb::Endpoint(end_key, endp_arg),
-                                     rocksdb::Endpoint(target, endp_arg));
-      }
+        if (forward) {
+          m_status = m_txn->GetRangeLock(m_cfh,
+                                       rocksdb::Endpoint(target, endp_arg),
+                                       rocksdb::Endpoint(end_key, endp_arg));
+        } else {
+          m_status = m_txn->GetRangeLock(m_cfh,
+                                       rocksdb::Endpoint(end_key, endp_arg),
+                                       rocksdb::Endpoint(target, endp_arg));
+        }
 
-      if (!m_status.ok()) {
-        // Failed to get a lock (most likely lock wait timeout)
-        m_valid = false;
-        return;
+        // Save the bound where we locked until:
+        m_have_locked_until= true;
+        m_locked_until.assign(end_key.data(), end_key.size());
+        if (!m_status.ok()) {
+          // Failed to get a lock (most likely lock wait timeout)
+          m_valid = false;
+          return;
+        }
+        if (m_lock_count)  (*m_lock_count)++;
       }
-      if (m_lock_count)  (*m_lock_count)++;
+
       std::string end_key_copy= end_key.ToString();
 
       //Ok, now we have a lock which is inhibiting modifications in the range
@@ -146,7 +173,6 @@ class LockingIterator : public rocksdb::Iterator {
       else
         m_iter->SeekForPrev(target);
 
-      auto cmp= m_cfh->GetComparator();
 
       if (call_next && m_iter->Valid() && !cmp->Compare(m_iter->key(), target)) {
         if (forward)
@@ -156,7 +182,6 @@ class LockingIterator : public rocksdb::Iterator {
       }
 
       if (m_iter->Valid()) {
-        int inv = forward ? 1 : -1;
         if (cmp->Compare(m_iter->key(), rocksdb::Slice(end_key_copy))*inv <= 0) {
           // Ok, the found key is within the range.
           m_status = rocksdb::Status::OK();


More information about the commits mailing list