[Commits] 0f4980afd8f: Apply patch: Use Get for secondary key point lookups

psergey sergey at mariadb.com
Mon May 17 17:37:11 EEST 2021


revision-id: 0f4980afd8faa61b23a3008d3cba726881072174 (percona-202102-51-g0f4980afd8f)
parent(s): d89e160443823d0f07691e99067e15b06d532ccc
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2021-05-17 17:37:01 +0300
message:

Apply patch: Use Get for secondary key point lookups

Summary: Today for secondary keys where we have the full key, we are still using an iterator to read the key when a point query would suffice. This is more efficient, but also makes the bloom filter code cleaner.

Test Plan: mtr

Reviewers: luqun, herman, yzha, #mysql_eng

Subscribers: pgl

Differential Revision: https://phabricator.intern.facebook.com/D25906968

---
 mysql-test/suite/rocksdb/r/bloomfilter.result      | 120 +++++++-------
 mysql-test/suite/rocksdb/r/rocksdb_range.result    |  12 ++
 .../suite/rocksdb/t/bloomfilter_load_select.inc    |   2 +
 mysql-test/suite/rocksdb/t/rocksdb_range.test      |   8 +
 storage/rocksdb/ha_rocksdb.cc                      | 179 +++++++++++++++------
 storage/rocksdb/ha_rocksdb.h                       |  20 ++-
 storage/rocksdb/nosql_access.cc                    |   5 +-
 7 files changed, 226 insertions(+), 120 deletions(-)

diff --git a/mysql-test/suite/rocksdb/r/bloomfilter.result b/mysql-test/suite/rocksdb/r/bloomfilter.result
index 7c5b479da2e..cd2d2b671f9 100644
--- a/mysql-test/suite/rocksdb/r/bloomfilter.result
+++ b/mysql-test/suite/rocksdb/r/bloomfilter.result
@@ -121,28 +121,28 @@ count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id2_id4_id5) where id2=23 and id4=115 and id5=115;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id2_id4_id5) where id2=500 and id4=2500 and id5=2500;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id2_id4_id5) where id2=601 and id4=3005 and id5=3005;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id2_id3) where id2=1;
 count(*)
@@ -240,14 +240,14 @@ count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id2_id3) where id2=12 and id3='12' and id4=60;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t1 force index (id2_id3) where id2=1 and id3='1';
 count(*)
@@ -331,42 +331,42 @@ count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id2) where id2=23 and id4=115;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id2) where id2=500 and id4=2500;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id2) where id2=601 and id4=3005;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id3_id4) where id3='1' and id4=1;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id3_id4) where id3='12' and id4=60;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t1 force index (id2) where id2=1;
 count(*)
@@ -519,28 +519,28 @@ count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id2_id4_id5) where id2=23 and id4=115 and id5=115;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id2_id4_id5) where id2=500 and id4=2500 and id5=2500;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id2_id4_id5) where id2=601 and id4=3005 and id5=3005;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id2_id3) where id2=1;
 count(*)
@@ -638,14 +638,14 @@ count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id2_id3) where id2=12 and id3='12' and id4=60;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t1 force index (id2_id3) where id2=1 and id3='1';
 count(*)
@@ -729,42 +729,42 @@ count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id2) where id2=23 and id4=115;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id2) where id2=500 and id4=2500;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id2) where id2=601 and id4=3005;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id3_id4) where id3='1' and id4=1;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id3_id4) where id3='12' and id4=60;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t1 force index (id2) where id2=1;
 count(*)
@@ -917,28 +917,28 @@ count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id2_id4_id5) where id2=23 and id4=115 and id5=115;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id2_id4_id5) where id2=500 and id4=2500 and id5=2500;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id2_id4_id5) where id2=601 and id4=3005 and id5=3005;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id2_id3) where id2=1;
 count(*)
@@ -1036,14 +1036,14 @@ count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id2_id3) where id2=12 and id3='12' and id4=60;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t1 force index (id2_id3) where id2=1 and id3='1';
 count(*)
@@ -1127,42 +1127,42 @@ count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id2) where id2=23 and id4=115;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id2) where id2=500 and id4=2500;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id2) where id2=601 and id4=3005;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id3_id4) where id3='1' and id4=1;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id3_id4) where id3='12' and id4=60;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t1 force index (id2) where id2=1;
 count(*)
@@ -1315,28 +1315,28 @@ count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id2_id4_id5) where id2=23 and id4=115 and id5=115;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id2_id4_id5) where id2=500 and id4=2500 and id5=2500;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id2_id4_id5) where id2=601 and id4=3005 and id5=3005;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id2_id3) where id2=1;
 count(*)
@@ -1434,14 +1434,14 @@ count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id2_id3) where id2=12 and id3='12' and id4=60;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t1 force index (id2_id3) where id2=1 and id3='1';
 count(*)
@@ -1525,42 +1525,42 @@ count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id2) where id2=23 and id4=115;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id2) where id2=500 and id4=2500;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id2) where id2=601 and id4=3005;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id3_id4) where id3='1' and id4=1;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id3_id4) where id3='12' and id4=60;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t1 force index (id2) where id2=1;
 count(*)
@@ -1713,28 +1713,28 @@ count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id2_id4_id5) where id2=23 and id4=115 and id5=115;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id2_id4_id5) where id2=500 and id4=2500 and id5=2500;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id2_id4_id5) where id2=601 and id4=3005 and id5=3005;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id2_id3) where id2=1;
 count(*)
@@ -1832,14 +1832,14 @@ count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id2_id3) where id2=12 and id3='12' and id4=60;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t1 force index (id2_id3) where id2=1 and id3='1';
 count(*)
@@ -1923,42 +1923,42 @@ count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id2) where id2=23 and id4=115;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id2) where id2=500 and id4=2500;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id2) where id2=601 and id4=3005;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id3_id4) where id3='1' and id4=1;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t2 force index (id3_id4) where id3='12' and id4=60;
 count(*)
 1
 call bloom_end();
 checked
-true
+false
 call bloom_start();
 select count(*) from t1 force index (id2) where id2=1;
 count(*)
diff --git a/mysql-test/suite/rocksdb/r/rocksdb_range.result b/mysql-test/suite/rocksdb/r/rocksdb_range.result
index a3d7839712d..4352f579759 100644
--- a/mysql-test/suite/rocksdb/r/rocksdb_range.result
+++ b/mysql-test/suite/rocksdb/r/rocksdb_range.result
@@ -1,6 +1,7 @@
 select * from information_schema.engines where engine = 'rocksdb';
 ENGINE	SUPPORT	COMMENT	TRANSACTIONS	XA	SAVEPOINTS
 ROCKSDB	DEFAULT	RocksDB storage engine	YES	YES	YES
+set optimizer_force_index_for_range = on;
 drop table if exists t0,t1,t2,t3,t4,t5;
 create table t0 (a int) engine=myisam;
 insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
@@ -64,6 +65,16 @@ Note	1003	/* select#1 */ select `test`.`t2`.`pk` AS `pk`,`test`.`t2`.`a` AS `a`,
 select * from t2 force index (a) where a=3 and pk=33;
 pk	a	b
 33	3	33
+explain 
+select * from t2 force index (a) where a=3 and pk in (33, 34);
+id	select_type	table	partitions	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	t2	NULL	range	a	a	8	NULL	#	#	Using index condition
+Warnings:
+Note	1003	/* select#1 */ select `test`.`t2`.`pk` AS `pk`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` FORCE INDEX (`a`) where ((`test`.`t2`.`a` = 3) and (`test`.`t2`.`pk` in (33,34)))
+select * from t2 force index (a) where a=3 and pk in (33, 34);
+pk	a	b
+33	3	33
+34	3	34
 select * from t2 force index (a) where a=99 and pk=99;
 pk	a	b
 select * from t2 force index (a) where a=0 and pk=0;
@@ -316,3 +327,4 @@ Note	1003	/* select#1 */ select `test`.`t5`.`pk` AS `pk`,`test`.`t5`.`a` AS `a`,
 select * from t5 where a=5 and b in (4) order by c desc;
 pk	a	b	c
 drop table t0,t1,t2,t3,t4,t5;
+set optimizer_force_index_for_range = off;
diff --git a/mysql-test/suite/rocksdb/t/bloomfilter_load_select.inc b/mysql-test/suite/rocksdb/t/bloomfilter_load_select.inc
index 1f1a4b9810f..504507f9a35 100644
--- a/mysql-test/suite/rocksdb/t/bloomfilter_load_select.inc
+++ b/mysql-test/suite/rocksdb/t/bloomfilter_load_select.inc
@@ -187,3 +187,5 @@ call bloom_start();
 select count(*) from t2 force index (id3_id5) where id3='240' and id5=1200;
 call bloom_end();
 
+--remove_file $MYSQL_TMP_DIR/insert_t1.sql
+--remove_file $MYSQL_TMP_DIR/insert_t2.sql
diff --git a/mysql-test/suite/rocksdb/t/rocksdb_range.test b/mysql-test/suite/rocksdb/t/rocksdb_range.test
index 2f5928e6351..d9a39d67de4 100644
--- a/mysql-test/suite/rocksdb/t/rocksdb_range.test
+++ b/mysql-test/suite/rocksdb/t/rocksdb_range.test
@@ -5,6 +5,8 @@
 #
 select * from information_schema.engines where engine = 'rocksdb';
 
+set optimizer_force_index_for_range = on;
+
 --disable_warnings
 drop table if exists t0,t1,t2,t3,t4,t5;
 --enable_warnings
@@ -46,6 +48,11 @@ explain
 select * from t2 force index (a) where a=3 and pk=33;
 select * from t2 force index (a) where a=3 and pk=33;
 
+--replace_column 10 # 11 #
+explain 
+select * from t2 force index (a) where a=3 and pk in (33, 34);
+select * from t2 force index (a) where a=3 and pk in (33, 34);
+
 select * from t2 force index (a) where a=99 and pk=99;
 select * from t2 force index (a) where a=0 and pk=0;
 select * from t2 force index (a) where a=-1;
@@ -191,3 +198,4 @@ select * from t5 where a=5 and b in (4) order by c desc;
 select * from t5 where a=5 and b in (4) order by c desc;
 
 drop table t0,t1,t2,t3,t4,t5;
+set optimizer_force_index_for_range = off;
diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc
index 91791b337af..2f41ba40a17 100644
--- a/storage/rocksdb/ha_rocksdb.cc
+++ b/storage/rocksdb/ha_rocksdb.cc
@@ -8974,7 +8974,8 @@ ulong ha_rocksdb::index_flags(uint inx, uint part, bool all_parts) const {
   m_scan_it points at the index key-value pair that we should read the (pk,row)
   pair for.
 */
-int ha_rocksdb::secondary_index_read(const int keyno, uchar *const buf) {
+int ha_rocksdb::secondary_index_read(const int keyno, uchar *const buf,
+                                     const rocksdb::Slice *value) {
   DBUG_ASSERT(table != nullptr);
 
   if (m_iteration_only) {
@@ -8983,7 +8984,6 @@ int ha_rocksdb::secondary_index_read(const int keyno, uchar *const buf) {
   }
 
   int rc = 0;
-  rocksdb::Slice value = m_scan_it->value();
 
 #ifndef DBUG_OFF
   bool save_keyread_only = m_keyread_only;
@@ -8992,7 +8992,7 @@ int ha_rocksdb::secondary_index_read(const int keyno, uchar *const buf) {
   bool covered_lookup =
       (m_keyread_only && m_key_descr_arr[keyno]->can_cover_lookup()) ||
           m_key_descr_arr[keyno]->covers_lookup(
-              &value, m_converter->get_lookup_bitmap());
+              value, m_converter->get_lookup_bitmap());
 #ifndef DBUG_OFF
   m_keyread_only = save_keyread_only;
 #endif
@@ -9045,6 +9045,7 @@ int ha_rocksdb::index_read_intern(uchar *const buf, const uchar *const key,
 
   const Rdb_key_def &kd = *m_key_descr_arr[active_index_pos()];
   bool using_full_key = false;
+  m_full_key_lookup = false;
 
   uint packed_size;
 
@@ -9060,6 +9061,7 @@ int ha_rocksdb::index_read_intern(uchar *const buf, const uchar *const key,
       Handle some special cases when we do exact key lookups.
     */
     if (find_flag == HA_READ_KEY_EXACT && using_full_key) {
+      m_full_key_lookup = true;
       if (active_index == table->s->primary_key) {
         /*
           Equality lookup over primary key, using full tuple.
@@ -9122,6 +9124,26 @@ int ha_rocksdb::index_read_intern(uchar *const buf, const uchar *const key,
 
           rc = get_row_by_rowid(buf, m_last_rowkey.ptr(),
                                 m_last_rowkey.length());
+          release_scan_iterator();
+          DBUG_RETURN(rc);
+        }
+
+        if (using_full_key) {
+          packed_size = kd.pack_index_tuple(
+              table, m_pack_buffer, m_sk_packed_tuple, key, keypart_map);
+          rocksdb::Slice key_slice(
+              reinterpret_cast<const char *>(m_sk_packed_tuple), packed_size);
+
+          rc = get_row_by_sk(buf, kd, &key_slice);
+
+          if (!rc) {
+            /* TODO(yzha) - row stats are gone in 8.0
+              stats.rows_read++;
+              stats.rows_index_first++; */
+            update_row_stats(ROWS_READ);
+          }
+
+          release_scan_iterator();
           DBUG_RETURN(rc);
         }
       }
@@ -9165,9 +9187,7 @@ int ha_rocksdb::index_read_intern(uchar *const buf, const uchar *const key,
       This will open the iterator and position it at a record that's equal or
       greater than the lookup tuple.
     */
-    setup_scan_iterator(kd, &slice,
-                        using_full_key && (find_flag == HA_READ_KEY_EXACT),
-                        eq_cond_len);
+    setup_scan_iterator(kd, &slice, eq_cond_len);
 
     /*
       Once we are positioned on from above, move to the position we really
@@ -9584,6 +9604,50 @@ int ha_rocksdb::records_from_index(ha_rows *num_rows, uint index) {
   return count;
 }
 
+/*
+  The analagous function to ha_rocksdb::get_row_by_rowid for performing
+  secondary key lookups.
+
+  @return
+    HA_EXIT_SUCCESS  OK
+    other            HA_ERR error code (can be SE-specific)
+*/
+int ha_rocksdb::get_row_by_sk(uchar *buf, const Rdb_key_def &kd,
+                              const rocksdb::Slice *key) {
+  DBUG_ENTER_FUNC();
+  Rdb_transaction *const tx = get_or_create_tx(table->in_use);
+
+  auto s = tx->get(kd.get_cf(), *key, &m_retrieved_record);
+
+  if (!s.IsNotFound() && !s.ok()) {
+    DBUG_RETURN(
+        tx->set_status_error(table->in_use, s, kd, m_tbl_def, m_table_handler));
+  }
+
+  if (s.IsNotFound()) {
+    DBUG_RETURN(HA_ERR_KEY_NOT_FOUND);
+  }
+
+  if (kd.has_ttl() &&
+      should_hide_ttl_rec(kd, m_retrieved_record, tx->m_snapshot_timestamp)) {
+    DBUG_RETURN(HA_ERR_KEY_NOT_FOUND);
+  }
+
+  const uint size =
+      kd.get_primary_key_tuple(table, *m_pk_descr, key, m_pk_packed_tuple);
+  if (size == RDB_INVALID_KEY_LEN) {
+    DBUG_RETURN(HA_ERR_ROCKSDB_CORRUPT_DATA);
+  }
+
+  m_last_rowkey.copy((const char *)m_pk_packed_tuple, size, &my_charset_bin);
+
+  int rc = secondary_index_read(active_index, buf, &m_retrieved_record);
+  if (!rc) {
+    table->m_status = 0;
+  }
+  DBUG_RETURN(rc);
+}
+
 /**
   @return
     HA_EXIT_SUCCESS  OK
@@ -9596,6 +9660,26 @@ int ha_rocksdb::index_next(uchar *const buf) {
   DBUG_RETURN(index_next_with_direction_intern(buf, true, false));
 }
 
+/**
+  @return
+    HA_EXIT_SUCCESS  OK
+    other            HA_ERR error code (can be SE-specific)
+*/
+int ha_rocksdb::index_next_same(uchar *const buf,
+                                const uchar *key MY_ATTRIBUTE((unused)),
+                                uint keylen MY_ATTRIBUTE((unused))) {
+  DBUG_ENTER_FUNC();
+
+  if (m_full_key_lookup) {
+#ifndef DBUG_OFF
+    uint len = calculate_key_len(table, active_index, HA_WHOLE_KEY);
+    DBUG_ASSERT(len == keylen);
+#endif
+    DBUG_RETURN(HA_ERR_END_OF_FILE);
+  }
+  DBUG_RETURN(index_next(buf));
+}
+
 /**
   @return
     HA_EXIT_SUCCESS  OK
@@ -9706,7 +9790,7 @@ int ha_rocksdb::index_next_with_direction_intern(uchar *const buf,
       m_last_rowkey.copy((const char *)m_pk_packed_tuple, size,
                          &my_charset_bin);
 
-      rc = secondary_index_read(active_index, buf);
+      rc = secondary_index_read(active_index, buf, &value);
     }
 
     if (!should_skip_invalidated_record(rc)) {
@@ -10362,35 +10446,48 @@ int ha_rocksdb::check_and_lock_sk(
   rocksdb::Slice lower_bound_slice;
   rocksdb::Slice upper_bound_slice;
 
-  const bool total_order_seek = !check_bloom_and_set_bounds(
-      ha_thd(), kd, new_slice, all_parts_used, Rdb_key_def::INDEX_ID_SIZE,
-      lower_bound_buf, upper_bound_buf, &lower_bound_slice, &upper_bound_slice);
-  const bool fill_cache = !THDVAR(ha_thd(), skip_fill_cache);
-
-  const rocksdb::Status s = get_for_update(row_info.tx, kd, new_slice, nullptr);
+  const rocksdb::Status s =
+      get_for_update(row_info.tx, kd, new_slice,
+                     all_parts_used ? &m_retrieved_record : nullptr);
   if (!s.ok() && !s.IsNotFound()) {
     return row_info.tx->set_status_error(table->in_use, s, kd, m_tbl_def,
                                          m_table_handler);
   }
 
-  rocksdb::Iterator *const iter = row_info.tx->get_iterator(
-      kd.get_cf(), total_order_seek, fill_cache, lower_bound_slice,
-      upper_bound_slice, true /* read current data */,
-      false /* acquire snapshot */);
-  /*
-    Need to scan the transaction to see if there is a duplicate key.
-    Also need to scan RocksDB and verify the key has not been deleted
-    in the transaction.
-  */
-  DBUG_ASSERT(row_info.tx->has_snapshot() &&
-              row_info.tx->m_snapshot_timestamp != 0);
-  *found =
-      !read_key_exact(kd, iter, new_slice, row_info.tx->m_snapshot_timestamp);
+  rocksdb::Iterator *iter = nullptr;
+
+  if (all_parts_used) {
+    *found = !s.IsNotFound();
+    if (*found && kd.has_ttl() &&
+        should_hide_ttl_rec(kd, m_retrieved_record,
+                            row_info.tx->m_snapshot_timestamp)) {
+      *found = false;
+    }
+  } else {
+    const bool total_order_seek = !check_bloom_and_set_bounds(
+        ha_thd(), kd, new_slice, Rdb_key_def::INDEX_ID_SIZE, lower_bound_buf,
+        upper_bound_buf, &lower_bound_slice, &upper_bound_slice);
+    const bool fill_cache = !THDVAR(ha_thd(), skip_fill_cache);
+
+    iter = row_info.tx->get_iterator(kd.get_cf(), total_order_seek, fill_cache,
+                                     lower_bound_slice, upper_bound_slice,
+                                     true /* read current data */,
+                                     false /* acquire snapshot */);
+    /*
+      Need to scan the transaction to see if there is a duplicate key.
+      Also need to scan RocksDB and verify the key has not been deleted
+      in the transaction.
+    */
+    DBUG_ASSERT(row_info.tx->has_snapshot() &&
+                row_info.tx->m_snapshot_timestamp != 0);
+    *found =
+        !read_key_exact(kd, iter, new_slice, row_info.tx->m_snapshot_timestamp);
+  }
 
   int rc = HA_EXIT_SUCCESS;
 
   if (*found && m_insert_with_update) {
-    const rocksdb::Slice &rkey = iter->key();
+    const rocksdb::Slice &rkey = all_parts_used ? new_slice : iter->key();
     uint pk_size =
         kd.get_primary_key_tuple(table, *m_pk_descr, &rkey, m_pk_packed_tuple);
     if (pk_size == RDB_INVALID_KEY_LEN) {
@@ -10979,7 +11076,6 @@ void ha_rocksdb::setup_iterator_bounds(
 
 void ha_rocksdb::setup_scan_iterator(const Rdb_key_def &kd,
                                      rocksdb::Slice *const slice,
-                                     const bool use_all_keys,
                                      const uint eq_cond_len) {
   DBUG_ASSERT(slice->size() >= eq_cond_len);
 
@@ -10997,7 +11093,7 @@ void ha_rocksdb::setup_scan_iterator(const Rdb_key_def &kd,
   // See ha_rocksdb::setup_iterator_bounds on how the bound_len parameter is
   // used.
   if (check_bloom_and_set_bounds(
-          ha_thd(), kd, eq_cond, use_all_keys,
+          ha_thd(), kd, eq_cond,
           std::max(eq_cond_len, (uint)Rdb_key_def::INDEX_ID_SIZE),
           m_scan_it_lower_bound, m_scan_it_upper_bound,
           &m_scan_it_lower_bound_slice, &m_scan_it_upper_bound_slice)) {
@@ -14955,10 +15051,9 @@ bool ha_rocksdb::can_assume_tracked(THD *thd) {
 
 bool ha_rocksdb::check_bloom_and_set_bounds(
     THD *thd, const Rdb_key_def &kd, const rocksdb::Slice &eq_cond,
-    const bool use_all_keys, size_t bound_len, uchar *const lower_bound,
-    uchar *const upper_bound, rocksdb::Slice *lower_bound_slice,
-    rocksdb::Slice *upper_bound_slice) {
-  bool can_use_bloom = can_use_bloom_filter(thd, kd, eq_cond, use_all_keys);
+    size_t bound_len, uchar *const lower_bound, uchar *const upper_bound,
+    rocksdb::Slice *lower_bound_slice, rocksdb::Slice *upper_bound_slice) {
+  bool can_use_bloom = can_use_bloom_filter(thd, kd, eq_cond);
   if (!can_use_bloom && (THDVAR(thd, enable_iterate_bounds))) {
     setup_iterator_bounds(kd, eq_cond, bound_len, lower_bound, upper_bound,
                           lower_bound_slice, upper_bound_slice);
@@ -14981,12 +15076,9 @@ bool ha_rocksdb::check_bloom_and_set_bounds(
   @param kd
   @param eq_cond      Equal condition part of the key. This always includes
                       system index id (4 bytes).
-  @param use_all_keys True if all key parts are set with equal conditions.
-                      This is aware of extended keys.
 */
 bool ha_rocksdb::can_use_bloom_filter(THD *thd, const Rdb_key_def &kd,
-                                      const rocksdb::Slice &eq_cond,
-                                      const bool use_all_keys) {
+                                      const rocksdb::Slice &eq_cond) {
   bool can_use = false;
 
   if (THDVAR(thd, skip_bloom_filter_on_read)) {
@@ -15013,18 +15105,7 @@ bool ha_rocksdb::can_use_bloom_filter(THD *thd, const Rdb_key_def &kd,
       shorter require all parts of the key to be available
       for the short key match.
     */
-    if ((use_all_keys && prefix_extractor->InRange(eq_cond)) ||
-        prefix_extractor->SameResultWhenAppended(eq_cond)) {
-      can_use = true;
-    } else {
-      can_use = false;
-    }
-  } else {
-    /*
-      if prefix extractor is not defined, all key parts have to be
-      used by eq_cond.
-    */
-    if (use_all_keys) {
+    if (prefix_extractor->SameResultWhenAppended(eq_cond)) {
       can_use = true;
     } else {
       can_use = false;
diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h
index 11913a3aec6..9c3c3927498 100644
--- a/storage/rocksdb/ha_rocksdb.h
+++ b/storage/rocksdb/ha_rocksdb.h
@@ -278,6 +278,8 @@ class ha_rocksdb : public my_core::handler {
   bool m_iteration_only;
   bool m_rnd_scan_started;
 
+  bool m_full_key_lookup = false;
+
   /*
     true means INSERT ON DUPLICATE KEY UPDATE. In such case we can optimize by
     remember the failed attempt (if there is one that violates uniqueness check)
@@ -322,7 +324,8 @@ class ha_rocksdb : public my_core::handler {
                       const TABLE *const old_table_arg = nullptr,
                       const Rdb_tbl_def *const old_tbl_def_arg = nullptr) const
       MY_ATTRIBUTE((__nonnull__(2, 3), __warn_unused_result__));
-  int secondary_index_read(const int keyno, uchar *const buf)
+  int secondary_index_read(const int keyno, uchar *const buf,
+                           const rocksdb::Slice *value)
       MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
   static void setup_iterator_bounds(const Rdb_key_def &kd,
                                     const rocksdb::Slice &eq_cond,
@@ -331,11 +334,9 @@ class ha_rocksdb : public my_core::handler {
                                     rocksdb::Slice *lower_bound_slice,
                                     rocksdb::Slice *upper_bound_slice);
   static bool can_use_bloom_filter(THD *thd, const Rdb_key_def &kd,
-                                   const rocksdb::Slice &eq_cond,
-                                   const bool use_all_keys);
+                                   const rocksdb::Slice &eq_cond);
   void setup_scan_iterator(const Rdb_key_def &kd, rocksdb::Slice *slice,
-                           const bool use_all_keys, const uint eq_cond_len)
-      MY_ATTRIBUTE((__nonnull__));
+                           const uint eq_cond_len) MY_ATTRIBUTE((__nonnull__));
   void release_scan_iterator(void);
 
   rocksdb::Status get_for_update(Rdb_transaction *const tx,
@@ -354,6 +355,8 @@ class ha_rocksdb : public my_core::handler {
     return get_row_by_rowid(buf, reinterpret_cast<const char *>(rowid),
                             rowid_size, skip_lookup, skip_ttl_check);
   }
+  int get_row_by_sk(uchar *buf, const Rdb_key_def &kd,
+                    const rocksdb::Slice *key);
 
   void load_auto_incr_value();
   ulonglong load_auto_incr_value_from_index();
@@ -628,6 +631,8 @@ class ha_rocksdb : public my_core::handler {
 
   int index_next(uchar *const buf) override
       MY_ATTRIBUTE((__warn_unused_result__));
+  int index_next_same(uchar *const buf, const uchar *key, uint keylen) override
+      MY_ATTRIBUTE((__warn_unused_result__));
   int index_prev(uchar *const buf) override
       MY_ATTRIBUTE((__warn_unused_result__));
 
@@ -659,9 +664,8 @@ class ha_rocksdb : public my_core::handler {
 
   static bool check_bloom_and_set_bounds(
       THD *thd, const Rdb_key_def &kd, const rocksdb::Slice &eq_cond,
-      const bool use_all_keys, size_t bound_len, uchar *const lower_bound,
-      uchar *const upper_bound, rocksdb::Slice *lower_bound_slice,
-      rocksdb::Slice *upper_bound_slice);
+      size_t bound_len, uchar *const lower_bound, uchar *const upper_bound,
+      rocksdb::Slice *lower_bound_slice, rocksdb::Slice *upper_bound_slice);
 
  private:
   // true <=> The scan uses the default MRR implementation, just redirect all
diff --git a/storage/rocksdb/nosql_access.cc b/storage/rocksdb/nosql_access.cc
index b01100b41e6..1788486ee9f 100644
--- a/storage/rocksdb/nosql_access.cc
+++ b/storage/rocksdb/nosql_access.cc
@@ -1506,9 +1506,8 @@ bool INLINE_ATTR select_exec::setup_iterator(txn_wrapper *txn,
   m_lower_bound_buf.reserve(bound_len);
   m_upper_bound_buf.reserve(bound_len);
   bool use_bloom = ha_rocksdb::check_bloom_and_set_bounds(
-      m_thd, *m_key_def, eq_slice, m_use_full_key, bound_len,
-      m_lower_bound_buf.data(), m_upper_bound_buf.data(), &m_lower_bound_slice,
-      &m_upper_bound_slice);
+      m_thd, *m_key_def, eq_slice, bound_len, m_lower_bound_buf.data(),
+      m_upper_bound_buf.data(), &m_lower_bound_slice, &m_upper_bound_slice);
   rocksdb::Iterator *it = txn->get_iterator(
       m_key_def->get_cf(), use_bloom, m_lower_bound_slice, m_upper_bound_slice);
   if (it == nullptr) {


More information about the commits mailing list