[Commits] db9ec0cdafc: MDEV-26886: Estimation for filtered rows less precise with JSON_HB, part 2

psergey sergey at mariadb.com
Mon Nov 29 15:05:23 EET 2021


revision-id: db9ec0cdafcd9e4f36b836166dcbdbb8e1fc34ec (mariadb-10.6.1-319-gdb9ec0cdafc)
parent(s): 5b3f38986662d293fa228bc814e660a43d7063b6
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2021-11-29 16:05:23 +0300
message:

MDEV-26886: Estimation for filtered rows less precise with JSON_HB, part 2

Part #2: fix typos and add ASSERTs to check that Histogram_json_hb::
find_bucket has returned the right bucket.

---
 sql/opt_histogram_json.cc | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/sql/opt_histogram_json.cc b/sql/opt_histogram_json.cc
index e1a4efc6b28..82828a35f44 100644
--- a/sql/opt_histogram_json.cc
+++ b/sql/opt_histogram_json.cc
@@ -802,7 +802,8 @@ int Histogram_json_hb::find_bucket(const Field *field, const uchar *lookup_val,
     if (!res)
     {
       *equal= true;
-      return middle;
+      low= middle;
+      goto end;
     }
     else if (res < 0)
       low= middle;
@@ -812,24 +813,24 @@ int Histogram_json_hb::find_bucket(const Field *field, const uchar *lookup_val,
 
   /*
     If low and high were assigned a value in the above loop and we got here,
-    then they are not equal to the lookup value:
+    then the following holds:
 
       bucket[low].start_value < lookup_val < bucket[high].start_value
 
-    But there are two special cases: low=0 and high=last_bucket. Handle them
-    below.
+    Besides that, there are two special cases: low=0 and high=last_bucket.
+    Handle them below.
   */
   if (low == 0)
   {
     res= field->key_cmp((uchar*)buckets[0].start_value.data(), lookup_val);
     if (!res)
       *equal= true;
-    else if (res < 0)
+    else if (res < 0) //  buckets[0] < lookup_val
     {
       res= field->key_cmp((uchar*)buckets[high].start_value.data(), lookup_val);
       if (!res)
         *equal= true;
-      if (res >= 0)
+      if (res <= 0) // buckets[high] <= lookup_val
         low= high;
     }
   }
@@ -838,9 +839,19 @@ int Histogram_json_hb::find_bucket(const Field *field, const uchar *lookup_val,
     res= field->key_cmp((uchar*)buckets[high].start_value.data(), lookup_val);
     if (!res)
       *equal= true;
-    if (res >= 0)
+    if (res <= 0)
       low= high;
   }
 
+end:
+  // Verification: *equal==TRUE <=> lookup value is equal to the found bucket.
+  DBUG_ASSERT(*equal == !(field->key_cmp((uchar*)buckets[low].start_value.data(),
+                                         lookup_val)));
+  // buckets[low] <= lookup_val, with one exception of the first bucket.
+  DBUG_ASSERT(low == 0 ||
+              field->key_cmp((uchar*)buckets[low].start_value.data(), lookup_val)<= 0);
+  // buckets[low+1] > lookup_val, with one exception of the last bucket
+  DBUG_ASSERT(low == (int)buckets.size()-1 ||
+              field->key_cmp((uchar*)buckets[low+1].start_value.data(), lookup_val)> 0);
   return low;
 }


More information about the commits mailing list