[Commits] Rev 2832: DS-MRR improvements: better comments, use symbolic name instead of +1/-1 constants. in file:///home/psergey/dev2/maria-5.3-dsmrr-cpk-r5/

Sergey Petrunya psergey at askmonty.org
Sun Sep 19 00:05:52 EEST 2010


At file:///home/psergey/dev2/maria-5.3-dsmrr-cpk-r5/

------------------------------------------------------------
revno: 2832
revision-id: psergey at askmonty.org-20100918210547-x14bc3qm72q0b7f8
parent: psergey at askmonty.org-20100915165838-6hmbpg09t4rkphaw
committer: Sergey Petrunya <psergey at askmonty.org>
branch nick: maria-5.3-dsmrr-cpk-r5
timestamp: Sun 2010-09-19 01:05:47 +0400
message:
  DS-MRR improvements: better comments, use symbolic name instead of +1/-1 constants.
=== modified file 'sql/multi_range_read.cc'
--- a/sql/multi_range_read.cc	2010-09-15 12:58:01 +0000
+++ b/sql/multi_range_read.cc	2010-09-18 21:05:47 +0000
@@ -338,6 +338,7 @@
     return (write_pos - bytes >= start);
 }
 
+
 size_t SimpleBuffer::used_size()
 {
   return (direction == 1)? write_pos - read_pos : read_pos - write_pos;
@@ -354,16 +355,18 @@
   read_size2= len2;
 }
 
+
 bool SimpleBuffer::read()
 {
   if (!have_data(read_size1 + (read_ptr2? read_size2 : 0)))
     return TRUE;
-  *read_ptr1 =read(read_size1);
+  *read_ptr1= read(read_size1);
   if (read_ptr2)
     *read_ptr2= read(read_size2);
   return FALSE;
 }
 
+
 uchar *SimpleBuffer::read(size_t bytes)
 {
   DBUG_ASSERT(have_data(bytes));
@@ -381,12 +384,14 @@
   }
 }
 
+
 bool SimpleBuffer::have_data(size_t bytes)
 {
   return (direction == 1)? (write_pos - read_pos >= (ptrdiff_t)bytes) : 
                            (read_pos - write_pos >= (ptrdiff_t)bytes);
 }
 
+
 void SimpleBuffer::reset_for_writing()
 {
   if (direction == 1)
@@ -493,7 +498,7 @@
   */
   full_buf= buf->buffer;
   full_buf_end= buf->buffer_end;
-  rowid_buffer.set_buffer_space(full_buf, full_buf_end, 1);
+  rowid_buffer.set_buffer_space(full_buf, full_buf_end, SimpleBuffer::FORWARD);
   
   if (do_sort_keys)
   {
@@ -819,10 +824,11 @@
   if (!do_rowid_fetch)
   {
     /* Give all space to key buffer. */
-    key_buffer.set_buffer_space(full_buf, full_buf_end, 1);
+    key_buffer.set_buffer_space(full_buf, full_buf_end, SimpleBuffer::FORWARD);
 
     /* Just in case, tell rowid buffer that it has zero size: */
-    rowid_buffer.set_buffer_space(full_buf_end, full_buf_end, 1);
+    rowid_buffer.set_buffer_space(full_buf_end, full_buf_end, 
+                                  SimpleBuffer::FORWARD);
     return;
   }
   
@@ -865,8 +871,10 @@
   }
 
   rowid_buffer_end= full_buf + bytes_for_rowids;
-  rowid_buffer.set_buffer_space(full_buf, rowid_buffer_end, 1);
-  key_buffer.set_buffer_space(rowid_buffer_end, full_buf_end, -1); 
+  rowid_buffer.set_buffer_space(full_buf, rowid_buffer_end, 
+                                SimpleBuffer::FORWARD);
+  key_buffer.set_buffer_space(rowid_buffer_end, full_buf_end, 
+                              SimpleBuffer::BACKWARD); 
 }
 
 
@@ -906,8 +914,10 @@
         We're using two buffers and both of them are empty now. Restore the
         original sizes
       */
-      rowid_buffer.set_buffer_space(full_buf, rowid_buffer_end, 1);
-      key_buffer.set_buffer_space(rowid_buffer_end, full_buf_end, -1);
+      rowid_buffer.set_buffer_space(full_buf, rowid_buffer_end,
+                                    SimpleBuffer::FORWARD);
+      key_buffer.set_buffer_space(rowid_buffer_end, full_buf_end,
+                                  SimpleBuffer::BACKWARD);
     }
     key_buffer.reset_for_writing();
     key_buffer.setup_writing(&key_ptr, key_size_in_keybuf,

=== modified file 'sql/multi_range_read.h'
--- a/sql/multi_range_read.h	2010-09-15 12:58:01 +0000
+++ b/sql/multi_range_read.h	2010-09-18 21:05:47 +0000
@@ -6,9 +6,14 @@
 /**
   A Disk-Sweep implementation of MRR Interface (DS-MRR for short)
 
-  This is a "plugin"(*) for storage engines that allows make index scans 
-  read table rows in rowid order. For disk-based storage engines, this is
-  faster than reading table rows in whatever-SQL-layer-makes-calls-in order.
+  This is a "plugin"(*) for storage engines that allows to
+    1. When doing index scans, read table rows in rowid order;
+    2. when making many index lookups, do them in key order and don't
+       lookup the same key value multiple times;
+    3. Do both #1 and #2, when applicable.
+  These changes are expected to speed up query execution for disk-based 
+  storage engines running io-bound loads and "big" queries (ie. queries that
+  do joins and enumerate lots of records).
 
   (*) - only conceptually. No dynamic loading or binary compatibility of any
         kind.
@@ -17,20 +22,25 @@
    
       SQL Layer code
        |   |   |
-      -v---v---v---- handler->multi_range_read_XXX() function calls
+       v   v   v 
+      -|---|---|---- handler->multi_range_read_XXX() function calls
        |   |   |
-      ____________________________________
-     / DS-MRR module                      \
-     |  (scan indexes, order rowids, do    |
-     |   full record reads in rowid order) |
-     \____________________________________/
+      _____________________________________
+     / DS-MRR module                       \
+     | (order/de-duplicate lookup keys,    |
+     | scan indexes in key order,          |
+     | order/de-duplicate rowids,          |
+     | retrieve full record reads in rowid |
+     | order)                              |
+     \_____________________________________/
        |   |   |
       -|---|---|----- handler->read_range_first()/read_range_next(), 
        |   |   |      handler->index_read(), handler->rnd_pos() calls.
        |   |   |
        v   v   v
       Storage engine internals
-   
+
+
   Currently DS-MRR is used by MyISAM, InnoDB/XtraDB and Maria storage engines.
   Potentially it can be used with any table handler that has disk-based data
   storage and has better performance when reading data in rowid order.
@@ -38,77 +48,104 @@
 
 
 /*
-  A simple memory buffer for reading and writing.
-
-  when writing, there is no user-visible "current" position, although
-  internally 'pos' points to just after the end of used area  (or at the 
-  start of it for reverse buffer).
-
-  When reading, there is current position pointing at start (for reverse
-  buffer, end) of the element that will be read next.
-   ^^ why end for reverse? it's more logical to point at start 
+  An in-memory buffer used by DS-MRR implementation. 
+  - The buffer contains fixed-size elements. The elements are either atomic
+    byte sequences or pairs.
+  - The buffer resides in memory provided by the user. It is possible to
+     = dynamically (ie. between write operations) add ajacent memory space to
+       the buffer
+     = dynamically remove unused space from the buffer.
+  - Buffer can be set to be either "forward" or "backward". 
+
+  The intent of the last two properties is to allow to have two buffers on
+  adjacent memory space, one is being read from (and so its space shrinks)
+  while the other is being written to (and so it needs more and more space).
+
+  Illustration of forward buffer operation:
+
+                         +-- next read will read from here
+                         |
+                         |               +-- next write will write to here
+                         v               v
+        *--------------*===============*----------------*
+        |       ^      |          ^    |                |
+        |       |      read_pos   |    write_pos        |
+        start   |                 |                     end
+                |                 |            
+              usused space         user data
+
 */
 
 class SimpleBuffer
 {
-  uchar *start;
-  uchar *end;
+public:
+
+  enum enum_direction {
+    BACKWARD=-1, /* buffer is filled/read from bigger to smaller memory addresses */
+    FORWARD=1  /* buffer is filled/read from smaller to bigger memory addresses */
+  };
+
+private:
+  enum_direction direction;
+
+  uchar *start; /* points to start of buffer space */
+  uchar *end;   /* points to just beyond the end of buffer space */
+  /*
+    Forward buffer: points to the start of the data that will be read next
+    Backward buffer: points to just beyond the end of the data that will be 
+    read next.
+  */
   uchar *read_pos;
+  /*
+    Forward buffer: points to just after the end of the used area.
+    Backward buffer: points to the start of used area.
+  */
   uchar *write_pos;
-  
-  /*
-     1 <=> buffer grows/is filled/is read from start to end
-    -1 <=> everthing is done from end to start instead.
+
+  /* 
+    Data to be written. write() call will assume that (*write_ptr1) points to 
+    write_size1 bytes of data to be written.
+    If write_ptr2!=NULL then the buffer stores pairs, and (*write_ptr2) points
+    to write_size2 bytes of data that form the second component.
   */
-  int direction;
-  
-  /* Pointers to read data from */
   uchar **write_ptr1;
   size_t write_size1;
-  /* Same as above, but may be NULL */
   uchar **write_ptr2;
   size_t write_size2;
 
-  /* Pointers to write data to */
+  /*
+    read() will do reading by storing pointer to read data into *read_ptr1 (if
+    the buffer stores atomic elements), or into {*read_ptr1, *read_ptr2} (if
+    the buffer stores pairs).
+  */
+  //TODO if write_size1 == read_size1 why have two variables??
   uchar **read_ptr1;
   size_t read_size1;
-  /* Same as above, but may be NULL */
   uchar **read_ptr2;
   size_t read_size2;
 
-  bool have_space_for(size_t bytes);
-  uchar *used_area() { return (direction == 1)? read_pos : write_pos; }
-  size_t used_size();
-
-  void write(const uchar *data, size_t bytes);
-  uchar *read(size_t bytes);
-
 public:
-  /* Set up writing*/
+  /* Write-mode functions */
   void setup_writing(uchar **data1, size_t len1, 
                      uchar **data2, size_t len2);
-
-  void sort(qsort2_cmp cmp_func, void *cmp_func_arg);
-
-  /* Write-mode functions */
   void reset_for_writing();
+  bool can_write();
   void write();
-  bool can_write();
 
+  /* Read-mode functions */
   bool is_empty() { return used_size() == 0; }
-
-  /* Read-mode functions */
   void reset_for_reading();
-  // todo: join with setup-writing? (but what for?)
   void setup_reading(uchar **data1, size_t len1, 
                      uchar **data2, size_t len2);
   bool read();
 
-  bool have_data(size_t bytes);
+  /* Misc functions */
+  void sort(qsort2_cmp cmp_func, void *cmp_func_arg);
+  bool is_reverse() { return direction == BACKWARD; }
   uchar *end_of_space();
 
-  /* Control functions */
-  void set_buffer_space(uchar *start_arg, uchar *end_arg, int direction_arg) 
+  /* Buffer space control functions */
+  void set_buffer_space(uchar *start_arg, uchar *end_arg, enum_direction direction_arg) 
   {
     start= start_arg;
     end= end_arg;
@@ -116,10 +153,10 @@
     TRASH(start, end - start);
     reset_for_writing();
   }
-  
+
   /*
-    Stop/return the unneded space (the one that we have wrote to and have read
-    from.
+    Stop using/return the unneded space (the one that we have already wrote 
+    to read from).
   */
   void remove_unused_space(uchar **unused_start, uchar **unused_end)
   {
@@ -142,9 +179,8 @@
     uchar *tmp= read_pos;
     read_pos= write_pos;
     write_pos= tmp;
-    direction= -direction;
+    direction= (direction == FORWARD)? BACKWARD: FORWARD;
   }
-  bool is_reverse() { return direction == -1; }
 
   void grow(uchar *unused_start, uchar *unused_end)
   {
@@ -173,10 +209,13 @@
   */
   class PeekIterator
   {
-    // if direction==1 : pointer to what to return next
-    // if direction==-1: pointer to the end of what is to be returned next
+    /*
+      if sb->direction==1 : pointer to what to return next
+      if sb->direction==-1: pointer to the end of what is to be returned next
+    */
     uchar *pos;
     SimpleBuffer *sb;
+    
   public:
     void init(SimpleBuffer *sb_arg)
     {
@@ -190,8 +229,10 @@
     */
     bool read_next()
     {
-      // Always read the first component first? (because we do inverted-writes
-      // if needed, so no measures need to be taken here).
+      /* 
+        Always read the first component first (if the buffer is backwards, we
+        have written the second component first).
+      */
       uchar *res;
       if ((res= get_next(sb->read_size1)))
       {
@@ -223,6 +264,16 @@
       }
     }
   };
+
+private:
+  bool have_space_for(size_t bytes);
+  /* Return pointer to start of the memory area that is occupied by the data */
+  uchar *used_area() { return (direction == FORWARD)? read_pos : write_pos; }
+  size_t used_size();
+
+  void write(const uchar *data, size_t bytes);
+  uchar *read(size_t bytes);
+  bool have_data(size_t bytes);
 };
 
 
@@ -231,11 +282,11 @@
   each ha_{myisam/innobase/etc} object. That object will be further referred to
   as "the handler"
 
-  There are actually three strategies
-   S1. Bypass DS-MRR, pass all calls to default implementation (i.e. to
+  DsMrr_impl has the following execution strategies:
+   S1. Bypass DS-MRR, pass all calls to default MRR implementation (i.e. to
       MRR-to-non-MRR calls converter)
-   S2. Regular DS-MRR 
-   S3. DS-MRR/CPK for doing scans on clustered primary keys.
+   S2. Sort Keys
+   S3. Sort Rowids
 
   S1 is used for cases which DS-MRR is unable to handle for some reason.
 
@@ -294,9 +345,13 @@
   handler *h;
   TABLE *table; /* Always equal to h->table */
 
-  /* Secondary handler object.  It is used for scanning the index */
+  /*
+    Secondary handler object, if needed (we need it when we need to both scan
+    the index and return rows).
+  */
   handler *h2;
-
+  
+  /* Full buffer that we're using (the buffer is obtained from SQL layer) */
   uchar *full_buf;
   uchar *full_buf_end;
   



More information about the commits mailing list