[Commits] Rev 2881: use bulk insert and repair by sort for unique keys in in http://bazaar.launchpad.net/~maria-captains/maria/5.3/

serg at askmonty.org serg at askmonty.org
Fri Jan 14 13:03:45 EET 2011


At http://bazaar.launchpad.net/~maria-captains/maria/5.3/

------------------------------------------------------------
revno: 2881
revision-id: sergii at pisem.net-20110114110341-mh8yex14gkwp5bgd
parent: sergii at pisem.net-20110114105845-ol0sfi01g52sw8y3
committer: Sergei Golubchik <sergii at pisem.net>
branch nick: 5.3-monty
timestamp: Fri 2011-01-14 12:03:41 +0100
message:
  use bulk insert and repair by sort for unique keys in
  Aria and MyISAM in create_internal_tmp_table_from_heap()
  (safe, as duplicates are impossible).
  This gives a HUGE speed boost!
  
  sql/opt_subselect.cc:
    Fixed problem with wrong recinfo in create_duplicate_weedout_tmp_tabl()
    Tagged the table with 'no_rows' so that when we create the table on disk,
    we only store the index data. This gave us a major speedup!
  sql/sql_select.cc:
    create_tmp_table_from_heap() now uses bulk_insert + repair_by_sort
    when creating Aria/MyISAM tables from HEAP tables.
    This gives a HUGE speed boost!
  storage/maria/ha_maria.cc:
    Extended bulk_insert() to recreate UNIQUE keys for
    internal temporary tables
  storage/maria/ma_open.c:
    Initialize m_info->lock.type properly for temporarly tables
    (needed for start_bulk_insert())
  storage/maria/ma_write.c:
    Don't check uniques that are disabled
  storage/myisam/ha_myisam.cc:
    Extended bulk_insert() to recreate UNIQUE keys for
    internal temporary tables.
-------------- next part --------------
=== modified file 'sql/opt_subselect.cc'
--- a/sql/opt_subselect.cc	2011-01-14 10:58:45 +0000
+++ b/sql/opt_subselect.cc	2011-01-14 11:03:41 +0000
@@ -2786,12 +2786,15 @@ TABLE *create_duplicate_weedout_tmp_tabl
     }
   }
 
-  if (thd->is_fatal_error)                              // If end of memory
+  if (thd->is_fatal_error)                      // If end of memory
     goto err;
   share->db_record_offset= 1;
+  table->no_rows= 1;                            // We don't need the data
+
+  // recinfo must point after last field
+  recinfo++;
   if (share->db_type() == TMP_ENGINE_HTON)
   {
-    recinfo++;
     if (create_internal_tmp_table(table, keyinfo, start_recinfo, &recinfo, 0))
       goto err;
   }

=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc	2011-01-14 10:58:45 +0000
+++ b/sql/sql_select.cc	2011-01-14 11:03:41 +0000
@@ -12720,6 +12720,7 @@ create_internal_tmp_table_from_heap2(THD
   save_proc_info=thd->proc_info;
   thd_proc_info(thd, proc_info);
 
+  new_table.no_rows= table->no_rows;
   if (create_internal_tmp_table(&new_table, table->key_info, start_recinfo,
                                 recinfo, thd->lex->select_lex.options | 
                                 thd->options))
@@ -12731,24 +12732,15 @@ create_internal_tmp_table_from_heap2(THD
   table->file->ha_index_or_rnd_end();
   if (table->file->ha_rnd_init_with_error(1))
     DBUG_RETURN(1);
-  if (table->no_rows)
-  {
+  if (new_table.no_rows)
     new_table.file->extra(HA_EXTRA_NO_ROWS);
-    new_table.no_rows=1;
+  else
+  {
+    /* update table->file->stats.records */
+    table->file->info(HA_STATUS_VARIABLE);
+    new_table.file->ha_start_bulk_insert(table->file->stats.records);
   }
 
-#ifdef TO_BE_DONE_LATER_IN_4_1
-  /*
-    To use start_bulk_insert() (which is new in 4.1) we need to find
-    all places where a corresponding end_bulk_insert() should be put.
-  */
-  table->file->info(HA_STATUS_VARIABLE); /* update table->file->stats.records */
-  new_table.file->ha_start_bulk_insert(table->file->stats.records);
-#else
-  /* HA_EXTRA_WRITE_CACHE can stay until close, no need to disable it */
-  new_table.file->extra(HA_EXTRA_WRITE_CACHE);
-#endif
-
   /*
     copy all old rows from heap table to MyISAM table
     This is the only code that uses record[1] to read/write but this
@@ -12762,6 +12754,8 @@ create_internal_tmp_table_from_heap2(THD
     if (write_err)
       goto err;
   }
+  if (!new_table.no_rows && new_table.file->ha_end_bulk_insert())
+    goto err;
   /* copy row that filled HEAP table */
   if ((write_err=new_table.file->ha_write_tmp_row(table->record[0])))
   {

=== modified file 'storage/maria/ha_maria.cc'
--- a/storage/maria/ha_maria.cc	2010-12-13 10:42:40 +0000
+++ b/storage/maria/ha_maria.cc	2011-01-14 11:03:41 +0000
@@ -1998,7 +1998,14 @@ void ha_maria::start_bulk_insert(ha_rows
          @todo for a single-row INSERT SELECT, we will go into repair, which
          is more costly (flushes, syncs) than a row write.
       */
-      maria_disable_non_unique_index(file, rows);
+      if (file->open_flags & HA_OPEN_INTERNAL_TABLE)
+      {
+        /* Internal table; If we get a duplicate something is very wrong */
+        file->update|= HA_STATE_CHANGED;
+        maria_clear_all_keys_active(file->s->state.key_map);
+      }
+      else
+        maria_disable_non_unique_index(file, rows);
       if (share->now_transactional)
       {
         bulk_insert_single_undo= BULK_INSERT_SINGLE_UNDO_AND_NO_REPAIR;

=== modified file 'storage/maria/ma_open.c'
--- a/storage/maria/ma_open.c	2011-01-14 10:54:39 +0000
+++ b/storage/maria/ma_open.c	2011-01-14 11:03:41 +0000
@@ -203,6 +203,9 @@ static MARIA_HA *maria_clone_internal(MA
 #ifdef THREAD
   thr_lock_data_init(&share->lock,&m_info->lock,(void*) m_info);
 #endif
+  if (share->options & HA_OPTION_TMP_TABLE)
+    m_info->lock.type= TL_WRITE;
+
   m_info->open_list.data=(void*) m_info;
   maria_open_list=list_add(maria_open_list,&m_info->open_list);
 
@@ -935,6 +938,8 @@ MARIA_HA *maria_open(const char *name, i
                            share->state.changed));
 
   pthread_mutex_unlock(&THR_LOCK_maria);
+
+  m_info->open_flags= open_flags;
   DBUG_RETURN(m_info);
 
 err:

=== modified file 'storage/maria/ma_write.c'
--- a/storage/maria/ma_write.c	2010-12-10 15:15:18 +0000
+++ b/storage/maria/ma_write.c	2011-01-14 11:03:41 +0000
@@ -124,12 +124,23 @@ int maria_write(MARIA_HA *info, uchar *r
     goto err2;
 
   /* Calculate and check all unique constraints */
-  for (i=0 ; i < share->state.header.uniques ; i++)
+
+  if (share->state.header.uniques)
   {
-    if (_ma_check_unique(info,share->uniqueinfo+i,record,
-                         _ma_unique_hash(share->uniqueinfo+i,record),
-                         HA_OFFSET_ERROR))
-      goto err2;
+    for (i=0 ; i < share->state.header.uniques ; i++)
+    {
+      MARIA_UNIQUEDEF *def= share->uniqueinfo + i;
+      ha_checksum unique_hash= _ma_unique_hash(share->uniqueinfo+i,record);
+      if (maria_is_key_active(share->state.key_map, def->key))
+      {
+        if (_ma_check_unique(info, def, record,
+                             unique_hash, HA_OFFSET_ERROR))
+          goto err2;
+      }
+      else
+        maria_unique_store(record+ share->keyinfo[def->key].seg->start,
+                           unique_hash);
+    }
   }
 
   /* Ensure we don't try to restore auto_increment if it doesn't change */

=== modified file 'storage/maria/maria_def.h'
--- a/storage/maria/maria_def.h	2011-01-14 10:54:39 +0000
+++ b/storage/maria/maria_def.h	2011-01-14 11:03:41 +0000
@@ -550,6 +550,7 @@ struct st_maria_handler
   ulong row_base_length;                /* Length of row header */
   uint row_flag;                        /* Flag to store in row header */
   uint opt_flag;                        /* Optim. for space/speed */
+  uint open_flags;                      /* Flags used in open() */
   uint update;                          /* If file changed since open */
   int lastinx;                          /* Last used index */
   uint last_rkey_length;                /* Last length in maria_rkey() */

=== modified file 'storage/myisam/ha_myisam.cc'
--- a/storage/myisam/ha_myisam.cc	2010-12-13 10:42:40 +0000
+++ b/storage/myisam/ha_myisam.cc	2011-01-14 11:03:41 +0000
@@ -1596,7 +1596,15 @@ void ha_myisam::start_bulk_insert(ha_row
     */
     if (file->state->records == 0 && can_enable_indexes &&
         (!rows || rows >= MI_MIN_ROWS_TO_DISABLE_INDEXES))
-      mi_disable_non_unique_index(file,rows);
+    {
+      if (file->open_flag & HA_OPEN_INTERNAL_TABLE)
+      {
+        file->update|= HA_STATE_CHANGED;
+        mi_clear_all_keys_active(file->s->state.key_map);
+      }
+      else
+        mi_disable_non_unique_index(file,rows);
+    }
     else
     if (!file->bulk_insert &&
         (!rows || rows >= MI_MIN_ROWS_TO_USE_BULK_INSERT))



More information about the commits mailing list