[Commits] Rev 3492: fix string charset conversion in file:///home/bell/maria/bzr/work-maria-5.5-dnames/

sanja at askmonty.org sanja at askmonty.org
Mon Sep 24 17:12:22 EEST 2012


At file:///home/bell/maria/bzr/work-maria-5.5-dnames/

------------------------------------------------------------
revno: 3492
revision-id: sanja at askmonty.org-20120924141218-rxxkg9trqayzd43z
parent: sanja at askmonty.org-20120922185847-5uwcrnfwkmlby24z
committer: sanja at askmonty.org
branch nick: work-maria-5.5-dnames
timestamp: Mon 2012-09-24 17:12:18 +0300
message:
  fix string charset conversion
-------------- next part --------------
=== modified file 'include/my_sys.h'
--- a/include/my_sys.h	2012-09-22 18:58:47 +0000
+++ b/include/my_sys.h	2012-09-24 14:12:18 +0000
@@ -801,6 +801,10 @@ extern my_bool dynstr_trunc(DYNAMIC_STRI
 extern void dynstr_free(DYNAMIC_STRING *str);
 extern void dynstr_reassociate(DYNAMIC_STRING *str, char **res, size_t *length,
                                size_t *alloc_length);
+extern uint32 copy_and_convert_extended(char *to, uint32 to_length,
+                                        CHARSET_INFO *to_cs,
+                                        const char *from, uint32 from_length,
+                                        CHARSET_INFO *from_cs, uint *errors);
 #ifdef HAVE_MLOCK
 extern void *my_malloc_lock(size_t length,myf flags);
 extern void my_free_lock(void *ptr);

=== modified file 'mysys/ma_dyncol.c'
--- a/mysys/ma_dyncol.c	2012-09-22 18:58:47 +0000
+++ b/mysys/ma_dyncol.c	2012-09-24 14:12:18 +0000
@@ -31,6 +31,9 @@
 #include <ma_dyncol.h>
 #include <my_time.h>
 
+uint32 copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs,
+			const char *from, uint32 from_length,
+			CHARSET_INFO *from_cs, uint *errors);
 /*
   Flag byte bits
 
@@ -3434,7 +3437,8 @@ end:
 
 
 enum enum_dyncol_func_result
-dynamic_column_val_str(DYNAMIC_STRING *str, DYNAMIC_COLUMN_VALUE *val)
+dynamic_column_val_str(DYNAMIC_STRING *str, DYNAMIC_COLUMN_VALUE *val,
+                       my_bool quote)
 {
   char buff[40];
   int len;
@@ -3451,17 +3455,64 @@ dynamic_column_val_str(DYNAMIC_STRING *s
       break;
     case DYN_COL_DOUBLE:
       len= snprintf(buff, sizeof(buff), "%lg", val->x.double_value);
-      if (dynstr_realloc(str, len + 2))
+      if (dynstr_realloc(str, len + (quote ? 2 : 0)))
         return ER_DYNCOL_RESOURCE;
-      str->str[str->length++]= '"';
+      if (quote)
+        str->str[str->length++]= '"';
       dynstr_append_mem(str, buff, len);
-      str->str[str->length++]= '"';
+      if (quote)
+        str->str[str->length++]= '"';
       break;
     case DYN_COL_STRING:
-      if (dynstr_append_quoted(str, val->x.string.value.str,
-                               val->x.string.value.length))
-        return ER_DYNCOL_RESOURCE;
-      break;
+      {
+        char *alloc= NULL;
+        char *from= val->x.string.value.str;
+        uint bufflen;
+        my_bool conv= !my_charset_same(val->x.string.charset,
+                                       &my_charset_utf8_general_ci);
+        my_bool rc;
+        len= val->x.string.value.length;
+        bufflen= (len * (conv ? my_charset_utf8_general_ci.mbmaxlen : 1));
+        if (dynstr_realloc(str, bufflen))
+            return ER_DYNCOL_RESOURCE;
+
+        // guaranty UTF-8 string for value
+        if (!my_charset_same(val->x.string.charset,
+                             &my_charset_utf8_general_ci))
+        {
+          uint dummy_errors;
+          if (!quote)
+          {
+            /* convert to the destination */
+            str->length+= copy_and_convert_extended(str->str, bufflen,
+                                                    &my_charset_utf8_general_ci,
+                                                    from, len,
+                                                    val->x.string.charset,
+                                                    &dummy_errors);
+            return ER_DYNCOL_OK;
+          }
+          if ((alloc= (char *)my_malloc(bufflen, MYF(0))))
+          {
+            len=
+              copy_and_convert_extended(alloc, bufflen,
+                                        &my_charset_utf8_general_ci,
+                                        from, len, val->x.string.charset,
+                                        &dummy_errors);
+            from= alloc;
+          }
+          else
+            return ER_DYNCOL_RESOURCE;
+        }
+        if (quote)
+          rc= dynstr_append_quoted(str, from, len);
+        else
+          rc= dynstr_append_mem(str, from, len);
+        if (alloc)
+          my_free(alloc);
+        if (rc)
+          return ER_DYNCOL_RESOURCE;
+        break;
+      }
     case DYN_COL_DECIMAL:
       len= sizeof(buff);
       decimal2string(&val->x.decimal.value, buff, &len,
@@ -3474,11 +3525,13 @@ dynamic_column_val_str(DYNAMIC_STRING *s
     case DYN_COL_DATE:
     case DYN_COL_TIME:
       len= my_TIME_to_str(&val->x.time_value, buff, AUTO_SEC_PART_DIGITS);
-      if (dynstr_realloc(str, len + 2))
+      if (dynstr_realloc(str, len + (quote ? 2 : 0)))
         return ER_DYNCOL_RESOURCE;
-      str->str[str->length++]= '"';
+      if (quote)
+        str->str[str->length++]= '"';
       dynstr_append_mem(str, buff, len);
-      str->str[str->length++]= '"';
+      if (quote)
+        str->str[str->length++]= '"';
       break;
     case DYN_COL_NULL:
       if (dynstr_append_mem(str, "null", 4))
@@ -3566,7 +3619,7 @@ dynamic_column_json(DYNAMIC_COLUMN *str,
     }
     json->str[json->length++]= '"';
     json->str[json->length++]= ':';
-    if ((rc= dynamic_column_val_str(json, &val)) < 0 ||
+    if ((rc= dynamic_column_val_str(json, &val, TRUE)) < 0 ||
         dynstr_append_mem(json, "}", 1))
       goto err;
   }

=== modified file 'mysys/string.c'
--- a/mysys/string.c	2012-09-22 18:58:47 +0000
+++ b/mysys/string.c	2012-09-24 14:12:18 +0000
@@ -221,3 +221,77 @@ void dynstr_reassociate(DYNAMIC_STRING *
   *alloc_length= str->max_length;
   str->str=0;
 }
+
+
+/*
+  copy a string from one character set to another
+
+  SYNOPSIS
+    copy_and_convert()
+    to			Store result here
+    to_cs		Character set of result string
+    from		Copy from here
+    from_length		Length of from string
+    from_cs		From character set
+
+  NOTES
+    'to' must be big enough as form_length * to_cs->mbmaxlen
+
+  RETURN
+    length of bytes copied to 'to'
+*/
+
+uint32
+copy_and_convert_extended(char *to, uint32 to_length, CHARSET_INFO *to_cs, 
+                          const char *from, uint32 from_length,
+                          CHARSET_INFO *from_cs,
+                          uint *errors)
+{
+  int         cnvres;
+  my_wc_t     wc;
+  const uchar *from_end= (const uchar*) from+from_length;
+  char *to_start= to;
+  uchar *to_end= (uchar*) to+to_length;
+  my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
+  my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
+  uint error_count= 0;
+
+  while (1)
+  {
+    if ((cnvres= (*mb_wc)(from_cs, &wc, (uchar*) from,
+				      from_end)) > 0)
+      from+= cnvres;
+    else if (cnvres == MY_CS_ILSEQ)
+    {
+      error_count++;
+      from++;
+      wc= '?';
+    }
+    else if (cnvres > MY_CS_TOOSMALL)
+    {
+      /*
+        A correct multibyte sequence detected
+        But it doesn't have Unicode mapping.
+      */
+      error_count++;
+      from+= (-cnvres);
+      wc= '?';
+    }
+    else
+      break;  // Not enough characters
+
+outp:
+    if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
+      to+= cnvres;
+    else if (cnvres == MY_CS_ILUNI && wc != '?')
+    {
+      error_count++;
+      wc= '?';
+      goto outp;
+    }
+    else
+      break;
+  }
+  *errors= error_count;
+  return (uint32) (to - to_start);
+}

=== modified file 'sql/item_strfunc.cc'
--- a/sql/item_strfunc.cc	2012-09-22 18:58:47 +0000
+++ b/sql/item_strfunc.cc	2012-09-24 14:12:18 +0000
@@ -4121,7 +4121,7 @@ String *Item_func_dyncol_json::val_str(S
     size_t length, alloc_length;
     dynstr_reassociate(&json, &ptr, &length, &alloc_length);
     str->reassociate(ptr, (uint32) length, (uint32) alloc_length,
-                     &my_charset_bin);
+                     &my_charset_utf8_general_ci);
     null_value= FALSE;
   }
   return str;

=== modified file 'sql/sql_string.cc'
--- a/sql/sql_string.cc	2012-04-07 13:58:46 +0000
+++ b/sql/sql_string.cc	2012-09-24 14:12:18 +0000
@@ -750,79 +750,6 @@ String *copy_if_not_alloced(String *to,S
   Help functions
 ****************************************************************************/
 
-/*
-  copy a string from one character set to another
-  
-  SYNOPSIS
-    copy_and_convert()
-    to			Store result here
-    to_cs		Character set of result string
-    from		Copy from here
-    from_length		Length of from string
-    from_cs		From character set
-
-  NOTES
-    'to' must be big enough as form_length * to_cs->mbmaxlen
-
-  RETURN
-    length of bytes copied to 'to'
-*/
-
-
-static uint32
-copy_and_convert_extended(char *to, uint32 to_length, CHARSET_INFO *to_cs, 
-                          const char *from, uint32 from_length,
-                          CHARSET_INFO *from_cs,
-                          uint *errors)
-{
-  int         cnvres;
-  my_wc_t     wc;
-  const uchar *from_end= (const uchar*) from+from_length;
-  char *to_start= to;
-  uchar *to_end= (uchar*) to+to_length;
-  my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
-  my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
-  uint error_count= 0;
-
-  while (1)
-  {
-    if ((cnvres= (*mb_wc)(from_cs, &wc, (uchar*) from,
-				      from_end)) > 0)
-      from+= cnvres;
-    else if (cnvres == MY_CS_ILSEQ)
-    {
-      error_count++;
-      from++;
-      wc= '?';
-    }
-    else if (cnvres > MY_CS_TOOSMALL)
-    {
-      /*
-        A correct multibyte sequence detected
-        But it doesn't have Unicode mapping.
-      */
-      error_count++;
-      from+= (-cnvres);
-      wc= '?';
-    }
-    else
-      break;  // Not enough characters
-
-outp:
-    if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
-      to+= cnvres;
-    else if (cnvres == MY_CS_ILUNI && wc != '?')
-    {
-      error_count++;
-      wc= '?';
-      goto outp;
-    }
-    else
-      break;
-  }
-  *errors= error_count;
-  return (uint32) (to - to_start);
-}
 
 
 /*



More information about the commits mailing list