[Commits] 66b4459: MDEV-9143 JSON_xxx functions.

Alexey Botchkov holyfoot at askmonty.org
Mon Sep 5 13:05:09 EEST 2016


revision-id: 66b4459e38f5df4913ecce1c9e3d71c7afa7860d (mariadb-10.2.1-21-g66b4459)
parent(s): 31a8cf54c8a7913338480a0571feaf32143b5f64
committer: Alexey Botchkov
timestamp: 2016-09-05 14:03:33 +0400
message:

MDEV-9143 JSON_xxx functions.

        Library with JSON-related functions added.

---
 include/CMakeLists.txt |    1 +
 include/json_lib.h     |  330 +++++++++++++
 strings/json_lib.c     | 1253 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 1584 insertions(+)

diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt
index d0c4768..3b2c2c6 100644
--- a/include/CMakeLists.txt
+++ b/include/CMakeLists.txt
@@ -60,6 +60,7 @@ SET(HEADERS
   my_compiler.h
   handler_state.h
   handler_ername.h
+  json_lib.h
 )
 
 INSTALL(FILES ${HEADERS} DESTINATION ${INSTALL_INCLUDEDIR} COMPONENT Development)
diff --git a/include/json_lib.h b/include/json_lib.h
new file mode 100644
index 0000000..ceb78a0
--- /dev/null
+++ b/include/json_lib.h
@@ -0,0 +1,330 @@
+#ifndef JSON_LIB_INCLUDED
+#define JSON_LIB_INCLUDED
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define JSON_DEPTH_LIMIT 32
+
+/*
+  When error happens, the c_next of the JSON engine contains the
+  character that caused the error, and the c_str is the position
+  in string where the error occurs.
+*/
+enum json_errors {
+  JE_BAD_CHR= -1,      /* Invalid character, charset handler cannot read it. */
+
+  JE_NOT_JSON_CHR= -2, /* Character met not used in JSON. */
+                       /* ASCII 00-08 for instance.       */
+
+  JE_EOS= -3,          /* Unexpected end of string. */
+
+  JE_SYN= -4,          /* The next character breaks the JSON syntax. */
+
+  JE_STRING_CONST= -5, /* Character disallowed in string constant. */
+
+  JE_ESCAPING= -6,     /* Error in the escaping. */
+
+  JE_DEPTH= -7,        /* The limit on the JSON depth was overrun. */
+};
+
+
+typedef struct st_json_string_t
+{
+  const uchar *c_str;    /* Current position in JSON string */
+  const uchar *str_end;  /* The end on the string. */
+  my_wc_t c_next;        /* UNICODE of the last read character */
+  int error;             /* error code. */
+
+  CHARSET_INFO *cs;      /* Character set of the JSON string. */
+
+  my_charset_conv_mb_wc wc; /* UNICODE conversion function. */
+                            /* It's taken out of the cs just to speed calls. */
+} json_string_t;
+
+
+void json_string_set_cs(json_string_t *s, CHARSET_INFO *i_cs);
+void json_string_set_str(json_string_t *s,
+                         const uchar *str, const uchar *end);
+#define json_next_char(j) \
+  (j)->wc((j)->cs, &(j)->c_next, (j)->c_str, (j)->str_end)
+#define json_eos(j) ((j)->c_str >= (j)->str_end)
+/*
+  read_string_const_chr() reads the next character of the string constant
+  and saves it to the js->c_next.
+  It takes into account possible escapings, so if for instance
+  the string is '\b', the read_string_const_chr() sets 8.
+*/
+int json_read_string_const_chr(json_string_t *js);
+
+
+/*
+  Various JSON-related operations expect JSON path as a parameter.
+  The path is a string like this "$.keyA[2].*"
+  The path itself is a number of steps specifying either a key or a position
+  in an array. Some of them can be wildcards.
+  So the representation of the JSON path is the json_path_t class
+  containing an array of json_path_step_t objects.
+*/
+
+
+enum json_path_step_types
+{
+  JSON_PATH_KEY=0,
+  JSON_PATH_ARRAY=1
+};
+
+
+typedef struct st_json_path_step_t
+{
+  enum json_path_step_types type;  /* The type of the step - KEY or ARRAY */
+  int wild;         /* If the step is a wildcard */
+  const uchar *key; /* Pointer to the beginning of the key. */
+  const uchar *key_end;  /* Pointer to the end of the key. */
+  uint n_item;      /* Item number in an array. No meaning for the key step. */
+} json_path_step_t;
+
+
+typedef struct st_json_path_t
+{
+  json_string_t s;  /* The string to be parsed. */
+  json_path_step_t steps[JSON_DEPTH_LIMIT]; /* Steps of the path. */
+  json_path_step_t *last_step; /* Points to the last step. */
+
+  int mode_strict; /* TRUE if the path specified as 'strict' */
+} json_path_t;
+
+
+int json_path_setup(json_path_t *p,
+                    CHARSET_INFO *i_cs, const uchar *str, const uchar *end);
+
+
+/*
+  The set of functions and structures below provides interface
+  to the JSON text parser.
+  Running the parser normally goes like this:
+
+    json_engine_t j_eng;   // structure keeps parser's data
+    json_scan_start(j_eng) // begin the parsing
+
+    do
+    {
+      // The parser has read next piece of JSON
+      // and set fields of j_eng structure accordingly.
+      // So let's see what we have:
+      switch (j_eng.state)
+      {
+        case JST_KEY:
+           // Handle key name. See the json_read_keyname_chr()
+           // Probably compare it with the keyname we're looking for
+        case JST_VALUE:
+           // Handle value. It is either value of the key or an array item.
+           // see the json_read_value()
+        case JST_OBJECT_BEGIN:
+          // parser found an object (the '{' in JSON)
+        case JST_OBJECT_END:
+          // parser found the end of the object (the '}' in JSON)
+        case JST_ARRAY_BEGIN:
+          // parser found an array (the '[' in JSON)
+        case JST_ARRAY_END:
+          // parser found the end of the array (the ']' in JSON)
+
+      };
+    } while (json_scan_next() == 0);  // parse next structure
+
+    
+    if (j_eng.s.error)  // we need to check why the loop ended.
+                        // Did we get to the end of JSON, or came upon error.
+    {
+       signal_error_in_JSON()
+    }
+
+
+  Parts of JSON can be quickly skipped. If we are not interested
+  in a particular key, we can just skip it with json_skip_key() call.
+  Similarly json_skip_level() goes right to the end of an object
+  or an array.
+*/
+
+
+/* These are JSON parser states that user can expect and handle.  */
+enum json_states {
+  JST_VALUE,       /* value found      */
+  JST_KEY,         /* key found        */
+  JST_OBJ_START,   /* object           */
+  JST_OBJ_END,     /* object ended     */
+  JST_ARRAY_START, /* array            */
+  JST_ARRAY_END,   /* array ended      */
+  NR_JSON_USER_STATES
+};
+
+
+enum json_value_types
+{
+  JSON_VALUE_OBJECT=0,
+  JSON_VALUE_ARRAY=1,
+  JSON_VALUE_STRING,
+  JSON_VALUE_NUMBER,
+  JSON_VALUE_TRUE,
+  JSON_VALUE_FALSE,
+  JSON_VALUE_NULL
+};
+
+
+typedef struct st_json_engine_t
+{
+  json_string_t s;  /* String to parse. */
+  int sav_c_len;    /* Length of the current character.
+                       Can be more than 1 for multibyte charsets */
+
+  int state; /* The state of the parser. One of 'enum json_states'.
+                It tells us what construction of JSON we've just read. */
+
+  /* These values are only set after the json_read_value() call. */
+  enum json_value_types value_type; /* type of the value.*/
+  const uchar *value;      /* Points to the value. */
+  const uchar *value_begin;/* Points to where the value starts in the JSON. */
+
+  /*
+    In most cases the 'value' and 'value_begin' are equal.
+    They only differ if the value is a string constants. Then 'value_begin'
+    points to the starting quotation mark, while the 'value' - to
+    the first character of the string.
+  */
+
+  const uchar *value_end; /* Points to the next character after the value. */
+  int value_len; /* The length of the value. Does not count quotations for */
+                 /* string constants. */
+
+  int stack[JSON_DEPTH_LIMIT]; /* Keeps the stack of nested JSON structures. */
+  int *stack_p;                /* The 'stack' pointer. */
+} json_engine_t;
+
+
+int json_scan_start(json_engine_t *je,
+                        CHARSET_INFO *i_cs, const uchar *str, const uchar *end);
+int json_scan_next(json_engine_t *j);
+
+
+/*
+  json_read_keyname_chr() function assists parsing the name of an JSON key.
+  It only can be called when the json_engine is in JST_KEY.
+  The json_read_keyname_chr() reads one character of the name of the key,
+  and puts it in j_eng.s.next_c.
+  Typical usage is like this:
+
+  if (j_eng.state == JST_KEY)
+  {
+    while (json_read_keyname_chr(&j) == 0)
+    {
+      //handle next character i.e. match it against the pattern
+    }
+  }
+*/
+
+int json_read_keyname_chr(json_engine_t *j);
+
+
+/*
+  json_read_value() function parses the JSON value syntax,
+  so that we can handle the value of a key or an array item.
+  It only returns meaningful result when the engine is in
+  the JST_VALUE state.
+
+  Typical usage is like this:
+
+  if (j_eng.state ==  JST_VALUE)
+  {
+    json_read_value(&j_eng);
+    switch(j_eng.value_type)
+    {
+      case JSON_VALUE_STRING:
+        // get the string
+        str= j_eng.value;
+        str_length= j_eng.value_len;
+      case JSON_VALUE_NUMBER:
+        // get the number
+      ... etc
+    }
+*/
+int json_read_value(json_engine_t *j);
+
+
+/*
+  json_skip_key() makes parser skip the content of the current
+  JSON key quickly.
+  It can be called only when the json_engine state is JST_KEY.
+  Typical usage is:
+
+  if (j_eng.state == JST_KEY)
+  {
+    if (key_does_not_match(j_eng))
+      json_skip_key(j_eng);
+  }
+*/
+
+int json_skip_key(json_engine_t *j);
+
+
+/*
+  json_skip_level() makes parser quickly skip the JSON content
+  to the end of the current object or array.
+  It is used when we're not interested in the rest of an array
+  or the rest of the keys of an object.
+*/
+int json_skip_level(json_engine_t *j);
+
+
+#define json_skip_array_item json_skip_key
+
+/*
+  Checks if the current value is of scalar type -
+  not an OBJECT nor ARRAY.
+*/
+#define json_value_scalar(je)  ((je)->value_type > JSON_VALUE_ARRAY)
+
+/*
+  Look for the JSON PATH in the json string.
+  Function can be called several times with same JSON/PATH to
+  find multiple matches.
+  On the first call, the json_engine_t parameter should be
+  initialized with the JSON string, and the json_path_t with the JSON path
+  appropriately. The 'p_cur_step' should point at the first
+  step of the path.
+  If function returns 0, it means it found the match. The position of
+  the match is je->s.c_str. Then we can call the json_find_value()
+  with same engine/path/p_cur_step to get the next match.
+  Non-zero return means no matches found.
+  Check je->s.error to see if there was an error in JSON.
+*/
+int json_find_value(json_engine_t *je,
+                    json_path_t *p, json_path_step_t **p_cur_step);
+
+
+/*
+  Converst JSON string constant into ordinary string constant
+  which can involve unpacking json escapes and changing character set.
+  Returns negative integer in the case of an error,
+  the length of the result otherwise.
+*/
+int json_unescape(CHARSET_INFO *json_cs,
+                  const uchar *json_str, const uchar *json_end,
+                  CHARSET_INFO *res_cs,
+                  uchar *res, const uchar *res_end);
+
+/*
+  Converst ordinary string constant into JSON string constant.
+  which can involve appropriate escaping and changing character set.
+  Returns negative integer in the case of an error,
+  the length of the result otherwise.
+*/
+int json_escape(CHARSET_INFO *str_cs, const uchar *str, const uchar *str_end,
+                CHARSET_INFO *json_cs, uchar *json, const uchar *json_end);
+
+#ifdef  __cplusplus
+}
+#endif
+
+#endif /* JSON_LIB_INCLUDED */
+
diff --git a/strings/json_lib.c b/strings/json_lib.c
new file mode 100644
index 0000000..fc4de68
--- /dev/null
+++ b/strings/json_lib.c
@@ -0,0 +1,1253 @@
+#include <my_global.h>
+#include <m_ctype.h>
+
+
+#include "json_lib.h"
+
+/*
+  JSON escaping lets user specify UTF16 codes of characters.
+  So we're going to need the UTF16 charset capabilities. Let's import
+  them from the utf16 charset.
+*/
+int my_utf16_uni(CHARSET_INFO *cs __attribute__((unused)),
+             my_wc_t *pwc, const uchar *s, const uchar *e);
+
+
+void json_string_set_str(json_string_t *s,
+                         const uchar *str, const uchar *end)
+{
+  s->c_str= str;
+  s->str_end= end;
+}
+
+
+void json_string_set_cs(json_string_t *s, CHARSET_INFO *i_cs)
+{
+  s->cs= i_cs;
+  s->error= 0;
+  s->wc= i_cs->cset->mb_wc;
+}
+
+
+static void json_string_setup(json_string_t *s,
+                              CHARSET_INFO *i_cs, const uchar *str,
+                              const uchar *end)
+{
+  json_string_set_cs(s, i_cs);
+  json_string_set_str(s, str, end);
+}
+
+
+enum json_char_classes {
+  C_EOS,    /* end of string */
+  C_LCURB,  /* {  */
+  C_RCURB,  /* } */
+  C_LSQRB,  /* [ */
+  C_RSQRB,  /* ] */
+  C_COLON,  /* : */
+  C_COMMA,  /* , */
+  C_QUOTE,  /* " */
+  C_DIGIT,  /* -0123456789 */
+  C_LOW_F,  /* 'f' (for "false") */
+  C_LOW_N,  /* 'n' (for "null") */
+  C_LOW_T,  /* 't' (for "true") */
+  C_ETC,    /* everything else */
+  C_ERR,    /* character disallowed in JSON */
+  C_BAD,    /* invalid character, charset handler cannot read it */
+  NR_C_CLASSES, /* Counter for classes that handled with functions. */
+  C_SPACE   /* space. Doesn't need specific handlers, so after the counter.*/
+};
+
+
+/*
+  This array maps first 128 Unicode Code Points into classes.
+  The remaining Unicode characters should be mapped to C_ETC.
+*/
+
+static enum json_char_classes json_chr_map[128] = {
+  C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,
+  C_ERR,   C_SPACE, C_SPACE, C_ERR,   C_ERR,   C_SPACE, C_ERR,   C_ERR,
+  C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,
+  C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,
+
+  C_SPACE, C_ETC,   C_QUOTE, C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
+  C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_COMMA, C_DIGIT, C_ETC,   C_ETC,
+  C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT,
+  C_DIGIT, C_DIGIT, C_COLON, C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
+
+  C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
+  C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
+  C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
+  C_ETC,   C_ETC,   C_ETC,   C_LSQRB, C_ETC,   C_RSQRB, C_ETC,   C_ETC,
+
+  C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_LOW_F, C_ETC,
+  C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_LOW_N, C_ETC,
+  C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_LOW_T, C_ETC,   C_ETC,   C_ETC,
+  C_ETC,   C_ETC,   C_ETC,   C_LCURB, C_ETC,   C_RCURB, C_ETC,   C_ETC
+};
+
+
+/*
+  JSON parser actually has more states than the 'enum json_states'
+  declares. But the rest of the states aren't seen to the user so let's
+  specify them here to avoid confusion.
+*/
+
+enum json_all_states {
+  JST_DONE= NR_JSON_USER_STATES,         /* ok to finish     */
+  JST_OBJ_CONT= NR_JSON_USER_STATES+1,   /* object continues */
+  JST_ARRAY_CONT= NR_JSON_USER_STATES+2, /* array continues  */
+  JST_READ_VALUE= NR_JSON_USER_STATES+3, /* value is being read */
+  NR_JSON_STATES= NR_JSON_USER_STATES+4
+};
+
+
+typedef int (*json_state_handler)(json_engine_t *);
+
+
+/* The string is broken. */
+static int unexpected_eos(json_engine_t *j)
+{
+  j->s.error= JE_EOS;
+  return 1;
+}
+
+
+/* This symbol here breaks the JSON syntax. */
+static int syntax_error(json_engine_t *j)
+{
+  j->s.error= JE_SYN;
+  return 1;
+}
+
+
+/* Value of object. */
+static int mark_object(json_engine_t *j)
+{
+  j->state= JST_OBJ_START;
+  *(++j->stack_p)= JST_OBJ_CONT;
+  return 0;
+}
+
+
+/* Read value of object. */
+static int read_obj(json_engine_t *j)
+{
+  j->state= JST_OBJ_START;
+  j->value_type= JSON_VALUE_OBJECT;
+  j->value= j->value_begin;
+  *(++j->stack_p)= JST_OBJ_CONT;
+  return 0;
+}
+
+
+/* Value of array. */
+static int mark_array(json_engine_t *j)
+{
+  j->state= JST_ARRAY_START;
+  *(++j->stack_p)= JST_ARRAY_CONT;
+  j->value= j->value_begin;
+  return 0;
+}
+
+/* Read value of object. */
+static int read_array(json_engine_t *j)
+{
+  j->state= JST_ARRAY_START;
+  j->value_type= JSON_VALUE_ARRAY;
+  j->value= j->value_begin;
+  *(++j->stack_p)= JST_ARRAY_CONT;
+  return 0;
+}
+
+
+
+/*
+  Character classes inside the JSON string constant.
+  We mostly need this to parse escaping properly.
+  Escapings availabe in JSON are:
+  \" - quotation mark
+  \\ - backslash
+  \b - backspace UNICODE 8
+  \f - formfeed UNICODE 12
+  \n - newline UNICODE 10
+  \r - carriage return UNICODE 13
+  \t - horizontal tab UNICODE 9
+  \u{four-hex-digits} - code in UCS16 character set
+*/
+enum json_string_char_classes {
+  S_0= 0,
+  S_1= 1,
+  S_2= 2,
+  S_3= 3,
+  S_4= 4,
+  S_5= 5,
+  S_6= 6,
+  S_7= 7,
+  S_8= 8,
+  S_9= 9,
+  S_A= 10,
+  S_B= 11,
+  S_C= 12,
+  S_D= 13,
+  S_E= 14,
+  S_F= 15,
+  S_ETC= 36,    /* rest of characters. */
+  S_QUOTE= 37,
+  S_BKSL= 38, /* \ */
+  S_ERR= 100,   /* disallowed */
+};
+
+
+/* This maps characters to their types inside a string constant. */
+static enum json_string_char_classes json_instr_chr_map[128] = {
+  S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,
+  S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,
+  S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,
+  S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,
+
+  S_ETC,   S_ETC,   S_QUOTE, S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
+  S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
+  S_0,     S_1,     S_2,     S_3,     S_4,     S_5,     S_6,     S_7,
+  S_8,     S_9,     S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
+
+  S_ETC,   S_A,     S_B,     S_C,     S_D,     S_E,     S_F,     S_ETC,
+  S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
+  S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
+  S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_BKSL,  S_ETC,   S_ETC,   S_ETC,
+
+  S_ETC,   S_A,     S_B,     S_C,     S_D,     S_E,     S_F,     S_ETC,
+  S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
+  S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
+  S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC
+};
+
+
+static int read_4_hexdigits(json_string_t *s, uchar *dest)
+{
+  int i, t, c_len;
+  for (i=0; i<4; i++)
+  {
+    if ((c_len= json_next_char(s)) <= 0)
+      return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
+
+    if (s->c_next >= 128 || (t= json_instr_chr_map[s->c_next]) >= S_F)
+      return s->error= JE_SYN;
+
+    s->c_str+= c_len;
+    dest[i/2]+= (i % 2) ? t : t*16;
+  }
+  return 0;
+}
+
+
+static int json_handle_esc(json_string_t *s)
+{
+  int t, c_len;
+  
+  if ((c_len= json_next_char(s)) <= 0)
+    return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
+
+  s->c_str+= c_len;
+  switch (s->c_next)
+  {
+    case 'b':
+      s->c_next= 8;
+      return 0;
+    case 'f':
+      s->c_next= 12;
+      return 0;
+    case 'n':
+      s->c_next= 10;
+      return 0;
+    case 'r':
+      s->c_next= 13;
+      return 0;
+    case 't':
+      s->c_next= 9;
+      return 0;
+  }
+
+  if (s->c_next < 128 && (t= json_instr_chr_map[s->c_next]) == S_ERR)
+  {
+    s->c_str-= c_len;
+    return s->error= JE_ESCAPING;
+  }
+
+
+  if (s->c_next != 'u')
+    return 0;
+
+  {
+    /*
+      Read the four-hex-digits code.
+      If symbol is not in the Basic Multilingual Plane, we're reading
+      the string for the next four digits to compose the UTF-16 surrogate pair.
+    */
+    uchar code[4]= {0,0,0,0};
+
+    if (read_4_hexdigits(s, code))
+      return 1;
+
+    if ((c_len= my_utf16_uni(0, &s->c_next, code, code+2)) == 2)
+      return 0;
+
+    if (c_len != MY_CS_TOOSMALL4)
+      return s->error= JE_BAD_CHR;
+
+    if ((c_len= json_next_char(s)) <= 0)
+      return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
+    if (s->c_next != '\\')
+      return s->error= JE_SYN;
+
+    if ((c_len= json_next_char(s)) <= 0)
+      return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
+    if (s->c_next != 'u')
+      return s->error= JE_SYN;
+
+    if (read_4_hexdigits(s, code+2))
+      return 1;
+
+    if ((c_len= my_utf16_uni(0, &s->c_next, code, code+4)) == 2)
+      return 0;
+  }
+  return s->error= JE_BAD_CHR;
+}
+
+
+int json_read_string_const_chr(json_string_t *js)
+{
+  int c_len;
+
+  if ((c_len= json_next_char(js)) > 0)
+  {
+    js->c_str+= c_len;
+    return (js->c_next == '\\') ? json_handle_esc(js) : 0;
+  }
+  js->error= json_eos(js) ? JE_EOS : JE_BAD_CHR; 
+  return 1;
+}
+
+
+static int skip_str_constant(json_engine_t *j)
+{
+  int t, c_len;
+  for (;;)
+  {
+    if ((c_len= json_next_char(&j->s)) > 0)
+    {
+      j->s.c_str+= c_len;
+      if (j->s.c_next >= 128 || ((t=json_instr_chr_map[j->s.c_next]) <= S_ETC))
+        continue;
+
+      if (j->s.c_next == '"')
+        break;
+      if (j->s.c_next == '\\')
+      {
+        if (json_handle_esc(&j->s))
+          return 1;
+        continue;
+      }
+      /* Symbol not allowed in JSON. */
+      return j->s.error= JE_NOT_JSON_CHR;
+    }
+    else
+      return j->s.error= json_eos(&j->s) ? JE_EOS : JE_BAD_CHR; 
+  }
+
+  j->state= *j->stack_p;
+  return 0;
+}
+
+
+/* Scalar string. */
+static int v_string(json_engine_t *j)
+{
+  return skip_str_constant(j) || json_scan_next(j);
+}
+
+
+/* Read scalar string. */
+static int read_strn(json_engine_t *j)
+{
+  j->value= j->s.c_str;
+
+  if (skip_str_constant(j))
+    return 1;
+
+  j->state= *j->stack_p;
+  j->value_type= JSON_VALUE_STRING;
+  j->value_len= (j->s.c_str - j->value) - 1;
+  return 0;
+}
+
+
+/*
+  We have dedicated parser for numeric constants. It's similar
+  to the main JSON parser, we similarly define character classes,
+  map characters to classes and implement the state-per-class
+  table. Though we don't create functions that handle
+  particular classes, just specify what new state should parser
+  get in this case.
+*/
+enum json_num_char_classes {
+  N_MINUS,
+  N_PLUS,
+  N_ZERO,
+  N_DIGIT,
+  N_POINT,
+  N_E,
+  N_END,
+  N_EEND,
+  N_ERR,
+  N_NUM_CLASSES
+};
+
+
+static enum json_num_char_classes json_num_chr_map[128] = {
+  N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,
+  N_ERR,   N_END,   N_END,   N_ERR,   N_ERR,   N_END,   N_ERR,   N_ERR,
+  N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,
+  N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,
+
+  N_END,   N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
+  N_EEND,  N_EEND,  N_EEND,  N_PLUS,  N_END,   N_MINUS, N_POINT, N_EEND,
+  N_ZERO,  N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT,
+  N_DIGIT, N_DIGIT, N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
+
+  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_E,     N_EEND,  N_EEND,
+  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
+  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
+  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_END,   N_EEND,  N_EEND,
+
+  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_E,     N_EEND,  N_EEND,
+  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
+  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
+  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,   N_END,   N_EEND,  N_EEND,
+};
+
+
+enum json_num_states {
+  NS_OK,  /* Number ended. */
+  NS_GO,  /* Initial state. */
+  NS_GO1, /* If the number starts with '-'. */
+  NS_Z,   /* If the number starts with '0'. */
+  NS_Z1,  /* If the numbers starts with '-0'. */
+  NS_INT, /* Integer part. */
+  NS_FRAC,/* Fractional part. */
+  NS_EX,  /* Exponential part begins. */
+  NS_EX1, /* Exponential part continues. */
+  NS_NUM_STATES
+};
+
+
+static int json_num_states[NS_NUM_STATES][N_NUM_CLASSES]=
+{
+/*         -        +       0        1..9    POINT    E       END_OK   ERROR */
+/*OK*/   { JE_SYN,  JE_SYN, JE_SYN,   JE_SYN, JE_SYN,  JE_SYN, JE_SYN, JE_BAD_CHR },
+/*GO*/   { NS_GO1,  JE_SYN, NS_Z,     NS_INT, JE_SYN,  JE_SYN, JE_SYN, JE_BAD_CHR },
+/*GO1*/  { JE_SYN,  JE_SYN, NS_Z1,    NS_INT, JE_SYN,  JE_SYN, JE_SYN, JE_BAD_CHR },
+/*ZERO*/ { JE_SYN,  JE_SYN, JE_SYN,   JE_SYN, NS_FRAC, JE_SYN, NS_OK,  JE_BAD_CHR },
+/*ZE1*/  { JE_SYN,  JE_SYN, JE_SYN,   JE_SYN, NS_FRAC, JE_SYN, JE_SYN, JE_BAD_CHR },
+/*INT*/  { JE_SYN,  JE_SYN, NS_INT,   NS_INT, NS_FRAC, NS_EX,  NS_OK,  JE_BAD_CHR },
+/*FRAC*/ { JE_SYN,  JE_SYN, NS_FRAC,  NS_FRAC,JE_SYN,  NS_EX,  NS_OK,  JE_BAD_CHR },
+/*EX*/   { NS_EX1,  NS_EX1, NS_EX1,   NS_EX1, JE_SYN,  JE_SYN, JE_SYN, JE_BAD_CHR }, 
+/*EX1*/  { JE_SYN,  JE_SYN, NS_EX1,   NS_EX1, JE_SYN,  JE_SYN, JE_SYN, JE_BAD_CHR }
+};
+
+
+static int skip_num_constant(json_engine_t *j)
+{
+  int state= json_num_states[NS_GO][json_num_chr_map[j->s.c_next]];
+  int c_len;
+
+  for (;;)
+  {
+    if ((c_len= json_next_char(&j->s)) > 0)
+    {
+      if ((state= json_num_states[state][json_num_chr_map[j->s.c_next]]) > 0)
+      {
+        j->s.c_str+= c_len;
+        continue;
+      }
+      break;
+    }
+
+    if ((j->s.error=
+          json_eos(&j->s) ? json_num_states[state][N_END] : JE_BAD_CHR) < 0)
+      return 1;
+    else
+      break;
+  }
+
+  j->state= *j->stack_p;
+  return 0;
+}
+
+
+/* Scalar numeric. */
+static int v_number(json_engine_t *j)
+{
+  return skip_num_constant(j) || json_scan_next(j);
+}
+
+
+/* Read numeric constant. */
+static int read_num(json_engine_t *j)
+{
+  j->value= j->value_begin;
+  if (skip_num_constant(j) == 0)
+  {
+    j->value_type= JSON_VALUE_NUMBER;
+    j->value_len= j->s.c_str - j->value_begin;
+    return 0;
+  }
+  return 1;
+}
+
+
+/* Check that the JSON string matches the argument and skip it. */
+static int skip_string_verbatim(json_string_t *s, const char *str)
+{
+  int c_len;
+  while (*str)
+  {
+    if ((c_len= json_next_char(s)) > 0)
+    {
+      if (s->c_next == (my_wc_t) *(str++))
+      {
+        s->c_str+= c_len;
+        continue;
+      }
+      return JE_SYN;
+    }
+    return json_eos(s) ? JE_EOS : JE_BAD_CHR; 
+  }
+
+  return 0;
+}
+
+
+/* Scalar false. */
+static int v_false(json_engine_t *j)
+{
+  if (skip_string_verbatim(&j->s, "alse"))
+   return 1;
+  j->state= *j->stack_p;
+  return json_scan_next(j);
+}
+
+
+/* Scalar null. */
+static int v_null(json_engine_t *j)
+{
+  if (skip_string_verbatim(&j->s, "ull"))
+   return 1;
+  j->state= *j->stack_p;
+  return json_scan_next(j);
+}
+
+
+/* Scalar true. */
+static int v_true(json_engine_t *j)
+{
+  if (skip_string_verbatim(&j->s, "rue"))
+   return 1;
+  j->state= *j->stack_p;
+  return json_scan_next(j);
+}
+
+
+/* Read false. */
+static int read_false(json_engine_t *j)
+{
+  j->value_type= JSON_VALUE_FALSE;
+  j->value= j->value_begin;
+  j->state= *j->stack_p;
+  j->value_len= 5;
+  return skip_string_verbatim(&j->s, "alse");
+}
+
+
+/* Read null. */
+static int read_null(json_engine_t *j)
+{
+  j->value_type= JSON_VALUE_NULL;
+  j->value= j->value_begin;
+  j->state= *j->stack_p;
+  j->value_len= 4;
+  return skip_string_verbatim(&j->s, "ull");
+}
+
+
+/* Read true. */
+static int read_true(json_engine_t *j)
+{
+  j->value_type= JSON_VALUE_TRUE;
+  j->value= j->value_begin;
+  j->state= *j->stack_p;
+  j->value_len= 4;
+  return skip_string_verbatim(&j->s, "rue");
+}
+
+
+/* Disallowed character. */
+static int not_json_chr(json_engine_t *j)
+{
+  j->s.error= JE_NOT_JSON_CHR;
+  return 1;
+}
+
+
+/* Bad character. */
+static int bad_chr(json_engine_t *j)
+{
+  j->s.error= JE_BAD_CHR;
+  return 1;
+}
+
+
+/* Correct finish. */
+static int done(json_engine_t *j  __attribute__((unused)))
+{
+  return 1;
+}
+
+
+/* End of the object. */
+static int end_object(json_engine_t *j)
+{
+  j->stack_p--;
+  j->state= JST_OBJ_END;
+  return 0;
+}
+
+
+/* End of the array. */
+static int end_array(json_engine_t *j)
+{
+  j->stack_p--;
+  j->state= JST_ARRAY_END;
+  return 0;
+}
+
+
+/* Start reading key name. */
+static int read_keyname(json_engine_t *j)
+{
+  j->state= JST_KEY;
+  return 0;
+}
+
+
+static void get_first_nonspace(json_string_t *js, int *t_next, int *c_len)
+{
+  do
+  {
+    if ((*c_len= json_next_char(js)) <= 0)
+      *t_next= json_eos(js) ? C_EOS : C_BAD;
+    else
+    {
+      *t_next= (js->c_next < 128) ? json_chr_map[js->c_next] : C_ETC;
+      js->c_str+= *c_len;
+    }
+  } while (*t_next == C_SPACE);
+}
+
+
+/* Next key name. */
+static int next_key(json_engine_t *j)
+{
+  int t_next, c_len;
+  get_first_nonspace(&j->s, &t_next, &c_len);
+
+  if (t_next == C_QUOTE)
+  {
+    j->state= JST_KEY;
+    return 0;
+  }
+
+  j->s.error= (t_next == C_EOS)  ? JE_EOS :
+              ((t_next == C_BAD) ? JE_BAD_CHR :
+                                   JE_SYN);
+  return 1;
+}
+
+
+/* Forward declarations. */
+static int skip_colon(json_engine_t *j);
+static int skip_key(json_engine_t *j);
+static int struct_end_cb(json_engine_t *j);
+static int struct_end_qb(json_engine_t *j);
+static int struct_end_cm(json_engine_t *j);
+static int struct_end_eos(json_engine_t *j);
+
+
+static int next_item(json_engine_t *j)
+{
+  j->state= JST_VALUE;
+  return 0;
+}
+
+
+static int array_item(json_engine_t *j)
+{
+  j->state= JST_VALUE;
+  j->s.c_str-= j->sav_c_len;
+  return 0;
+}
+
+
+static json_state_handler json_actions[NR_JSON_STATES][NR_C_CLASSES]=
+/*
+   EOS              {            }             [             ]
+   :                ,            "             -0..9         f
+   n                t              ETC          ERR           BAD
+*/
+{
+  {/*VALUE*/
+    unexpected_eos, mark_object, syntax_error, mark_array,   syntax_error,
+    syntax_error,   syntax_error,v_string,     v_number,     v_false,
+    v_null,         v_true,       syntax_error, not_json_chr, bad_chr},
+  {/*KEY*/
+    unexpected_eos, skip_key,    skip_key,     skip_key,     skip_key,
+    skip_key,       skip_key,    skip_colon,   skip_key,     skip_key,
+    skip_key,       skip_key,     skip_key,     not_json_chr, bad_chr},
+  {/*OBJ_START*/
+    unexpected_eos, syntax_error, end_object,  syntax_error, syntax_error,
+    syntax_error,   syntax_error, read_keyname, syntax_error, syntax_error,
+    syntax_error,   syntax_error,   syntax_error,    not_json_chr, bad_chr},
+  {/*OBJ_END*/
+    struct_end_eos, syntax_error, struct_end_cb, syntax_error, struct_end_qb,
+    syntax_error,   struct_end_cm,syntax_error,  syntax_error, syntax_error,
+    syntax_error,   syntax_error,  syntax_error,    not_json_chr, bad_chr},
+  {/*ARRAY_START*/
+    unexpected_eos, array_item,   syntax_error, array_item,   end_array,
+    syntax_error,   syntax_error, array_item,  array_item,  array_item,
+    array_item,    array_item,    syntax_error,    not_json_chr, bad_chr},
+  {/*ARRAY_END*/
+    struct_end_eos, syntax_error, struct_end_cb, syntax_error,  struct_end_qb,
+    syntax_error,   struct_end_cm, syntax_error, syntax_error,  syntax_error,
+    syntax_error,   syntax_error,  syntax_error,    not_json_chr, bad_chr},
+  {/*DONE*/
+    done,           syntax_error, syntax_error, syntax_error, syntax_error,
+    syntax_error,   syntax_error, syntax_error, syntax_error, syntax_error,
+    syntax_error,   syntax_error, syntax_error, not_json_chr, bad_chr},
+  {/*OBJ_CONT*/
+    unexpected_eos, syntax_error, end_object,    syntax_error,   end_array,
+    syntax_error,   next_key,     syntax_error,  syntax_error,   syntax_error,
+    syntax_error,    syntax_error,    syntax_error,    not_json_chr, bad_chr},
+  {/*ARRAY_CONT*/
+    unexpected_eos, syntax_error, syntax_error,  syntax_error, end_array,
+    syntax_error,   next_item,    syntax_error,  syntax_error, syntax_error,
+    syntax_error,    syntax_error,    syntax_error,    not_json_chr, bad_chr},
+  {/*READ_VALUE*/
+    unexpected_eos, read_obj,     syntax_error,  read_array,    syntax_error,
+    syntax_error,   syntax_error, read_strn,     read_num,      read_false,
+    read_null,      read_true,    syntax_error,    not_json_chr, bad_chr},
+};
+
+
+
+int json_scan_start(json_engine_t *je,
+                    CHARSET_INFO *i_cs, const uchar *str, const uchar *end)
+{
+  json_string_setup(&je->s, i_cs, str, end);
+  je->stack[0]= JST_DONE;
+  je->stack_p= je->stack;
+  je->state= JST_VALUE;
+  return 0;
+}
+
+
+/* Skip colon and the value. */
+static int skip_colon(json_engine_t *j)
+{
+  int t_next, c_len;
+
+  get_first_nonspace(&j->s, &t_next, &c_len);
+
+  if (t_next == C_COLON)
+  {
+    get_first_nonspace(&j->s, &t_next, &c_len);
+    return json_actions[JST_VALUE][t_next](j);
+ }
+
+  j->s.error= (t_next == C_EOS)  ? JE_EOS :
+              ((t_next == C_BAD) ? JE_BAD_CHR:
+                                   JE_SYN);
+
+  return 1;
+}
+
+
+/* Skip colon and the value. */
+static int skip_key(json_engine_t *j)
+{
+  int t_next, c_len;
+  while (json_read_keyname_chr(j) == 0) {}
+
+  if (j->s.error)
+    return 1;
+
+  get_first_nonspace(&j->s, &t_next, &c_len);
+  return json_actions[JST_VALUE][t_next](j);
+}
+
+
+/*
+  Handle EOS after the end of an object or array.
+  To do that we should pop the stack to see if
+  we are inside an object, or an array, and
+  run our 'state machine' accordingly.
+*/
+static int struct_end_eos(json_engine_t *j)
+{ return json_actions[*j->stack_p][C_EOS](j); }
+
+
+/*
+  Handle '}' after the end of an object or array.
+  To do that we should pop the stack to see if
+  we are inside an object, or an array, and
+  run our 'state machine' accordingly.
+*/
+static int struct_end_cb(json_engine_t *j)
+{ return json_actions[*j->stack_p][C_RCURB](j); }
+
+
+/*
+  Handle ']' after the end of an object or array.
+  To do that we should pop the stack to see if
+  we are inside an object, or an array, and
+  run our 'state machine' accordingly.
+*/
+static int struct_end_qb(json_engine_t *j)
+{ return json_actions[*j->stack_p][C_RSQRB](j); }
+
+
+/*
+  Handle ',' after the end of an object or array.
+  To do that we should pop the stack to see if
+  we are inside an object, or an array, and
+  run our 'state machine' accordingly.
+*/
+static int struct_end_cm(json_engine_t *j)
+{ return json_actions[*j->stack_p][C_COMMA](j); }
+
+
+int json_read_keyname_chr(json_engine_t *j)
+{
+  int c_len, t;
+
+  if ((c_len= json_next_char(&j->s)) > 0)
+  {
+    j->s.c_str+= c_len;
+    if (j->s.c_next>= 128 || (t= json_instr_chr_map[j->s.c_next]) <= S_ETC)
+      return 0;
+
+    switch (t)
+    {
+    case S_QUOTE:
+      for (;;)  /* Skip spaces until ':'. */
+      {
+        if ((c_len= json_next_char(&j->s) > 0))
+        {
+          if (j->s.c_next == ':')
+          {
+            j->s.c_str+= c_len;
+            j->state= JST_VALUE;
+            return 1;
+          }
+
+          if (j->s.c_next < 128 && json_chr_map[j->s.c_next] == C_SPACE)
+          {
+            j->s.c_str+= c_len;
+            continue;
+          }
+          j->s.error= JE_SYN;
+          break;
+        }
+        j->s.error= json_eos(&j->s) ? JE_EOS : JE_BAD_CHR;
+        break;
+      }
+      return 1;
+    case S_BKSL:
+      return json_handle_esc(&j->s);
+    case S_ERR:
+      j->s.c_str-= c_len;
+      j->s.error= JE_STRING_CONST;
+      return 1;
+    }
+  }
+  j->s.error= json_eos(&j->s) ? JE_EOS : JE_BAD_CHR; 
+  return 1;
+}
+
+
+int json_read_value(json_engine_t *j)
+{
+  int t_next, c_len, res;
+
+  if (j->state == JST_KEY)
+  {
+    while (json_read_keyname_chr(j) == 0) {}
+
+    if (j->s.error)
+      return 1;
+  }
+
+  get_first_nonspace(&j->s, &t_next, &c_len);
+
+  j->value_begin= j->s.c_str-c_len;
+  res= json_actions[JST_READ_VALUE][t_next](j);
+  j->value_end= j->s.c_str;
+  return res;
+}
+
+
+int json_scan_next(json_engine_t *j)
+{
+  int t_next;
+
+  get_first_nonspace(&j->s, &t_next, &j->sav_c_len);
+  return json_actions[j->state][t_next](j);
+}
+
+
+enum json_path_chr_classes {
+  P_EOS,    /* end of string */
+  P_USD,    /* $ */
+  P_ASTER,  /* * */
+  P_LSQRB,  /* [ */
+  P_RSQRB,  /* ] */
+  P_POINT,  /* . */
+  P_ZERO,   /* 0 */
+  P_DIGIT,  /* 123456789 */
+  P_L,      /* l (for "lax") */
+  P_S,      /* s (for "strict") */
+  P_SPACE,  /* space */
+  P_BKSL,   /* \ */
+  P_ETC,    /* everything else */
+  P_ERR,    /* character disallowed in JSON*/
+  P_BAD,    /* invalid character */
+  N_PATH_CLASSES,
+};
+
+
+static enum json_path_chr_classes json_path_chr_map[128] = {
+  P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,
+  P_ERR,   P_SPACE, P_SPACE, P_ERR,   P_ERR,   P_SPACE, P_ERR,   P_ERR,
+  P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,
+  P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,
+
+  P_SPACE, P_ETC,   P_ETC,   P_ETC,   P_USD,   P_ETC,   P_ETC,   P_ETC,
+  P_ETC,   P_ETC,   P_ASTER, P_ETC,   P_ETC,   P_ETC,   P_POINT, P_ETC,
+  P_ZERO,  P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT,
+  P_DIGIT, P_DIGIT, P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,
+
+  P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,
+  P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_L,     P_ETC,   P_ETC,   P_ETC,
+  P_ETC,   P_ETC,   P_S,     P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,
+  P_ETC,   P_ETC,   P_ETC,   P_LSQRB, P_BKSL, P_RSQRB, P_ETC,   P_ETC,
+
+  P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,
+  P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_L,     P_ETC,   P_ETC,   P_ETC,
+  P_ETC,   P_ETC,   P_S,     P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,
+  P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC
+};
+
+
+enum json_path_states {
+  PS_GO,  /* Initial state. */
+  PS_LAX, /* Parse the 'lax' keyword. */
+  PS_PT,  /* New path's step begins. */
+  PS_AR,  /* Parse array step. */
+  PS_AWD, /* Array wildcard. */
+  PS_Z,   /* '0' (as an array item number). */
+  PS_INT, /* Parse integer (as an array item number). */
+  PS_AS,  /* Space. */
+  PS_KEY, /* Key. */
+  PS_KNM, /* Parse key name. */
+  PS_KWD, /* Key wildcard. */
+  N_PATH_STATES, /* Below are states that aren't in the transitions table. */
+  PS_SCT,  /* Parse the 'strict' keyword. */
+  PS_EKY,  /* '.' after the keyname so next step is the key. */
+  PS_EAR,  /* '[' after the keyname so next step is the array. */
+  PS_ESC,  /* Escaping in the keyname. */
+  PS_OK,   /* Path normally ended. */
+  PS_KOK   /* EOS after the keyname so end the path normally. */
+};
+
+
+static int json_path_transitions[N_PATH_STATES][N_PATH_CLASSES]=
+{
+/*
+            EOS       $,      *       [       ]       .       0
+            1..9    L       S       SPACE   \       ETC     ERR
+            BAD
+*/
+/* GO  */ { JE_EOS, PS_PT,  JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
+            JE_SYN, PS_LAX, PS_SCT, PS_GO,  JE_SYN, JE_SYN, JE_NOT_JSON_CHR,
+            JE_BAD_CHR},
+/* LAX */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
+            JE_SYN, PS_LAX, JE_SYN, PS_GO,  JE_SYN, JE_SYN, JE_NOT_JSON_CHR,
+            JE_BAD_CHR},
+/* PT */  { PS_OK,  JE_SYN, JE_SYN, PS_AR,  JE_SYN, PS_KEY, JE_SYN, JE_SYN,
+            JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_NOT_JSON_CHR,
+            JE_BAD_CHR},
+/* AR */  { JE_EOS, JE_SYN, PS_AWD, JE_SYN, PS_PT,  JE_SYN, PS_Z,
+            PS_INT, JE_SYN, JE_SYN, PS_AR,  JE_SYN, JE_SYN, JE_NOT_JSON_CHR,
+            JE_BAD_CHR},
+/* AWD */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT,  JE_SYN, JE_SYN,
+            JE_SYN, JE_SYN, JE_SYN, PS_AS,  JE_SYN, JE_SYN, JE_NOT_JSON_CHR,
+            JE_BAD_CHR},
+/* Z */   { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT,  JE_SYN, JE_SYN,
+            JE_SYN, JE_SYN, JE_SYN, PS_AS,  JE_SYN, JE_SYN, JE_NOT_JSON_CHR,
+            JE_BAD_CHR},
+/* INT */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT,  JE_SYN, PS_INT,
+            PS_INT, JE_SYN, JE_SYN, PS_AS,  JE_SYN, JE_SYN, JE_NOT_JSON_CHR,
+            JE_BAD_CHR},
+/* AS */  { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT,  JE_SYN, JE_SYN, JE_SYN,
+            JE_SYN, JE_SYN, PS_AS,  JE_SYN, JE_SYN, JE_NOT_JSON_CHR,
+            JE_BAD_CHR},
+/* KEY */ { JE_EOS, PS_KNM, PS_KWD, JE_SYN, PS_KNM, JE_SYN, PS_KNM,
+            PS_KNM, PS_KNM, PS_KNM, PS_KNM, JE_SYN, PS_KNM, JE_NOT_JSON_CHR,
+            JE_BAD_CHR},
+/* KNM */ { PS_KOK, PS_KNM, PS_KNM, PS_EAR, PS_KNM, PS_EKY, PS_KNM,
+            PS_KNM, PS_KNM, PS_KNM, PS_KNM, PS_ESC, PS_KNM, JE_NOT_JSON_CHR,
+            JE_BAD_CHR},
+/* KWD */ { PS_OK,  JE_SYN, JE_SYN, PS_AR,  JE_SYN, PS_EKY, JE_SYN,
+            JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_NOT_JSON_CHR,
+            JE_BAD_CHR}
+};
+
+
+int json_path_setup(json_path_t *p,
+                    CHARSET_INFO *i_cs, const uchar *str, const uchar *end)
+{
+  int c_len, t_next, state= PS_GO;
+
+  json_string_setup(&p->s, i_cs, str, end);
+
+  p->steps[0].type= JSON_PATH_ARRAY;
+  p->steps[0].wild= 1;
+  p->last_step= p->steps;
+  p->mode_strict= FALSE;
+
+  do
+  {
+    if ((c_len= json_next_char(&p->s)) <= 0)
+      t_next= json_eos(&p->s) ? P_EOS : P_BAD;
+    else
+      t_next= (p->s.c_next >= 128) ? P_ETC : json_path_chr_map[p->s.c_next];
+
+    if ((state= json_path_transitions[state][t_next]) < 0)
+      return p->s.error= state;
+
+    p->s.c_str+= c_len;
+
+    switch (state)
+    {
+    case PS_LAX:
+      if ((p->s.error= skip_string_verbatim(&p->s, "ax")))
+        return 1;
+      p->mode_strict= FALSE;
+      continue;
+    case PS_SCT:
+      if ((p->s.error= skip_string_verbatim(&p->s, "rict")))
+        return 1;
+      p->mode_strict= TRUE;
+      state= PS_LAX;
+      continue;
+    case PS_AWD:
+      p->last_step->wild= 1;
+      continue;
+    case PS_INT:
+      p->last_step->n_item*= 10;
+      p->last_step->n_item+= p->s.c_next - '0';
+      continue;
+    case PS_EKY:
+      p->last_step->key_end= p->s.c_str - c_len;
+      state= PS_KEY;
+      /* Note no 'continue' here. */
+    case PS_KEY:
+      p->last_step++;
+      p->last_step->type= JSON_PATH_KEY;
+      p->last_step->wild= 0;
+      p->last_step->key= p->s.c_str;
+      continue;
+    case PS_EAR:
+      p->last_step->key_end= p->s.c_str - c_len;
+      state= PS_AR;
+      /* Note no 'continue' here. */
+    case PS_AR:
+      p->last_step++;
+      p->last_step->type= JSON_PATH_ARRAY;
+      p->last_step->wild= 0;
+      p->last_step->n_item= 0;
+      continue;
+    case PS_KWD:
+      p->last_step->wild= 1;
+      continue;
+    case PS_ESC:
+      if (json_handle_esc(&p->s))
+        return 1;
+      continue;
+    case PS_KOK:
+      p->last_step->key_end= p->s.c_str - c_len;
+      state= PS_OK;
+      break;
+    };
+  } while (state != PS_OK);
+
+  return 0;
+}
+
+
+int json_skip_level(json_engine_t *j)
+{
+  int ct= 0;
+
+  while (json_scan_next(j) == 0)
+  {
+    switch (j->state) {
+    case JST_OBJ_START:
+    case JST_ARRAY_START:
+      ct++;
+      break;
+    case JST_OBJ_END:
+    case JST_ARRAY_END:
+      if (ct == 0)
+        return 0;
+      ct--;
+      break;
+    }
+  }
+
+  return 1;
+}
+
+
+int json_skip_key(json_engine_t *j)
+{
+  if (json_read_value(j))
+    return 1;
+
+  if (json_value_scalar(j))
+    return 0;
+
+  return json_skip_level(j);
+}
+
+
+/*
+  Current step of the patch matches the JSON construction.
+  Now we should either stop the search or go to the next
+  step of the path.
+*/
+static int handle_match(json_engine_t *je, json_path_t *p,
+                        json_path_step_t **p_cur_step, uint *n_arrays)
+{
+  DBUG_ASSERT(*p_cur_step < p->last_step);
+
+  if (json_read_value(je))
+    return 1;
+
+  if (json_value_scalar(je))
+    return 0;
+
+  (*p_cur_step)++;
+  n_arrays[*p_cur_step - p->steps]= 0;
+
+  if ((int) je->value_type != (int) (*p_cur_step)->type)
+  {
+    (*p_cur_step)--;
+    return json_skip_level(je);
+  }
+
+  return 0;
+}
+
+
+/*
+  Check if the name of the current JSON key matches
+  the step of the path.
+*/
+static int json_key_matches(json_engine_t *je, json_string_t *k)
+{
+  while (json_read_keyname_chr(je) == 0)
+  {
+    if (json_read_string_const_chr(k) ||
+        je->s.c_next != k->c_next)
+      return 0;
+  }
+
+  if (json_read_string_const_chr(k))
+    return 1;
+
+  return 0;
+}
+
+
+int json_find_value(json_engine_t *je,
+                    json_path_t *p, json_path_step_t **p_cur_step)
+{
+  json_string_t key_name;
+  uint n_arrays[JSON_DEPTH_LIMIT];
+
+  json_string_set_cs(&key_name, p->s.cs);
+
+  do
+  {
+    json_path_step_t *cur_step= *p_cur_step;
+    switch (je->state)
+    {
+    case JST_KEY:
+      DBUG_ASSERT(cur_step->type == JSON_PATH_KEY);
+      if (!cur_step->wild)
+      {
+        json_string_set_str(&key_name, cur_step->key, cur_step->key_end);
+        if (!json_key_matches(je, &key_name))
+        {
+          if (json_skip_key(je))
+            goto exit;
+          continue;
+        }
+      }
+      if (cur_step == p->last_step ||
+          handle_match(je, p, p_cur_step, n_arrays))
+        goto exit;
+      break;
+    case JST_VALUE:
+      DBUG_ASSERT(cur_step->type == JSON_PATH_ARRAY);
+      if (cur_step->wild ||
+          cur_step->n_item == n_arrays[cur_step - p->steps])
+      {
+        /* Array item matches. */
+        if (cur_step == p->last_step ||
+            handle_match(je, p, p_cur_step, n_arrays))
+          goto exit;
+      }
+      else
+      {
+        json_skip_array_item(je);
+        n_arrays[cur_step - p->steps]++;
+      }
+      break;
+    case JST_OBJ_END:
+    case JST_ARRAY_END:
+      (*p_cur_step)--;
+      break;
+    default:
+      DBUG_ASSERT(0);
+      break;
+    };
+  } while (json_scan_next(je) == 0);
+
+  /* No luck. */
+  return 1;
+
+exit:
+  return je->s.error;
+}
+



More information about the commits mailing list