[Commits] 3335236: Implement first_value and last_value as window functions

Vicentiu Ciorbaru vicentiu at mariadb.org
Tue Sep 20 21:46:48 EEST 2016


revision-id: 333523663602658da8c7a98dc7a43527e215d509 (mariadb-10.1.8-274-g3335236)
parent(s): 5c87314473152f221d6851ce2f5af82362bed4d9
author: Vicențiu Ciorbaru
committer: Vicențiu Ciorbaru
timestamp: 2016-09-20 20:45:45 +0200
message:

Implement first_value and last_value as window functions

Currently the implementation doesn't support removal, thus the
computation is performed by running over the window frame again.

---
 sql/item_sum.h         |  2 +-
 sql/item_windowfunc.cc | 23 ++++++++++++++++
 sql/item_windowfunc.h  | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++
 sql/lex.h              |  1 +
 sql/sql_yacc.yy        | 34 ++++++++++++++++++++++--
 5 files changed, 128 insertions(+), 3 deletions(-)

diff --git a/sql/item_sum.h b/sql/item_sum.h
index 1683544..d7d583a 100644
--- a/sql/item_sum.h
+++ b/sql/item_sum.h
@@ -350,7 +350,7 @@ class Item_sum :public Item_func_or_sum
     AVG_DISTINCT_FUNC, MIN_FUNC, MAX_FUNC, STD_FUNC,
     VARIANCE_FUNC, SUM_BIT_FUNC, UDF_SUM_FUNC, GROUP_CONCAT_FUNC,
     ROW_NUMBER_FUNC, RANK_FUNC, DENSE_RANK_FUNC, PERCENT_RANK_FUNC,
-    CUME_DIST_FUNC, NTILE_FUNC
+    CUME_DIST_FUNC, NTILE_FUNC, FIRST_VALUE_FUNC, LAST_VALUE_FUNC
   };
 
   Item **ref_by; /* pointer to a ref to the object used to register it */
diff --git a/sql/item_windowfunc.cc b/sql/item_windowfunc.cc
index c8ea979..5a7ee52 100644
--- a/sql/item_windowfunc.cc
+++ b/sql/item_windowfunc.cc
@@ -219,3 +219,26 @@ void Item_sum_percent_rank::setup_window_func(THD *thd, Window_spec *window_spec
   peer_tracker->init();
   clear();
 }
+
+bool Item_sum_first_value::add()
+{
+  if (value_added)
+    return false;
+
+  /* TODO(cvicentiu) This is done like this due to how Item_sum_hybrid works.
+     For this usecase we can actually get rid of arg_cache. arg_cache is just
+     for running a comparison function. */
+  value_added= true;
+  arg_cache->cache_value();
+  value->store(arg_cache);
+  null_value= arg_cache->null_value;
+  return false;
+}
+
+bool Item_sum_last_value::add()
+{
+  arg_cache->cache_value();
+  value->store(arg_cache);
+  null_value= arg_cache->null_value;
+  return false;
+}
diff --git a/sql/item_windowfunc.h b/sql/item_windowfunc.h
index 3d4c310..6333207 100644
--- a/sql/item_windowfunc.h
+++ b/sql/item_windowfunc.h
@@ -280,6 +280,77 @@ class Item_sum_dense_rank: public Item_sum_int
 };
 
 /*
+   This item will remember the first value added to it. It will not update
+   the value unless it is cleared.
+
+   TODO(cvicentiu) Item_sum_hybrid is a pretty heavyweight class. It holds
+   logic that allows comparing values. It was generally thought out for MIN/MAX
+   functions, but we can use it here as well.
+   Refactor Item_sum_hybrid to only include basic field handling and
+   make a more specialized class for min/max. It might be useful if we'd like
+   to optimize how min/max is computed as a window function. We can potentially
+   implement a PQ within the specialized class to support removal.
+*/
+class Item_sum_first_value : public Item_sum_hybrid
+{
+ public:
+  Item_sum_first_value(THD* thd, Item* arg_expr) :
+    Item_sum_hybrid(thd, arg_expr, -1 /* This cmp parameter is not needed */),
+    value_added(false) {}
+
+  bool add();
+
+  void clear()
+  {
+    value_added= false;
+    Item_sum_hybrid::clear();
+  }
+
+  enum Sumfunctype sum_func () const
+  {
+    return FIRST_VALUE_FUNC;
+  }
+
+  const char*func_name() const
+  {
+    return "first_value";
+  }
+
+  Item *get_copy(THD *thd, MEM_ROOT *mem_root)
+  { return get_item_copy<Item_sum_first_value>(thd, mem_root, this); }
+
+ private:
+  bool value_added;
+};
+
+/*
+   This item will remember the last value added to it.
+
+   This item does not support removal, and can be cleared only by calling
+   clear().
+*/
+class Item_sum_last_value : public Item_sum_hybrid
+{
+ public:
+  Item_sum_last_value(THD* thd, Item* arg_expr) :
+    Item_sum_hybrid(thd, arg_expr, -1 /* This cmp parameter is not needed */) {}
+
+  bool add();
+  enum Sumfunctype sum_func () const
+  {
+    return LAST_VALUE_FUNC;
+  }
+
+  const char*func_name() const
+  {
+    return "last_value";
+  }
+
+  Item *get_copy(THD *thd, MEM_ROOT *mem_root)
+  { return get_item_copy<Item_sum_last_value>(thd, mem_root, this); }
+};
+
+/*
   A base window function (aggregate) that also holds a counter for the number
   of rows.
 */
diff --git a/sql/lex.h b/sql/lex.h
index d1762b2..90dac2e 100644
--- a/sql/lex.h
+++ b/sql/lex.h
@@ -695,6 +695,7 @@ static SYMBOL sql_functions[] = {
   { "DATE_SUB",		SYM(DATE_SUB_INTERVAL)},
   { "DENSE_RANK",       SYM(DENSE_RANK_SYM)},
   { "EXTRACT",		SYM(EXTRACT_SYM)},
+  { "FIRST_VALUE",      SYM(FIRST_VALUE_SYM)},
   { "GROUP_CONCAT",	SYM(GROUP_CONCAT_SYM)},
   { "MAX",		SYM(MAX_SYM)},
   { "MID",		SYM(SUBSTRING)},	/* unireg function */
diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy
index 551a86e..b7e617e 100644
--- a/sql/sql_yacc.yy
+++ b/sql/sql_yacc.yy
@@ -1245,6 +1245,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token  FAULTS_SYM
 %token  FETCH_SYM                     /* SQL-2003-R */
 %token  FILE_SYM
+%token  FIRST_VALUE_SYM               /* SQL-2012 */
 %token  FIRST_SYM                     /* SQL-2003-N */
 %token  FIXED_SYM
 %token  FLOAT_NUM
@@ -9888,8 +9889,23 @@ function_call_conflict:
             if ($$ == NULL)
               MYSQL_YYABORT;
           }
-        | LAST_VALUE '(' expr_list ')'
+          /* LAST_VALUE here conflicts with the definition for window functions.
+             We have these 2 separate rules to remove the shift/reduce conflict.
+          */
+        | LAST_VALUE '(' expr ')'
           {
+            List<Item> *list= new (thd->mem_root) List<Item>;
+            if (list == NULL)
+              MYSQL_YYABORT;
+            list->push_back($3, thd->mem_root);
+
+            $$= new (thd->mem_root) Item_func_last_value(thd, *list);
+            if ($$ == NULL)
+              MYSQL_YYABORT;
+          }
+        | LAST_VALUE '(' expr_list ',' expr ')'
+          {
+            $3->push_back($5, thd->mem_root);
             $$= new (thd->mem_root) Item_func_last_value(thd, *$3);
             if ($$ == NULL)
               MYSQL_YYABORT;
@@ -10478,7 +10494,21 @@ simple_window_func:
             if ($$ == NULL)
               MYSQL_YYABORT;
           }
-        ;
+        |
+          FIRST_VALUE_SYM '(' expr ')'
+          {
+            $$= new (thd->mem_root) Item_sum_first_value(thd, $3);
+            if ($$ == NULL)
+              MYSQL_YYABORT;
+          }
+        |
+          LAST_VALUE '(' expr ')'
+          {
+            $$= new (thd->mem_root) Item_sum_last_value(thd, $3);
+            if ($$ == NULL)
+              MYSQL_YYABORT;
+          }
+         ;
 
 window_name:
           ident


More information about the commits mailing list