Skip to content

Commit ac032a0

Browse files
authored
SQL: Fix bug regarding histograms usage in scripting (#36866)
Allow scripts to correctly reference grouping functions Fix bug in translation of date/time functions mixed with histograms. Enhance Verifier to prevent histograms being nested inside other functions inside GROUP BY (as it implies double grouping) Extend Histogram docs
1 parent 4cd5705 commit ac032a0

File tree

15 files changed

+234
-47
lines changed

15 files changed

+234
-47
lines changed

‎docs/reference/sql/functions/grouping.asciidoc‎

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ The histogram function takes all matching values and divides them into buckets w
3636
bucket_key = Math.floor(value / interval) * interval
3737
----
3838

39+
NOTE:: The histogram in SQL does *NOT* return empty buckets for missing intervals as the traditional <<search-aggregations-bucket-histogram-aggregation, histogram>> and <<search-aggregations-bucket-datehistogram-aggregation, date histogram>>. Such behavior does not fit conceptually in SQL which treats all missing values as `NULL`; as such the histogram places all missing values in the `NULL` group.
40+
3941
`Histogram` can be applied on either numeric fields:
4042

4143

@@ -51,4 +53,26 @@ or date/time fields:
5153
include-tagged::{sql-specs}/docs.csv-spec[histogramDate]
5254
----
5355

56+
Expressions inside the histogram are also supported as long as the
57+
return type is numeric:
58+
59+
["source","sql",subs="attributes,callouts,macros"]
60+
----
61+
include-tagged::{sql-specs}/docs.csv-spec[histogramNumericExpression]
62+
----
63+
64+
Do note that histograms (and grouping functions in general) allow custom expressions but cannot have any functions applied to them in the `GROUP BY`. In other words, the following statement is *NOT* allowed:
5465

66+
["source","sql",subs="attributes,callouts,macros"]
67+
----
68+
include-tagged::{sql-specs}/docs.csv-spec[expressionOnHistogramNotAllowed]
69+
----
70+
71+
as it requires two groupings (one for histogram followed by a second for applying the function on top of the histogram groups).
72+
73+
Instead one can rewrite the query to move the expression on the histogram _inside_ of it:
74+
75+
["source","sql",subs="attributes,callouts,macros"]
76+
----
77+
include-tagged::{sql-specs}/docs.csv-spec[histogramDateExpression]
78+
----

‎x-pack/plugin/sql/qa/src/main/resources/agg.csv-spec‎

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -262,9 +262,51 @@ SELECT HISTOGRAM(birth_date, INTERVAL 1 YEAR) AS h, COUNT(*) as c FROM test_emp
262262
null |10
263263
;
264264

265-
histogramDateWithDateFunction-Ignore
266-
SELECT YEAR(HISTOGRAM(birth_date, INTERVAL 1 YEAR)) AS h, COUNT(*) as c FROM test_emp GROUP BY h ORDER BY h DESC;
265+
histogramDateWithMonthOnTop
266+
schema::h:i|c:l
267+
SELECT HISTOGRAM(MONTH(birth_date), 2) AS h, COUNT(*) as c FROM test_emp GROUP BY h ORDER BY h DESC;
268+
269+
h | c
270+
---------------+---------------
271+
12 |7
272+
10 |17
273+
8 |16
274+
6 |16
275+
4 |18
276+
2 |10
277+
0 |6
278+
null |10
279+
;
280+
281+
histogramDateWithYearOnTop
282+
schema::h:i|c:l
283+
SELECT HISTOGRAM(YEAR(birth_date), 2) AS h, COUNT(*) as c FROM test_emp GROUP BY h ORDER BY h DESC;
284+
h | c
285+
---------------+---------------
286+
1964 |5
287+
1962 |13
288+
1960 |16
289+
1958 |16
290+
1956 |9
291+
1954 |12
292+
1952 |19
293+
null |10
294+
;
267295

268-
269-
296+
histogramNumericWithExpression
297+
schema::h:i|c:l
298+
SELECT HISTOGRAM(emp_no % 100, 10) AS h, COUNT(*) as c FROM test_emp GROUP BY h ORDER BY h DESC;
299+
300+
h | c
301+
---------------+---------------
302+
90 |10
303+
80 |10
304+
70 |10
305+
60 |10
306+
50 |10
307+
40 |10
308+
30 |10
309+
20 |10
310+
10 |10
311+
0 |10
270312
;

‎x-pack/plugin/sql/qa/src/main/resources/docs.csv-spec‎

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -725,6 +725,27 @@ SELECT HISTOGRAM(salary, 5000) AS h FROM emp GROUP BY h;
725725
// end::histogramNumeric
726726
;
727727

728+
histogramNumericExpression
729+
schema::h:i|c:l
730+
// tag::histogramNumericExpression
731+
SELECT HISTOGRAM(salary % 100, 10) AS h, COUNT(*) AS c FROM emp GROUP BY h;
732+
733+
h | c
734+
---------------+---------------
735+
0 |10
736+
10 |15
737+
20 |10
738+
30 |14
739+
40 |9
740+
50 |9
741+
60 |8
742+
70 |13
743+
80 |3
744+
90 |9
745+
746+
// end::histogramNumericExpression
747+
;
748+
728749
histogramDate
729750
schema::h:ts|c:l
730751
// tag::histogramDate
@@ -752,6 +773,30 @@ null |10
752773
// end::histogramDate
753774
;
754775

776+
expressionOnHistogramNotAllowed-Ignore
777+
// tag::expressionOnHistogramNotAllowed
778+
SELECT MONTH(HISTOGRAM(birth_date), 2)) AS h, COUNT(*) as c FROM emp GROUP BY h ORDER BY h DESC;
779+
// end::expressionOnHistogramNotAllowed
780+
781+
histogramDateExpression
782+
schema::h:i|c:l
783+
// tag::histogramDateExpression
784+
SELECT HISTOGRAM(MONTH(birth_date), 2) AS h, COUNT(*) as c FROM emp GROUP BY h ORDER BY h DESC;
785+
786+
h | c
787+
---------------+---------------
788+
12 |7
789+
10 |17
790+
8 |16
791+
6 |16
792+
4 |18
793+
2 |10
794+
0 |6
795+
null |10
796+
797+
// end::histogramDateExpression
798+
;
799+
755800
///////////////////////////////
756801
//
757802
// Date/Time

‎x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Verifier.java‎

Lines changed: 38 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
import org.elasticsearch.xpack.sql.expression.function.FunctionAttribute;
1919
import org.elasticsearch.xpack.sql.expression.function.Functions;
2020
import org.elasticsearch.xpack.sql.expression.function.Score;
21+
import org.elasticsearch.xpack.sql.expression.function.aggregate.AggregateFunctionAttribute;
22+
import org.elasticsearch.xpack.sql.expression.function.grouping.GroupingFunctionAttribute;
2123
import org.elasticsearch.xpack.sql.expression.function.scalar.ScalarFunction;
2224
import org.elasticsearch.xpack.sql.expression.predicate.conditional.ConditionalFunction;
2325
import org.elasticsearch.xpack.sql.expression.predicate.operator.comparison.In;
@@ -224,6 +226,7 @@ Collection<Failure> verify(LogicalPlan plan) {
224226
validateConditional(p, localFailures);
225227

226228
checkFilterOnAggs(p, localFailures);
229+
checkFilterOnGrouping(p, localFailures);
227230

228231
if (!groupingFailures.contains(p)) {
229232
checkGroupBy(p, localFailures, resolvedFunctions, groupingFailures);
@@ -419,7 +422,7 @@ private static boolean checkGroupByHavingHasOnlyAggs(Expression e, Node<?> sourc
419422
return true;
420423
}
421424
// skip aggs (allowed to refer to non-group columns)
422-
if (Functions.isAggregate(e)) {
425+
if (Functions.isAggregate(e) || Functions.isGrouping(e)) {
423426
return true;
424427
}
425428

@@ -448,6 +451,21 @@ private static boolean checkGroupByAgg(LogicalPlan p, Set<Failure> localFailures
448451
}
449452
}));
450453

454+
a.groupings().forEach(e -> {
455+
if (Functions.isGrouping(e) == false) {
456+
e.collectFirstChildren(c -> {
457+
if (Functions.isGrouping(c)) {
458+
localFailures.add(fail(c,
459+
"Cannot combine [%s] grouping function inside GROUP BY, found [%s];"
460+
+ " consider moving the expression inside the histogram",
461+
Expressions.name(c), Expressions.name(e)));
462+
return true;
463+
}
464+
return false;
465+
});
466+
}
467+
});
468+
451469
if (!localFailures.isEmpty()) {
452470
return false;
453471
}
@@ -547,19 +565,30 @@ private static void checkFilterOnAggs(LogicalPlan p, Set<Failure> localFailures)
547565
if (p instanceof Filter) {
548566
Filter filter = (Filter) p;
549567
if ((filter.child() instanceof Aggregate) == false) {
550-
filter.condition().forEachDown(f -> {
551-
if (Functions.isAggregate(f) || Functions.isGrouping(f)) {
552-
String type = Functions.isAggregate(f) ? "aggregate" : "grouping";
553-
localFailures.add(fail(f,
554-
"Cannot use WHERE filtering on %s function [%s], use HAVING instead", type, Expressions.name(f)));
568+
filter.condition().forEachDown(e -> {
569+
if (Functions.isAggregate(e) || e instanceof AggregateFunctionAttribute) {
570+
localFailures.add(
571+
fail(e, "Cannot use WHERE filtering on aggregate function [%s], use HAVING instead", Expressions.name(e)));
555572
}
556-
557-
}, Function.class);
573+
}, Expression.class);
558574
}
559575
}
560576
}
561577

562578

579+
private static void checkFilterOnGrouping(LogicalPlan p, Set<Failure> localFailures) {
580+
if (p instanceof Filter) {
581+
Filter filter = (Filter) p;
582+
filter.condition().forEachDown(e -> {
583+
if (Functions.isGrouping(e) || e instanceof GroupingFunctionAttribute) {
584+
localFailures
585+
.add(fail(e, "Cannot filter on grouping function [%s], use its argument instead", Expressions.name(e)));
586+
}
587+
}, Expression.class);
588+
}
589+
}
590+
591+
563592
private static void checkForScoreInsideFunctions(LogicalPlan p, Set<Failure> localFailures) {
564593
// Make sure that SCORE is only used in "top level" functions
565594
p.forEachExpressions(e ->
@@ -647,4 +676,4 @@ private static boolean areTypesCompatible(DataType left, DataType right) {
647676
(left.isNumeric() && right.isNumeric());
648677
}
649678
}
650-
}
679+
}

‎x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/scalar/whitelist/InternalSqlScriptUtils.java‎

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,9 @@ public static Integer weekOfYear(Object dateTime, String tzId) {
346346
}
347347

348348
public static ZonedDateTime asDateTime(Object dateTime) {
349+
if (dateTime == null) {
350+
return null;
351+
}
349352
if (dateTime instanceof JodaCompatibleZonedDateTime) {
350353
return ((JodaCompatibleZonedDateTime) dateTime).getZonedDateTime();
351354
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
package org.elasticsearch.xpack.sql.expression.gen.script;
7+
8+
import org.elasticsearch.xpack.sql.expression.function.grouping.GroupingFunctionAttribute;
9+
10+
class Grouping extends Param<GroupingFunctionAttribute> {
11+
12+
Grouping(GroupingFunctionAttribute groupRef) {
13+
super(groupRef);
14+
}
15+
16+
String groupName() {
17+
return value().functionId();
18+
}
19+
20+
@Override
21+
public String prefix() {
22+
return "g";
23+
}
24+
}

‎x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/gen/script/Params.java‎

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -85,25 +85,15 @@ Map<String, String> asAggPaths() {
8585
String s = a.aggProperty() != null ? a.aggProperty() : a.aggName();
8686
map.put(p.prefix() + aggs++, s);
8787
}
88-
}
89-
90-
return map;
91-
}
92-
93-
// return the agg refs
94-
List<String> asAggRefs() {
95-
List<String> refs = new ArrayList<>();
96-
97-
for (Param<?> p : params) {
98-
if (p instanceof Agg) {
99-
refs.add(((Agg) p).aggName());
88+
if (p instanceof Grouping) {
89+
Grouping g = (Grouping) p;
90+
map.put(p.prefix() + aggs++, g.groupName());
10091
}
10192
}
10293

103-
return refs;
94+
return map;
10495
}
10596

106-
10797
private static List<Param<?>> flatten(List<Param<?>> params) {
10898
List<Param<?>> flatten = emptyList();
10999

@@ -116,6 +106,9 @@ private static List<Param<?>> flatten(List<Param<?>> params) {
116106
else if (p instanceof Agg) {
117107
flatten.add(p);
118108
}
109+
else if (p instanceof Grouping) {
110+
flatten.add(p);
111+
}
119112
else if (p instanceof Var) {
120113
flatten.add(p);
121114
}
@@ -131,4 +124,4 @@ else if (p instanceof Var) {
131124
public String toString() {
132125
return params.toString();
133126
}
134-
}
127+
}

‎x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/gen/script/ParamsBuilder.java‎

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
package org.elasticsearch.xpack.sql.expression.gen.script;
77

88
import org.elasticsearch.xpack.sql.expression.function.aggregate.AggregateFunctionAttribute;
9+
import org.elasticsearch.xpack.sql.expression.function.grouping.GroupingFunctionAttribute;
910

1011
import java.util.ArrayList;
1112
import java.util.List;
@@ -28,6 +29,11 @@ public ParamsBuilder agg(AggregateFunctionAttribute agg) {
2829
return this;
2930
}
3031

32+
public ParamsBuilder grouping(GroupingFunctionAttribute grouping) {
33+
params.add(new Grouping(grouping));
34+
return this;
35+
}
36+
3137
public ParamsBuilder script(Params ps) {
3238
params.add(new Script(ps));
3339
return this;

‎x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/gen/script/ScriptTemplate.java‎

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,6 @@ public Params params() {
4444
return params;
4545
}
4646

47-
public List<String> aggRefs() {
48-
return params.asAggRefs();
49-
}
50-
5147
public Map<String, String> aggPaths() {
5248
return params.asAggPaths();
5349
}

‎x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/gen/script/ScriptWeaver.java‎

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import org.elasticsearch.xpack.sql.expression.Expressions;
1313
import org.elasticsearch.xpack.sql.expression.FieldAttribute;
1414
import org.elasticsearch.xpack.sql.expression.function.aggregate.AggregateFunctionAttribute;
15+
import org.elasticsearch.xpack.sql.expression.function.grouping.GroupingFunctionAttribute;
1516
import org.elasticsearch.xpack.sql.expression.function.scalar.ScalarFunctionAttribute;
1617
import org.elasticsearch.xpack.sql.expression.literal.IntervalDayTime;
1718
import org.elasticsearch.xpack.sql.expression.literal.IntervalYearMonth;
@@ -37,6 +38,9 @@ default ScriptTemplate asScript(Expression exp) {
3738
if (attr instanceof AggregateFunctionAttribute) {
3839
return scriptWithAggregate((AggregateFunctionAttribute) attr);
3940
}
41+
if (attr instanceof GroupingFunctionAttribute) {
42+
return scriptWithGrouping((GroupingFunctionAttribute) attr);
43+
}
4044
if (attr instanceof FieldAttribute) {
4145
return scriptWithField((FieldAttribute) attr);
4246
}
@@ -83,6 +87,16 @@ default ScriptTemplate scriptWithAggregate(AggregateFunctionAttribute aggregate)
8387
dataType());
8488
}
8589

90+
default ScriptTemplate scriptWithGrouping(GroupingFunctionAttribute grouping) {
91+
String template = "{}";
92+
if (grouping.dataType() == DataType.DATE) {
93+
template = "{sql}.asDateTime({})";
94+
}
95+
return new ScriptTemplate(processScript(template),
96+
paramsBuilder().grouping(grouping).build(),
97+
dataType());
98+
}
99+
86100
default ScriptTemplate scriptWithField(FieldAttribute field) {
87101
return new ScriptTemplate(processScript("doc[{}].value"),
88102
paramsBuilder().variable(field.name()).build(),

0 commit comments

Comments
 (0)