From 50ef8bdfdc374a97bf140ab501b684d5c048813f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Taro?= Date: Mon, 21 Oct 2024 14:47:49 +0200 Subject: [PATCH 1/3] fix: Update value of expression column names key in the JSON object with new column names --- .../DataSerializerShould_ApplyTo.cs | 42 +++++++++++++++++++ ...lyTo.ExpressionAggregationWithCombine.snap | 34 +++++++++++++++ .../Sections/SectionFactoryJsonMapper.cs | 17 +++++++- 3 files changed, 92 insertions(+), 1 deletion(-) create mode 100644 Apache.Druid.Querying.Tests.Unit/__snapshots__/DataSerializerShould_ApplyTo.ExpressionAggregationWithCombine.snap diff --git a/Apache.Druid.Querying.Tests.Unit/DataSerializerShould_ApplyTo.cs b/Apache.Druid.Querying.Tests.Unit/DataSerializerShould_ApplyTo.cs index 17c04a1..b29db40 100644 --- a/Apache.Druid.Querying.Tests.Unit/DataSerializerShould_ApplyTo.cs +++ b/Apache.Druid.Querying.Tests.Unit/DataSerializerShould_ApplyTo.cs @@ -30,6 +30,7 @@ private sealed record Data( private sealed record Aggregations( DateTimeOffset Time, bool Bool); + [Test] public void ExpressionAggregation() { @@ -70,6 +71,47 @@ public void DimensionMetric() .Metric(type => type.Dimension(t)); Verify(query); } + + private sealed record Activity( + [property: DataSourceTimeColumn] DateTimeOffset Timestamp, + int Duration, + int DomainID, + int UserID + ); + private sealed record ActivityDimensions(int DomainID); + private sealed record ActivityAggregations(List UserIds, int Duration); + + [Test] + public void ExpressionAggregationWithCombine() + { + var query = new Query + .GroupBy + .WithNoVirtualColumns + .WithAggregations() + .Dimensions(type => new ActivityDimensions(type.Default(activity => activity.DomainID))) + .Aggregations(type => new ActivityAggregations( + type.Expression, string>( + "ARRAY[]", + "__acc", + data => $"array_set_add(__acc, {data.UserID})", + data => $"array_set_add_all(__acc, {data.Duration})", + null, + null, + data => "ARRAY[]", + true, + true, + false, + 1024 * 10 + ), + type.Sum(activity => activity.Duration)) + ); + + var json = query + .MapToJson(dataSerializerOptions: dataSerializerOptions) + .ToString(); + + Snapshot.Match(json); + } private sealed record Inline_Data( [property: DataSourceTimeColumn] DateTimeOffset DataTimeOffset, diff --git a/Apache.Druid.Querying.Tests.Unit/__snapshots__/DataSerializerShould_ApplyTo.ExpressionAggregationWithCombine.snap b/Apache.Druid.Querying.Tests.Unit/__snapshots__/DataSerializerShould_ApplyTo.ExpressionAggregationWithCombine.snap new file mode 100644 index 0000000..690e3af --- /dev/null +++ b/Apache.Druid.Querying.Tests.Unit/__snapshots__/DataSerializerShould_ApplyTo.ExpressionAggregationWithCombine.snap @@ -0,0 +1,34 @@ +{ + "queryType": "groupBy", + "dimensions": [ + { + "type": "default", + "outputName": "DomainID", + "dimension": "DomainID" + } + ], + "aggregations": [ + { + "type": "expression", + "name": "UserIds", + "initialValue": "ARRAY\u003CLONG\u003E[]", + "accumulatorIdentifier": "__acc", + "fold": "array_set_add(__acc, \u0022UserID\u0022)", + "combine": "array_set_add_all(__acc, \u0022Duration\u0022)", + "initialValueCombine": "ARRAY\u003CLONG\u003E[]", + "fields": [ + "\u0022UserID\u0022", + "\u0022Duration\u0022" + ], + "isNullUnlessAggregated": true, + "shouldAggregateNullInputs": true, + "shouldCombineAggregateNullInputs": false, + "maxSizeBytes": 10240 + }, + { + "type": "longSum", + "name": "Duration", + "fieldName": "Duration" + } + ] +} diff --git a/Apache.Druid.Querying/Internal/Sections/SectionFactoryJsonMapper.cs b/Apache.Druid.Querying/Internal/Sections/SectionFactoryJsonMapper.cs index 1b07363..812390b 100644 --- a/Apache.Druid.Querying/Internal/Sections/SectionFactoryJsonMapper.cs +++ b/Apache.Druid.Querying/Internal/Sections/SectionFactoryJsonMapper.cs @@ -40,8 +40,23 @@ void MapCallParam(ElementFactoryCall.Parameter.Any param, string? callResultMemb var (value, columnNames) = DruidExpression.Map(expression.Value, context.ColumnNames, context.DataSerializerOptions); result.Add(expression.Name, value); + if (options.ExpressionColumnNamesKey is string existing) - result.Add(existing, JsonSerializer.SerializeToNode(columnNames, context.QuerySerializerOptions)); + { + if (!result.ContainsKey(existing)) + { + result.Add(existing, JsonSerializer.SerializeToNode(columnNames, context.QuerySerializerOptions)); + } + else + { + result.TryGetPropertyValue(existing, out JsonNode? node); + + string[] fields = node.Deserialize()!; + + result.Remove(existing); + result.Add(existing, JsonSerializer.SerializeToNode(fields.Concat(columnNames), context.QuerySerializerOptions)); + } + } }, (filterFactory, result) => { From 16181fb3cec55ffa55060d0921b1bba01de74eef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Taro?= Date: Thu, 24 Oct 2024 09:04:30 +0200 Subject: [PATCH 2/3] refactor: Simplify logic for adding additional fields --- .../Internal/Sections/SectionFactoryJsonMapper.cs | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/Apache.Druid.Querying/Internal/Sections/SectionFactoryJsonMapper.cs b/Apache.Druid.Querying/Internal/Sections/SectionFactoryJsonMapper.cs index 812390b..65e983c 100644 --- a/Apache.Druid.Querying/Internal/Sections/SectionFactoryJsonMapper.cs +++ b/Apache.Druid.Querying/Internal/Sections/SectionFactoryJsonMapper.cs @@ -43,19 +43,12 @@ void MapCallParam(ElementFactoryCall.Parameter.Any param, string? callResultMemb if (options.ExpressionColumnNamesKey is string existing) { - if (!result.ContainsKey(existing)) + if (result.Remove(existing, out var node)) { - result.Add(existing, JsonSerializer.SerializeToNode(columnNames, context.QuerySerializerOptions)); - } - else - { - result.TryGetPropertyValue(existing, out JsonNode? node); - - string[] fields = node.Deserialize()!; - - result.Remove(existing); - result.Add(existing, JsonSerializer.SerializeToNode(fields.Concat(columnNames), context.QuerySerializerOptions)); + var existingColumnNames = node.Deserialize>()!; + columnNames = existingColumnNames.Concat(columnNames).Distinct().ToArray(); } + result.Add(existing, JsonSerializer.SerializeToNode(columnNames, context.QuerySerializerOptions)); } }, (filterFactory, result) => From 5269afc3227e007a61af73a4e6c5a1e34d32ffc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Taro?= Date: Thu, 24 Oct 2024 09:12:45 +0200 Subject: [PATCH 3/3] refactor: Move unit test to a different location --- .../DataSerializerShould_ApplyTo.cs | 41 ------------------- .../QueryShould_MapToRightJson.cs | 36 ++++++++++++++++ ...pressionAggregationWithCombine_query.snap} | 0 3 files changed, 36 insertions(+), 41 deletions(-) rename Apache.Druid.Querying.Tests.Unit/__snapshots__/{DataSerializerShould_ApplyTo.ExpressionAggregationWithCombine.snap => QueryShould_MapToRightJson.ExpressionAggregationWithCombine_query.snap} (100%) diff --git a/Apache.Druid.Querying.Tests.Unit/DataSerializerShould_ApplyTo.cs b/Apache.Druid.Querying.Tests.Unit/DataSerializerShould_ApplyTo.cs index b29db40..353e231 100644 --- a/Apache.Druid.Querying.Tests.Unit/DataSerializerShould_ApplyTo.cs +++ b/Apache.Druid.Querying.Tests.Unit/DataSerializerShould_ApplyTo.cs @@ -71,47 +71,6 @@ public void DimensionMetric() .Metric(type => type.Dimension(t)); Verify(query); } - - private sealed record Activity( - [property: DataSourceTimeColumn] DateTimeOffset Timestamp, - int Duration, - int DomainID, - int UserID - ); - private sealed record ActivityDimensions(int DomainID); - private sealed record ActivityAggregations(List UserIds, int Duration); - - [Test] - public void ExpressionAggregationWithCombine() - { - var query = new Query - .GroupBy - .WithNoVirtualColumns - .WithAggregations() - .Dimensions(type => new ActivityDimensions(type.Default(activity => activity.DomainID))) - .Aggregations(type => new ActivityAggregations( - type.Expression, string>( - "ARRAY[]", - "__acc", - data => $"array_set_add(__acc, {data.UserID})", - data => $"array_set_add_all(__acc, {data.Duration})", - null, - null, - data => "ARRAY[]", - true, - true, - false, - 1024 * 10 - ), - type.Sum(activity => activity.Duration)) - ); - - var json = query - .MapToJson(dataSerializerOptions: dataSerializerOptions) - .ToString(); - - Snapshot.Match(json); - } private sealed record Inline_Data( [property: DataSourceTimeColumn] DateTimeOffset DataTimeOffset, diff --git a/Apache.Druid.Querying.Tests.Unit/QueryShould_MapToRightJson.cs b/Apache.Druid.Querying.Tests.Unit/QueryShould_MapToRightJson.cs index dc7e451..93c20c4 100644 --- a/Apache.Druid.Querying.Tests.Unit/QueryShould_MapToRightJson.cs +++ b/Apache.Druid.Querying.Tests.Unit/QueryShould_MapToRightJson.cs @@ -123,6 +123,42 @@ public void Granularity_() var eight = query.Granularity("T1M", "utc", t); AssertMatch(eight); } + + private sealed record Activity( + [property: DataSourceTimeColumn] DateTimeOffset Timestamp, + int Duration, + int DomainID, + int UserID + ); + private sealed record ActivityDimensions(int DomainID); + private sealed record ActivityAggregations(List UserIds, int Duration); + + [Test] + public void ExpressionAggregationWithCombine() + { + var query = new Query + .GroupBy + .WithNoVirtualColumns + .WithAggregations() + .Dimensions(type => new ActivityDimensions(type.Default(activity => activity.DomainID))) + .Aggregations(type => new ActivityAggregations( + type.Expression, string>( + "ARRAY[]", + "__acc", + data => $"array_set_add(__acc, {data.UserID})", + data => $"array_set_add_all(__acc, {data.Duration})", + null, + null, + data => "ARRAY[]", + true, + true, + false, + 1024 * 10 + ), + type.Sum(activity => activity.Duration)) + ); + AssertMatch(query); + } private record IotMeasurementTimestamps(DateTimeOffset Normal, DateTimeOffset Processed); [Test] diff --git a/Apache.Druid.Querying.Tests.Unit/__snapshots__/DataSerializerShould_ApplyTo.ExpressionAggregationWithCombine.snap b/Apache.Druid.Querying.Tests.Unit/__snapshots__/QueryShould_MapToRightJson.ExpressionAggregationWithCombine_query.snap similarity index 100% rename from Apache.Druid.Querying.Tests.Unit/__snapshots__/DataSerializerShould_ApplyTo.ExpressionAggregationWithCombine.snap rename to Apache.Druid.Querying.Tests.Unit/__snapshots__/QueryShould_MapToRightJson.ExpressionAggregationWithCombine_query.snap