From 652d8509033075fc4255a1b6294618cb53031d37 Mon Sep 17 00:00:00 2001 From: Piotr Szul Date: Wed, 8 Jan 2025 09:28:33 +1000 Subject: [PATCH] Adding normalization of the representation of empty array values ColumnRepresentation (the canonical representation is NULL) --- .../fhirpath/column/ColumnRepresentation.java | 28 +++++++------------ .../fhirpath/column/EmptyRepresentation.java | 2 +- .../fhirpath/execution/CollectionDataset.java | 16 ++++------- .../execution/SingleResourceFhirpathTest.java | 18 ++++++------ 4 files changed, 24 insertions(+), 40 deletions(-) diff --git a/fhirpath/src/main/java/au/csiro/pathling/fhirpath/column/ColumnRepresentation.java b/fhirpath/src/main/java/au/csiro/pathling/fhirpath/column/ColumnRepresentation.java index ef5ef20647..87e70f9fc2 100644 --- a/fhirpath/src/main/java/au/csiro/pathling/fhirpath/column/ColumnRepresentation.java +++ b/fhirpath/src/main/java/au/csiro/pathling/fhirpath/column/ColumnRepresentation.java @@ -209,22 +209,7 @@ public ColumnRepresentation singular() { Function.identity() ); } - - - /** - * If necessary converters the underlying simple type column to an array with its value as the - * only element. - * - * @return A new {@link ColumnRepresentation} when the underlying column has the ARRAY. - */ - @Nonnull - public ColumnRepresentation asArray() { - return vectorize( - Function.identity(), - c -> functions.when(c.isNotNull(), functions.array(c)) - ); - } - + /** * */ @@ -271,7 +256,6 @@ public ColumnRepresentation removeNulls() { ); } - /** * Converts empty arrays to nulls in the current {@link ColumnRepresentation}. * @@ -286,6 +270,14 @@ public ColumnRepresentation normaliseNull() { ); } + /** + * Converts the current {@link ColumnRepresentation} to a canonical form. + */ + @Nonnull + public ColumnRepresentation asCanonical() { + return removeNulls().normaliseNull(); + } + /** * Transforms the current {@link ColumnRepresentation} in a way that only affects a singular value @@ -518,7 +510,7 @@ public ColumnRepresentation cast(@Nonnull final DataType dataType) { public ColumnRepresentation asString() { return cast(DataTypes.StringType); } - + @Nonnull public ColumnRepresentation vectorize2(@Nonnull final ColumnRepresentation other, @Nonnull final BiFunction arrayExpression, diff --git a/fhirpath/src/main/java/au/csiro/pathling/fhirpath/column/EmptyRepresentation.java b/fhirpath/src/main/java/au/csiro/pathling/fhirpath/column/EmptyRepresentation.java index 0b7f4a5023..78e9874567 100644 --- a/fhirpath/src/main/java/au/csiro/pathling/fhirpath/column/EmptyRepresentation.java +++ b/fhirpath/src/main/java/au/csiro/pathling/fhirpath/column/EmptyRepresentation.java @@ -59,7 +59,7 @@ public EmptyRepresentation traverse(@Nonnull final String fieldName) { @Nonnull @Override public EmptyRepresentation traverse(@Nonnull final String fieldName, - final Optional fhirType) { + @Nonnull final Optional fhirType) { return traverse(fieldName); } diff --git a/fhirpath/src/main/java/au/csiro/pathling/fhirpath/execution/CollectionDataset.java b/fhirpath/src/main/java/au/csiro/pathling/fhirpath/execution/CollectionDataset.java index 7d32a1a807..e65d16090b 100644 --- a/fhirpath/src/main/java/au/csiro/pathling/fhirpath/execution/CollectionDataset.java +++ b/fhirpath/src/main/java/au/csiro/pathling/fhirpath/execution/CollectionDataset.java @@ -18,8 +18,8 @@ package au.csiro.pathling.fhirpath.execution; import au.csiro.pathling.fhirpath.collection.Collection; +import au.csiro.pathling.fhirpath.column.ColumnRepresentation; import jakarta.annotation.Nonnull; -import java.util.function.Function; import lombok.Value; import org.apache.spark.sql.Column; import org.apache.spark.sql.Dataset; @@ -40,18 +40,12 @@ public Column getValueColumn() { } @Nonnull - public Dataset materialize(@Nonnull final String valueColumnName) { - return dataset.withColumn(valueColumnName, getValueColumn()); - } - - @Nonnull - public Dataset materialize(@Nonnull final String valueColumnName, - @Nonnull Function mapper) { - return dataset.withColumn(valueColumnName, mapper.apply(getValueColumn())); + public Dataset toIdValueDataset() { + return dataset.select(dataset.col("id").alias("id"), getValueColumn().alias("value")); } @Nonnull - public Dataset toIdValueDataset() { - return dataset.select(dataset.col("id").alias("id"), getValueColumn().alias("value")); + public CollectionDataset toCanonical() { + return new CollectionDataset(dataset, value.map(ColumnRepresentation::asCanonical)); } } diff --git a/fhirpath/src/test/java/au/csiro/pathling/fhirpathe/execution/SingleResourceFhirpathTest.java b/fhirpath/src/test/java/au/csiro/pathling/fhirpathe/execution/SingleResourceFhirpathTest.java index a82ad45ed5..6b97a2cb1e 100644 --- a/fhirpath/src/test/java/au/csiro/pathling/fhirpathe/execution/SingleResourceFhirpathTest.java +++ b/fhirpath/src/test/java/au/csiro/pathling/fhirpathe/execution/SingleResourceFhirpathTest.java @@ -4,7 +4,7 @@ import au.csiro.pathling.encoders.FhirEncoders; import au.csiro.pathling.fhirpath.collection.IntegerCollection; -import au.csiro.pathling.fhirpath.collection.ReferenceCollection; +import au.csiro.pathling.fhirpath.collection.StringCollection; import au.csiro.pathling.fhirpath.execution.CollectionDataset; import au.csiro.pathling.fhirpath.execution.FhirpathExecutor; import au.csiro.pathling.fhirpath.execution.MultiFhirpathEvaluator.ManyProvider; @@ -62,7 +62,8 @@ CollectionDataset evalExpression(@Nonnull final ObjectDataSource dataSource, @Nonnull final String fhirExpression) { return createEvaluator(subjectResource, dataSource) - .evaluate(subjectResource, fhirExpression); + .evaluate(subjectResource, fhirExpression) + .toCanonical(); } @@ -70,9 +71,7 @@ CollectionDataset evalExpression(@Nonnull final ObjectDataSource dataSource, Dataset selectExpression(@Nonnull final ObjectDataSource dataSource, @Nonnull final ResourceType subjectResource, @Nonnull final String fhirExpression) { - - return createEvaluator(subjectResource, dataSource) - .evaluate(subjectResource, fhirExpression) + return evalExpression(dataSource, subjectResource, fhirExpression) .toIdValueDataset(); } @@ -286,15 +285,14 @@ void testOfTypeForReference() { ); final CollectionDataset evalResult = evalExpression(dataSource, ResourceType.OBSERVATION, - "extension.value.ofType(Reference)"); + "extension.value.ofType(Reference).reference"); Assertions.assertThat(evalResult) - .isElementPath(ReferenceCollection.class) + .isElementPath(StringCollection.class) .selectResult() .hasRowsUnordered( - RowFactory.create("1", 17), - RowFactory.create("2", null), - RowFactory.create("3", null) + RowFactory.create("1", "MolecularSequence/1"), + RowFactory.create("2", null) ); }