Skip to content

Commit

Permalink
Adding normalization of the representation of empty array values Colu…
Browse files Browse the repository at this point in the history
…mnRepresentation (the canonical representation is NULL)
  • Loading branch information
piotrszul committed Jan 7, 2025
1 parent 281e6cd commit 652d850
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 40 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -209,22 +209,7 @@ public ColumnRepresentation singular() {
Function.identity()
);
}


/**
* If necessary converters the underlying simple type column to an array with its value as the
* only element.
*
* @return A new {@link ColumnRepresentation} when the underlying column has the ARRAY<?>.
*/
@Nonnull
public ColumnRepresentation asArray() {
return vectorize(
Function.identity(),
c -> functions.when(c.isNotNull(), functions.array(c))
);
}


/**
*
*/
Expand Down Expand Up @@ -271,7 +256,6 @@ public ColumnRepresentation removeNulls() {
);
}


/**
* Converts empty arrays to nulls in the current {@link ColumnRepresentation}.
*
Expand All @@ -286,6 +270,14 @@ public ColumnRepresentation normaliseNull() {
);
}

/**
* Converts the current {@link ColumnRepresentation} to a canonical form.
*/
@Nonnull
public ColumnRepresentation asCanonical() {
return removeNulls().normaliseNull();
}


/**
* Transforms the current {@link ColumnRepresentation} in a way that only affects a singular value
Expand Down Expand Up @@ -518,7 +510,7 @@ public ColumnRepresentation cast(@Nonnull final DataType dataType) {
public ColumnRepresentation asString() {
return cast(DataTypes.StringType);
}

@Nonnull
public ColumnRepresentation vectorize2(@Nonnull final ColumnRepresentation other,
@Nonnull final BiFunction<Column, Column, Column> arrayExpression,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ public EmptyRepresentation traverse(@Nonnull final String fieldName) {
@Nonnull
@Override
public EmptyRepresentation traverse(@Nonnull final String fieldName,
final Optional<FHIRDefinedType> fhirType) {
@Nonnull final Optional<FHIRDefinedType> fhirType) {
return traverse(fieldName);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
package au.csiro.pathling.fhirpath.execution;

import au.csiro.pathling.fhirpath.collection.Collection;
import au.csiro.pathling.fhirpath.column.ColumnRepresentation;
import jakarta.annotation.Nonnull;
import java.util.function.Function;
import lombok.Value;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
Expand All @@ -40,18 +40,12 @@ public Column getValueColumn() {
}

@Nonnull
public Dataset<Row> materialize(@Nonnull final String valueColumnName) {
return dataset.withColumn(valueColumnName, getValueColumn());
}

@Nonnull
public Dataset<Row> materialize(@Nonnull final String valueColumnName,
@Nonnull Function<Column, Column> mapper) {
return dataset.withColumn(valueColumnName, mapper.apply(getValueColumn()));
public Dataset<Row> toIdValueDataset() {
return dataset.select(dataset.col("id").alias("id"), getValueColumn().alias("value"));
}

@Nonnull
public Dataset<Row> toIdValueDataset() {
return dataset.select(dataset.col("id").alias("id"), getValueColumn().alias("value"));
public CollectionDataset toCanonical() {
return new CollectionDataset(dataset, value.map(ColumnRepresentation::asCanonical));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import au.csiro.pathling.encoders.FhirEncoders;
import au.csiro.pathling.fhirpath.collection.IntegerCollection;
import au.csiro.pathling.fhirpath.collection.ReferenceCollection;
import au.csiro.pathling.fhirpath.collection.StringCollection;
import au.csiro.pathling.fhirpath.execution.CollectionDataset;
import au.csiro.pathling.fhirpath.execution.FhirpathExecutor;
import au.csiro.pathling.fhirpath.execution.MultiFhirpathEvaluator.ManyProvider;
Expand Down Expand Up @@ -62,17 +62,16 @@ CollectionDataset evalExpression(@Nonnull final ObjectDataSource dataSource,
@Nonnull final String fhirExpression) {

return createEvaluator(subjectResource, dataSource)
.evaluate(subjectResource, fhirExpression);
.evaluate(subjectResource, fhirExpression)
.toCanonical();

}

@Nonnull
Dataset<Row> selectExpression(@Nonnull final ObjectDataSource dataSource,
@Nonnull final ResourceType subjectResource,
@Nonnull final String fhirExpression) {

return createEvaluator(subjectResource, dataSource)
.evaluate(subjectResource, fhirExpression)
return evalExpression(dataSource, subjectResource, fhirExpression)
.toIdValueDataset();
}

Expand Down Expand Up @@ -286,15 +285,14 @@ void testOfTypeForReference() {
);

final CollectionDataset evalResult = evalExpression(dataSource, ResourceType.OBSERVATION,
"extension.value.ofType(Reference)");
"extension.value.ofType(Reference).reference");

Assertions.assertThat(evalResult)
.isElementPath(ReferenceCollection.class)
.isElementPath(StringCollection.class)
.selectResult()
.hasRowsUnordered(
RowFactory.create("1", 17),
RowFactory.create("2", null),
RowFactory.create("3", null)
RowFactory.create("1", "MolecularSequence/1"),
RowFactory.create("2", null)
);
}

Expand Down

0 comments on commit 652d850

Please sign in to comment.