From 9cb328f2fdc0e083ccf9e456481abf6137ed2d4e Mon Sep 17 00:00:00 2001 From: Lucas Satabin Date: Thu, 5 Oct 2023 17:56:31 +0200 Subject: [PATCH] Add a default fallback value to paths In some query languages and for some formats, when a path is not found, it is fine to output nothing. For instance for XML, an empty sequence of nodes is ok. However in some cases, a default value has to be output. This is the case in the JSON query language. When the query builds an object whose value is a sub-query, then the value cannot be empty, even if the sub-query finds no element. Moreover, the query must return exactly one result. If it return zero or more than one element, then the generated JSON data is invalid. But in the JSON query lanuage, generating no data is still valid, and happens when the path is iterated over, for instance. This change allows for the query to parameterize for each path whether it should return a (unique) default value, or not. If not, then the previous behavior is kept, otherwise in case of no-match, then the default value is output. --- .../main/scala/fs2/data/mft/query/Query.scala | 2 +- .../fs2/data/mft/query/QueryCompiler.scala | 27 +++++--- .../src/main/scala/fs2/data/pfsa/PDFA.scala | 6 +- .../src/main/scala/fs2/data/pfsa/PNFA.scala | 1 + .../main/scala/fs2/data/pfsa/Regular.scala | 6 +- .../test/scala/fs2/data/mft/QuerySpec.scala | 23 +++---- .../data/json/jq/internal/ESPJqCompiler.scala | 22 ++++--- .../test/scala/fs2/data/json/jq/JqSpec.scala | 66 +++++++++++++++++++ 8 files changed, 117 insertions(+), 36 deletions(-) diff --git a/finite-state/shared/src/main/scala/fs2/data/mft/query/Query.scala b/finite-state/shared/src/main/scala/fs2/data/mft/query/Query.scala index 154dcb10e..7513294b7 100644 --- a/finite-state/shared/src/main/scala/fs2/data/mft/query/Query.scala +++ b/finite-state/shared/src/main/scala/fs2/data/mft/query/Query.scala @@ -29,7 +29,7 @@ object Query { case class ForClause[Tag, Path](variable: String, source: Path, result: Query[Tag, Path]) extends Query[Tag, Path] case class LetClause[Tag, Path](variable: String, query: Query[Tag, Path], result: Query[Tag, Path]) extends Query[Tag, Path] - case class Ordpath[Tag, Path](path: Path) extends Query[Tag, Path] + case class Ordpath[Tag, Path](path: Path, default: Option[Tag]) extends Query[Tag, Path] case class Variable[Tag, Path](name: String) extends Query[Tag, Path] case class Node[Tag, Path](tag: Tag, child: Query[Tag, Path]) extends Query[Tag, Path] case class Leaf[Tag, Path](tag: Tag) extends Query[Tag, Path] diff --git a/finite-state/shared/src/main/scala/fs2/data/mft/query/QueryCompiler.scala b/finite-state/shared/src/main/scala/fs2/data/mft/query/QueryCompiler.scala index 2979a0a9f..6248c3e5c 100644 --- a/finite-state/shared/src/main/scala/fs2/data/mft/query/QueryCompiler.scala +++ b/finite-state/shared/src/main/scala/fs2/data/mft/query/QueryCompiler.scala @@ -18,10 +18,11 @@ package fs2.data package mft package query -import pfsa.{Candidate, Pred, Regular} import cats.Eq -import cats.syntax.all._ import cats.data.NonEmptyList +import cats.syntax.all._ + +import pfsa.{Candidate, Pred, Regular} /** This compiler can be used to compile to an MFT any query language that can be represented by nested for loops. * @@ -82,7 +83,10 @@ private[fs2] abstract class QueryCompiler[InTag, OutTag, Path] { // input is copied in the first argument q0(any) -> qinit(x0, qcopy(x0)) - def translatePath(path: Path, start: builder.StateBuilder, end: builder.StateBuilder): Unit = { + def translatePath(path: Path, + default: Rhs[OutTag], + start: builder.StateBuilder, + end: builder.StateBuilder): Unit = { val regular = path2regular(path) val dfa = regular.deriveDFA // resolve transitions into patterns and guards @@ -114,6 +118,7 @@ private[fs2] abstract class QueryCompiler[InTag, OutTag, Path] { val states2 = transitions.foldLeft(states1) { case (states, (pattern, guard, tgt)) => val finalTgt = dfa.finals.contains(tgt) + val trapTgt = dfa.trap.contains(tgt) val (q2, states1) = states.get(tgt) match { case Some(q2) => (q2, states) @@ -122,14 +127,16 @@ private[fs2] abstract class QueryCompiler[InTag, OutTag, Path] { (q2, states.updated(tgt, q2)) } val pat: builder.Guardable = tagOf(pattern).fold(anyNode)(aNode(_)) - if (!finalTgt) { - q1(pat.when(guard)) -> q2(x1, copyArgs: _*) ~ q1(x2, copyArgs: _*) + if (trapTgt) { + q1(pat.when(guard)) -> (if (default == eps) q2(x1, copyArgs: _*) ~ q1(x2, copyArgs: _*) else default) + } else if (!finalTgt) { + q1(pat.when(guard)) -> q2(x1, copyArgs: _*) ~ (if (default == eps) q1(x2, copyArgs: _*) else eps) } else if (emitSelected) { q1(pat.when(guard)) -> end(x1, (copyArgs :+ copy(qcopy(x1))): _*) ~ q2(x1, copyArgs: _*) ~ - q1(x2, copyArgs: _*) + (if (default == eps) q1(x2, copyArgs: _*) else eps) } else { q1(pat.when(guard)) -> end(x1, (copyArgs :+ qcopy(x1)): _*) ~ q2(x1, copyArgs: _*) ~ - q1(x2, copyArgs: _*) + (if (default == eps) q1(x2, copyArgs: _*) else eps) } states1 } @@ -148,7 +155,7 @@ private[fs2] abstract class QueryCompiler[InTag, OutTag, Path] { val q1 = state(args = q.nargs + 1) // compile the variable binding path - translatePath(source, q, q1) + translatePath(source, eps, q, q1) // then the body with the bound variable translate(result, variable :: vars, q1) @@ -166,11 +173,11 @@ private[fs2] abstract class QueryCompiler[InTag, OutTag, Path] { val copyArgs = List.tabulate(q.nargs)(y(_)) q(any) -> q1(x0, (copyArgs :+ qv(x0, copyArgs: _*)): _*) - case Query.Ordpath(path) => + case Query.Ordpath(path, default) => val q1 = state(args = q.nargs + 1) // compile the path - translatePath(path, q, q1) + translatePath(path, default.map(leaf(_)).getOrElse(eps), q, q1) // emit the result q1(any) -> y(q.nargs) diff --git a/finite-state/shared/src/main/scala/fs2/data/pfsa/PDFA.scala b/finite-state/shared/src/main/scala/fs2/data/pfsa/PDFA.scala index 0ae36d1c5..83d397169 100644 --- a/finite-state/shared/src/main/scala/fs2/data/pfsa/PDFA.scala +++ b/finite-state/shared/src/main/scala/fs2/data/pfsa/PDFA.scala @@ -21,8 +21,10 @@ import cats.syntax.foldable._ import Pred.syntax._ -private[data] class PDFA[P, T](val init: Int, val finals: Set[Int], val transitions: Array[List[(P, Int)]])(implicit - P: Pred[P, T]) { +private[data] class PDFA[P, T](val init: Int, + val finals: Set[Int], + val trap: Option[Int], + val transitions: Array[List[(P, Int)]])(implicit P: Pred[P, T]) { def step(q: Int, t: T): Option[Int] = if (q >= transitions.length) diff --git a/finite-state/shared/src/main/scala/fs2/data/pfsa/PNFA.scala b/finite-state/shared/src/main/scala/fs2/data/pfsa/PNFA.scala index 5ee62d47a..ebe0957e4 100644 --- a/finite-state/shared/src/main/scala/fs2/data/pfsa/PNFA.scala +++ b/finite-state/shared/src/main/scala/fs2/data/pfsa/PNFA.scala @@ -76,6 +76,7 @@ private[data] class PNFA[P, T](val init: Int, val finals: Set[Int], val transiti case Nil => new PDFA[P, T](0, newFinals.map(newStates(_)), + None, newTransitions.result().map(_.map { case (p, q) => (p, newStates(q)) })) case q :: qs => if (newStates.contains(q)) { diff --git a/finite-state/shared/src/main/scala/fs2/data/pfsa/Regular.scala b/finite-state/shared/src/main/scala/fs2/data/pfsa/Regular.scala index a8d2d2b58..ee7b578b8 100644 --- a/finite-state/shared/src/main/scala/fs2/data/pfsa/Regular.scala +++ b/finite-state/shared/src/main/scala/fs2/data/pfsa/Regular.scala @@ -194,8 +194,10 @@ sealed abstract class Regular[CharSet] { } val (qs, transitions) = explore(Chain.one(this), Map.empty, this) - val finals = qs.zipWithIndex.collect { case (re, idx) if re.acceptEpsilon => idx }.toList.toSet - new PDFA[CharSet, C](0, finals, Array.tabulate(qs.size.toInt)(transitions.getOrElse(_, Nil))) + val indexedStates = qs.zipWithIndex + val finals = indexedStates.collect { case (re, idx) if re.acceptEpsilon => idx }.toList.toSet + val trap = indexedStates.collectFirst { case (Regular.Chars(cs), idx) if cs === never => idx } + new PDFA[CharSet, C](0, finals, trap, Array.tabulate(qs.size.toInt)(transitions.getOrElse(_, Nil))) } } diff --git a/finite-state/shared/src/test/scala/fs2/data/mft/QuerySpec.scala b/finite-state/shared/src/test/scala/fs2/data/mft/QuerySpec.scala index 4c4f3cc2a..bf30c714e 100644 --- a/finite-state/shared/src/test/scala/fs2/data/mft/QuerySpec.scala +++ b/finite-state/shared/src/test/scala/fs2/data/mft/QuerySpec.scala @@ -116,7 +116,7 @@ abstract class QuerySpec(credit: Int) extends SimpleIOSuite { test("child path") { MiniXQueryCompiler - .compile(Query.Ordpath(MiniXPath(NonEmptyList.one(Step.Child(Some("a"))))), credit) + .compile(Query.Ordpath(MiniXPath(NonEmptyList.one(Step.Child(Some("a")))), None), credit) .esp[IO] .flatMap { esp => Stream @@ -163,7 +163,7 @@ abstract class QuerySpec(credit: Int) extends SimpleIOSuite { test("any child path") { MiniXQueryCompiler - .compile(Query.Ordpath(MiniXPath(NonEmptyList.one(Step.Child(None)))), credit) + .compile(Query.Ordpath(MiniXPath(NonEmptyList.one(Step.Child(None))), None), credit) .esp[IO] .flatMap { esp => Stream @@ -210,7 +210,7 @@ abstract class QuerySpec(credit: Int) extends SimpleIOSuite { test("descendant path") { MiniXQueryCompiler - .compile(Query.Ordpath(MiniXPath(NonEmptyList.one(Step.Descendant(Some("a"))))), credit) + .compile(Query.Ordpath(MiniXPath(NonEmptyList.one(Step.Descendant(Some("a")))), None), credit) .esp[IO] .flatMap { esp => Stream @@ -267,7 +267,7 @@ abstract class QuerySpec(credit: Int) extends SimpleIOSuite { test("any descendant path") { MiniXQueryCompiler - .compile(Query.Ordpath(MiniXPath(NonEmptyList.one(Step.Descendant(None)))), credit) + .compile(Query.Ordpath(MiniXPath(NonEmptyList.one(Step.Descendant(None))), None), credit) .esp[IO] .flatMap { esp => Stream @@ -326,10 +326,11 @@ abstract class QuerySpec(credit: Int) extends SimpleIOSuite { test("simple let") { MiniXQueryCompiler - .compile( - Query - .LetClause("v", Query.Ordpath(MiniXPath(NonEmptyList.one(Step.Descendant(Some("a"))))), Query.Variable("v")), - credit) + .compile(Query + .LetClause("v", + Query.Ordpath(MiniXPath(NonEmptyList.one(Step.Descendant(Some("a")))), None), + Query.Variable("v")), + credit) .esp[IO] .flatMap { esp => Stream @@ -662,10 +663,10 @@ abstract class QuerySpec(credit: Int) extends SimpleIOSuite { MiniXPath(NonEmptyList.one(Step.Descendant(Some("b")))), Query.LetClause( "v3", - Query.Ordpath(MiniXPath(NonEmptyList.one(Step.Descendant(Some("c"))))), + Query.Ordpath(MiniXPath(NonEmptyList.one(Step.Descendant(Some("c")))), None), Query.LetClause( "v4", - Query.Ordpath(MiniXPath(NonEmptyList.one(Step.Descendant(Some("d"))))), + Query.Ordpath(MiniXPath(NonEmptyList.one(Step.Descendant(Some("d")))), None), Query.Sequence(NonEmptyList .of(Query.Variable("v1"), Query.Variable("v2"), Query.Variable("v3"), Query.Variable("v4"))) ) @@ -781,7 +782,7 @@ abstract class QuerySpec(credit: Int) extends SimpleIOSuite { .compile( Query.LetClause( "a", - Query.Ordpath(MiniXPath(NonEmptyList.one(Step.Descendant(Some("a"))))), + Query.Ordpath(MiniXPath(NonEmptyList.one(Step.Descendant(Some("a")))), None), Query.ForClause( "b", MiniXPath(NonEmptyList.one(Step.Descendant(Some("b")))), diff --git a/json/src/main/scala/fs2/data/json/jq/internal/ESPJqCompiler.scala b/json/src/main/scala/fs2/data/json/jq/internal/ESPJqCompiler.scala index 44731873c..5faea0b4f 100644 --- a/json/src/main/scala/fs2/data/json/jq/internal/ESPJqCompiler.scala +++ b/json/src/main/scala/fs2/data/json/jq/internal/ESPJqCompiler.scala @@ -34,6 +34,8 @@ private[jq] class ESPJqCompiler[F[_]](implicit F: MonadThrow[F], defer: Defer[F] override protected val emitSelected: Boolean = false + private val default: Option[TaggedJson] = Some(TaggedJson.Raw(Token.NullValue)) + private type State[T] = StateT[F, Int, T] private val nextIdent: State[String] = @@ -223,7 +225,7 @@ private[jq] class ESPJqCompiler[F[_]](implicit F: MonadThrow[F], defer: Defer[F] } } - private def preprocess(prefix: Filter, jq: Jq): State[Query[TaggedJson, Filter]] = + private def preprocess(prefix: Filter, jq: Jq, withDefault: Boolean): State[Query[TaggedJson, Filter]] = jq match { case Jq.Null => pure(Query.Leaf(TaggedJson.Raw(Token.NullValue))) @@ -232,7 +234,7 @@ private[jq] class ESPJqCompiler[F[_]](implicit F: MonadThrow[F], defer: Defer[F] case Jq.Arr(prefix1, values) => values.zipWithIndex .traverse { case (elt, idx) => - preprocess(prefix ~ prefix1, elt).map(q => Query.Node(TaggedJson.StartArrayElement(idx), q)) + preprocess(prefix ~ prefix1, elt, false).map(q => Query.Node(TaggedJson.StartArrayElement(idx), q)) } .map { elts => Query.Node(TaggedJson.Raw(Token.StartArray), @@ -256,10 +258,10 @@ private[jq] class ESPJqCompiler[F[_]](implicit F: MonadThrow[F], defer: Defer[F] case Some(v) => Query.node[TaggedJson, Filter](TaggedJson.StartObjectValue(name), Query.variable(v)).pure[State] case None => - preprocess(prefix ~ prefix1, elt).map(q => Query.node(TaggedJson.StartObjectValue(name), q)) + preprocess(prefix ~ prefix1, elt, true).map(q => Query.node(TaggedJson.StartObjectValue(name), q)) } else - preprocess(prefix ~ prefix1, elt).map(q => Query.node(TaggedJson.StartObjectValue(name), q)) + preprocess(prefix ~ prefix1, elt, true).map(q => Query.node(TaggedJson.StartObjectValue(name), q)) }.map { elts => Query.Node(TaggedJson.Raw(Token.StartObject), NonEmptyList.fromList(elts).fold(Query.empty[TaggedJson, Filter])(Query.Sequence(_))) @@ -273,7 +275,7 @@ private[jq] class ESPJqCompiler[F[_]](implicit F: MonadThrow[F], defer: Defer[F] if (elt == Jq.Identity) Query.Variable[TaggedJson, Filter](v).pure[State] else - preprocess(prefix ~ prefix1, elt) + preprocess(prefix ~ prefix1, elt, true) } yield (v, Query.Node(TaggedJson.StartObjectValue(name), q)) } v <- nextIdent @@ -281,7 +283,7 @@ private[jq] class ESPJqCompiler[F[_]](implicit F: MonadThrow[F], defer: Defer[F] if (inner == Jq.Identity) Query.Variable[TaggedJson, Filter](v).pure[State] else - preprocess(Jq.Identity, inner) + preprocess(Jq.Identity, inner, true) } yield { val (before, after) = values.splitAt(idx) val forClause: Query[TaggedJson, Filter] = @@ -306,20 +308,20 @@ private[jq] class ESPJqCompiler[F[_]](implicit F: MonadThrow[F], defer: Defer[F] case Jq.Iterator(filter, inner: Constructor) => for { v <- nextIdent - inner <- preprocess(Jq.Identity, inner) + inner <- preprocess(Jq.Identity, inner, withDefault) } yield Query.ForClause(v, prefix ~ filter ~ Jq.Child, inner) case Jq.Iterator(filter, inner) => for { v <- nextIdent - inner <- preprocess(Jq.Child, inner) + inner <- preprocess(Jq.Child, inner, withDefault) } yield Query.ForClause(v, prefix ~ filter, inner) case filter: Filter => - pure(Query.Ordpath(prefix ~ filter)) + pure(Query.Ordpath(prefix ~ filter, if (withDefault) default else None)) } def compile(jq: Jq): F[Pipe[F, Token, Token]] = for { - query <- preprocess(Jq.Root, jq).runA(0) + query <- preprocess(Jq.Root, jq, false).runA(0) mft = compile(query) esp <- mft.esp } yield new ESPCompiledJq[F](esp) diff --git a/json/src/test/scala/fs2/data/json/jq/JqSpec.scala b/json/src/test/scala/fs2/data/json/jq/JqSpec.scala index 013bb77ca..4c61dd547 100644 --- a/json/src/test/scala/fs2/data/json/jq/JqSpec.scala +++ b/json/src/test/scala/fs2/data/json/jq/JqSpec.scala @@ -44,6 +44,34 @@ object JqSpec extends SimpleIOSuite { } yield expect.same(List(Token.NumberValue("0")), result) } + test("select not found") { + for { + compiled <- compiler.compile(jq".a[0].d.e") + result <- input.through(compiled).compile.toList + } yield expect.same(Nil, result) + } + + test("iterate not found") { + for { + compiled <- compiler.compile(jq""".d[]""") + result <- input.through(compiled).compile.toList + } yield expect.same(Nil, result) + } + + test("iterate object not found") { + for { + compiled <- compiler.compile(jq""".d[] | { "value": .a }""") + result <- input.through(compiled).compile.toList + } yield expect.same(Nil, result) + } + + test("iterate array not found") { + for { + compiled <- compiler.compile(jq"""[ .d[] ]""") + result <- input.through(compiled).compile.toList + } yield expect.same(List(Token.StartArray, Token.EndArray), result) + } + test("simple recursive descent") { for { compiled <- compiler.compile(jq"..") @@ -355,4 +383,42 @@ object JqSpec extends SimpleIOSuite { ) } + test("not found value constructor") { + for { + compiled <- compiler.compile(jq"""{ "value": .a[0].d }""") + result <- input.through(compiled).compile.toList + } yield expect.same( + List( + Token.StartObject, + Token.Key("value"), + Token.NullValue, + Token.EndObject + ), + result + ) + } + + test("not found value object iterator") { + for { + compiled <- compiler.compile(jq"""{ "value": .a[].unknown }""") + result <- input.through(compiled).compile.toList + } yield expect.same( + List( + Token.StartObject, + Token.Key("value"), + Token.NullValue, + Token.EndObject, + Token.StartObject, + Token.Key("value"), + Token.NullValue, + Token.EndObject, + Token.StartObject, + Token.Key("value"), + Token.NullValue, + Token.EndObject + ), + result + ) + } + }