Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fs2-data-xml streaming parser #25

Closed
wants to merge 11 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 64 additions & 25 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ jobs:
os: [ubuntu-latest]
scala: [2.12.16, 2.13.8, 3.1.2]
java: [temurin@8, temurin@11, temurin@17]
project: [rootJS, rootJVM]
exclude:
- scala: 2.12.16
java: temurin@11
Expand All @@ -40,6 +41,10 @@ jobs:
java: temurin@11
- scala: 3.1.2
java: temurin@17
- project: rootJS
java: temurin@11
- project: rootJS
java: temurin@17
runs-on: ${{ matrix.os }}
steps:
- name: Checkout current branch (full)
Expand Down Expand Up @@ -108,44 +113,48 @@ jobs:
key: ${{ runner.os }}-sbt-cache-v2-${{ hashFiles('**/*.sbt') }}-${{ hashFiles('project/build.properties') }}

- name: Check that workflows are up to date
run: sbt '++${{ matrix.scala }}' 'project /' githubWorkflowCheck
run: sbt 'project ${{ matrix.project }}' '++${{ matrix.scala }}' 'project /' githubWorkflowCheck

- name: Check headers and formatting
if: matrix.java == 'temurin@8'
run: sbt '++${{ matrix.scala }}' headerCheckAll scalafmtCheckAll 'project /' scalafmtSbtCheck
run: sbt 'project ${{ matrix.project }}' '++${{ matrix.scala }}' headerCheckAll scalafmtCheckAll 'project /' scalafmtSbtCheck

- name: scalaJSLink
if: matrix.project == 'rootJS'
run: sbt 'project ${{ matrix.project }}' '++${{ matrix.scala }}' Test/scalaJSLinkerResult

- name: Test
run: sbt '++${{ matrix.scala }}' test
run: sbt 'project ${{ matrix.project }}' '++${{ matrix.scala }}' test

- name: Check binary compatibility
if: matrix.java == 'temurin@8'
run: sbt '++${{ matrix.scala }}' mimaReportBinaryIssues
run: sbt 'project ${{ matrix.project }}' '++${{ matrix.scala }}' mimaReportBinaryIssues

- name: Generate API documentation
if: matrix.java == 'temurin@8'
run: sbt '++${{ matrix.scala }}' doc
run: sbt 'project ${{ matrix.project }}' '++${{ matrix.scala }}' doc

- name: Check scalafix lints
if: matrix.java == 'temurin@8' && !startsWith(matrix.scala, '3.')
run: sbt '++${{ matrix.scala }}' 'scalafixAll --check'
run: sbt 'project ${{ matrix.project }}' '++${{ matrix.scala }}' 'scalafixAll --check'

- name: Check unused compile dependencies
if: matrix.java == 'temurin@8'
run: sbt '++${{ matrix.scala }}' unusedCompileDependenciesTest
run: sbt 'project ${{ matrix.project }}' '++${{ matrix.scala }}' unusedCompileDependenciesTest

- name: Make target directories
if: github.event_name != 'pull_request' && (startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main')
run: mkdir -p target scala-xml/target site/target project/target
run: mkdir -p target .js/target site/target .jvm/target .native/target scala-xml/.js/target scala-xml/.jvm/target project/target

- name: Compress target directories
if: github.event_name != 'pull_request' && (startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main')
run: tar cf targets.tar target scala-xml/target site/target project/target
run: tar cf targets.tar target .js/target site/target .jvm/target .native/target scala-xml/.js/target scala-xml/.jvm/target project/target

- name: Upload target directories
if: github.event_name != 'pull_request' && (startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main')
uses: actions/upload-artifact@v2
with:
name: target-${{ matrix.os }}-${{ matrix.java }}-${{ matrix.scala }}
name: target-${{ matrix.os }}-${{ matrix.java }}-${{ matrix.scala }}-${{ matrix.project }}
path: targets.tar

publish:
Expand Down Expand Up @@ -224,52 +233,82 @@ jobs:
~/Library/Caches/Coursier/v1
key: ${{ runner.os }}-sbt-cache-v2-${{ hashFiles('**/*.sbt') }}-${{ hashFiles('project/build.properties') }}

- name: Download target directories (2.12.16)
- name: Download target directories (2.12.16, rootJS)
uses: actions/download-artifact@v2
with:
name: target-${{ matrix.os }}-${{ matrix.java }}-2.12.16-rootJS

- name: Inflate target directories (2.12.16, rootJS)
run: |
tar xf targets.tar
rm targets.tar

- name: Download target directories (2.12.16, rootJVM)
uses: actions/download-artifact@v2
with:
name: target-${{ matrix.os }}-${{ matrix.java }}-2.12.16-rootJVM

- name: Inflate target directories (2.12.16, rootJVM)
run: |
tar xf targets.tar
rm targets.tar

- name: Download target directories (2.13.8, rootJS)
uses: actions/download-artifact@v2
with:
name: target-${{ matrix.os }}-${{ matrix.java }}-2.13.8-rootJS

- name: Inflate target directories (2.13.8, rootJS)
run: |
tar xf targets.tar
rm targets.tar

- name: Download target directories (2.13.8, rootJVM)
uses: actions/download-artifact@v2
with:
name: target-${{ matrix.os }}-${{ matrix.java }}-2.12.16
name: target-${{ matrix.os }}-${{ matrix.java }}-2.13.8-rootJVM

- name: Inflate target directories (2.12.16)
- name: Inflate target directories (2.13.8, rootJVM)
run: |
tar xf targets.tar
rm targets.tar

- name: Download target directories (2.13.8)
- name: Download target directories (2.13.8, rootJVM)
uses: actions/download-artifact@v2
with:
name: target-${{ matrix.os }}-${{ matrix.java }}-2.13.8
name: target-${{ matrix.os }}-${{ matrix.java }}-2.13.8-rootJVM

- name: Inflate target directories (2.13.8)
- name: Inflate target directories (2.13.8, rootJVM)
run: |
tar xf targets.tar
rm targets.tar

- name: Download target directories (2.13.8)
- name: Download target directories (2.13.8, rootJVM)
uses: actions/download-artifact@v2
with:
name: target-${{ matrix.os }}-${{ matrix.java }}-2.13.8
name: target-${{ matrix.os }}-${{ matrix.java }}-2.13.8-rootJVM

- name: Inflate target directories (2.13.8)
- name: Inflate target directories (2.13.8, rootJVM)
run: |
tar xf targets.tar
rm targets.tar

- name: Download target directories (2.13.8)
- name: Download target directories (3.1.2, rootJS)
uses: actions/download-artifact@v2
with:
name: target-${{ matrix.os }}-${{ matrix.java }}-2.13.8
name: target-${{ matrix.os }}-${{ matrix.java }}-3.1.2-rootJS

- name: Inflate target directories (2.13.8)
- name: Inflate target directories (3.1.2, rootJS)
run: |
tar xf targets.tar
rm targets.tar

- name: Download target directories (3.1.2)
- name: Download target directories (3.1.2, rootJVM)
uses: actions/download-artifact@v2
with:
name: target-${{ matrix.os }}-${{ matrix.java }}-3.1.2
name: target-${{ matrix.os }}-${{ matrix.java }}-3.1.2-rootJVM

- name: Inflate target directories (3.1.2)
- name: Inflate target directories (3.1.2, rootJVM)
run: |
tar xf targets.tar
rm targets.tar
Expand Down
12 changes: 7 additions & 5 deletions build.sbt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
ThisBuild / tlBaseVersion := "0.23"
ThisBuild / tlMimaPreviousVersions ++= (0 to 11).map(y => s"0.23.$y").toSet
ThisBuild / tlBaseVersion := "0.24"
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I couldn't decide whether or not to bump the base version. We can make this change binary-compatibly, it's more about semantics.

ThisBuild / developers := List(
tlGitHubDev("rossabaker", "Ross A. Baker")
)
Expand All @@ -8,14 +7,16 @@ val Scala213 = "2.13.8"
ThisBuild / crossScalaVersions := Seq("2.12.16", Scala213, "3.1.2")
ThisBuild / scalaVersion := Scala213

lazy val root = project.in(file(".")).aggregate(scalaXml).enablePlugins(NoPublishPlugin)
lazy val root = tlCrossRootProject.aggregate(scalaXml)

val http4sVersion = "0.23.12"
val scalaXmlVersion = "2.1.0"
val fs2DataVersion = "1.4.0"
val munitVersion = "0.7.29"
val munitCatsEffectVersion = "1.0.7"

lazy val scalaXml = project
lazy val scalaXml = crossProject(JVMPlatform, JSPlatform)
.crossType(CrossType.Pure)
.in(file("scala-xml"))
.settings(
name := "http4s-scala-xml",
Expand All @@ -24,6 +25,7 @@ lazy val scalaXml = project
libraryDependencies ++= Seq(
"org.http4s" %%% "http4s-core" % http4sVersion,
"org.scala-lang.modules" %%% "scala-xml" % scalaXmlVersion,
"org.gnieh" %%% "fs2-data-xml-scala" % fs2DataVersion,
"org.scalameta" %%% "munit-scalacheck" % munitVersion % Test,
"org.typelevel" %%% "munit-cats-effect-3" % munitCatsEffectVersion % Test,
"org.http4s" %%% "http4s-laws" % http4sVersion % Test,
Expand All @@ -32,7 +34,7 @@ lazy val scalaXml = project

lazy val docs = project
.in(file("site"))
.dependsOn(scalaXml)
.dependsOn(scalaXml.jvm)
.settings(
libraryDependencies ++= Seq(
"org.http4s" %%% "http4s-dsl" % http4sVersion,
Expand Down
2 changes: 1 addition & 1 deletion docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class JsonXmlHttpEndpoint[F[_]](implicit F: Async[F]) extends Http4sDsl[F] {
* </person>
*/
private object Person {
def fromXml(elem: Elem): Person = {
def fromXml(elem: Document): Person = {
val name = (elem \\ "name").text
val age = (elem \\ "age").text
Person(name, age.toInt)
Expand Down
1 change: 1 addition & 0 deletions project/plugins.sbt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
addSbtPlugin("org.http4s" % "sbt-http4s-org" % "0.14.3")
addSbtPlugin("org.scala-js" % "sbt-scalajs" % "1.10.0")
86 changes: 38 additions & 48 deletions scala-xml/src/main/scala/org/http4s/scalaxml/ElemInstances.scala
Original file line number Diff line number Diff line change
Expand Up @@ -17,24 +17,21 @@
package org.http4s
package scalaxml

import cats.data.EitherT
import cats.effect.Async
import cats.effect.Concurrent
import cats.syntax.all._
import fs2.Stream
import fs2.data.xml.XmlEvent
import fs2.data.xml.XmlException
import fs2.data.xml.scalaXml._
import org.http4s.Charset.`UTF-8`
import org.http4s.headers.`Content-Type`

import java.io.ByteArrayInputStream
import java.io.StringWriter
import javax.xml.parsers.SAXParserFactory
import scala.util.control.NonFatal
import scala.xml.Document
import scala.xml.Elem
import scala.xml.InputSource
import scala.xml.SAXParseException
import scala.xml.XML

trait ElemInstances {
protected def saxFactory: SAXParserFactory

implicit def xmlEncoder[F[_]](implicit charset: Charset = `UTF-8`): EntityEncoder[F, Elem] =
EntityEncoder
Expand All @@ -46,50 +43,43 @@ trait ElemInstances {
}
.withContentType(`Content-Type`(MediaType.application.xml).withCharset(charset))

/** Handles a message body as XML.
*
* TODO Not an ideal implementation. Would be much better with an asynchronous XML parser, such as Aalto.
*
* @return an XML element
*/
@deprecated("Blocks. Use xmlDecoder with an Async constraint.", "0.23.12")
def xml[F[_]](implicit F: Concurrent[F]): EntityDecoder[F, Elem] = {
import EntityDecoder._
decodeBy(MediaType.text.xml, MediaType.text.html, MediaType.application.xml) { msg =>
val source = new InputSource()
msg.charset.foreach(cs => source.setEncoding(cs.nioCharset.name))
implicit def xmlEvents[F[_]: Concurrent]: EntityDecoder[F, Stream[F, XmlEvent]] =
xmlEvents(true)

collectBinary(msg).flatMap[DecodeFailure, Elem] { chunk =>
source.setByteStream(new ByteArrayInputStream(chunk.toArray))
val saxParser = saxFactory.newSAXParser()
try DecodeResult.successT[F, Elem](XML.loadXML(source, saxParser))
catch {
case e: SAXParseException =>
DecodeResult.failureT(MalformedMessageBodyFailure("Invalid XML", Some(e)))
case NonFatal(e) => DecodeResult(F.raiseError[Either[DecodeFailure, Elem]](e))
}
}
def xmlEvents[F[_]](
includeComments: Boolean
)(implicit F: Concurrent[F]): EntityDecoder[F, Stream[F, XmlEvent]] =
EntityDecoder.decodeBy(MediaType.text.xml, MediaType.text.html, MediaType.application.xml) {
msg =>
DecodeResult.successT(msg.bodyText.through(fs2.data.xml.events(includeComments)))
}
}

implicit def xmlDecoder[F[_]](implicit F: Async[F]): EntityDecoder[F, Elem] = {
import EntityDecoder._
decodeBy(MediaType.text.xml, MediaType.text.html, MediaType.application.xml) { msg =>
val source = new InputSource()
msg.charset.foreach(cs => source.setEncoding(cs.nioCharset.name))
/** Handles a message body as XML.
*
* @return an XML [[Document]]
*/
implicit def xmlDocument[F[_]: Concurrent]: EntityDecoder[F, Document] =
xmlDocument(true)

collectBinary(msg).flatMap[DecodeFailure, Elem] { chunk =>
source.setByteStream(new ByteArrayInputStream(chunk.toArray))
val saxParser = saxFactory.newSAXParser()
EitherT(
F.blocking(XML.loadXML(source, saxParser))
.map(Either.right[DecodeFailure, Elem](_))
.recover { case e: SAXParseException =>
Left(MalformedMessageBodyFailure("Invalid XML", Some(e)))
}
)
/** Handles a message body as XML.
*
* @return an XML [[Document]]
*/
def xmlDocument[F[_]](
includeComments: Boolean
)(implicit F: Concurrent[F]): EntityDecoder[F, Document] =
xmlEvents(includeComments).flatMapR { events =>
DecodeResult {
events
.through(fs2.data.xml.dom.documents)
.head
.compile
.lastOrError
.map(Either.right[MalformedMessageBodyFailure, Document](_))
.recover { case ex: XmlException =>
Left(MalformedMessageBodyFailure("Invalid XML", Some(ex)))
}
.widen
}
}
}

}
6 changes: 1 addition & 5 deletions scala-xml/src/main/scala/org/http4s/scalaxml/package.scala
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,4 @@

package org.http4s

import javax.xml.parsers.SAXParserFactory

package object scalaxml extends ElemInstances {
override val saxFactory = SAXParserFactory.newInstance
}
package object scalaxml extends ElemInstances
10 changes: 6 additions & 4 deletions scala-xml/src/test/scala/org/http4s/scalaxml/ScalaXmlSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import org.scalacheck.effect.PropF._
import org.typelevel.ci._

import java.nio.charset.StandardCharsets
import scala.xml.Document
import scala.xml.Elem

class ScalaXmlSuite extends CatsEffectSuite with ScalaCheckEffectSuite {
Expand All @@ -53,16 +54,17 @@ class ScalaXmlSuite extends CatsEffectSuite with ScalaCheckEffectSuite {
.map(_.getOrElse(""))

val server: Request[IO] => IO[Response[IO]] = { req =>
req.decode { (elem: Elem) =>
IO.pure(Response[IO](Ok).withEntity(elem.label))
req.decode { (doc: Document) =>
IO.pure(Response[IO](Ok).withEntity(doc.docElem.label))
}
}

test("round trips utf-8") {
forAllF(genXml) { (elem: Elem) =>
Request[IO]()
.withEntity(elem)
.as[Elem]
.as[Document]
.map(_.docElem)
.assertEquals(elem)
}
}
Expand Down Expand Up @@ -149,7 +151,7 @@ class ScalaXmlSuite extends CatsEffectSuite with ScalaCheckEffectSuite {
val body = Stream.chunk(bytes)
val msg = Request[IO](Method.POST, headers = Headers(Header.Raw(ci"Content-Type", contentType)))
.withBodyStream(body)
msg.as[Elem].map(_ \\ "hello" \@ "name").assertEquals(name)
msg.as[Document].map(_ \\ "hello" \@ "name").assertEquals(name)
}

test("parse UTF-8 charset with explicit encoding") {
Expand Down