diff --git a/zio-http-benchmarks/src/main/scala/zhttp.benchmarks/ProbeContentTypeBenchmark.scala b/zio-http-benchmarks/src/main/scala/zhttp.benchmarks/ProbeContentTypeBenchmark.scala index 9426ff2d5d..abb2fd1b2b 100644 --- a/zio-http-benchmarks/src/main/scala/zhttp.benchmarks/ProbeContentTypeBenchmark.scala +++ b/zio-http-benchmarks/src/main/scala/zhttp.benchmarks/ProbeContentTypeBenchmark.scala @@ -4,6 +4,7 @@ import java.util.concurrent.TimeUnit import scala.util.Random +import zio.http.Header.ContentType import zio.http.MediaType import org.openjdk.jmh.annotations._ @@ -14,6 +15,7 @@ import org.openjdk.jmh.annotations._ class ProbeContentTypeBenchmark { private val extensions = List("mp4", "def", "mp3", "js", "html", "css", "gif", "jpeg") + private val header = ContentType(MediaType.application.`json`) @Benchmark def benchmarkApp(): Unit = { @@ -21,4 +23,16 @@ class ProbeContentTypeBenchmark { MediaType.forFileExtension(extensions(rand)) () } + + @Benchmark + def benchmarkParseContentType(): Unit = { + ContentType.parse("application/json; charset=utf-8") + () + } + + @Benchmark + def benchmarkRenderContentType(): Unit = { + ContentType.render(header) + () + } } diff --git a/zio-http/src/main/scala/zio/http/Header.scala b/zio-http/src/main/scala/zio/http/Header.scala index 5be397f714..cfe2a0dad7 100644 --- a/zio-http/src/main/scala/zio/http/Header.scala +++ b/zio-http/src/main/scala/zio/http/Header.scala @@ -30,7 +30,6 @@ import scala.util.{Either, Failure, Success, Try} import zio._ import zio.http.codec.RichTextCodec -import zio.http.endpoint.openapi.OpenAPI.SecurityScheme.Http import zio.http.internal.DateEncoding sealed trait Header { @@ -2466,6 +2465,8 @@ object Header { override def self: Self = this override def headerType: HeaderType.Typed[ContentType] = ContentType + + override lazy val renderedValue: String = ContentType.render(this) } object ContentType extends HeaderType { @@ -2481,47 +2482,55 @@ object Header { private val codec: RichTextCodec[ContentType] = { // char `.` according to BNF not allowed as `token`, but here tolerated - val token = RichTextCodec.charsNot(' ', '(', ')', '<', '>', '@', ',', ';', ':', '\\', '"', '/', '[', ']', '?', '=') + val token = RichTextCodec.charsNot(' ', '(', ')', '<', '>', '@', ',', ';', ':', '\\', '"', '/', '[', ']', '?', '=') + val tokenString = token.repeat.string - val tokenQuoted = RichTextCodec.charsNot(' ', '"') + val tokenStringQuoted = RichTextCodec.charsNot(' ', '"').repeat.string - val type1 = RichTextCodec.string.collectOrFail("unsupported main type") { + val type1 = RichTextCodec.string.collectOrFail("unsupported main type") { case value if MediaType.mainTypeMap.contains(value) => value } - val type1x = (RichTextCodec.literalCI("x-") ~ token.repeat.string).transform[String](in => s"${in._1}${in._2}")(in => ("x-", s"${in.substring(2)}")) - val codecType1 = (type1 | type1x).transform[String](_.merge) { + val type1x = (RichTextCodec.literalCI("x-") ~ tokenString).transform[String](in => s"${in._1}${in._2}")(in => ("x-", s"${in.substring(2)}")) + val codecType1 = (type1 | type1x).transform[String](_.merge) { case x if x.startsWith("x-") => Right(x) case x => Left(x) } - val codecType2 = token.repeat.string - val codecType = (codecType1 <~ RichTextCodec.char('/').const('/')) ~ codecType2 - val attribute = token.repeat.string - val valueUnquoted = token.repeat.string - val valueQuoted = RichTextCodec.char('"') ~ tokenQuoted.repeat.string ~ RichTextCodec.char('"') - val value = valueQuoted | valueUnquoted - - val param = (( + val forwardSlash = RichTextCodec.char('/').const('/') + val codecType = (codecType1 <~ forwardSlash) ~ tokenString + val quote = RichTextCodec.char('"') + val valueQuoted = quote ~ tokenStringQuoted ~ quote + val value = valueQuoted | tokenString + + val unitChunk = Chunk.single(()) + val whitespaces = RichTextCodec.whitespaceChar.repeat.transform[Char](_ => ' ')(_ => unitChunk).const(' ') + val param = (( RichTextCodec.char(';').const(';') ~> - (RichTextCodec.whitespaceChar.repeat | RichTextCodec.empty).transform[Char](_ => ' ')(_ => Left(Chunk(()))).const(' ') ~> - attribute <~ + whitespaces ~> + tokenString <~ RichTextCodec.char('=').const('=') ) ~ value) .transformOrFailLeft[ContentType.Parameter](in => ContentType.Parameter.fromCodec(in))(in => in.toCodec) - val params = param.repeat + val params = param.repeat + (codecType ~ params).transform[ContentType] { case (mainType, subType, params) => ContentType( MediaType.forContentType(s"$mainType/$subType").get, - params.collect { case p if p.key == ContentType.Parameter.Boundary.name => zio.http.Boundary(p.value) }.headOption, - params.collect { case p if p.key == ContentType.Parameter.Charset.name => java.nio.charset.Charset.forName(p.value) }.headOption, + params.collectFirst { case p if p.key == ContentType.Parameter.Boundary.name => zio.http.Boundary(p.value) }, + params.collectFirst { case p if p.key == ContentType.Parameter.Charset.name => java.nio.charset.Charset.forName(p.value) }, ) }(in => ( in.mediaType.mainType, - in.mediaType.subType, - Chunk( - in.charset.map(in => Parameter.Charset(Parameter.Payload(Parameter.Charset.name, in, false))), - in.boundary.map(in => Parameter.Boundary(Parameter.Payload(Parameter.Boundary.name, in, false))), - ).flatten, + in.mediaType.subType, { + val charset = in.charset.map(in => Parameter.Charset(Parameter.Payload(Parameter.Charset.name, in, false))) + val boundary = in.boundary.map(in => Parameter.Boundary(Parameter.Payload(Parameter.Boundary.name, in, false))) + (charset, boundary) match { + case (Some(c), Some(b)) => Chunk(c, b) + case (Some(c), _) => Chunk.single(c) + case (_, Some(b)) => Chunk.single(b) + case _ => Chunk.empty + } + }, ), ) } diff --git a/zio-http/src/main/scala/zio/http/codec/RichTextCodec.scala b/zio-http/src/main/scala/zio/http/codec/RichTextCodec.scala index df2d0b53b1..fb67265b1c 100644 --- a/zio-http/src/main/scala/zio/http/codec/RichTextCodec.scala +++ b/zio-http/src/main/scala/zio/http/codec/RichTextCodec.scala @@ -20,6 +20,7 @@ import java.lang.Integer.parseInt import scala.annotation.tailrec import scala.collection.immutable.BitSet +import scala.util.control.ControlThrowable import zio.{Chunk, NonEmptyChunk} @@ -30,24 +31,35 @@ import zio.{Chunk, NonEmptyChunk} * information in HTTP headers, which in turn allows generating much better * error messages and documentation than otherwise possible. */ -sealed trait RichTextCodec[A] { self => +sealed abstract class RichTextCodec[A] { self => final def string(implicit ev: A =:= Chunk[Char]): RichTextCodec[String] = - self.asType[Chunk[Char]].transform(_.mkString)(a => Chunk(a.toList: _*)) + self + .asType[Chunk[Char]] + .transform { c => + val builder = new StringBuilder(c.length) + val iter = c.chunkIterator + var i = 0 + while (iter.hasNextAt(i)) { + builder += iter.nextAt(i) + i += 1 + } + builder.result() + }(a => Chunk.fromArray(a.toCharArray)) /** * Returns a new codec that is the sequential composition of this codec and * the specified codec, but which only produces the value of this codec. */ final def <~(that: => RichTextCodec[Unit]): RichTextCodec[A] = - self ~ RichTextCodec.defer(that) + self ~ that /** * Returns a new codec that is the sequential composition of this codec and * the specified codec, but which only produces the value of that codec. */ final def ~>[B](that: => RichTextCodec[B])(implicit ev: A =:= Unit): RichTextCodec[B] = - self.asType[Unit] ~ RichTextCodec.defer(that) + self.asType[Unit] ~ that /** * Returns a new codec that is the sequential composition of this codec and @@ -73,14 +85,13 @@ sealed trait RichTextCodec[A] { self => final def asType[B](implicit ev: A =:= B): RichTextCodec[B] = self.asInstanceOf[RichTextCodec[B]] - final def collectOrFail(failure: String)(pf: PartialFunction[A, A]): RichTextCodec[A] = - transformOrFailLeft[A] { - case x if pf.isDefinedAt(x) => Right(pf(x)) - case _ => Left(failure) - }(identity) + final def collectOrFail(failure: String)(pf: PartialFunction[A, A]): RichTextCodec[A] = { + val lifted = pf.lift + transformOrFailLeft[A](lifted(_).toRight(failure))(identity) + } final def decode(value: CharSequence): Either[String, A] = - RichTextCodec.parse(value, self).map(_._2) + RichTextCodec.parse(value, self, new RichTextCodec.ParserIndex) /** * Constructs documentation for this rich text codec. @@ -118,17 +129,11 @@ sealed trait RichTextCodec[A] { self => final def optional(default: A): RichTextCodec[Option[A]] = self.transform[Option[A]](a => Some(a))(_.fold(default)(identity)) - lazy val repeat: RichTextCodec[Chunk[A]] = - ((self ~ repeat).transform[NonEmptyChunk[A]](t => NonEmptyChunk(t._1, t._2: _*))(c => - (c.head, c.tail), - ) | RichTextCodec.empty.as(Chunk.empty[A])) - .transform[Chunk[A]] { - case Left(nonEmpty) => nonEmpty - case Right(maybeEmpty) => maybeEmpty - }(c => c.nonEmptyOrElse[Either[NonEmptyChunk[A], Chunk[A]]](Right(c))(Left(_))) + final lazy val repeat: RichTextCodec[Chunk[A]] = + RichTextCodec.Repeated(self) final def singleton: RichTextCodec[NonEmptyChunk[A]] = - self.transform(a => NonEmptyChunk(a))(_.head) + self.transform(a => NonEmptyChunk.single(a))(_.head) final def transform[B](f: A => B)(g: B => A): RichTextCodec[B] = self.transformOrFail[B](a => Right(f(a)))(b => Right(g(b))) @@ -163,11 +168,12 @@ sealed trait RichTextCodec[A] { self => } object RichTextCodec { - private[codec] case object Empty extends RichTextCodec[Unit] - private[codec] final case class CharIn(set: BitSet) extends RichTextCodec[Char] { + private[codec] case object Empty extends RichTextCodec[Unit] + private[codec] final case class CharIn(set: BitSet) extends RichTextCodec[Char] { val errorMessage: Left[String, Nothing] = Left(s"Expected, but did not find: ${this.describe}") } + private[codec] final case class Repeated[A](codec: RichTextCodec[A]) extends RichTextCodec[Chunk[A]] private[codec] final case class TransformOrFail[A, B]( codec: RichTextCodec[A], to: A => Either[String, B], @@ -175,9 +181,15 @@ object RichTextCodec { ) extends RichTextCodec[B] private[codec] final case class Alt[A, B](left: RichTextCodec[A], right: RichTextCodec[B]) extends RichTextCodec[Either[A, B]] - private[codec] final case class Lazy[A](codec0: () => RichTextCodec[A]) extends RichTextCodec[A] { + private[codec] final case class Lazy[A](private val codec0: () => RichTextCodec[A]) extends RichTextCodec[A] { lazy val codec: RichTextCodec[A] = codec0() } + object Lazy { + // Prevents us from accidentally extracting the codec function in pattern matching. However, compiling doesn't work + // with Scala 2.12 when used this way so it's only defined just to be safe + def unapply[A](l: Lazy[A]): Option[RichTextCodec[A]] = Some(l.codec) + } + private[codec] final case class Zip[A, B, C]( left: RichTextCodec[A], right: RichTextCodec[B], @@ -298,6 +310,9 @@ object RichTextCodec { final case class Literal(ranges: List[CharRanges]) extends DocPart { override def toString: String = s"“${ranges.mkString}”" } + final case class Repeated(it: DocPart) extends DocPart { + override def toString: String = s"“$it”*" + } final case class CharRange(from: Char, to: Char) extends DocPart { override def toString: String = if (from == to) escapedChars.getOrElse(from, from.toString) @@ -362,19 +377,22 @@ object RichTextCodec { case Alt(left, right) => val lc = findCycles(seen + codec, lastAnonymous, tags, left) findCycles(seen + codec, lc._2, tags ++ lc._1, right) - case l @ Lazy(_) => + case l: Lazy[A @unchecked] => val res = findCycles(seen + codec, lastAnonymous, tags, l.codec) (res._1 ++ res._1.get(l.codec).map(t => codec -> t), res._2) case Zip(left, right, _) => val lc = findCycles(seen + codec, lastAnonymous, tags, left) findCycles(seen + codec, lc._2, tags ++ lc._1, right) + case Repeated(codec0) => + val res = findCycles(seen + codec, lastAnonymous, tags, codec0) + (res._1 ++ res._1.get(codec0).map(t => codec -> t), res._2) case t @ Tagged(_, c, _) => @tailrec def addTag( tags: Map[RichTextCodec[_], Tagged[_]], c: RichTextCodec[_], ): Map[RichTextCodec[_], Tagged[_]] = c match { - case l @ Lazy(_) => addTag(tags + (c -> t), l.codec) + case l: Lazy[A @unchecked] => addTag(tags + (c -> t), l.codec) case TransformOrFail(codec, _, _) => addTag(tags + (c -> t), codec) case _ => tags + (c -> t) } @@ -388,15 +406,6 @@ object RichTextCodec { taggedToDescribe: List[Tagged[_]] = Nil, ) - @tailrec - def isAltInParens(codec: RichTextCodec[_]): Boolean = - codec match { - case Alt(_, _) => true - case Empty | CharIn(_) | Zip(_, _, _) | Tagged(_, _, _) => false - case TransformOrFail(codec, _, _) => isAltInParens(codec) - case Lazy(codec0) => isAltInParens(codec0()) - } - def explain(tagged: Tagged[_], namesSeen: Set[String]): PartialDescription = { val pd = loop(tagged.codec, namesSeen + tagged.name, tagged) pd.copy(description = DocPart.Defintion(tagged.name, pd.description)) @@ -422,7 +431,7 @@ object RichTextCodec { else (acc :+ ((min, max)), (c, c)) } - val finalElement = if (tuple._2 == ((-1, -1))) Chunk.empty else Chunk(tuple._2) + val finalElement = if (tuple._2 == ((-1, -1))) Chunk.empty else Chunk.single(tuple._2) val chunk: Chunk[DocPart.CharRange] = (tuple._1 ++ finalElement).map { case (min, max) => DocPart.CharRange(min.toChar, max.toChar) @@ -461,7 +470,12 @@ object RichTextCodec { }, leftDescription.taggedToDescribe ++ rightDescription.taggedToDescribe, ) - case l @ Lazy(_) => loop(l.codec, namesSeen, explaining, seen) + case l: Lazy[A @unchecked] => + loop(l.codec, namesSeen, explaining, seen) + case Repeated(codec0) => + val c = cycles.getOrElse(codec0, codec0) + val partial = loop(codec0, namesSeen, explaining, cycles.contains(c)) + PartialDescription(DocPart.Repeated(partial.description), partial.taggedToDescribe) case Zip(left, right, _) => val l = cycles.getOrElse(left, left) val r = cycles.getOrElse(right, right) @@ -522,67 +536,117 @@ object RichTextCodec { getLines(Nil, List(cycles.getOrElse(codec, codec)), Set.empty).reverse.mkString("\n") } + private case class EncodingError(error: String) extends ControlThrowable + private def encode[A](value: A, self: RichTextCodec[A]): Either[String, String] = { - self match { - case RichTextCodec.Empty => Right("") - case RichTextCodec.CharIn(_) => Right(value.asInstanceOf[Char].toString) - case RichTextCodec.TransformOrFail(codec, _, from) => - from(value) match { - case Left(err) => Left(err) - case Right(value2) => - codec.encode(value2) - } - case RichTextCodec.Alt(left, right) => - value match { - case Left(a) => left.encode(a) - case Right(b) => right.encode(b) - } - case RichTextCodec.Lazy(codec0) => codec0().encode(value) - case RichTextCodec.Zip(left, right, combiner) => - val (a, b) = combiner.separate(value) - for { - l <- left.encode(a) - r <- right.encode(b) - } yield l + r - case RichTextCodec.Tagged(_, codec, _) => codec.encode(value) + val builder = new StringBuilder() + + def loop[AA](value: AA, self: RichTextCodec[AA]): Unit = { + self match { + case RichTextCodec.CharIn(_) => + builder append value.asInstanceOf[Char] + () + case RichTextCodec.TransformOrFail(codec, _, from) => + from(value) match { + case Right(value2) => loop(value2, codec) + case Left(err) => throw EncodingError(err) + } + case RichTextCodec.Zip(left, right, combiner) => + val (a, b) = combiner.separate(value) + loop(a, left) + loop(b, right) + case RichTextCodec.Alt(left, right) => + value match { + case Left(a) => loop(a, left) + case Right(b) => loop(b, right) + } + case l: RichTextCodec.Lazy[AA] => loop(value, l.codec) + case RichTextCodec.Tagged(_, codec, _) => loop(value, codec) + case RichTextCodec.Empty => () + case RichTextCodec.Repeated(codec) => + val iter = value.chunkIterator + var i = 0 + while (iter.hasNextAt(i)) { + loop(iter.nextAt(i), codec) + i += 1 + } + () + } + } + + try { + loop(value, self) + Right(builder.result()) + } catch { + case EncodingError(error) => Left(error) } } - private def parse[A](value: CharSequence, self: RichTextCodec[A]): Either[String, (CharSequence, A)] = + private def parse[A](value: CharSequence, self: RichTextCodec[A], parserIdx: ParserIndex): Either[String, A] = { self match { - case Empty => - Right((value, ())) - case self @ CharIn(bitset) => - if (value.length == 0 || !bitset.contains(value.charAt(0).toInt)) + val idx = parserIdx.getAndIncr() + if (value.length <= idx || !bitset.contains(value.charAt(idx).toInt)) { + parserIdx.decr() self.errorMessage - else - Right((value.subSequence(1, value.length), value.charAt(0))) + } else { + Right(value.charAt(idx)) + } case TransformOrFail(codec, to, _) => - parse(value, codec).flatMap { case (rest, a0) => to(a0).map(a => (rest, a)) } + parse(value, codec, parserIdx).flatMap(to(_)) + + case RichTextCodec.Zip(left, right, combiner) => + for { + l <- parse(value, left, parserIdx) + r <- parse(value, right, parserIdx) + } yield combiner.combine(l, r) case Alt(left, right) => - parse(value, left) match { - case Right((rest, a)) => Right((rest, Left(a))) - case Left(errorLeft) => - parse(value, right) match { - case Right((rest, b)) => Right((rest, Right(b))) - case Left(errorRight) => Left(s"($errorLeft, $errorRight)") + val startIdx = parserIdx.get() + parse(value, left, parserIdx) match { + case Right(a) => Right(Left(a)) + case Left(errorLeft) => + parserIdx.set(startIdx) + parse(value, right, parserIdx) match { + case Right(b) => Right(Right(b)) + case Left(errorRight) => + parserIdx.set(startIdx) + Left(s"($errorLeft, $errorRight)") } } - case RichTextCodec.Lazy(codec0) => - parse(value, codec0()) + case l: RichTextCodec.Lazy[A] => parse(value, l.codec, parserIdx) - case RichTextCodec.Zip(left, right, combiner) => - for { - l <- parse(value, left) - r <- parse(l._1, right) - } yield (r._1, combiner.combine(l._2, r._2)) + case RichTextCodec.Tagged(_, codec, _) => parse(value, codec, parserIdx) - case RichTextCodec.Tagged(_, codec, _) => parse(value, codec) + case Empty => Right(()) + case RichTextCodec.Repeated(codec) => + val builder = Chunk.newBuilder[Any] + @tailrec + def loop(): Chunk[Any] = + parse(value, codec, parserIdx) match { + case Right(v) => builder += v; loop() + case _ => builder.result() + } + Right(loop().asInstanceOf[A]) + } + } + + private class ParserIndex { + private var i = 0 + + def get(): Int = i + def set(idx: Int): Unit = i = idx + + def getAndIncr(): Int = { + val res = i + i += 1 + res } + def decr(): Unit = i -= 1 + } + } diff --git a/zio-http/src/test/scala/zio/http/codec/RichTextCodecSpec.scala b/zio-http/src/test/scala/zio/http/codec/RichTextCodecSpec.scala index 5ac325ae9f..cd40811993 100644 --- a/zio-http/src/test/scala/zio/http/codec/RichTextCodecSpec.scala +++ b/zio-http/src/test/scala/zio/http/codec/RichTextCodecSpec.scala @@ -118,13 +118,11 @@ object RichTextCodecSpec extends ZIOHttpSpec { }, test("describe simple recursion") { val codec = RichTextCodec.char('x').repeat - // This would be perhaps nicer as «1» ⩴ “x”* or even without the label. - assertTrue(textOf(codec.describe).get == "«1» ⩴ (“x” «1»)?") + assertTrue(textOf(codec.describe).get == "“x”*") }, test("describe tagged simple recursion") { val codec = RichTextCodec.char('x').repeat ?? "xs" - // This would be perhaps nicer as «xs» ⩴ “x”* - assertTrue(textOf(codec.describe).get == "«xs» ⩴ (“x” «xs»)?") + assertTrue(textOf(codec.describe).get == "«xs» ⩴ “x”*") }, test("describe tagged with recursion") { lazy val integer: RichTextCodec[_] = (RichTextCodec.digit ~ (RichTextCodec.empty | integer)) ?? "integer"