diff --git a/0492aa7e.039daa46.js b/0492aa7e.039daa46.js deleted file mode 100644 index 784542e29..000000000 --- a/0492aa7e.039daa46.js +++ /dev/null @@ -1 +0,0 @@ -(window.webpackJsonp=window.webpackJsonp||[]).push([[6],{119:function(e,a,n){"use strict";n.d(a,"a",(function(){return b})),n.d(a,"b",(function(){return u}));var t=n(0),r=n.n(t);function s(e,a,n){return a in e?Object.defineProperty(e,a,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[a]=n,e}function o(e,a){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var t=Object.getOwnPropertySymbols(e);a&&(t=t.filter((function(a){return Object.getOwnPropertyDescriptor(e,a).enumerable}))),n.push.apply(n,t)}return n}function l(e){for(var a=1;a=0||(r[n]=e[n]);return r}(e,a);if(Object.getOwnPropertySymbols){var s=Object.getOwnPropertySymbols(e);for(t=0;t=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var i=r.a.createContext({}),c=function(e){var a=r.a.useContext(i),n=a;return e&&(n="function"==typeof e?e(a):l(l({},a),e)),n},b=function(e){var a=c(e.components);return r.a.createElement(i.Provider,{value:a},e.children)},d={inlineCode:"code",wrapper:function(e){var a=e.children;return r.a.createElement(r.a.Fragment,{},a)}},m=r.a.forwardRef((function(e,a){var n=e.components,t=e.mdxType,s=e.originalType,o=e.parentName,i=p(e,["components","mdxType","originalType","parentName"]),b=c(n),m=t,u=b["".concat(o,".").concat(m)]||b[m]||d[m]||s;return n?r.a.createElement(u,l(l({ref:a},i),{},{components:n})):r.a.createElement(u,l({ref:a},i))}));function u(e,a){var n=arguments,t=a&&a.mdxType;if("string"==typeof e||t){var s=n.length,o=new Array(s);o[0]=m;var l={};for(var p in a)hasOwnProperty.call(a,p)&&(l[p]=a[p]);l.originalType=e,l.mdxType="string"==typeof e?e:t,o[1]=l;for(var i=2;i<none> is not a term",id:"datasets-and-none-is-not-a-term",children:[]},{value:"Example",id:"example",children:[]}],c={toc:i};function b(e){var a=e.components,n=Object(r.a)(e,o);return Object(s.b)("wrapper",Object(t.a)({},c,n,{components:a,mdxType:"MDXLayout"}),Object(s.b)("h2",{id:"introduction"},"Introduction"),Object(s.b)("p",null,"By default, Spark uses reflection to derive schemas and encoders from case\nclasses. This doesn't work well when there are messages that contain types that\nSpark does not understand such as enums, ",Object(s.b)("inlineCode",{parentName:"p"},"ByteString"),"s and ",Object(s.b)("inlineCode",{parentName:"p"},"oneof"),"s. To get around this, sparksql-scalapb provides its own ",Object(s.b)("inlineCode",{parentName:"p"},"Encoder"),"s for protocol buffers."),Object(s.b)("p",null,"However, it turns out there is another obstacle. Spark does not provide any mechanism to compose user-provided encoders with its own reflection-derived Encoders. Therefore, merely providing an ",Object(s.b)("inlineCode",{parentName:"p"},"Encoder")," for protocol buffers is insufficient to derive an encoder for regular case-classes that contain a protobuf as a field. To solve this problem, ScalaPB uses ",Object(s.b)("a",{parentName:"p",href:"https://github.com/typelevel/frameless"},"frameless")," which relies on implicit search to derive encoders. This approach enables combining ScalaPB's encoders with frameless encoders that takes care for all non-protobuf types."),Object(s.b)("h2",{id:"setting-up-your-project"},"Setting up your project"),Object(s.b)("p",null,"We are going to use sbt-assembly to deploy a fat JAR containing ScalaPB, and\nyour compiled protos. Make sure in project/plugins.sbt you have a line\nthat adds sbt-assembly:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.10")\n')),Object(s.b)("p",null,"To add sparksql-scalapb to your project, add ",Object(s.b)("em",{parentName:"p"},"one")," of the following lines that\nmatches ",Object(s.b)("em",{parentName:"p"},"both the version of ScalaPB and Spark")," you use:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'// Spark 3.3 and ScalaPB 0.11\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql33-scalapb0_11" % "1.0.2"\n\n// Spark 3.2 and ScalaPB 0.11\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql32-scalapb0_11" % "1.0.2"\n\n// Spark 3.1 and ScalaPB 0.11\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql31-scalapb0_11" % "1.0.2"\n\n// Spark 3.0 and ScalaPB 0.11\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql30-scalapb0_11" % "1.0.1"\n\n// Spark 3.3 and ScalaPB 0.10\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql33-scalapb0_10" % "1.0.2"\n\n// Spark 3.2 and ScalaPB 0.10\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql32-scalapb0_10" % "1.0.2"\n\n// Spark 3.1 and ScalaPB 0.10\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql31-scalapb0_10" % "1.0.2"\n\n// Spark 3.0 and ScalaPB 0.10\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql30-scalapb0_10" % "1.0.1"\n\n// Spark 2.x and ScalaPB 0.10\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql-scalapb" % "0.10.4"\n\n// Spark 2.x and ScalaPB 0.9\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql-scalapb" % "0.9.3"\n')),Object(s.b)("p",null,"Known issue: Spark 3.2.1 is binary incompatible with Spark 3.2.0 in some of its internal\nAPIs being used. If you use Spark 3.2.0, please stick to sparksql-scalapb 1.0.0-M1."),Object(s.b)("p",null,"Spark ships with an old version of Google's Protocol Buffers runtime that is not compatible with\nthe current version. In addition, it comes with incompatible versions of scala-collection-compat\nand shapeless. Therefore, we need to shade these libraries. Add the following to your build.sbt:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'assemblyShadeRules in assembly := Seq(\n ShadeRule.rename("com.google.protobuf.**" -> "shadeproto.@1").inAll,\n ShadeRule.rename("scala.collection.compat.**" -> "shadecompat.@1").inAll,\n ShadeRule.rename("shapeless.**" -> "shadeshapeless.@1").inAll\n)\n')),Object(s.b)("p",null,"See ",Object(s.b)("a",{parentName:"p",href:"https://github.com/thesamet/sparksql-scalapb-test/blob/master/build.sbt"},"complete example of build.sbt"),"."),Object(s.b)("h2",{id:"using-sparksql-scalapb"},"Using sparksql-scalapb"),Object(s.b)("p",null,"We assume you have a ",Object(s.b)("inlineCode",{parentName:"p"},"SparkSession")," assigned to the variable ",Object(s.b)("inlineCode",{parentName:"p"},"spark"),". In a standalone Scala program, this can be created with:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'import org.apache.spark.sql.SparkSession\n\nval spark: SparkSession = SparkSession\n .builder()\n .appName("ScalaPB Demo")\n .master("local[2]")\n .getOrCreate()\n// spark: SparkSession = org.apache.spark.sql.SparkSession@1a6bb5f7\n')),Object(s.b)("p",null,Object(s.b)("em",{parentName:"p"},"IMPORTANT"),": Ensure you do not import ",Object(s.b)("inlineCode",{parentName:"p"},"spark.implicits._")," to avoid ambiguity between ScalaPB provided encoders and Spark's default encoders. You may want to import ",Object(s.b)("inlineCode",{parentName:"p"},"StringToColumn")," to convert ",Object(s.b)("inlineCode",{parentName:"p"},'$"col name"')," into a ",Object(s.b)("inlineCode",{parentName:"p"},"Column"),". Add an import ",Object(s.b)("inlineCode",{parentName:"p"},"scalapb.spark.Implicits")," to add ScalaPB's encoders for protocol buffers into the implicit search scope:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"import org.apache.spark.sql.{Dataset, DataFrame, functions => F}\nimport spark.implicits.StringToColumn\nimport scalapb.spark.ProtoSQL\n\nimport scalapb.spark.Implicits._\n")),Object(s.b)("p",null,"The code snippets below use the ",Object(s.b)("a",{parentName:"p",href:"https://github.com/scalapb/ScalaPB/blob/master/docs/src/main/protobuf/person.proto"},Object(s.b)("inlineCode",{parentName:"a"},"Person")," message"),"."),Object(s.b)("p",null,"We start by creating some test data:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'import scalapb.docs.person.Person\nimport scalapb.docs.person.Person.{Address, AddressType}\n\nval testData = Seq(\n Person(name="John", age=32, addresses=Vector(\n Address(addressType=AddressType.HOME, street="Market", city="SF"))\n ),\n Person(name="Mike", age=29, addresses=Vector(\n Address(addressType=AddressType.WORK, street="Castro", city="MV"),\n Address(addressType=AddressType.HOME, street="Church", city="MV"))\n ),\n Person(name="Bart", age=27)\n)\n')),Object(s.b)("p",null,"We can create a ",Object(s.b)("inlineCode",{parentName:"p"},"DataFrame")," from the test data:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"val df = ProtoSQL.createDataFrame(spark, testData)\n// df: DataFrame = [name: string, age: int ... 1 more field]\ndf.printSchema()\n// root\n// |-- name: string (nullable = true)\n// |-- age: integer (nullable = true)\n// |-- addresses: array (nullable = false)\n// | |-- element: struct (containsNull = false)\n// | | |-- address_type: string (nullable = true)\n// | | |-- street: string (nullable = true)\n// | | |-- city: string (nullable = true)\n// \ndf.show()\n// +----+---+--------------------+\n// |name|age| addresses|\n// +----+---+--------------------+\n// |John| 32|[{HOME, Market, SF}]|\n// |Mike| 29|[{WORK, Castro, M...|\n// |Bart| 27| []|\n// +----+---+--------------------+\n//\n")),Object(s.b)("p",null,"and then process it as any other Dataframe in Spark:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'df.select($"name", F.size($"addresses").alias("address_count")).show()\n// +----+-------------+\n// |name|address_count|\n// +----+-------------+\n// |John| 1|\n// |Mike| 2|\n// |Bart| 0|\n// +----+-------------+\n// \n\nval nameAndAddress = df.select($"name", $"addresses".getItem(0).alias("firstAddress"))\n// nameAndAddress: DataFrame = [name: string, firstAddress: struct]\n\nnameAndAddress.show()\n// +----+------------------+\n// |name| firstAddress|\n// +----+------------------+\n// |John|{HOME, Market, SF}|\n// |Mike|{WORK, Castro, MV}|\n// |Bart| null|\n// +----+------------------+\n//\n')),Object(s.b)("p",null,"Using the datasets API it is possible to bring the data back to ScalaPB case classes:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"nameAndAddress.as[(String, Option[Address])].collect().foreach(println)\n// (John,Some(Address(HOME,Market,SF,UnknownFieldSet(Map()))))\n// (Mike,Some(Address(WORK,Castro,MV,UnknownFieldSet(Map()))))\n// (Bart,None)\n")),Object(s.b)("p",null,"You can create a Dataset directly using Spark APIs:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"spark.createDataset(testData)\n// res5: Dataset[Person] = [name: string, age: int ... 1 more field]\n")),Object(s.b)("h2",{id:"from-binary-to-protos-and-back"},"From Binary to protos and back"),Object(s.b)("p",null,"In some situations, you may need to deal with datasets that contain serialized protocol buffers. This can be handled by mapping the datasets through ScalaPB's ",Object(s.b)("inlineCode",{parentName:"p"},"parseFrom")," and ",Object(s.b)("inlineCode",{parentName:"p"},"toByteArray")," functions."),Object(s.b)("p",null,"Let's start by preparing a dataset with test binary data by mapping our ",Object(s.b)("inlineCode",{parentName:"p"},"testData"),":"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"val binaryDS: Dataset[Array[Byte]] = spark.createDataset(testData.map(_.toByteArray))\n// binaryDS: Dataset[Array[Byte]] = [value: binary]\n\nbinaryDS.show()\n// +--------------------+\n// | value|\n// +--------------------+\n// |[0A 04 4A 6F 68 6...|\n// |[0A 04 4D 69 6B 6...|\n// |[0A 04 42 61 72 7...|\n// +--------------------+\n//\n")),Object(s.b)("p",null,"To turn this dataset into a ",Object(s.b)("inlineCode",{parentName:"p"},"Dataset[Person]"),", we map it through ",Object(s.b)("inlineCode",{parentName:"p"},"parseFrom"),":"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"val protosDS: Dataset[Person] = binaryDS.map(Person.parseFrom(_))\n// protosDS: Dataset[Person] = [name: string, age: int ... 1 more field]\n")),Object(s.b)("p",null,"to turn a dataset of protos into ",Object(s.b)("inlineCode",{parentName:"p"},"Dataset[Array[Byte]]"),":"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"val protosBinary: Dataset[Array[Byte]] = protosDS.map(_.toByteArray)\n// protosBinary: Dataset[Array[Byte]] = [value: binary]\n")),Object(s.b)("h2",{id:"on-enums"},"On enums"),Object(s.b)("p",null,"In SparkSQL-ScalaPB, enums are represented as strings. Unrecognized enum values are represented as strings containing the numeric value."),Object(s.b)("h2",{id:"dataframes-and-datasets-from-rdds"},"Dataframes and Datasets from RDDs"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"import org.apache.spark.rdd.RDD\n\nval protoRDD: RDD[Person] = spark.sparkContext.parallelize(testData)\n\nval protoDF: DataFrame = ProtoSQL.protoToDataFrame(spark, protoRDD)\n\nval protoDS: Dataset[Person] = spark.createDataset(protoRDD)\n")),Object(s.b)("h2",{id:"udfs"},"UDFs"),Object(s.b)("p",null,"If you need to write a UDF that returns a message, it would not pick up our encoder and you may get a runtime failure. To work around this, sparksql-scalapb provides ",Object(s.b)("inlineCode",{parentName:"p"},"ProtoSQL.udf")," to create UDFs. For example, if you need to parse a binary column into a proto:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'val binaryDF = protosBinary.toDF("value")\n// binaryDF: DataFrame = [value: binary]\n\nval parsePersons = ProtoSQL.udf { bytes: Array[Byte] => Person.parseFrom(bytes) }\n// parsePersons: org.apache.spark.sql.Column => org.apache.spark.sql.Column = scalapb.spark.Udfs$$Lambda$11463/423403262@3348f179\n\nbinaryDF.withColumn("person", parsePersons($"value"))\n// res7: DataFrame = [value: binary, person: struct]\n')),Object(s.b)("h2",{id:"primitive-wrappers"},"Primitive wrappers"),Object(s.b)("p",null,"In ProtoSQL 0.9.x and 0.10.x, primitive wrappers are represented in Spark as structs\nwitha single field named ",Object(s.b)("inlineCode",{parentName:"p"},"value"),". A better representation in Spark would be a\nnullable field of the primitive type. The better representation will be the\ndefault in 0.11.x. To enable this representation today, replace the usages of\n",Object(s.b)("inlineCode",{parentName:"p"},"scalapb.spark.ProtoSQL")," with ",Object(s.b)("inlineCode",{parentName:"p"},"scalapb.spark.ProtoSQL.withPrimitiveWrappers"),".\nInstead of importing ",Object(s.b)("inlineCode",{parentName:"p"},"scalapb.spark.Implicits._"),", import\n",Object(s.b)("inlineCode",{parentName:"p"},"scalapb.spark.ProtoSQL.implicits._")),Object(s.b)("p",null,"See example in ",Object(s.b)("a",{parentName:"p",href:"https://github.com/scalapb/sparksql-scalapb/blob/80f3162b69313d57f95d3dcbfee865809873567a/sparksql-scalapb/src/test/scala/WrappersSpec.scala#L42-L59"},"WrappersSpec"),"."),Object(s.b)("h2",{id:"datasets-and-none-is-not-a-term"},"Datasets and ",Object(s.b)("inlineCode",{parentName:"h2"}," is not a term")),Object(s.b)("p",null,"You will see this error if for some reason Spark's ",Object(s.b)("inlineCode",{parentName:"p"},"Encoder"),"s are being picked up\ninstead of the ones provided by sparksql-scalapb. Please ensure you are not importing ",Object(s.b)("inlineCode",{parentName:"p"},"spark.implicits._"),". See instructions above for imports."),Object(s.b)("h2",{id:"example"},"Example"),Object(s.b)("p",null,"Check out a ",Object(s.b)("a",{parentName:"p",href:"https://github.com/thesamet/sparksql-scalapb-test"},"complete example")," here."))}b.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/0492aa7e.c518a047.js b/0492aa7e.c518a047.js new file mode 100644 index 000000000..a29442931 --- /dev/null +++ b/0492aa7e.c518a047.js @@ -0,0 +1 @@ +(window.webpackJsonp=window.webpackJsonp||[]).push([[6],{119:function(e,a,n){"use strict";n.d(a,"a",(function(){return b})),n.d(a,"b",(function(){return u}));var t=n(0),r=n.n(t);function s(e,a,n){return a in e?Object.defineProperty(e,a,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[a]=n,e}function o(e,a){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var t=Object.getOwnPropertySymbols(e);a&&(t=t.filter((function(a){return Object.getOwnPropertyDescriptor(e,a).enumerable}))),n.push.apply(n,t)}return n}function l(e){for(var a=1;a=0||(r[n]=e[n]);return r}(e,a);if(Object.getOwnPropertySymbols){var s=Object.getOwnPropertySymbols(e);for(t=0;t=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var i=r.a.createContext({}),c=function(e){var a=r.a.useContext(i),n=a;return e&&(n="function"==typeof e?e(a):l(l({},a),e)),n},b=function(e){var a=c(e.components);return r.a.createElement(i.Provider,{value:a},e.children)},d={inlineCode:"code",wrapper:function(e){var a=e.children;return r.a.createElement(r.a.Fragment,{},a)}},m=r.a.forwardRef((function(e,a){var n=e.components,t=e.mdxType,s=e.originalType,o=e.parentName,i=p(e,["components","mdxType","originalType","parentName"]),b=c(n),m=t,u=b["".concat(o,".").concat(m)]||b[m]||d[m]||s;return n?r.a.createElement(u,l(l({ref:a},i),{},{components:n})):r.a.createElement(u,l({ref:a},i))}));function u(e,a){var n=arguments,t=a&&a.mdxType;if("string"==typeof e||t){var s=n.length,o=new Array(s);o[0]=m;var l={};for(var p in a)hasOwnProperty.call(a,p)&&(l[p]=a[p]);l.originalType=e,l.mdxType="string"==typeof e?e:t,o[1]=l;for(var i=2;i<none> is not a term",id:"datasets-and-none-is-not-a-term",children:[]},{value:"Example",id:"example",children:[]}],c={toc:i};function b(e){var a=e.components,n=Object(r.a)(e,o);return Object(s.b)("wrapper",Object(t.a)({},c,n,{components:a,mdxType:"MDXLayout"}),Object(s.b)("h2",{id:"introduction"},"Introduction"),Object(s.b)("p",null,"By default, Spark uses reflection to derive schemas and encoders from case\nclasses. This doesn't work well when there are messages that contain types that\nSpark does not understand such as enums, ",Object(s.b)("inlineCode",{parentName:"p"},"ByteString"),"s and ",Object(s.b)("inlineCode",{parentName:"p"},"oneof"),"s. To get around this, sparksql-scalapb provides its own ",Object(s.b)("inlineCode",{parentName:"p"},"Encoder"),"s for protocol buffers."),Object(s.b)("p",null,"However, it turns out there is another obstacle. Spark does not provide any mechanism to compose user-provided encoders with its own reflection-derived Encoders. Therefore, merely providing an ",Object(s.b)("inlineCode",{parentName:"p"},"Encoder")," for protocol buffers is insufficient to derive an encoder for regular case-classes that contain a protobuf as a field. To solve this problem, ScalaPB uses ",Object(s.b)("a",{parentName:"p",href:"https://github.com/typelevel/frameless"},"frameless")," which relies on implicit search to derive encoders. This approach enables combining ScalaPB's encoders with frameless encoders that takes care for all non-protobuf types."),Object(s.b)("h2",{id:"setting-up-your-project"},"Setting up your project"),Object(s.b)("p",null,"We are going to use sbt-assembly to deploy a fat JAR containing ScalaPB, and\nyour compiled protos. Make sure in project/plugins.sbt you have a line\nthat adds sbt-assembly:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.10")\n')),Object(s.b)("p",null,"To add sparksql-scalapb to your project, add ",Object(s.b)("em",{parentName:"p"},"one")," of the following lines that\nmatches ",Object(s.b)("em",{parentName:"p"},"both the version of ScalaPB and Spark")," you use:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'// Spark 3.3 and ScalaPB 0.11\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql33-scalapb0_11" % "1.0.2"\n\n// Spark 3.2 and ScalaPB 0.11\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql32-scalapb0_11" % "1.0.2"\n\n// Spark 3.1 and ScalaPB 0.11\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql31-scalapb0_11" % "1.0.2"\n\n// Spark 3.0 and ScalaPB 0.11\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql30-scalapb0_11" % "1.0.1"\n\n// Spark 3.3 and ScalaPB 0.10\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql33-scalapb0_10" % "1.0.2"\n\n// Spark 3.2 and ScalaPB 0.10\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql32-scalapb0_10" % "1.0.2"\n\n// Spark 3.1 and ScalaPB 0.10\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql31-scalapb0_10" % "1.0.2"\n\n// Spark 3.0 and ScalaPB 0.10\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql30-scalapb0_10" % "1.0.1"\n\n// Spark 2.x and ScalaPB 0.10\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql-scalapb" % "0.10.4"\n\n// Spark 2.x and ScalaPB 0.9\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql-scalapb" % "0.9.3"\n')),Object(s.b)("p",null,"Known issue: Spark 3.2.1 is binary incompatible with Spark 3.2.0 in some of its internal\nAPIs being used. If you use Spark 3.2.0, please stick to sparksql-scalapb 1.0.0-M1."),Object(s.b)("p",null,"Spark ships with an old version of Google's Protocol Buffers runtime that is not compatible with\nthe current version. In addition, it comes with incompatible versions of scala-collection-compat\nand shapeless. Therefore, we need to shade these libraries. Add the following to your build.sbt:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'assemblyShadeRules in assembly := Seq(\n ShadeRule.rename("com.google.protobuf.**" -> "shadeproto.@1").inAll,\n ShadeRule.rename("scala.collection.compat.**" -> "shadecompat.@1").inAll,\n ShadeRule.rename("shapeless.**" -> "shadeshapeless.@1").inAll\n)\n')),Object(s.b)("p",null,"See ",Object(s.b)("a",{parentName:"p",href:"https://github.com/thesamet/sparksql-scalapb-test/blob/master/build.sbt"},"complete example of build.sbt"),"."),Object(s.b)("h2",{id:"using-sparksql-scalapb"},"Using sparksql-scalapb"),Object(s.b)("p",null,"We assume you have a ",Object(s.b)("inlineCode",{parentName:"p"},"SparkSession")," assigned to the variable ",Object(s.b)("inlineCode",{parentName:"p"},"spark"),". In a standalone Scala program, this can be created with:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'import org.apache.spark.sql.SparkSession\n\nval spark: SparkSession = SparkSession\n .builder()\n .appName("ScalaPB Demo")\n .master("local[2]")\n .getOrCreate()\n// spark: SparkSession = org.apache.spark.sql.SparkSession@ca9c8f9\n')),Object(s.b)("p",null,Object(s.b)("em",{parentName:"p"},"IMPORTANT"),": Ensure you do not import ",Object(s.b)("inlineCode",{parentName:"p"},"spark.implicits._")," to avoid ambiguity between ScalaPB provided encoders and Spark's default encoders. You may want to import ",Object(s.b)("inlineCode",{parentName:"p"},"StringToColumn")," to convert ",Object(s.b)("inlineCode",{parentName:"p"},'$"col name"')," into a ",Object(s.b)("inlineCode",{parentName:"p"},"Column"),". Add an import ",Object(s.b)("inlineCode",{parentName:"p"},"scalapb.spark.Implicits")," to add ScalaPB's encoders for protocol buffers into the implicit search scope:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"import org.apache.spark.sql.{Dataset, DataFrame, functions => F}\nimport spark.implicits.StringToColumn\nimport scalapb.spark.ProtoSQL\n\nimport scalapb.spark.Implicits._\n")),Object(s.b)("p",null,"The code snippets below use the ",Object(s.b)("a",{parentName:"p",href:"https://github.com/scalapb/ScalaPB/blob/master/docs/src/main/protobuf/person.proto"},Object(s.b)("inlineCode",{parentName:"a"},"Person")," message"),"."),Object(s.b)("p",null,"We start by creating some test data:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'import scalapb.docs.person.Person\nimport scalapb.docs.person.Person.{Address, AddressType}\n\nval testData = Seq(\n Person(name="John", age=32, addresses=Vector(\n Address(addressType=AddressType.HOME, street="Market", city="SF"))\n ),\n Person(name="Mike", age=29, addresses=Vector(\n Address(addressType=AddressType.WORK, street="Castro", city="MV"),\n Address(addressType=AddressType.HOME, street="Church", city="MV"))\n ),\n Person(name="Bart", age=27)\n)\n')),Object(s.b)("p",null,"We can create a ",Object(s.b)("inlineCode",{parentName:"p"},"DataFrame")," from the test data:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"val df = ProtoSQL.createDataFrame(spark, testData)\n// df: DataFrame = [name: string, age: int ... 1 more field]\ndf.printSchema()\n// root\n// |-- name: string (nullable = true)\n// |-- age: integer (nullable = true)\n// |-- addresses: array (nullable = false)\n// | |-- element: struct (containsNull = false)\n// | | |-- address_type: string (nullable = true)\n// | | |-- street: string (nullable = true)\n// | | |-- city: string (nullable = true)\n// \ndf.show()\n// +----+---+--------------------+\n// |name|age| addresses|\n// +----+---+--------------------+\n// |John| 32|[{HOME, Market, SF}]|\n// |Mike| 29|[{WORK, Castro, M...|\n// |Bart| 27| []|\n// +----+---+--------------------+\n//\n")),Object(s.b)("p",null,"and then process it as any other Dataframe in Spark:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'df.select($"name", F.size($"addresses").alias("address_count")).show()\n// +----+-------------+\n// |name|address_count|\n// +----+-------------+\n// |John| 1|\n// |Mike| 2|\n// |Bart| 0|\n// +----+-------------+\n// \n\nval nameAndAddress = df.select($"name", $"addresses".getItem(0).alias("firstAddress"))\n// nameAndAddress: DataFrame = [name: string, firstAddress: struct]\n\nnameAndAddress.show()\n// +----+------------------+\n// |name| firstAddress|\n// +----+------------------+\n// |John|{HOME, Market, SF}|\n// |Mike|{WORK, Castro, MV}|\n// |Bart| null|\n// +----+------------------+\n//\n')),Object(s.b)("p",null,"Using the datasets API it is possible to bring the data back to ScalaPB case classes:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"nameAndAddress.as[(String, Option[Address])].collect().foreach(println)\n// (John,Some(Address(HOME,Market,SF,UnknownFieldSet(Map()))))\n// (Mike,Some(Address(WORK,Castro,MV,UnknownFieldSet(Map()))))\n// (Bart,None)\n")),Object(s.b)("p",null,"You can create a Dataset directly using Spark APIs:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"spark.createDataset(testData)\n// res5: Dataset[Person] = [name: string, age: int ... 1 more field]\n")),Object(s.b)("h2",{id:"from-binary-to-protos-and-back"},"From Binary to protos and back"),Object(s.b)("p",null,"In some situations, you may need to deal with datasets that contain serialized protocol buffers. This can be handled by mapping the datasets through ScalaPB's ",Object(s.b)("inlineCode",{parentName:"p"},"parseFrom")," and ",Object(s.b)("inlineCode",{parentName:"p"},"toByteArray")," functions."),Object(s.b)("p",null,"Let's start by preparing a dataset with test binary data by mapping our ",Object(s.b)("inlineCode",{parentName:"p"},"testData"),":"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"val binaryDS: Dataset[Array[Byte]] = spark.createDataset(testData.map(_.toByteArray))\n// binaryDS: Dataset[Array[Byte]] = [value: binary]\n\nbinaryDS.show()\n// +--------------------+\n// | value|\n// +--------------------+\n// |[0A 04 4A 6F 68 6...|\n// |[0A 04 4D 69 6B 6...|\n// |[0A 04 42 61 72 7...|\n// +--------------------+\n//\n")),Object(s.b)("p",null,"To turn this dataset into a ",Object(s.b)("inlineCode",{parentName:"p"},"Dataset[Person]"),", we map it through ",Object(s.b)("inlineCode",{parentName:"p"},"parseFrom"),":"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"val protosDS: Dataset[Person] = binaryDS.map(Person.parseFrom(_))\n// protosDS: Dataset[Person] = [name: string, age: int ... 1 more field]\n")),Object(s.b)("p",null,"to turn a dataset of protos into ",Object(s.b)("inlineCode",{parentName:"p"},"Dataset[Array[Byte]]"),":"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"val protosBinary: Dataset[Array[Byte]] = protosDS.map(_.toByteArray)\n// protosBinary: Dataset[Array[Byte]] = [value: binary]\n")),Object(s.b)("h2",{id:"on-enums"},"On enums"),Object(s.b)("p",null,"In SparkSQL-ScalaPB, enums are represented as strings. Unrecognized enum values are represented as strings containing the numeric value."),Object(s.b)("h2",{id:"dataframes-and-datasets-from-rdds"},"Dataframes and Datasets from RDDs"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"import org.apache.spark.rdd.RDD\n\nval protoRDD: RDD[Person] = spark.sparkContext.parallelize(testData)\n\nval protoDF: DataFrame = ProtoSQL.protoToDataFrame(spark, protoRDD)\n\nval protoDS: Dataset[Person] = spark.createDataset(protoRDD)\n")),Object(s.b)("h2",{id:"udfs"},"UDFs"),Object(s.b)("p",null,"If you need to write a UDF that returns a message, it would not pick up our encoder and you may get a runtime failure. To work around this, sparksql-scalapb provides ",Object(s.b)("inlineCode",{parentName:"p"},"ProtoSQL.udf")," to create UDFs. For example, if you need to parse a binary column into a proto:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'val binaryDF = protosBinary.toDF("value")\n// binaryDF: DataFrame = [value: binary]\n\nval parsePersons = ProtoSQL.udf { bytes: Array[Byte] => Person.parseFrom(bytes) }\n// parsePersons: org.apache.spark.sql.Column => org.apache.spark.sql.Column = scalapb.spark.Udfs$$Lambda$11430/20914410@18e9db60\n\nbinaryDF.withColumn("person", parsePersons($"value"))\n// res7: DataFrame = [value: binary, person: struct]\n')),Object(s.b)("h2",{id:"primitive-wrappers"},"Primitive wrappers"),Object(s.b)("p",null,"In ProtoSQL 0.9.x and 0.10.x, primitive wrappers are represented in Spark as structs\nwitha single field named ",Object(s.b)("inlineCode",{parentName:"p"},"value"),". A better representation in Spark would be a\nnullable field of the primitive type. The better representation will be the\ndefault in 0.11.x. To enable this representation today, replace the usages of\n",Object(s.b)("inlineCode",{parentName:"p"},"scalapb.spark.ProtoSQL")," with ",Object(s.b)("inlineCode",{parentName:"p"},"scalapb.spark.ProtoSQL.withPrimitiveWrappers"),".\nInstead of importing ",Object(s.b)("inlineCode",{parentName:"p"},"scalapb.spark.Implicits._"),", import\n",Object(s.b)("inlineCode",{parentName:"p"},"scalapb.spark.ProtoSQL.implicits._")),Object(s.b)("p",null,"See example in ",Object(s.b)("a",{parentName:"p",href:"https://github.com/scalapb/sparksql-scalapb/blob/80f3162b69313d57f95d3dcbfee865809873567a/sparksql-scalapb/src/test/scala/WrappersSpec.scala#L42-L59"},"WrappersSpec"),"."),Object(s.b)("h2",{id:"datasets-and-none-is-not-a-term"},"Datasets and ",Object(s.b)("inlineCode",{parentName:"h2"}," is not a term")),Object(s.b)("p",null,"You will see this error if for some reason Spark's ",Object(s.b)("inlineCode",{parentName:"p"},"Encoder"),"s are being picked up\ninstead of the ones provided by sparksql-scalapb. Please ensure you are not importing ",Object(s.b)("inlineCode",{parentName:"p"},"spark.implicits._"),". See instructions above for imports."),Object(s.b)("h2",{id:"example"},"Example"),Object(s.b)("p",null,"Check out a ",Object(s.b)("a",{parentName:"p",href:"https://github.com/thesamet/sparksql-scalapb-test"},"complete example")," here."))}b.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/28fe488f.273191e7.js b/28fe488f.3cbafbef.js similarity index 94% rename from 28fe488f.273191e7.js rename to 28fe488f.3cbafbef.js index 9b8b9bf28..69ed89de5 100644 --- a/28fe488f.273191e7.js +++ b/28fe488f.3cbafbef.js @@ -1 +1 @@ -(window.webpackJsonp=window.webpackJsonp||[]).push([[14],{119:function(e,n,t){"use strict";t.d(n,"a",(function(){return b})),t.d(n,"b",(function(){return u}));var a=t(0),r=t.n(a);function o(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function s(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,a)}return t}function i(e){for(var n=1;n=0||(r[t]=e[t]);return r}(e,n);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(r[t]=e[t])}return r}var c=r.a.createContext({}),p=function(e){var n=r.a.useContext(c),t=n;return e&&(t="function"==typeof e?e(n):i(i({},n),e)),t},b=function(e){var n=p(e.components);return r.a.createElement(c.Provider,{value:n},e.children)},d={inlineCode:"code",wrapper:function(e){var n=e.children;return r.a.createElement(r.a.Fragment,{},n)}},m=r.a.forwardRef((function(e,n){var t=e.components,a=e.mdxType,o=e.originalType,s=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),b=p(t),m=a,u=b["".concat(s,".").concat(m)]||b[m]||d[m]||o;return t?r.a.createElement(u,i(i({ref:n},c),{},{components:t})):r.a.createElement(u,i({ref:n},c))}));function u(e,n){var t=arguments,a=n&&n.mdxType;if("string"==typeof e||a){var o=t.length,s=new Array(o);s[0]=m;var i={};for(var l in n)hasOwnProperty.call(n,l)&&(i[l]=n[l]);i.originalType=e,i.mdxType="string"==typeof e?e:a,s[1]=i;for(var c=2;c,\n// UnknownFieldSet(Map())\n// )\n// ),\n// UnknownFieldSet(Map())\n// )\n\nval typeRegistry = TypeRegistry().addMessage[MyMessage]\n// typeRegistry: TypeRegistry = TypeRegistry(\n// Map(\n// "type.googleapis.com/com.thesamet.docs.MyMessage" -> com.thesamet.docs.json.MyMessage$@6ab5713b\n// ),\n// Set()\n// )\n\nval printer = new Printer().withTypeRegistry(typeRegistry)\n// printer: Printer = scalapb.json4s.Printer@1fe6851e\n\nprinter.print(c)\n// res0: String = "{\\"myAny\\":{\\"@type\\":\\"type.googleapis.com/com.thesamet.docs.MyMessage\\",\\"x\\":17}}"\n')),Object(o.b)("p",null,"Conversely, you can start from a JSON and parse it back to a ",Object(o.b)("inlineCode",{parentName:"p"},"MyContainer")," that contains an ",Object(o.b)("inlineCode",{parentName:"p"},"Any")," field:"),Object(o.b)("pre",null,Object(o.b)("code",{parentName:"pre",className:"language-scala"},'val parser = new Parser().withTypeRegistry(typeRegistry)\n// parser: Parser = scalapb.json4s.Parser@30e31cd\n\nparser.fromJsonString[MyContainer]("""\n {\n "myAny": {\n "@type": "type.googleapis.com/com.thesamet.docs.MyMessage",\n "x": 17\n }\n }""")\n// res1: MyContainer = MyContainer(\n// Some(\n// Any(\n// "type.googleapis.com/com.thesamet.docs.MyMessage",\n// ,\n// UnknownFieldSet(Map())\n// )\n// ),\n// UnknownFieldSet(Map())\n// )\n')))}b.isMDXComponent=!0}}]); \ No newline at end of file +(window.webpackJsonp=window.webpackJsonp||[]).push([[14],{119:function(e,n,t){"use strict";t.d(n,"a",(function(){return b})),t.d(n,"b",(function(){return u}));var a=t(0),r=t.n(a);function o(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function s(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,a)}return t}function i(e){for(var n=1;n=0||(r[t]=e[t]);return r}(e,n);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(r[t]=e[t])}return r}var c=r.a.createContext({}),p=function(e){var n=r.a.useContext(c),t=n;return e&&(t="function"==typeof e?e(n):i(i({},n),e)),t},b=function(e){var n=p(e.components);return r.a.createElement(c.Provider,{value:n},e.children)},d={inlineCode:"code",wrapper:function(e){var n=e.children;return r.a.createElement(r.a.Fragment,{},n)}},m=r.a.forwardRef((function(e,n){var t=e.components,a=e.mdxType,o=e.originalType,s=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),b=p(t),m=a,u=b["".concat(s,".").concat(m)]||b[m]||d[m]||o;return t?r.a.createElement(u,i(i({ref:n},c),{},{components:t})):r.a.createElement(u,i({ref:n},c))}));function u(e,n){var t=arguments,a=n&&n.mdxType;if("string"==typeof e||a){var o=t.length,s=new Array(o);s[0]=m;var i={};for(var l in n)hasOwnProperty.call(n,l)&&(i[l]=n[l]);i.originalType=e,i.mdxType="string"==typeof e?e:a,s[1]=i;for(var c=2;c,\n// UnknownFieldSet(Map())\n// )\n// ),\n// UnknownFieldSet(Map())\n// )\n\nval typeRegistry = TypeRegistry().addMessage[MyMessage]\n// typeRegistry: TypeRegistry = TypeRegistry(\n// Map(\n// "type.googleapis.com/com.thesamet.docs.MyMessage" -> com.thesamet.docs.json.MyMessage$@51bb9cc6\n// ),\n// Set()\n// )\n\nval printer = new Printer().withTypeRegistry(typeRegistry)\n// printer: Printer = scalapb.json4s.Printer@32007f31\n\nprinter.print(c)\n// res0: String = "{\\"myAny\\":{\\"@type\\":\\"type.googleapis.com/com.thesamet.docs.MyMessage\\",\\"x\\":17}}"\n')),Object(o.b)("p",null,"Conversely, you can start from a JSON and parse it back to a ",Object(o.b)("inlineCode",{parentName:"p"},"MyContainer")," that contains an ",Object(o.b)("inlineCode",{parentName:"p"},"Any")," field:"),Object(o.b)("pre",null,Object(o.b)("code",{parentName:"pre",className:"language-scala"},'val parser = new Parser().withTypeRegistry(typeRegistry)\n// parser: Parser = scalapb.json4s.Parser@374ba798\n\nparser.fromJsonString[MyContainer]("""\n {\n "myAny": {\n "@type": "type.googleapis.com/com.thesamet.docs.MyMessage",\n "x": 17\n }\n }""")\n// res1: MyContainer = MyContainer(\n// Some(\n// Any(\n// "type.googleapis.com/com.thesamet.docs.MyMessage",\n// ,\n// UnknownFieldSet(Map())\n// )\n// ),\n// UnknownFieldSet(Map())\n// )\n')))}b.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/404.html b/404.html index 52c7b01d9..7ed4e9723 100644 --- a/404.html +++ b/404.html @@ -10,14 +10,14 @@ Page Not Found | ScalaPB - +

Page Not Found

We could not find what you were looking for.

Please contact the owner of the site that linked you to the original URL and let them know their link is broken.

- + \ No newline at end of file diff --git a/63f1a026.3e949aad.js b/63f1a026.3e949aad.js deleted file mode 100644 index 0fe9cc1eb..000000000 --- a/63f1a026.3e949aad.js +++ /dev/null @@ -1 +0,0 @@ -(window.webpackJsonp=window.webpackJsonp||[]).push([[23],{119:function(e,n,t){"use strict";t.d(n,"a",(function(){return d})),t.d(n,"b",(function(){return m}));var a=t(0),o=t.n(a);function i(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function r(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,a)}return t}function s(e){for(var n=1;n=0||(o[t]=e[t]);return o}(e,n);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(o[t]=e[t])}return o}var p=o.a.createContext({}),c=function(e){var n=o.a.useContext(p),t=n;return e&&(t="function"==typeof e?e(n):s(s({},n),e)),t},d=function(e){var n=c(e.components);return o.a.createElement(p.Provider,{value:n},e.children)},b={inlineCode:"code",wrapper:function(e){var n=e.children;return o.a.createElement(o.a.Fragment,{},n)}},u=o.a.forwardRef((function(e,n){var t=e.components,a=e.mdxType,i=e.originalType,r=e.parentName,p=l(e,["components","mdxType","originalType","parentName"]),d=c(t),u=a,m=d["".concat(r,".").concat(u)]||d[u]||b[u]||i;return t?o.a.createElement(m,s(s({ref:n},p),{},{components:t})):o.a.createElement(m,s({ref:n},p))}));function m(e,n){var t=arguments,a=n&&n.mdxType;if("string"==typeof e||a){var i=t.length,r=new Array(i);r[0]=u;var s={};for(var l in n)hasOwnProperty.call(n,l)&&(s[l]=n[l]);s.originalType=e,s.mdxType="string"==typeof e?e:a,r[1]=s;for(var p=2;p println("Mobile!")\n case _ => println("Not mobile!")\n}\n')),Object(i.b)("h2",{id:"nested-messages"},"Nested messages"),Object(i.b)("p",null,"Nested messages appear as case classes inside the companion object of\nthe containing message."),Object(i.b)("h2",{id:"serialization"},"Serialization"),Object(i.b)("p",null,"Each message case class extends a base trait called ",Object(i.b)("inlineCode",{parentName:"p"},"GeneratedMessage")," which provides\nmethods that help serialize a message:"),Object(i.b)("ul",null,Object(i.b)("li",{parentName:"ul"},Object(i.b)("inlineCode",{parentName:"li"},"def toByteArray: Array[Byte]"),": serializes the message and return a byte array containing its raw bytes."),Object(i.b)("li",{parentName:"ul"},Object(i.b)("inlineCode",{parentName:"li"},"def writeTo(output: OutputStream): Unit"),": serializes the message and writes it to an ",Object(i.b)("inlineCode",{parentName:"li"},"OutputStream"),".")),Object(i.b)("h2",{id:"parsing"},"Parsing"),Object(i.b)("p",null,"The companion object of each message extends a base trait called ",Object(i.b)("inlineCode",{parentName:"p"},"GeneratedMessageCompanion[A]")," where ",Object(i.b)("inlineCode",{parentName:"p"},"A")," is the type of the message. It provides many\nuseful methods that helps dealing with a message in a generic way, however the primary use is parsing:"),Object(i.b)("ul",null,Object(i.b)("li",{parentName:"ul"},Object(i.b)("inlineCode",{parentName:"li"},"def parseFrom(input: InputStream): A"),": reads and parses a message from an ",Object(i.b)("inlineCode",{parentName:"li"},"InputStream"),"."),Object(i.b)("li",{parentName:"ul"},Object(i.b)("inlineCode",{parentName:"li"},"def parseFrom(s: Array[Byte]): A"),": parses a message from the given byte array.")),Object(i.b)("h2",{id:"parsing-from-an-input-stream"},"Parsing from an input stream"),Object(i.b)("p",null,"The following example code loads binary data from a file and parses it as an ",Object(i.b)("inlineCode",{parentName:"p"},"AddressBook"),". If the file doesn't exist it returns an empty ",Object(i.b)("inlineCode",{parentName:"p"},"AddressBook"),":"),Object(i.b)("pre",null,Object(i.b)("code",{parentName:"pre",className:"language-scala"},'def readFromFile(): AddressBook =\n Using(new FileInputStream("addressbook.pb")) { fileInputStream =>\n AddressBook.parseFrom(fileInputStream)\n }.recover { case _: FileNotFoundException =>\n println("No address book found. Will create a new file.")\n AddressBook()\n }.get\n')),Object(i.b)("h2",{id:"adding-new-person"},"Adding new person"),Object(i.b)("p",null,"The following code prompts the user to enter a person's data. It then loads\nthe address book from a file, adds the new person to the list, and saves it again:"),Object(i.b)("pre",null,Object(i.b)("code",{parentName:"pre",className:"language-scala"},'def personFromStdin(): Person = {\n print("Enter person ID (int): ")\n val id = StdIn.readInt()\n print("Enter name: ")\n val name = StdIn.readLine()\n print("Enter email address (blank for none): ")\n val email = StdIn.readLine()\n\n def getPhone(): Option[Person.PhoneNumber] = {\n print("Enter a phone number (or leave blank to finish): ")\n val number = StdIn.readLine()\n if (number.nonEmpty) {\n print("Is this a mobile, home, or work phone [mobile, home, work] ? ")\n val typ = StdIn.readLine() match {\n case "mobile" => Some(Person.PhoneType.MOBILE)\n case "home" => Some(Person.PhoneType.HOME)\n case "work" => Some(Person.PhoneType.WORK)\n case _ =>\n println("Unknown phone type. Leaving as None.")\n None\n }\n Some(Person.PhoneNumber(number = number, `type` = typ))\n } else None\n }\n\n // Keep prompting for phone numbers until None is returned.\n val phones =\n Iterator\n .continually(getPhone())\n .takeWhile(_.nonEmpty)\n .flatten\n .toSeq\n\n Person(\n id = id,\n name = name,\n email = if (email.nonEmpty) Some(email) else None,\n phones = phones\n )\n}\n\ndef addPerson(): Unit = {\n val newPerson = personFromStdin()\n val addressBook = readFromFile()\n // Append the new person to the people list field\n val updated = addressBook.update(\n _.people :+= newPerson\n )\n Using(new FileOutputStream("addressbook.pb")) { output =>\n updated.writeTo(output)\n }\n}\n')),Object(i.b)("h2",{id:"running-the-example"},"Running the example"),Object(i.b)("p",null,"In sbt, type ",Object(i.b)("inlineCode",{parentName:"p"},"run")),Object(i.b)("blockquote",null,Object(i.b)("p",{parentName:"blockquote"},'This document, "Protocol Buffer Tutorial: Scala" is a modification of ',Object(i.b)("a",{parentName:"p",href:"https://developers.google.com/protocol-buffers/docs/javatutorial"},'"Protocol Buffer Basics: Java"'),", which is a work created and ",Object(i.b)("a",{parentName:"p",href:"https://developers.google.com/terms/site-policies"},"shared by Google")," and used according to terms described in the ",Object(i.b)("a",{parentName:"p",href:"https://creativecommons.org/licenses/by/4.0/"},"Creative Commons 4.0 Attribution License"),".")))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/63f1a026.9f0de8be.js b/63f1a026.9f0de8be.js new file mode 100644 index 000000000..05ba16244 --- /dev/null +++ b/63f1a026.9f0de8be.js @@ -0,0 +1 @@ +(window.webpackJsonp=window.webpackJsonp||[]).push([[23],{119:function(e,n,t){"use strict";t.d(n,"a",(function(){return d})),t.d(n,"b",(function(){return m}));var a=t(0),o=t.n(a);function r(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function i(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,a)}return t}function s(e){for(var n=1;n=0||(o[t]=e[t]);return o}(e,n);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(o[t]=e[t])}return o}var p=o.a.createContext({}),c=function(e){var n=o.a.useContext(p),t=n;return e&&(t="function"==typeof e?e(n):s(s({},n),e)),t},d=function(e){var n=c(e.components);return o.a.createElement(p.Provider,{value:n},e.children)},b={inlineCode:"code",wrapper:function(e){var n=e.children;return o.a.createElement(o.a.Fragment,{},n)}},u=o.a.forwardRef((function(e,n){var t=e.components,a=e.mdxType,r=e.originalType,i=e.parentName,p=l(e,["components","mdxType","originalType","parentName"]),d=c(t),u=a,m=d["".concat(i,".").concat(u)]||d[u]||b[u]||r;return t?o.a.createElement(m,s(s({ref:n},p),{},{components:t})):o.a.createElement(m,s({ref:n},p))}));function m(e,n){var t=arguments,a=n&&n.mdxType;if("string"==typeof e||a){var r=t.length,i=new Array(r);i[0]=u;var s={};for(var l in n)hasOwnProperty.call(n,l)&&(s[l]=n[l]);s.originalType=e,s.mdxType="string"==typeof e?e:a,i[1]=s;for(var p=2;p println("Mobile!")\n case _ => println("Not mobile!")\n}\n')),Object(r.b)("h2",{id:"nested-messages"},"Nested messages"),Object(r.b)("p",null,"Nested messages appear as case classes inside the companion object of\nthe containing message."),Object(r.b)("h2",{id:"serialization"},"Serialization"),Object(r.b)("p",null,"Each message case class extends a base trait called ",Object(r.b)("inlineCode",{parentName:"p"},"GeneratedMessage")," which provides\nmethods that help serialize a message:"),Object(r.b)("ul",null,Object(r.b)("li",{parentName:"ul"},Object(r.b)("inlineCode",{parentName:"li"},"def toByteArray: Array[Byte]"),": serializes the message and return a byte array containing its raw bytes."),Object(r.b)("li",{parentName:"ul"},Object(r.b)("inlineCode",{parentName:"li"},"def writeTo(output: OutputStream): Unit"),": serializes the message and writes it to an ",Object(r.b)("inlineCode",{parentName:"li"},"OutputStream"),".")),Object(r.b)("h2",{id:"parsing"},"Parsing"),Object(r.b)("p",null,"The companion object of each message extends a base trait called ",Object(r.b)("inlineCode",{parentName:"p"},"GeneratedMessageCompanion[A]")," where ",Object(r.b)("inlineCode",{parentName:"p"},"A")," is the type of the message. It provides many\nuseful methods that helps dealing with a message in a generic way, however the primary use is parsing:"),Object(r.b)("ul",null,Object(r.b)("li",{parentName:"ul"},Object(r.b)("inlineCode",{parentName:"li"},"def parseFrom(input: InputStream): A"),": reads and parses a message from an ",Object(r.b)("inlineCode",{parentName:"li"},"InputStream"),"."),Object(r.b)("li",{parentName:"ul"},Object(r.b)("inlineCode",{parentName:"li"},"def parseFrom(s: Array[Byte]): A"),": parses a message from the given byte array.")),Object(r.b)("h2",{id:"parsing-from-an-input-stream"},"Parsing from an input stream"),Object(r.b)("p",null,"The following example code loads binary data from a file and parses it as an ",Object(r.b)("inlineCode",{parentName:"p"},"AddressBook"),". If the file doesn't exist it returns an empty ",Object(r.b)("inlineCode",{parentName:"p"},"AddressBook"),":"),Object(r.b)("pre",null,Object(r.b)("code",{parentName:"pre",className:"language-scala"},'def readFromFile(): AddressBook =\n Using(new FileInputStream("addressbook.pb")) { fileInputStream =>\n AddressBook.parseFrom(fileInputStream)\n }.recover { case _: FileNotFoundException =>\n println("No address book found. Will create a new file.")\n AddressBook()\n }.get\n')),Object(r.b)("h2",{id:"adding-new-person"},"Adding new person"),Object(r.b)("p",null,"The following code prompts the user to enter a person's data. It then loads\nthe address book from a file, adds the new person to the list, and saves it again:"),Object(r.b)("pre",null,Object(r.b)("code",{parentName:"pre",className:"language-scala"},'def personFromStdin(): Person = {\n print("Enter person ID (int): ")\n val id = StdIn.readInt()\n print("Enter name: ")\n val name = StdIn.readLine()\n print("Enter email address (blank for none): ")\n val email = StdIn.readLine()\n\n def getPhone(): Option[Person.PhoneNumber] = {\n print("Enter a phone number (or leave blank to finish): ")\n val number = StdIn.readLine()\n if (number.nonEmpty) {\n print("Is this a mobile, home, or work phone [mobile, home, work] ? ")\n val typ = StdIn.readLine() match {\n case "mobile" => Some(Person.PhoneType.MOBILE)\n case "home" => Some(Person.PhoneType.HOME)\n case "work" => Some(Person.PhoneType.WORK)\n case _ =>\n println("Unknown phone type. Leaving as None.")\n None\n }\n Some(Person.PhoneNumber(number = number, `type` = typ))\n } else None\n }\n\n // Keep prompting for phone numbers until None is returned.\n val phones =\n Iterator\n .continually(getPhone())\n .takeWhile(_.nonEmpty)\n .flatten\n .toSeq\n\n Person(\n id = id,\n name = name,\n email = if (email.nonEmpty) Some(email) else None,\n phones = phones\n )\n}\n\ndef addPerson(): Unit = {\n val newPerson = personFromStdin()\n val addressBook = readFromFile()\n // Append the new person to the people list field\n val updated = addressBook.update(\n _.people :+= newPerson\n )\n Using(new FileOutputStream("addressbook.pb")) { output =>\n updated.writeTo(output)\n }\n}\n')),Object(r.b)("h2",{id:"running-the-example"},"Running the example"),Object(r.b)("p",null,"In sbt, type ",Object(r.b)("inlineCode",{parentName:"p"},"run")),Object(r.b)("blockquote",null,Object(r.b)("p",{parentName:"blockquote"},'This document, "Protocol Buffer Tutorial: Scala" is a modification of ',Object(r.b)("a",{parentName:"p",href:"https://developers.google.com/protocol-buffers/docs/javatutorial"},'"Protocol Buffer Basics: Java"'),", which is a work created and ",Object(r.b)("a",{parentName:"p",href:"https://developers.google.com/terms/site-policies"},"shared by Google")," and used according to terms described in the ",Object(r.b)("a",{parentName:"p",href:"https://creativecommons.org/licenses/by/4.0/"},"Creative Commons 4.0 Attribution License"),".")))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/blog/2019/05/28/hola/index.html b/blog/2019/05/28/hola/index.html index 9cc26e3ba..b09240c02 100644 --- a/blog/2019/05/28/hola/index.html +++ b/blog/2019/05/28/hola/index.html @@ -10,7 +10,7 @@ Hola | ScalaPB - + @@ -23,7 +23,7 @@

Hola

Gao Wei

Gao Wei

Docusaurus Core Team

Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet

- + diff --git a/blog/2019/05/29/hello-world/index.html b/blog/2019/05/29/hello-world/index.html index 6bdfaf473..ceb006721 100644 --- a/blog/2019/05/29/hello-world/index.html +++ b/blog/2019/05/29/hello-world/index.html @@ -10,7 +10,7 @@ Hello | ScalaPB - + @@ -23,7 +23,7 @@
- + diff --git a/blog/2019/05/30/welcome/index.html b/blog/2019/05/30/welcome/index.html index 49dde7769..cb5327ec8 100644 --- a/blog/2019/05/30/welcome/index.html +++ b/blog/2019/05/30/welcome/index.html @@ -10,7 +10,7 @@ Welcome | ScalaPB - + @@ -23,7 +23,7 @@

Welcome

Yangshun Tay

Yangshun Tay

Front End Engineer @ Facebook

Blog features are powered by the blog plugin. Simply add files to the blog directory. It supports tags as well!

Delete the whole directory if you don't want the blog features. As simple as that!

- + diff --git a/blog/index.html b/blog/index.html index 714ac2664..b454bf349 100644 --- a/blog/index.html +++ b/blog/index.html @@ -10,7 +10,7 @@ Blog | ScalaPB - + @@ -26,7 +26,7 @@

Welcome

Yangshun Tay

Yangshun Tay

Front End Engineer @ Facebook

Blog features are powered by the blog plugin. Simply add files to the blog directory. It supports tags as well!

Delete the whole directory if you don't want the blog features. As simple as that!

Hola

Gao Wei

Gao Wei

Docusaurus Core Team

Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet

- + diff --git a/blog/tags/docusaurus/index.html b/blog/tags/docusaurus/index.html index 1a7449766..7c040cf78 100644 --- a/blog/tags/docusaurus/index.html +++ b/blog/tags/docusaurus/index.html @@ -10,7 +10,7 @@ Posts tagged "docusaurus" | ScalaPB - + @@ -26,7 +26,7 @@

3 posts tagged with "docusaurus"

View All Tags

Welcome

Yangshun Tay

Yangshun Tay

Front End Engineer @ Facebook

Blog features are powered by the blog plugin. Simply add files to the blog directory. It supports tags as well!

Delete the whole directory if you don't want the blog features. As simple as that!

Hola

Gao Wei

Gao Wei

Docusaurus Core Team

Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet

- + diff --git a/blog/tags/facebook/index.html b/blog/tags/facebook/index.html index c73374e62..e843f373e 100644 --- a/blog/tags/facebook/index.html +++ b/blog/tags/facebook/index.html @@ -10,7 +10,7 @@ Posts tagged "facebook" | ScalaPB - + @@ -24,7 +24,7 @@

1 post tagged with "facebook"

View All Tags

Welcome

Yangshun Tay

Yangshun Tay

Front End Engineer @ Facebook

Blog features are powered by the blog plugin. Simply add files to the blog directory. It supports tags as well!

Delete the whole directory if you don't want the blog features. As simple as that!

- + diff --git a/blog/tags/hello/index.html b/blog/tags/hello/index.html index 0ae830674..abc289e87 100644 --- a/blog/tags/hello/index.html +++ b/blog/tags/hello/index.html @@ -10,7 +10,7 @@ Posts tagged "hello" | ScalaPB - + @@ -25,7 +25,7 @@

2 posts tagged with "hello"

View All Tags

Welcome

Yangshun Tay

Yangshun Tay

Front End Engineer @ Facebook

Blog features are powered by the blog plugin. Simply add files to the blog directory. It supports tags as well!

Delete the whole directory if you don't want the blog features. As simple as that!

- + diff --git a/blog/tags/hola/index.html b/blog/tags/hola/index.html index 31f32ebdd..a07424b23 100644 --- a/blog/tags/hola/index.html +++ b/blog/tags/hola/index.html @@ -10,7 +10,7 @@ Posts tagged "hola" | ScalaPB - + @@ -24,7 +24,7 @@

1 post tagged with "hola"

View All Tags

Hola

Gao Wei

Gao Wei

Docusaurus Core Team

Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet

- + diff --git a/blog/tags/index.html b/blog/tags/index.html index 79742ee55..befd5cdd3 100644 --- a/blog/tags/index.html +++ b/blog/tags/index.html @@ -10,7 +10,7 @@ Tags | ScalaPB - + @@ -22,7 +22,7 @@ - + diff --git a/docs/common-protos/index.html b/docs/common-protos/index.html index 9aeef7b00..76ec0fca4 100644 --- a/docs/common-protos/index.html +++ b/docs/common-protos/index.html @@ -10,7 +10,7 @@ Common protos | ScalaPB - + @@ -29,7 +29,7 @@ the classpath. This is accomplished by adding the library as a normal dependency.

If you don't have any proto files that import the common protos, then you can omit the "protobuf" dependency.

Adding new packages#

If you don't see your favorite third-party proto package here, and there is already a maven package for it that provides the proto files (with possibly Java generated classes), you can send a pull request to common-protos to have it added. See instruction on the ScalaPB Common Protos project page on Github.

Available packages#

proto-google-common-protos#

ScalaPB 0.11.x:

libraryDependencies ++= Seq(
"com.thesamet.scalapb.common-protos" %% "proto-google-common-protos-scalapb_0.11" % "2.9.6-0" % "protobuf",
"com.thesamet.scalapb.common-protos" %% "proto-google-common-protos-scalapb_0.11" % "2.9.6-0"
)

ScalaPB 0.10.x:

libraryDependencies ++= Seq(
"com.thesamet.scalapb.common-protos" %% "proto-google-common-protos-scalapb_0.10" % "2.9.6-0" % "protobuf",
"com.thesamet.scalapb.common-protos" %% "proto-google-common-protos-scalapb_0.10" % "2.9.6-0"
)

ScalaPB 0.9.x:

libraryDependencies ++= Seq(
"com.thesamet.scalapb.common-protos" %% "proto-google-common-protos-scalapb_0.9" % "2.9.6-0" % "protobuf",
"com.thesamet.scalapb.common-protos" %% "proto-google-common-protos-scalapb_0.9" % "2.9.6-0"
)

proto-google-cloud-pubsub-v1#

ScalaPB 0.11.x:

libraryDependencies ++= Seq(
"com.thesamet.scalapb.common-protos" %% "proto-google-cloud-pubsub-v1-scalapb_0.11" % "1.102.20-0" % "protobuf",
"com.thesamet.scalapb.common-protos" %% "proto-google-cloud-pubsub-v1-scalapb_0.11" % "1.102.20-0"
)

ScalaPB 0.10.x:

libraryDependencies ++= Seq(
"com.thesamet.scalapb.common-protos" %% "proto-google-cloud-pubsub-v1-scalapb_0.10" % "1.102.20-0" % "protobuf",
"com.thesamet.scalapb.common-protos" %% "proto-google-cloud-pubsub-v1-scalapb_0.10" % "1.102.20-0"
)

ScalaPB 0.9.x:

libraryDependencies ++= Seq(
"com.thesamet.scalapb.common-protos" %% "proto-google-cloud-pubsub-v1-scalapb_0.9" % "1.102.20-0" % "protobuf",
"com.thesamet.scalapb.common-protos" %% "proto-google-cloud-pubsub-v1-scalapb_0.9" % "1.102.20-0"
)

pgv-proto#

ScalaPB 0.11.x:

libraryDependencies ++= Seq(
"com.thesamet.scalapb.common-protos" %% "pgv-proto-scalapb_0.11" % "0.6.13-0" % "protobuf",
"com.thesamet.scalapb.common-protos" %% "pgv-proto-scalapb_0.11" % "0.6.13-0"
)

ScalaPB 0.10.x:

libraryDependencies ++= Seq(
"com.thesamet.scalapb.common-protos" %% "pgv-proto-scalapb_0.10" % "0.6.13-0" % "protobuf",
"com.thesamet.scalapb.common-protos" %% "pgv-proto-scalapb_0.10" % "0.6.13-0"
)

ScalaPB 0.9.x:

libraryDependencies ++= Seq(
"com.thesamet.scalapb.common-protos" %% "pgv-proto-scalapb_0.9" % "0.6.13-0" % "protobuf",
"com.thesamet.scalapb.common-protos" %% "pgv-proto-scalapb_0.9" % "0.6.13-0"
)

footer

- + diff --git a/docs/contact/index.html b/docs/contact/index.html index 9823cd06c..f37ce2b0b 100644 --- a/docs/contact/index.html +++ b/docs/contact/index.html @@ -10,7 +10,7 @@ Contacting us | ScalaPB - + @@ -46,7 +46,7 @@ would be a great way to support the time and effort put into the development of ScalaPB!

- + diff --git a/docs/customizations/index.html b/docs/customizations/index.html index 0511beae9..29b7b09ab 100644 --- a/docs/customizations/index.html +++ b/docs/customizations/index.html @@ -10,7 +10,7 @@ Customizations | ScalaPB - + @@ -225,7 +225,7 @@ sealed oneofs:

message Foo {
option (scalapb.message).derives = "yourpkg.Show";
...
}}}
message Expr {
option (scalapb.message).sealed_oneof_derives = "yourpkg.Show";
oneof sealed_value {
...
}
}
- + diff --git a/docs/dotty/index.html b/docs/dotty/index.html index eb85d1129..687b5da90 100644 --- a/docs/dotty/index.html +++ b/docs/dotty/index.html @@ -10,7 +10,7 @@ Using with Dotty | ScalaPB - + @@ -29,7 +29,7 @@ the Scala compiler with the default compiler settings. It is known that currently the generator will provide an error if -language:strictEquality is set.

- + diff --git a/docs/faq/index.html b/docs/faq/index.html index c310c50c1..44d77f7cf 100644 --- a/docs/faq/index.html +++ b/docs/faq/index.html @@ -10,7 +10,7 @@ Frequently Asked Questions | ScalaPB - + @@ -76,7 +76,7 @@
Use a recent version of sbt-protoc (at least 1.0.6), which defaults to a
compatible version of protoc (3.19.2).
- + diff --git a/docs/generated-code/index.html b/docs/generated-code/index.html index 38f7f4673..859c472c0 100644 --- a/docs/generated-code/index.html +++ b/docs/generated-code/index.html @@ -10,7 +10,7 @@ Generated Code | ScalaPB - + @@ -119,7 +119,7 @@ toJavaProto methods.
  • The companion object for enums will have fromJavaValue and toJavaValue methods.
  • - + diff --git a/docs/generic/index.html b/docs/generic/index.html index 911e32f73..7146099d5 100644 --- a/docs/generic/index.html +++ b/docs/generic/index.html @@ -10,7 +10,7 @@ Writing generic code | ScalaPB - + @@ -46,7 +46,7 @@ to return, and the filename. The Scala compiler will automatically find the appropriate message companion to pass as cmp via implicit search:

    readFromFile[Person]("/tmp/person.pb")
    - + diff --git a/docs/getting-started/index.html b/docs/getting-started/index.html index f9d4714a7..c2fa1f264 100644 --- a/docs/getting-started/index.html +++ b/docs/getting-started/index.html @@ -10,7 +10,7 @@ Protocol Buffer Tutorial: Scala | ScalaPB - + @@ -18,12 +18,12 @@ - +

    Protocol Buffer Tutorial: Scala

    This tutorial provides a basic Scala programmer's introduction to working with protocol buffers. By walking through creating a simple example application, it shows you how to

    • Define message formats in a .proto file.

    • Use SBT to generate Scala case classes from proto files.

    • Use ScalaPB's API to write and read messages.

    This isn't a comprehensive guide to using protocol buffers in Scala. For more detailed reference information, see the Generated Code page.

    Why Use Protocol Buffers?#

    The example we're going to use is a very simple "address book" application that can read and write people's contact details to and from a file. Each person in the address book has a name, an ID, an email address, and a contact phone number.

    How do you serialize and retrieve structured data like this? There are a few ways to solve this problem:

    • Use Java Serialization. This is the default approach since it's built into the language, but it has a host of well-known problems (see Effective Java, by Josh Bloch pp. 213), and also doesn't work very well if you need to share data with applications written in C++ or Python.
    • You can invent an ad-hoc way to encode the data items into a single string – such as encoding 4 ints as "12:3:-23:67". This is a simple and flexible approach, although it does require writing one-off encoding and parsing code, and the parsing imposes a small run-time cost. This works best for encoding very simple data.
    • Serialize the data to JSON (or XML). This approach can be very attractive since JSON and XML are (sort of) human readable and there are plenty of libraries for lots of languages. This can be a good choice if you want to share data with other applications/projects. However, JSON and XML are notoriously space intensive, and encoding/decoding it can impose a huge performance penalty on applications. Also, navigating JSON AST trees or XML DOM trees is considerably more complicated than navigating simple fields in a case class normally would be.

    Protocol buffers are the flexible, efficient, automated solution to solve exactly this problem. With protocol buffers, you write a .proto description of the data structure you wish to store. From that, the protocol buffer compiler creates a case class that implements automatic encoding and parsing of the protocol buffer data with an efficient binary format. Importantly, the protocol buffer format supports the idea of extending the format over time in such a way that the code can still read data encoded with the old format.

    Where to Find the Example Code#

    The example code for this tutorial is under the examples/basic directory -in ScalaPB's repo. To get your copy:

    git clone https://github.com/scalapb/ScalaPB.git
    cd examples/basic

    Defining Your Protocol Format#

    By default, all proto files under src/main/protobuf will be compiled to Scala case classes. The following example .proto file resides in src/main/protobuf/addressbook.proto.

    syntax = "proto2";
    +in ScalaPB's repo. To get your copy:

    git clone https://github.com/scalapb/ScalaPB.git
    cd examples/basic

    Defining Your Protocol Format#

    By default, all proto files under src/main/protobuf will be compiled to Scala case classes. The following example .proto file resides in src/main/protobuf/addressbook.proto.

    syntax = "proto2";
    package tutorial;
    message Person {
    required string name = 1;
    required int32 id = 2;
    optional string email = 3;
    enum PhoneType {
    MOBILE = 0;
    HOME = 1;
    WORK = 2;
    }
    @@ -32,7 +32,7 @@
    message AddressBook {
    repeated Person people = 1;
    }

    Let's go through each part of the file and see what it does.

    The .proto file starts with a package declaration, which helps to prevent naming conflicts between different projects. In Scala, the package name followed by the file name is used as the Scala package unless you have either explicitly specified a java_package, or specified a scala package option.

    Next, you have your message definitions. A message is just an aggregate containing a set of typed fields. Many standard simple data types are available as field types, including bool, int32, float, double, and string. You can also add further structure to your messages by using other message types as field types – in the above example the Person message contains PhoneNumber messages, while the AddressBook message contains Person messages. You can even define message types nested inside other messages – as you can see, the PhoneNumber type is defined inside Person. You can also define enum types if you want one of your fields to have one of a predefined list of values – here you want to specify that a phone number can be one of MOBILE, HOME, or WORK.

    The " = 1", " = 2" markers on each element identify the unique "tag" that field uses in the binary encoding. Tag numbers 1-15 require one less byte to encode than higher numbers, so as an optimization you can decide to use those tags for the commonly used or repeated elements, leaving tags 16 and higher for less-commonly used optional elements. Each element in a repeated field requires re-encoding the tag number, so repeated fields are particularly good candidates for this optimization.

    Each field must be annotated with one of the following modifiers:

    • required: a value for the field must be provided when constructing a message case class. Parsing a message that misses a required field will throw an InvalidProtocolBufferException. Other than this, a required field behaves exactly like an optional field.
    • optional: the field may or may not be set. If an optional field value isn't set, a default value is used. For simple types, you can specify your own default value, as we've done for the phone number type in the example. Otherwise, a system default is used: zero for numeric types, the empty string for strings, false for bools. For embedded messages, the default value is always the "default instance" or "prototype" of the message, which has none of its fields set. Calling the accessor to get the value of an optional (or required) field which has not been explicitly set always returns that field's default value. In proto2, optional fields are represented as Option[]. In proto3, optional primitives are not wrapped in Option[], but messages are.
    • repeated: the field may be repeated any number of times (including zero). The order of the repeated values will be preserved in the protocol buffer. Think of repeated fields as dynamically sized arrays. They are represented in Scala as Seqs.

    Required Is Forever. You should be very careful about marking fields as required. If at some point you wish to stop writing or sending a required field, it will be problematic to change the field to an optional field – old readers will consider messages without this field to be incomplete and may reject or drop them unintentionally. You should consider writing application-specific custom validation routines for your buffers instead. Some engineers at Google have come to the conclusion that using required does more harm than good; they prefer to use only optional and repeated. However, this view is not universal.

    You'll find a complete guide to writing .proto files – including all the possible field types – in the Protocol Buffer Language Guide. Don't go looking for facilities similar to class inheritance, though – protocol buffers don't do that.

    Compiling Your Protocol Buffers#

    Start sbt and type compile to compile the tutorial project.

    Now, take a look at the files that were generated under target/scala-2.13/src_managed/main/scalapb/com/example/tutorial/addressbook:

    You will find Person.scala, with a case class that conceptually looks like this:

    final case class Person(
    name: String,
    id: Int,
    email: Option[String] = None,
    phones: Seq[PhoneNumber] = Seq.empty,
    ...

    The actual generated code will contain fully qualified class names (such as _root_.scala.Predef.String) to prevent name collisions between entities defined in your protocol buffer and other Scala code.

    As you can see, each protobuf field becomes a member in the generated case class.

    Repeated fields, by default, have a Seq[T] type. When they are parsed, the runtime type would be a Vector.

    Enums#

    The enum PhoneType is represented as a sealed abstract class, extended by a case object for each possible enum value:

    sealed abstract class PhoneType(val value: Int)
    extends _root_.scalapb.GeneratedEnum {
    type EnumType = PhoneType
    def isMobile: Boolean = false
    def isHome: Boolean = false
    def isWork: Boolean = false
    }
    object PhoneType {
    case object MOBILE extends PhoneType(0) {
    val index = 0
    val name = "MOBILE"
    override def isMobile: Boolean = true
    }
    -
    case object HOME extends PhoneType(1) {
    val index = 1
    val name = "HOME"
    override def isMobile: Boolean = true
    }
    +
    case object HOME extends PhoneType(1) {
    val index = 1
    val name = "HOME"
    override def isHome: Boolean = true
    }
    // ...
    }

    Since Scala type equality is not type-safe (a == b will compile even when a and b are of types that can never be equal), it is recommended to use the various isX methods or pattern matching for comparison:

    // Using isX:
    val t = if (phoneType.isMobile) "Mobile" else "Not Mobile"
    @@ -46,7 +46,7 @@
    Person(
    id = id,
    name = name,
    email = if (email.nonEmpty) Some(email) else None,
    phones = phones
    )
    }
    def addPerson(): Unit = {
    val newPerson = personFromStdin()
    val addressBook = readFromFile()
    // Append the new person to the people list field
    val updated = addressBook.update(
    _.people :+= newPerson
    )
    Using(new FileOutputStream("addressbook.pb")) { output =>
    updated.writeTo(output)
    }
    }

    Running the example#

    In sbt, type run

    This document, "Protocol Buffer Tutorial: Scala" is a modification of "Protocol Buffer Basics: Java", which is a work created and shared by Google and used according to terms described in the Creative Commons 4.0 Attribution License.

    - + @@ -54,6 +54,6 @@ - + \ No newline at end of file diff --git a/docs/grpc/index.html b/docs/grpc/index.html index 5efa2aeda..c74b1833d 100644 --- a/docs/grpc/index.html +++ b/docs/grpc/index.html @@ -10,7 +10,7 @@ gRPC | ScalaPB - + @@ -44,7 +44,7 @@ closely the offical grpc-java API. Example project coming soon.

    grpc-netty issues#

    In certain situations (for example when you have a fat jar), you may see the following exception:

    Exception in thread "main" io.grpc.ManagedChannelProvider$ProviderNotFoundException: No functional server found. Try adding a dependency on the grpc-netty artifact

    To work around this issue, try the following solutions:

    1. Create a NettyServer explicitly using io.grpc.netty.NettyServerBuilder.

    Example:

    NettyServerBuilder
    .forPort(9000)
    .keepAliveTime(500, TimeUnit.SECONDS)
    1. If using SBT, try the following merge conflict strategy:
    assemblyMergeStrategy in assembly := {
    case x if x.contains("io.netty.versions.properties") => MergeStrategy.discard
    case x =>
    val oldStrategy = (assemblyMergeStrategy in assembly).value
    oldStrategy(x)
    }
    - + diff --git a/docs/index.html b/docs/index.html index 3ea6d2d91..4498a8f82 100644 --- a/docs/index.html +++ b/docs/index.html @@ -10,7 +10,7 @@ ScalaPB: Scala Protocol Buffer Compiler | ScalaPB - + @@ -33,7 +33,7 @@ Oneof's that were introduced in Protocol Buffers 2.6.0.

  • Newer: Supports Scala.js (in 0.5.x).

  • Newer: Supports gRPC (in 0.5.x).

  • Newest: Supports SparkSQL (in 0.5.23).

  • Newest: Supports converting to and from JSON (in 0.5.x).

  • Newest: Supports User-defined options (in 0.5.29).

  • - + diff --git a/docs/installation/index.html b/docs/installation/index.html index 13d18599c..8700e89ea 100644 --- a/docs/installation/index.html +++ b/docs/installation/index.html @@ -10,7 +10,7 @@ Installing ScalaPB | ScalaPB - + @@ -31,7 +31,7 @@ use scalapbc (ScalaPB compiler).

    See ScalaPBC.

    Running from Maven#

    Using ScalaPBC, you can get maven to generate the code for you. Check out the ScalaPB Maven example.

    Next:#

    Read about the Generated Code.

    - + diff --git a/docs/json/index.html b/docs/json/index.html index fc2717842..1c6034794 100644 --- a/docs/json/index.html +++ b/docs/json/index.html @@ -10,7 +10,7 @@ ScalaPB and JSON | ScalaPB - + @@ -18,7 +18,7 @@ - +
    @@ -45,13 +45,13 @@ strings. To use the numeric representation, set this option to true. Note that due to the way Javascript represents numbers, there is a possibility to lose precision (more details here).

    The parser can be instantiated with new scalapb.json4s.Parser(), and various methods can return instances of the parser with customized configuration:

    • ignoringUnkownFields: by default the parser will throw a JsonFormatException when encountering unknown fields. By enabling this option, unknown options will be silently ignored.
    • ignoringOverlappingOneofFields: by default the parser will throw a JsonFormatException if values are provided for more than one field within the same oneof. By enabling this option, when more than one field is present for a oneof, one of the values of this field will be picked for the oneof.
    • mapEntriesAsKeyValuePairs: by default, protobuf maps are modeled as json objects. When this setting is enabled, protobuf maps are expected to be read as arrays of objects with key and value keys.

    See the list of constructor paramerters here

    Printing and parsing Anys#

    In Protocol Buffers, google.protobuf.Any is a type that embeds an arbitrary protobuf message. An Any is represented as a message that contains a typeUrl field that identifies the type, and a bytes field value which contains the serialized contents of a message. In JSON, the message embedded in the Any is serialized as usual, and there is a @type key added to it to identify which message it is. The parser expects this @type key to know which message it is. To accomplish this, all the expected embedded types need to be registered with a TypeRegistry so the printer and parser know how to process the embedded message.

    The following example is based on this proto.

    import com.thesamet.docs.json._
    import scalapb.json4s.{Printer, Parser, TypeRegistry}
    -
    val c = MyContainer(
    myAny=Some(
    com.google.protobuf.any.Any.pack(
    MyMessage(x=17)
    )
    )
    )
    // c: MyContainer = MyContainer(
    // Some(
    // Any(
    // "type.googleapis.com/com.thesamet.docs.MyMessage",
    // <ByteString@1114c9bc size=2 contents="\b\021">,
    // UnknownFieldSet(Map())
    // )
    // ),
    // UnknownFieldSet(Map())
    // )
    -
    val typeRegistry = TypeRegistry().addMessage[MyMessage]
    // typeRegistry: TypeRegistry = TypeRegistry(
    // Map(
    // "type.googleapis.com/com.thesamet.docs.MyMessage" -> com.thesamet.docs.json.MyMessage$@6ab5713b
    // ),
    // Set()
    // )
    -
    val printer = new Printer().withTypeRegistry(typeRegistry)
    // printer: Printer = scalapb.json4s.Printer@1fe6851e
    -
    printer.print(c)
    // res0: String = "{\"myAny\":{\"@type\":\"type.googleapis.com/com.thesamet.docs.MyMessage\",\"x\":17}}"

    Conversely, you can start from a JSON and parse it back to a MyContainer that contains an Any field:

    val parser = new Parser().withTypeRegistry(typeRegistry)
    // parser: Parser = scalapb.json4s.Parser@30e31cd
    -
    parser.fromJsonString[MyContainer]("""
    {
    "myAny": {
    "@type": "type.googleapis.com/com.thesamet.docs.MyMessage",
    "x": 17
    }
    }""")
    // res1: MyContainer = MyContainer(
    // Some(
    // Any(
    // "type.googleapis.com/com.thesamet.docs.MyMessage",
    // <ByteString@23e54e31 size=2 contents="\b\021">,
    // UnknownFieldSet(Map())
    // )
    // ),
    // UnknownFieldSet(Map())
    // )
    +
    val c = MyContainer(
    myAny=Some(
    com.google.protobuf.any.Any.pack(
    MyMessage(x=17)
    )
    )
    )
    // c: MyContainer = MyContainer(
    // Some(
    // Any(
    // "type.googleapis.com/com.thesamet.docs.MyMessage",
    // <ByteString@7b6863ad size=2 contents="\b\021">,
    // UnknownFieldSet(Map())
    // )
    // ),
    // UnknownFieldSet(Map())
    // )
    +
    val typeRegistry = TypeRegistry().addMessage[MyMessage]
    // typeRegistry: TypeRegistry = TypeRegistry(
    // Map(
    // "type.googleapis.com/com.thesamet.docs.MyMessage" -> com.thesamet.docs.json.MyMessage$@51bb9cc6
    // ),
    // Set()
    // )
    +
    val printer = new Printer().withTypeRegistry(typeRegistry)
    // printer: Printer = scalapb.json4s.Printer@32007f31
    +
    printer.print(c)
    // res0: String = "{\"myAny\":{\"@type\":\"type.googleapis.com/com.thesamet.docs.MyMessage\",\"x\":17}}"

    Conversely, you can start from a JSON and parse it back to a MyContainer that contains an Any field:

    val parser = new Parser().withTypeRegistry(typeRegistry)
    // parser: Parser = scalapb.json4s.Parser@374ba798
    +
    parser.fromJsonString[MyContainer]("""
    {
    "myAny": {
    "@type": "type.googleapis.com/com.thesamet.docs.MyMessage",
    "x": 17
    }
    }""")
    // res1: MyContainer = MyContainer(
    // Some(
    // Any(
    // "type.googleapis.com/com.thesamet.docs.MyMessage",
    // <ByteString@3c5792b8 size=2 contents="\b\021">,
    // UnknownFieldSet(Map())
    // )
    // ),
    // UnknownFieldSet(Map())
    // )
    - + @@ -59,6 +59,6 @@ - + \ No newline at end of file diff --git a/docs/sbt-settings/index.html b/docs/sbt-settings/index.html index 41502dbab..4789b41b7 100644 --- a/docs/sbt-settings/index.html +++ b/docs/sbt-settings/index.html @@ -10,7 +10,7 @@ SBT Settings | ScalaPB - + @@ -30,7 +30,7 @@ however this is configurable using the Compile / PB.protoSources setting.

    By default, sbt-protoc invokes protoc 3.x that is shipped with protoc-jar. If you would like to run a different version of protoc:

    PB.protocVersion := "-v3.11.4"

    See all available options in sbt-protoc documentation

    Java Conversions#

    To enable Java conversions add the following to your build.sbt:

    Compile / PB.targets := Seq(
    PB.gens.java -> (Compile / sourceManaged).value,
    scalapb.gen(javaConversions=true) -> (Compile / sourceManaged).value
    )

    gRPC#

    Generating gRPC stubs for services is enabled by default. To disable:

    Compile / PB.targets := Seq(
    scalapb.gen(grpc=false) -> (Compile / sourceManaged).value
    )

    Additional options to the generator#

    scalapb.gen(
    flatPackage: Boolean = false,
    javaConversions: Boolean = false,
    grpc: Boolean = true,
    singleLineToProtoString: Boolean = false,
    asciiFormatToString: Boolean = false,
    lenses: Boolean = true,
    retainSourceCodeInfo: Boolean = false
    )
    OptionscalapbcDescription
    flatPackageflat_packageWhen set, ScalaPB will not append the protofile base name to the package name.
    javaConversionsjava_conversionsGenerates in the companion object two functions, toJavaProto and fromJavaProto that convert between the Scala case class and the Java protobufs. For the generated code to compile, the Java protobuf code need to be also generated or available as a library dependency.
    grpcgrpcGenerates gRPC code for services. Default is true in scalapb.gen, and need to be explicitly specified in scalapbc.
    singleLineToProtoStringsingle_line_to_proto_stringBy default, ScalaPB generates a toProtoString() method that renders the message as a multi-line format (using TextFormat.printToUnicodeString). If set, ScalaPB generates toString() methods that use the single line format.
    asciiFormatToStringascii_format_to_stringSetting this to true, overrides toString to return a standard ASCII representation of the message by calling toProtoString.
    lensesno_lensesBy default, ScalaPB generates lenses for each message for easy updating. If you are not using this feature and would like to reduce code size or compilation time, you can set this to false and lenses will not be generated.
    retainSourceCodeInforetain_source_code_infoRetain source code information (locations, comments) provided by protoc in the descriptors. Use the location accessor to get that information from a descriptor.
    scala3Sourcesscala3_sourcesIf set, generates sources that are error-free under -source future with Scala 3, or Xsource:3 with Scala 2.13.
    - + diff --git a/docs/scala.js/index.html b/docs/scala.js/index.html index cf62c8fe4..f3941ec4c 100644 --- a/docs/scala.js/index.html +++ b/docs/scala.js/index.html @@ -10,7 +10,7 @@ Using ScalaPB with Scala.js | ScalaPB - + @@ -29,7 +29,7 @@ 0.6.0 would cover most use cases)

    Getting Started#

    Add to your library dependencies:

    libraryDependencies ++= Seq(
    "com.thesamet.scalapb" %%% "scalapb-runtime" % scalapb.compiler.Version.scalapbVersion,
    // The following needed only if you include scalapb/scalapb.proto:
    "com.thesamet.scalapb" %%% "scalapb-runtime" % scalapb.compiler.Version.scalapbVersion % "protobuf"
    )

    Demo#

    Example project: https://github.com/thesamet/scalapbjs-test

    Example with multi-project build: https://github.com/thesamet/sbt-protoc/tree/master/examples/scalajs-multiproject

    Live demo: http://thesamet.github.io/scalapbjs-test/

    - + diff --git a/docs/scalapbc/index.html b/docs/scalapbc/index.html index e48e5961a..25fdcdd3a 100644 --- a/docs/scalapbc/index.html +++ b/docs/scalapbc/index.html @@ -10,7 +10,7 @@ ScalaPBC: ScalaPB's standalone compiler | ScalaPB - + @@ -37,7 +37,7 @@ GRPC descriptors that the generated ZIO code depends on.

    bin/scalapbc --plugin-artifact=com.thesamet.scalapb.zio-grpc:protoc-gen-zio:0.1.0:default,classifier=unix,ext=sh,type=jar -- e2e/src/main/protobuf/service.proto --zio_out=/tmp/out --scala_out=grpc:/tmp/out -Ie2e/src/main/protobuf -Ithird_party -Iprotobuf

    bin/scalapbc --plugin-artifact=io.grpc:grpc-java:

    Using ScalaPB as a proper protoc plugin#

    You may want to use ScalaPB code generator as a standard protoc plugin (rather than using scalapbc as a wrapper or through SBT).

    For Linux and Mac OS X, you can download a native executable version of the plugin for Scala from our release page:

    Those zip files contain native executables of the plugin for the respective operating system built using GraalVM. If you are using another operating system, or prefer to use a JVM based plugin implementation, you will find executable scripts for Windows and Unix-like operating systems in maven. These scripts require a JVM to run. The JVM needs to be available on the path, or through the JAVA_HOME environment variable.

    To generate code:

    protoc my.protos --plugin=/path/to/bin/protoc-gen-scala-0.11.11-unix.sh --scala_out=scala

    On Windows:

    protoc my.protos --plugin=protoc-gen-scala=/path/to/bin/protoc-gen-scala.bat --scala_out=scala

    For passing parameters to the plugin, see the section above.

    Note that the standalone plugin provided in scalapbc needs to be able to find a JVM in the path or through JAVA_HOME environment variable. If you encounter unexpected errors, try to execute the plugin directly from the command line, and the output printed may be useful for further debugging.

    The generated code depends on scalapb-runtime to compile. To get the code to work, add a dependency on scalapb-runtime to your project. The version of scalapb-runtime needs to match or be newer than the version of the plugin.

    - + diff --git a/docs/sealed-oneofs/index.html b/docs/sealed-oneofs/index.html index b2afa7da2..85a3faadb 100644 --- a/docs/sealed-oneofs/index.html +++ b/docs/sealed-oneofs/index.html @@ -10,7 +10,7 @@ Sealed oneofs | ScalaPB - + @@ -41,7 +41,7 @@
    case class Mul(left: Option[Expr], right: Option[Expr]) extends Expr with GeneratedMessage
    case class Programs(exprs: Seq[Option[Expr]]) extends GeneratedMessage
    - + diff --git a/docs/sparksql/index.html b/docs/sparksql/index.html index 1e2913626..b902425ec 100644 --- a/docs/sparksql/index.html +++ b/docs/sparksql/index.html @@ -10,7 +10,7 @@ Using ScalaPB with Spark | ScalaPB - + @@ -18,7 +18,7 @@ - +
    @@ -40,7 +40,7 @@ APIs being used. If you use Spark 3.2.0, please stick to sparksql-scalapb 1.0.0-M1.

    Spark ships with an old version of Google's Protocol Buffers runtime that is not compatible with the current version. In addition, it comes with incompatible versions of scala-collection-compat and shapeless. Therefore, we need to shade these libraries. Add the following to your build.sbt:

    assemblyShadeRules in assembly := Seq(
    ShadeRule.rename("com.google.protobuf.**" -> "shadeproto.@1").inAll,
    ShadeRule.rename("scala.collection.compat.**" -> "shadecompat.@1").inAll,
    ShadeRule.rename("shapeless.**" -> "shadeshapeless.@1").inAll
    )

    See complete example of build.sbt.

    Using sparksql-scalapb#

    We assume you have a SparkSession assigned to the variable spark. In a standalone Scala program, this can be created with:

    import org.apache.spark.sql.SparkSession
    -
    val spark: SparkSession = SparkSession
    .builder()
    .appName("ScalaPB Demo")
    .master("local[2]")
    .getOrCreate()
    // spark: SparkSession = org.apache.spark.sql.SparkSession@1a6bb5f7

    IMPORTANT: Ensure you do not import spark.implicits._ to avoid ambiguity between ScalaPB provided encoders and Spark's default encoders. You may want to import StringToColumn to convert $"col name" into a Column. Add an import scalapb.spark.Implicits to add ScalaPB's encoders for protocol buffers into the implicit search scope:

    import org.apache.spark.sql.{Dataset, DataFrame, functions => F}
    import spark.implicits.StringToColumn
    import scalapb.spark.ProtoSQL
    +
    val spark: SparkSession = SparkSession
    .builder()
    .appName("ScalaPB Demo")
    .master("local[2]")
    .getOrCreate()
    // spark: SparkSession = org.apache.spark.sql.SparkSession@ca9c8f9

    IMPORTANT: Ensure you do not import spark.implicits._ to avoid ambiguity between ScalaPB provided encoders and Spark's default encoders. You may want to import StringToColumn to convert $"col name" into a Column. Add an import scalapb.spark.Implicits to add ScalaPB's encoders for protocol buffers into the implicit search scope:

    import org.apache.spark.sql.{Dataset, DataFrame, functions => F}
    import spark.implicits.StringToColumn
    import scalapb.spark.ProtoSQL
    import scalapb.spark.Implicits._

    The code snippets below use the Person message.

    We start by creating some test data:

    import scalapb.docs.person.Person
    import scalapb.docs.person.Person.{Address, AddressType}
    val testData = Seq(
    Person(name="John", age=32, addresses=Vector(
    Address(addressType=AddressType.HOME, street="Market", city="SF"))
    ),
    Person(name="Mike", age=29, addresses=Vector(
    Address(addressType=AddressType.WORK, street="Castro", city="MV"),
    Address(addressType=AddressType.HOME, street="Church", city="MV"))
    ),
    Person(name="Bart", age=27)
    )

    We can create a DataFrame from the test data:

    val df = ProtoSQL.createDataFrame(spark, testData)
    // df: DataFrame = [name: string, age: int ... 1 more field]
    df.printSchema()
    // root
    // |-- name: string (nullable = true)
    // |-- age: integer (nullable = true)
    // |-- addresses: array (nullable = false)
    // | |-- element: struct (containsNull = false)
    // | | |-- address_type: string (nullable = true)
    // | | |-- street: string (nullable = true)
    // | | |-- city: string (nullable = true)
    //
    df.show()
    // +----+---+--------------------+
    // |name|age| addresses|
    // +----+---+--------------------+
    // |John| 32|[{HOME, Market, SF}]|
    // |Mike| 29|[{WORK, Castro, M...|
    // |Bart| 27| []|
    // +----+---+--------------------+
    //

    and then process it as any other Dataframe in Spark:

    df.select($"name", F.size($"addresses").alias("address_count")).show()
    // +----+-------------+
    // |name|address_count|
    // +----+-------------+
    // |John| 1|
    // |Mike| 2|
    // |Bart| 0|
    // +----+-------------+
    //
    val nameAndAddress = df.select($"name", $"addresses".getItem(0).alias("firstAddress"))
    // nameAndAddress: DataFrame = [name: string, firstAddress: struct<address_type: string, street: string ... 1 more field>]
    @@ -49,7 +49,7 @@
    val protoRDD: RDD[Person] = spark.sparkContext.parallelize(testData)
    val protoDF: DataFrame = ProtoSQL.protoToDataFrame(spark, protoRDD)
    val protoDS: Dataset[Person] = spark.createDataset(protoRDD)

    UDFs#

    If you need to write a UDF that returns a message, it would not pick up our encoder and you may get a runtime failure. To work around this, sparksql-scalapb provides ProtoSQL.udf to create UDFs. For example, if you need to parse a binary column into a proto:

    val binaryDF = protosBinary.toDF("value")
    // binaryDF: DataFrame = [value: binary]
    -
    val parsePersons = ProtoSQL.udf { bytes: Array[Byte] => Person.parseFrom(bytes) }
    // parsePersons: org.apache.spark.sql.Column => org.apache.spark.sql.Column = scalapb.spark.Udfs$$Lambda$11463/423403262@3348f179
    +
    val parsePersons = ProtoSQL.udf { bytes: Array[Byte] => Person.parseFrom(bytes) }
    // parsePersons: org.apache.spark.sql.Column => org.apache.spark.sql.Column = scalapb.spark.Udfs$$Lambda$11430/20914410@18e9db60
    binaryDF.withColumn("person", parsePersons($"value"))
    // res7: DataFrame = [value: binary, person: struct<name: string, age: int ... 1 more field>]

    Primitive wrappers#

    In ProtoSQL 0.9.x and 0.10.x, primitive wrappers are represented in Spark as structs witha single field named value. A better representation in Spark would be a nullable field of the primitive type. The better representation will be the @@ -59,7 +59,7 @@ scalapb.spark.ProtoSQL.implicits._

    See example in WrappersSpec.

    Datasets and <none> is not a term#

    You will see this error if for some reason Spark's Encoders are being picked up instead of the ones provided by sparksql-scalapb. Please ensure you are not importing spark.implicits._. See instructions above for imports.

    Example#

    Check out a complete example here.

    - + @@ -67,6 +67,6 @@ - + \ No newline at end of file diff --git a/docs/third-party-protos/index.html b/docs/third-party-protos/index.html index 9a88b2126..d577b8d69 100644 --- a/docs/third-party-protos/index.html +++ b/docs/third-party-protos/index.html @@ -10,7 +10,7 @@ Using third-party protos | ScalaPB - + @@ -26,7 +26,7 @@
    Compile / PB.targets := Seq(
    scalapb.gen() -> (Compile / sourceManaged).value
    )
    )
    // myProject contains its own protos which rely on protos from externalProtos
    lazy val myProject = (project in file("my-project"))
    .dependsOn(externalProtos)
    .settings(
    Compile / PB.targets := Seq(
    scalapb.gen() -> (Compile / sourceManaged).value
    )
    )

    See full example here.

    - + diff --git a/docs/transformations/index.html b/docs/transformations/index.html index 54ddd6b91..7900ec092 100644 --- a/docs/transformations/index.html +++ b/docs/transformations/index.html @@ -10,7 +10,7 @@ Transformations | ScalaPB - + @@ -44,7 +44,7 @@
    import "scalapb/scalapb.proto";
    option (scalapb.options) = {
    scope: PACKAGE
    field_transformations : [
    {
    when : {
    type: TYPE_MESSAGE
    type_name: ".google.protobuf.Timestamp"
    }
    set : {[scalapb.field] {type : 'com.myexample.MyType' }}
    }
    ]
    };
    note

    Note the . (dot) prefix in the type_name field above. It is needed as explained here. In this example we assume the user's package is not named google or google.protobuf since then type_name could be relative and would not match.

    Now, we need to make sure there is an implicit typemapper converting between google.protobuf.timestamp.Timestamp and com.myexample.MyType. The typemapper can be defined in the companion object of MyType as exampled in custom types.

    - + diff --git a/docs/upgrading/index.html b/docs/upgrading/index.html index 6866741e8..cc75df22a 100644 --- a/docs/upgrading/index.html +++ b/docs/upgrading/index.html @@ -10,7 +10,7 @@ Upgrade guide | ScalaPB - + @@ -34,7 +34,7 @@ ScalaPB SBT Settings.

    If you are using files like scalapb.proto and Google's well-known proto change the library dependency from:

    "com.trueaccord.scalapb" %% "scalapb-runtime" % "0.11.11" % PB.protobufConfig

    to:

    "com.thesamet.scalapb" %% "scalapb-runtime" % scalapb.compiler.Version.scalapbVersion % "protobuf"
    - + diff --git a/docs/user_defined_options/index.html b/docs/user_defined_options/index.html index 5d6cbd912..f87b85900 100644 --- a/docs/user_defined_options/index.html +++ b/docs/user_defined_options/index.html @@ -10,7 +10,7 @@ Defining custom options | ScalaPB - + @@ -45,7 +45,7 @@
    assert(use_opts.OneMessage.scalaDescriptor.getOptions.extension(
    my_opts.CustomOptionsMyOptsProto.myMessageOption).get ==
    my_opts.MyMessageOption().update(_.priority := 17))
    assert(numberField.getOptions.extension(
    my_opts.Wrapper.tags) == Seq(
    my_opts.Tag(name = Some("tag1")),
    my_opts.Tag(name = Some("tag2"))))

    Example code#

    The full source code of this example is available below:

    - + diff --git a/docs/validation/index.html b/docs/validation/index.html index 9abf99323..1c09a11be 100644 --- a/docs/validation/index.html +++ b/docs/validation/index.html @@ -10,7 +10,7 @@ Validating Protobufs | ScalaPB - + @@ -67,7 +67,7 @@
    field_transformations : [ {
    when : {options: {[validate.rules] {int32 : {gt : 1}}}} // <-- 1 can be replaced with any number
    set : {type : "Int Refined Greater[$(options.[validate.rules].int32.gt)]"}
    match_type : PRESENCE
    } ]
    };
    message Test {
    int32 gt_test = 1 [ (validate.rules).int32 = {gt : 5} ]; // transformed to: Int Refined Greater[5]
    }

    For this to work, a typemapper for refined types need to be either put in a package object in the same package where the code is generated, or be manually imported through import options.

    The typemapper used in scalapb-validate tests is here.

    Additional resources:

    - + diff --git a/docs/writing-plugins/index.html b/docs/writing-plugins/index.html index 1b2a78f9b..b3060226c 100644 --- a/docs/writing-plugins/index.html +++ b/docs/writing-plugins/index.html @@ -10,7 +10,7 @@ Writing protoc plugins in Scala | ScalaPB - + @@ -53,7 +53,7 @@ library with a % "protobuf" scope. To use:

    import "myplugin.proto";
    message MyMessage {
    option (myplugin.myopts).my_option = false;
    }

    Publishing the plugin#

    The project can be published to Maven using the “publish” command. We recommend to use the excellent sbt-ci-release plugin to automatically build a snapshot on each commit, and a full release when pushing a git tag.

    SBT users of your code generators will add your plugin to the build by adding it to their project/plugins.sbt like this:

    Compile / PB.targets := Seq(
    scalapb.gen() -> (Compile / sourceManaged).value / "scalapb",
    com.myplugin.gen() -> (Compile / sourceManaged).value / "scalapb"
    )

    The template also publishes artifacts with names ending with unix.sh and windows.bat. These are executable jars for Unix and Windows systems that contain all the classes needed to run your code generator (except of a JVM which is expected to be in JAVA_HOME or in the PATH). This is useful if your users need to use your plugin directly with protoc, or with a build tool such as maven.

    Secondary outputs#

    note

    Secondary outputs were introduced in protoc-bridge 0.9.0 and are supported by sbt-protoc 1.0.0 and onwards.

    Secondary outputs provide a simple way for protoc plugins to pass information for other protoc plugins running after them in the same protoc invocation. The information is passed through files that are created in a temporary directory. The absolute path of that temporary directory is provided to all protoc plugins. Plugins may create new files in that directory for subsequent plugins to consume.

    Conventions:

    • Names of secondary output files should be in kebab-case, and should clearly identify the plugin producing them. For example scalapb-validate-preprocessor.
    • The content of the file should be a serialized google.protobuf.Any message that packs the arbitrary payload the plugin wants to publish.

    Determining the secondary output directory location#

    JVM-based plugins that are executed in the same JVM that spawns protoc (like the ones described on this page), receive the location of the secondary output directory via the CodeGeneratorRequest. protoc-bridge appends to the request an unknown field carrying a message called ExtraEnv which contains the path to the secondary output directory.

    Other plugins that are invoked directly by protoc can find the secondary output directory by inspecting the SCALAPB_SECONDARY_OUTPUT_DIR environment variable.

    protoc-bridge takes care of creating the temporary directory and setting up the environment variable before invoking protoc. If protoc is ran manually (for example, through the CLI), it is the user's responsibility to create a directory for secondary outputs and pass it as an environment variable to protoc. It's worth noting that ScalaPB only looks for secondary output directory if a preprocessor is requested, and therefore for the most part users do not need to worry about secondary output directories.

    In ScalaPB's code base, SecondaryOutputProvider provides a method to find the secondary output directory as described above.

    Preprocessors#

    Preprocessors are protoc plugins that provide secondary outputs that are consumed by ScalaPB. ScalaPB expects the secondary output to be a google.protobuf.Any that encodes a PreprocessorOutput. The message contains a map between proto file names (as given by FileDescriptor#getFullName()) to additional ScalaPbOptions that are merged with the files options. By appending to aux_field_options, a preprocessor can, for example, impact the generated types of ScalaPB fields.

    • ScalaPB applies the provided options to a proto file only if the original file lists the preprocessor secondary output filename in a preprocessors file-level option. That option can be inherited from a package-scoped option.
    • To exclude a specific file from being preprocessed (if it would be otherwise impacted by a package-scoped option), add a -NAME entry to the list of preprocessors where NAME is the name of the preprocessor's secondary output.
    • In case of multiple preprocessors, options of later preprocessors overrides the one of earlier processors. Options in the file are merged over the preprocessor's options. When merging, repeated fields get concatenated.
    • Preprocessor plugins need to be invoked (in PB.targets or protoc's command line) before ScalaPB, so when ScalaPB runs their output is available.
    • Plugins that depend on ScalaPB (such as scalapb-validate) rely on DescriptorImplicits which consume the preprocessor output and therefore also see the updated options.

    Summary#

    If you followed this guide all the way to here, then congratulations for creating your first protoc plugin in Scala!

    If you have any questions, feel free to reach out to us on Gitter or Github.

    Did you write an interesting protoc plugin? Let us know on our gitter channel or our Google group and we'd love to mention it here!

    - + diff --git a/index.html b/index.html index 5ff63e256..f801fb86f 100644 --- a/index.html +++ b/index.html @@ -10,7 +10,7 @@ ScalaPB: Protocol Buffer Compiler for Scala | ScalaPB - + @@ -20,7 +20,7 @@

    Your Docusaurus site did not load properly.

    A very common reason is a wrong site baseUrl configuration.

    Current configured baseUrl = / (default value)

    We suggest trying baseUrl =

    Protocol Buffer Compiler for Scala

    Easy to Use

    ScalaPB translates Protocol Buffers to Scala case classes. The generated API is easy to use!

    Supports proto2 and proto3

    ScalaPB is built as a protoc plugin and has perfect compatibility with the protobuf language specification.

    Nested updates

    Updating immutable nested structure is made easy by an optional lenses support. Learn more.

    Interoperate with Java

    Scala Protocol Buffers can be converted to Java and vice versa. Scala and Java protobufs can co-exist in the same project to make it easier to gradually migrate, or interact with legacy Java APIs.

    Scala.js support

    ScalaPB fully supports Scala.js so you can write Scala programs that use your domain-specific Protocol Buffers in the browser! Learn more.

    gRPC

    Build gRPC servers and clients with ScalaPB. ScalaPB ships with its own wrapper around the official gRPC Java implementation. There are gRPC libraries for ZIO, Cats Effect and Akka.

    - + diff --git a/runtime~main.740f0db3.js b/runtime~main.a7809e06.js similarity index 63% rename from runtime~main.740f0db3.js rename to runtime~main.a7809e06.js index 31dbf8879..6b96e7419 100644 --- a/runtime~main.740f0db3.js +++ b/runtime~main.a7809e06.js @@ -1 +1 @@ -!function(e){function a(a){for(var f,n,d=a[0],o=a[1],b=a[2],u=0,l=[];u