Skip to content
This repository has been archived by the owner on Dec 20, 2018. It is now read-only.

using the SchemaConverters.convertStructToAvro #277

Open
divo77 opened this issue Mar 23, 2018 · 1 comment
Open

using the SchemaConverters.convertStructToAvro #277

divo77 opened this issue Mar 23, 2018 · 1 comment

Comments

@divo77
Copy link

divo77 commented Mar 23, 2018

In my project I am trying to convert the Struct DF schema to Avro schema with SchemaConverters.convertStructToAvro . Does anybody have example how to use this converter ?

I am using Scala and

com.databricks
spark-avro_2.11
4.0.0

@florin1288
Copy link

florin1288 commented Jul 12, 2018

import com.databricks.spark.avro.SchemaConverters
import org.apache.avro.{Schema, SchemaBuilder}
import org.apache.avro.SchemaBuilder.RecordBuilder
import org.apache.avro.generic.{GenericData, GenericRecord}
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql._

val recordName: String = "Model"
val recordNamespace: String = "Test"
val sparkSession: SparkSession = SparkSession.builder().master("local[*]").getOrCreate()
val builder: RecordBuilder[Schema] = SchemaBuilder.record(recordName).namespace(recordNamespace)

def serialize(): Unit = {
import sparkSession.implicits._
val inputDF: DataFrame = sparkSession.sparkContext.parallelize(Seq(Model(Some(List("param1"))))).toDF()
val structType: StructType = Encoders.product[Model].schema

val schema: Schema = SchemaConverters.convertStructToAvro(structType, builder, recordNamespace)
val genericRecord: GenericRecord = getGenericRecord(inputDF.head())

avroSerialize(schema, genericRecord)
}

case class Model(params: Option[List[String]])

def avroSerialize(schema: Schema, genericRecord: GenericRecord): Try[Array[Byte]] = {
Try {
val writer = new SpecificDatumWriter[GenericRecord](schema)
val out = new ByteArrayOutputStream()
val encoder = EncoderFactory.get().binaryEncoder(out, null)
writer.write(genericRecord, encoder)
encoder.flush()
out.close()
out.toByteArray
}
}

Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants