Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for product types #251

Merged
merged 38 commits into from
Jul 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
0a71ff7
Add spark type for products; literal types for products to be simplified
jserranohidalgo May 4, 2022
b010382
Avoid ambiguity errors with Product types
jserranohidalgo May 5, 2022
e4f8bc6
Some more tests
jserranohidalgo May 5, 2022
b29ea00
scalafmt
jserranohidalgo May 5, 2022
e3b47c6
Dynamic field access for product types
jserranohidalgo May 5, 2022
1e553af
Field safe selector
jserranohidalgo May 5, 2022
82468e5
scalafmt
jserranohidalgo May 5, 2022
2b7a2e2
compatibility with 3.0
jserranohidalgo May 5, 2022
b9f547f
wip
jserranohidalgo May 24, 2022
7ee98fc
Checked Spark data types and doric types
jserranohidalgo Jun 9, 2022
f0e97f3
Deserialize spark types
jserranohidalgo Jun 22, 2022
1e3f2b1
spark types withouth custom products
jserranohidalgo Jul 13, 2022
377d943
scalafmt
jserranohidalgo Jul 13, 2022
7a745a9
fix spark versions
jserranohidalgo Jul 14, 2022
13a1e31
fix spark versions
jserranohidalgo Jul 14, 2022
5dc33e7
wip
jserranohidalgo Jul 14, 2022
e677a67
Test / scalafmt
jserranohidalgo Jul 14, 2022
54d2b9d
scalafmt
jserranohidalgo Jul 14, 2022
7900b11
fix test name
jserranohidalgo Jul 14, 2022
7233674
fix scala 2.13
jserranohidalgo Jul 15, 2022
c17fbb1
fix scalafmt
jserranohidalgo Jul 15, 2022
c94c307
DateTime java8 API enabled for proper verions of Spark
jserranohidalgo Jul 18, 2022
2cc30ee
fix scalafmt
jserranohidalgo Jul 18, 2022
fab11c8
fix codecov
jserranohidalgo Jul 18, 2022
6340af8
fix build.sbt
jserranohidalgo Jul 19, 2022
7c26b57
fix codecov
jserranohidalgo Jul 19, 2022
168e0d9
Add literalSparkType instances for java numerics
jserranohidalgo Jul 19, 2022
c5f4833
clean up old tests
jserranohidalgo Jul 19, 2022
bd2f14b
clean up & array implicits simplified
jserranohidalgo Jul 19, 2022
66aa130
fix localdate implicits spark instance for row
jserranohidalgo Jul 19, 2022
ece28b7
fix docs warning
jserranohidalgo Jul 19, 2022
e815993
fix warnings docs
jserranohidalgo Jul 19, 2022
5497a90
fix warnings in docs
jserranohidalgo Jul 19, 2022
50d5dd2
fix codecov
jserranohidalgo Jul 20, 2022
b278872
add genDoricERror
jserranohidalgo Jul 20, 2022
11ee750
clean up tests
jserranohidalgo Jul 20, 2022
f9d3623
clean comments
jserranohidalgo Jul 20, 2022
f7a5ce5
fix scalafmt
jserranohidalgo Jul 20, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ lazy val core = project
"org.apache.spark" %% "spark-sql" % sparkVersion.value % "provided", // scala-steward:off
"org.typelevel" %% "cats-core" % catsVersion(sparkVersion.value),
"com.lihaoyi" %% "sourcecode" % "0.3.0",
"com.chuusai" %% "shapeless" % "2.3.9",
"com.github.mrpowers" %% "spark-fast-tests" % "1.3.0" % "test",
"org.scalatest" %% "scalatest" % "3.2.12" % "test"
),
Expand Down
18 changes: 7 additions & 11 deletions core/src/main/scala/doric/DoricColumn.scala
Original file line number Diff line number Diff line change
Expand Up @@ -39,17 +39,13 @@ case class LiteralDoricColumn[T] private[doric] (
) extends DoricColumn[T]

object LiteralDoricColumn {
def apply[T: SparkType: LiteralSparkType](value: T): LiteralDoricColumn[T] = {
val colLit: Doric[Column] = new Column(
Literal(
CatalystTypeConverters.createToCatalystConverter(SparkType[T].dataType)(
LiteralSparkType[T].literalTo(value)
),
SparkType[T].dataType
)
).pure[Doric]
LiteralDoricColumn(colLit, value)
}
def apply[T: SparkType: LiteralSparkType](
value: T
)(implicit l: Location): LiteralDoricColumn[T] =
LiteralDoricColumn(
Kleisli { _ => LiteralSparkType[T].literal(value) },
value
)

implicit class LiteralGetter[T](litCol: LiteralDoricColumn[T]) {
def getColumnValueAsSparkValue(implicit
Expand Down
6 changes: 6 additions & 0 deletions core/src/main/scala/doric/sem/Errors.scala
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,12 @@ case class SparkErrorWrapper(sparkCause: Throwable)(implicit
}
}

case class GenDoricError(
val message: String
)(implicit
val location: Location
) extends DoricSingleError(None)

object Location {
implicit def location(implicit
line: sourcecode.Line,
Expand Down
55 changes: 52 additions & 3 deletions core/src/main/scala/doric/syntax/DStructs.scala
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@ import cats.evidence.Is
import cats.implicits._
import doric.sem.{ColumnTypeError, Location, SparkErrorWrapper}
import doric.types.SparkType

import org.apache.spark.sql.{Column, Dataset, Row}
import org.apache.spark.sql.catalyst.expressions.ExtractValue
import org.apache.spark.sql.functions.{struct => sparkStruct}
import shapeless.labelled.FieldType
import shapeless.{::, HList, LabelledGeneric, Witness}

private[syntax] trait DStructs {

Expand All @@ -26,7 +27,9 @@ private[syntax] trait DStructs {
def struct(cols: DoricColumn[_]*): RowColumn =
cols.map(_.elem).toList.sequence.map(c => sparkStruct(c: _*)).toDC

implicit class DStructOps(private val col: RowColumn) {
implicit class DStructOps[T](private val col: DoricColumn[T])(implicit
st: SparkType.Custom[T, Row]
) {

/**
* Retreaves the child row of the Struct column
Expand Down Expand Up @@ -83,15 +86,22 @@ private[syntax] trait DStructs {
.toDC
}
}
trait DynamicFieldAccessor[T] extends Dynamic { self: DoricColumn[T] =>

trait DynamicFieldAccessor[T] extends Dynamic {
self: DoricColumn[T] =>

/**
* Allows for accessing fields of struct columns using the syntax `rowcol.name[T]`.
* This expression stands for `rowcol.getChild[T](name)`.
*
* @param name
* @param location
* @param st
* @tparam A
* @return The column which refers to the given field
* @throws doric.sem.ColumnTypeError if the parent column is not a struct
*/

def selectDynamic[A](name: String)(implicit
location: Location,
st: SparkType[A],
Expand All @@ -100,4 +110,43 @@ private[syntax] trait DStructs {
w.lift[DoricColumn].coerce(self).getChild[A](name)
}

@annotation.implicitNotFound(msg = "No field ${K} in record ${L}")
trait SelectorWithSparkType[L <: HList, K <: Symbol] {
type V
val st: SparkType[V]
}

object SelectorWithSparkType extends SelectorLPI {
type Aux[L <: HList, K <: Symbol, _V] = SelectorWithSparkType[L, K] {
type V = _V
}

implicit def Found[K <: Symbol, _V: SparkType, T <: HList] =
new SelectorWithSparkType[FieldType[K, _V] :: T, K] {
type V = _V
val st = SparkType[_V]
}
}

trait SelectorLPI {
implicit def KeepFinding[K1, V1, T <: HList, K <: Symbol](implicit
T: SelectorWithSparkType[T, K]
) =
new SelectorWithSparkType[FieldType[K1, V1] :: T, K] {
type V = T.V
val st = T.st
}
}

implicit class StructOps[T, L <: HList](dc: DoricColumn[T])(implicit
lg: LabelledGeneric.Aux[T, L],
st: SparkType.Custom[T, Row]
) {
def getChildSafe[K <: Symbol](k: Witness.Aux[K])(implicit
S: SelectorWithSparkType[L, K],
location: Location
): DoricColumn[S.V] =
new DStructOps(dc).getChild[S.V](k.value.name)(S.st, location)
}

}
9 changes: 7 additions & 2 deletions core/src/main/scala/doric/syntax/LiteralConversions.scala
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package doric
package syntax

import doric.sem.Location
import doric.types.{LiteralSparkType, SparkType}

private[syntax] trait LiteralConversions {
Expand All @@ -16,7 +17,9 @@ private[syntax] trait LiteralConversions {
* A doric column that represent the literal value and the same type as the
* value.
*/
def lit[L: SparkType: LiteralSparkType](litv: L): LiteralDoricColumn[L] = {
def lit[L: SparkType: LiteralSparkType](
litv: L
)(implicit l: Location): LiteralDoricColumn[L] = {
LiteralDoricColumn(litv)
}

Expand All @@ -29,7 +32,9 @@ private[syntax] trait LiteralConversions {
* a literal with the same type.
*/
@inline
def lit: LiteralDoricColumn[L] = LiteralDoricColumn(litv)
def lit(implicit l: Location): LiteralDoricColumn[L] = LiteralDoricColumn(
litv
)
}

}
Loading