Skip to content

Commit

Permalink
doc: link to spark functions (PR #138)
Browse files Browse the repository at this point in the history
* doc: Added some links to spark functions

Some functions scaladoc issue --> #135

Co-authored-by: Eduardo Ruiz <[email protected]>
  • Loading branch information
eruizalo and eruizalo authored Jan 24, 2022
1 parent 75f91dd commit b5efcd4
Show file tree
Hide file tree
Showing 7 changed files with 115 additions and 38 deletions.
6 changes: 6 additions & 0 deletions core/src/main/scala/doric/syntax/BinaryColumns.scala
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ private[syntax] trait BinaryColumns {
* as a 32 character hex string.
*
* @group Binary Type
* @see [[org.apache.spark.sql.functions.md5]]
*/
def md5: StringColumn = column.elem.map(f.md5).toDC

Expand All @@ -43,6 +44,7 @@ private[syntax] trait BinaryColumns {
* as a 40 character hex string.
*
* @group Binary Type
* @see [[org.apache.spark.sql.functions.sha1]]
*/
def sha1: StringColumn = column.elem.map(f.sha1).toDC

Expand All @@ -52,6 +54,7 @@ private[syntax] trait BinaryColumns {
*
* @throws java.lang.IllegalArgumentException if numBits is not in the permitted values
* @group Binary Type
* @see [[org.apache.spark.sql.functions.sha2]]
*/
def sha2(numBits: Int): StringColumn =
column.elem.map(x => f.sha2(x, numBits)).toDC
Expand All @@ -61,6 +64,7 @@ private[syntax] trait BinaryColumns {
* returns the value as a long column.
*
* @group Binary Type
* @see [[org.apache.spark.sql.functions.crc32]]
*/
def crc32: LongColumn = column.elem.map(f.crc32).toDC

Expand All @@ -69,6 +73,7 @@ private[syntax] trait BinaryColumns {
* This is the reverse of unbase64.
*
* @group Binary Type
* @see [[org.apache.spark.sql.functions.base64]]
*/
def base64: StringColumn = column.elem.map(f.base64).toDC

Expand All @@ -78,6 +83,7 @@ private[syntax] trait BinaryColumns {
* If either argument is null, the result will also be null.
*
* @group Binary Type
* @see [[org.apache.spark.sql.functions.decode]]
*/
def decode(charset: StringColumn): StringColumn =
(column.elem, charset.elem)
Expand Down
2 changes: 2 additions & 0 deletions core/src/main/scala/doric/syntax/BooleanColumns.scala
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ private[syntax] trait BooleanColumns {
*
* @throws java.lang.RuntimeException if the condition is false
* @group Boolean Type
* @see [[org.apache.spark.sql.functions.assert_true(c:org\.apache\.spark\.sql\.Column):* org.apache.spark.sql.functions.assert_true]]
*/
def assertTrue: NullColumn = column.elem.map(f.assert_true).toDC

Expand All @@ -75,6 +76,7 @@ private[syntax] trait BooleanColumns {
*
* @throws java.lang.RuntimeException if the condition is false
* @group Boolean Type
* @see [[org.apache.spark.sql.functions.assert_true(c:org\.apache\.spark\.sql\.Column,e:* org.apache.spark.sql.functions.assert_true]]
*/
def assertTrue(msg: StringColumn): NullColumn =
(column.elem, msg.elem).mapN(f.assert_true).toDC
Expand Down
6 changes: 6 additions & 0 deletions core/src/main/scala/doric/syntax/CommonColumns.scala
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ private[syntax] trait CommonColumns extends ColGetters[NamedDoricColumn] {
* the DoricColumns to coalesce
* @return
* the first column that is not null, or null if all inputs are null.
* @see [[org.apache.spark.sql.functions.coalesce]]
*/
def coalesce[T](cols: DoricColumn[T]*): DoricColumn[T] =
cols.map(_.elem).toList.sequence.map(f.coalesce(_: _*)).toDC
Expand All @@ -29,6 +30,7 @@ private[syntax] trait CommonColumns extends ColGetters[NamedDoricColumn] {
* Calculates the hash code of given columns, and returns the result as an integer column.
*
* @group All Types
* @see [[org.apache.spark.sql.functions.hash]]
*/
def hash(cols: DoricColumn[_]*): IntegerColumn =
cols.map(_.elem).toList.sequence.map(f.hash(_: _*)).toDC
Expand All @@ -38,6 +40,7 @@ private[syntax] trait CommonColumns extends ColGetters[NamedDoricColumn] {
* variant of the xxHash algorithm, and returns the result as a long column.
*
* @group All Types
* @see [[org.apache.spark.sql.functions.xxhash64]]
*/
def xxhash64(cols: DoricColumn[_]*): LongColumn =
cols.map(_.elem).toList.sequence.map(f.xxhash64(_: _*)).toDC
Expand Down Expand Up @@ -185,6 +188,7 @@ private[syntax] trait CommonColumns extends ColGetters[NamedDoricColumn] {
* literals to compare to
* @return
* Boolean DoricColumn with the comparation logic.
* @see [[org.apache.spark.sql.Column.isin]]
*/
def isIn(elems: T*): BooleanColumn = column.elem.map(_.isin(elems: _*)).toDC

Expand All @@ -193,6 +197,7 @@ private[syntax] trait CommonColumns extends ColGetters[NamedDoricColumn] {
* @group All Types
* @return
* Boolean DoricColumn
* @see [[org.apache.spark.sql.Column.isNull]]
*/
def isNull: BooleanColumn = column.elem.map(_.isNull).toDC

Expand All @@ -201,6 +206,7 @@ private[syntax] trait CommonColumns extends ColGetters[NamedDoricColumn] {
* @group All Types
* @return
* Boolean DoricColumn
* @see [[org.apache.spark.sql.Column.isNotNull]]
*/
def isNotNull: BooleanColumn = column.elem.map(_.isNotNull).toDC

Expand Down
50 changes: 36 additions & 14 deletions core/src/main/scala/doric/syntax/DateColumns.scala
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ private[syntax] trait DateColumns {
* All calls of current_date within the same query return the same value.
*
* @group Date Type
* @see [[org.apache.spark.sql.functions.current_date]]
*/
def currentDate(): DateColumn = f.current_date().asDoric[Date]

Expand All @@ -32,6 +33,7 @@ private[syntax] trait DateColumns {
* Date column after adding months
* @note
* Timestamp columns will be truncated to Date column
* @see [[org.apache.spark.sql.functions.add_months(startDate:org\.apache\.spark\.sql\.Column,numMonths:org\.apache\.spark\.sql\.Column):* org.apache.spark.sql.functions.add_months]]
*/
def addMonths(nMonths: IntegerColumn): DateColumn =
(column.elem, nMonths.elem).mapN(f.add_months).toDC
Expand All @@ -44,6 +46,7 @@ private[syntax] trait DateColumns {
* @note
* Timestamp columns will be truncated to Date column
* @group Date & Timestamp Type
* @see [[org.apache.spark.sql.functions.date_add(start:org\.apache\.spark\.sql\.Column,days:org\.apache\.spark\.sql\.Column):* org.apache.spark.sql.functions.date_add]]
*/
def addDays(days: IntegerColumn): DateColumn =
(column.elem, days.elem).mapN(f.date_add).toDC
Expand All @@ -59,6 +62,7 @@ private[syntax] trait DateColumns {
* Use specialized functions like 'year' whenever possible as they benefit from a
* specialized implementation.
* @group Date & Timestamp Type
* @see [[org.apache.spark.sql.functions.date_format]]
*/
def format(format: StringColumn): StringColumn =
(column.elem, format.elem)
Expand All @@ -75,6 +79,7 @@ private[syntax] trait DateColumns {
* @note
* Timestamp columns will be truncated to Date column
* @group Date & Timestamp Type
* @see [[org.apache.spark.sql.functions.date_sub(start:org\.apache\.spark\.sql\.Column,days:org\.apache\.spark\.sql\.Column):* org.apache.spark.sql.functions.date_sub]]
*/
def subDays(days: IntegerColumn): DateColumn =
(column.elem, days.elem).mapN(f.date_sub).toDC
Expand All @@ -85,6 +90,7 @@ private[syntax] trait DateColumns {
* @param dateCol
* A Date or Timestamp column
* @group Date & Timestamp Type
* @see [[org.apache.spark.sql.functions.datediff]]
*/
def diff(dateCol: DoricColumn[T]): IntegerColumn =
(column.elem, dateCol.elem)
Expand All @@ -95,6 +101,7 @@ private[syntax] trait DateColumns {
* Extracts the day of the month as an integer from a given date.
*
* @group Date & Timestamp Type
* @see [[org.apache.spark.sql.functions.dayofmonth]]
*/
def dayOfMonth: IntegerColumn = column.elem.map(f.dayofmonth).toDC

Expand All @@ -103,20 +110,23 @@ private[syntax] trait DateColumns {
* Ranges from 1 for a Sunday through to 7 for a Saturday
*
* @group Date & Timestamp Type
* @see [[org.apache.spark.sql.functions.dayofweek]]
*/
def dayOfWeek: IntegerColumn = column.elem.map(f.dayofweek).toDC

/**
* Extracts the day of the year as an integer from a given date.
*
* @group Date & Timestamp Type
* @see [[org.apache.spark.sql.functions.dayofyear]]
*/
def dayOfYear: IntegerColumn = column.elem.map(f.dayofyear).toDC

/**
* Sets the moment to the last day of the same month.
*
* @group Date & Timestamp Type
* @see [[org.apache.spark.sql.functions.last_day]]
*/
def endOfMonth: DateColumn = lastDayOfMonth

Expand All @@ -126,13 +136,15 @@ private[syntax] trait DateColumns {
* month in July 2015.
*
* @group Date & Timestamp Type
* @see [[org.apache.spark.sql.functions.last_day]]
*/
def lastDayOfMonth: DateColumn = column.elem.map(f.last_day).toDC

/**
* Extracts the month as an integer from a given date.
*
* @group Date & Timestamp Type
* @see [[org.apache.spark.sql.functions.month]]
*/
def month: IntegerColumn = column.elem.map(f.month).toDC

Expand All @@ -143,7 +155,7 @@ private[syntax] trait DateColumns {
* of their respective months. Otherwise, the difference is calculated assuming 31 days per month.
*
* For example:
* {{{
* @example {{{
* Date("2017-11-14").monthsBetween(Date("2017-07-14")) // returns 4.0
* Date("2017-01-01").monthsBetween(Date("2017-01-10")) // returns 0.29032258
* Timestamp("2017-06-01 00:00:00").monthsBetween(Timestamp("2017-06-16 12:00:00")) // returns -0.5
Expand All @@ -152,6 +164,7 @@ private[syntax] trait DateColumns {
* @param dateCol
* Date or Timestamp column
* @group Date & Timestamp Type
* @see [[org.apache.spark.sql.functions.months_between(end:org\.apache\.spark\.sql\.Column,start:org\.apache\.spark\.sql\.Column):* org.apache.spark.sql.functions.months_between]]
*/
def monthsBetween(dateCol: DoricColumn[T]): DoubleColumn =
(column.elem, dateCol.elem).mapN(f.months_between).toDC
Expand All @@ -165,6 +178,7 @@ private[syntax] trait DateColumns {
* If `roundOff` is set to true, the result is rounded off to 8 digits;
* it is not rounded otherwise.
* @group Date & Timestamp Type
* @see [[org.apache.spark.sql.functions.months_between(end:org\.apache\.spark\.sql\.Column,start:org\.apache\.spark\.sql\.Column,roundOff:* org.apache.spark.sql.functions.months_between]]
*/
def monthsBetween(
dateCol: DoricColumn[T],
Expand All @@ -180,14 +194,15 @@ private[syntax] trait DateColumns {
* Returns the first date which is later than the value of the `date` column that is on the
* specified day of the week.
*
* For example, `Date("2015-07-27").nextDay("Sunday")` returns Date("2015-08-02") because
* that is the first Sunday after 2015-07-27.
* @example For example, `Date("2015-07-27").nextDay("Sunday")` returns Date("2015-08-02")
* because that is the first Sunday after 2015-07-27.
*
* @param dayOfWeek
* Case insensitive, and accepts: "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"
* @note
* Timestamp columns will be truncated to Date column
* @group Date & Timestamp Type
* @see [[org.apache.spark.sql.functions.next_day]]
*/
def nextDay(dayOfWeek: StringColumn): DateColumn =
(column.elem, dayOfWeek.elem)
Expand All @@ -200,28 +215,30 @@ private[syntax] trait DateColumns {
* Extracts the quarter as an integer from a given date.
*
* @group Date & Timestamp Type
* @see [[org.apache.spark.sql.functions.quarter]]
*/
def quarter: IntegerColumn = column.elem.map(f.quarter).toDC

/**
* Returns date truncated to the unit specified by the format.
*
* For example, `Timestamp("2018-11-19 12:01:19").trunc("year")` returns Date("2018-01-01")
* @example For example, `Timestamp("2018-11-19 12:01:19").trunc("year")` returns Date("2018-01-01")
*
* @param format
* if date:
* * 'year', 'yyyy', 'yy' to truncate by year,
* * 'month', 'mon', 'mm' to truncate by month
* Other options are: 'week', 'quarter'
* if timestamp:
* * 'year', 'yyyy', 'yy' to truncate by year,
* * 'month', 'mon', 'mm' to truncate by month,
* * 'day', 'dd' to truncate by day,
* Other options are:
* * 'microsecond', 'millisecond', 'second', 'minute', 'hour', 'week', 'quarter'
* - if <b>date</b>:
* - 'year', 'yyyy', 'yy' to truncate by year,
* - 'month', 'mon', 'mm' to truncate by month
* - __Other options are__: 'week', 'quarter'
* - if <b>timestamp</b>:
* - 'year', 'yyyy', 'yy' to truncate by year,
* - 'month', 'mon', 'mm' to truncate by month,
* - 'day', 'dd' to truncate by day,
* - __Other options are__: 'microsecond', 'millisecond', 'second', 'minute', 'hour', 'week', 'quarter'
* @note
* Timestamp columns will be truncated to Date column
* @group Date & Timestamp Type
* @see [[org.apache.spark.sql.functions.trunc]]
* @see [[org.apache.spark.sql.functions.date_trunc]]
*/
def truncate(format: StringColumn): DoricColumn[T] =
(column.elem, format.elem)
Expand All @@ -243,6 +260,7 @@ private[syntax] trait DateColumns {
* A long
*
* @group Date & Timestamp Type
* @see [[org.apache.spark.sql.functions.unix_timestamp(s:org\.apache\.spark\.sql\.Column):* org.apache.spark.sql.functions.unix_timestamp]]
*/
def unixTimestamp: LongColumn = column.elem.map(f.unix_timestamp).toDC

Expand All @@ -253,27 +271,31 @@ private[syntax] trait DateColumns {
* as defined by ISO 8601
*
* @group Date & Timestamp Type
* @see [[org.apache.spark.sql.functions.weekofyear]]
*/
def weekOfYear: IntegerColumn = column.elem.map(f.weekofyear).toDC

/**
* Extracts the year as an integer from a given date.
*
* @group Date & Timestamp Type
* @see [[org.apache.spark.sql.functions.year]]
*/
def year: IntegerColumn = column.elem.map(f.year).toDC

/**
* Transform date to timestamp
*
* @group Date Type
* @see [[org.apache.spark.sql.functions.to_timestamp(s:org\.apache\.spark\.sql\.Column):* org.apache.spark.sql.functions.to_timestamp]]
*/
def toTimestamp: TimestampColumn = column.elem.map(f.to_timestamp).toDC

/**
* Transform date to Instant
*
* @group Date Type
* @see [[org.apache.spark.sql.functions.to_timestamp(s:org\.apache\.spark\.sql\.Column):* org.apache.spark.sql.functions.to_timestamp]]
*/
def toInstant: InstantColumn = column.elem.map(f.to_timestamp).toDC
}
Expand Down
Loading

0 comments on commit b5efcd4

Please sign in to comment.