Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DOCS] Update incomplete SQL doc pages #3298

Merged
merged 5 commits into from
Nov 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions src/daft-sql/src/modules/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,34 @@ impl SQLFunction for S3ConfigFunction {

Ok(Expr::Literal(LiteralValue::Struct(entries)).arced())
}
fn docstrings(&self, _: &str) -> String {
"Create configurations to be used when accessing an S3-compatible system.".to_string()
}

fn arg_names(&self) -> &'static [&'static str] {
&[
"region_name",
"endpoint_url",
"key_id",
"session_token",
"access_key",
"credentials_provider",
"buffer_time",
"max_connections_per_io_thread",
"retry_initial_backoff_ms",
"connect_timeout_ms",
"read_timeout_ms",
"num_tries",
"retry_mode",
"anonymous",
"use_ssl",
"verify_ssl",
"check_hostname_ssl",
"requester_pays",
"force_virtual_addressing",
"profile_name",
]
}
}

pub struct HTTPConfigFunction;
Expand All @@ -151,6 +179,14 @@ impl SQLFunction for HTTPConfigFunction {

Ok(Expr::Literal(LiteralValue::Struct(entries)).arced())
}

fn docstrings(&self, _: &str) -> String {
"Create configurations for sending web requests.".to_string()
}

fn arg_names(&self) -> &'static [&'static str] {
&["user_agent", "bearer_token"]
}
}
pub struct AzureConfigFunction;
impl SQLFunction for AzureConfigFunction {
Expand Down Expand Up @@ -211,6 +247,26 @@ impl SQLFunction for AzureConfigFunction {

Ok(Expr::Literal(LiteralValue::Struct(entries)).arced())
}

fn docstrings(&self, _: &str) -> String {
"Create configurations to be used when accessing Azure Blob Storage.".to_string()
}

fn arg_names(&self) -> &'static [&'static str] {
&[
"storage_account",
"access_key",
"sas_token",
"bearer_token",
"tenant_id",
"client_id",
"client_secret",
"use_fabric_endpoint",
"anonymous",
"endpoint_url",
"use_ssl",
]
}
}

pub struct GCSConfigFunction;
Expand Down Expand Up @@ -244,6 +300,13 @@ impl SQLFunction for GCSConfigFunction {

Ok(Expr::Literal(LiteralValue::Struct(entries)).arced())
}
fn docstrings(&self, _: &str) -> String {
"Create configurations to be used when accessing Google Cloud Storage.".to_string()
}

fn arg_names(&self) -> &'static [&'static str] {
&["project_id", "credentials", "token", "anonymous"]
}
}

pub(crate) fn expr_to_iocfg(expr: &ExprRef) -> SQLPlannerResult<IOConfig> {
Expand Down
17 changes: 17 additions & 0 deletions src/daft-sql/src/modules/hashing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,14 @@ impl SQLFunction for SQLHash {
_ => unsupported_sql_err!("Invalid arguments for hash: '{inputs:?}'"),
}
}

fn docstrings(&self, _: &str) -> String {
"Hashes the values in the input expression.".to_string()
}

fn arg_names(&self) -> &'static [&'static str] {
&["input", "seed"]
}
}

pub struct SQLMinhash;
Expand Down Expand Up @@ -132,4 +140,13 @@ impl SQLFunction for SQLMinhash {
_ => unsupported_sql_err!("Invalid arguments for minhash: '{inputs:?}'"),
}
}

fn docstrings(&self, _: &str) -> String {
"Calculates the minimum hash over the inputs ngrams, repeating with num_hashes permutations."
.to_string()
}

fn arg_names(&self) -> &'static [&'static str] {
&["input", "num_hashes", "ngram_size", "seed", "hash_function"]
}
}
62 changes: 60 additions & 2 deletions src/daft-sql/src/modules/utf8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ impl SQLFunction for Utf8Expr {
Self::ToDate(_) => "Parses the string as a date using the specified format.".to_string(),
Self::ToDatetime(_, _) => "Parses the string as a datetime using the specified format.".to_string(),
Self::LengthBytes => "Returns the length of the string in bytes".to_string(),
Self::Normalize(_) => unimplemented!("Normalize not implemented"),
Self::Normalize(_) => "Normalizes a string for more useful deduplication and data cleaning".to_string(),
}
}

Expand Down Expand Up @@ -141,7 +141,13 @@ impl SQLFunction for Utf8Expr {
Self::ToDate(_) => &["string_input", "format"],
Self::ToDatetime(_, _) => &["string_input", "format"],
Self::LengthBytes => &["string_input"],
Self::Normalize(_) => unimplemented!("Normalize not implemented"),
Self::Normalize(_) => &[
"input",
"remove_punct",
"lowercase",
"nfd_unicode",
"white_space",
],
}
}
}
Expand Down Expand Up @@ -358,6 +364,15 @@ impl SQLFunction for SQLCountMatches {
)),
}
}

fn docstrings(&self, _: &str) -> String {
"Counts the number of times a pattern, or multiple patterns, appears in the input."
.to_string()
}

fn arg_names(&self) -> &'static [&'static str] {
&["input", "pattern", "whole_words", "case_sensitive"]
}
}

pub struct SQLNormalize;
Expand Down Expand Up @@ -403,6 +418,19 @@ impl SQLFunction for SQLNormalize {
_ => invalid_operation_err!("Invalid arguments for normalize"),
}
}
fn docstrings(&self, _: &str) -> String {
"Normalizes a string for more useful deduplication and data cleaning.".to_string()
}

fn arg_names(&self) -> &'static [&'static str] {
&[
"input",
"remove_punct",
"lowercase",
"nfd_unicode",
"white_space",
]
}
}

pub struct SQLTokenizeEncode;
Expand Down Expand Up @@ -476,6 +504,21 @@ impl SQLFunction for SQLTokenizeEncode {
_ => invalid_operation_err!("Invalid arguments for tokenize_encode"),
}
}

fn docstrings(&self, _: &str) -> String {
"Decodes each list of integer tokens into a string using a tokenizer.".to_string()
}

fn arg_names(&self) -> &'static [&'static str] {
&[
"input",
"token_path",
"io_config",
"pattern",
"special_tokens",
"use_special_tokens",
]
}
}

pub struct SQLTokenizeDecode;
Expand Down Expand Up @@ -540,6 +583,21 @@ impl SQLFunction for SQLTokenizeDecode {
_ => invalid_operation_err!("Invalid arguments for tokenize_decode"),
}
}

fn docstrings(&self, _: &str) -> String {
"Encodes each string as a list of integer tokens using a tokenizer.".to_string()
}

fn arg_names(&self) -> &'static [&'static str] {
&[
"input",
"token_path",
"io_config",
"pattern",
"special_tokens",
"use_special_tokens",
]
}
}

pub struct SQLConcat;
Expand Down
Loading