diff --git a/src/daft-sql/src/modules/config.rs b/src/daft-sql/src/modules/config.rs index fdd550160b..4b63d5f625 100644 --- a/src/daft-sql/src/modules/config.rs +++ b/src/daft-sql/src/modules/config.rs @@ -125,6 +125,34 @@ impl SQLFunction for S3ConfigFunction { Ok(Expr::Literal(LiteralValue::Struct(entries)).arced()) } + fn docstrings(&self, _: &str) -> String { + "Create configurations to be used when accessing an S3-compatible system.".to_string() + } + + fn arg_names(&self) -> &'static [&'static str] { + &[ + "region_name", + "endpoint_url", + "key_id", + "session_token", + "access_key", + "credentials_provider", + "buffer_time", + "max_connections_per_io_thread", + "retry_initial_backoff_ms", + "connect_timeout_ms", + "read_timeout_ms", + "num_tries", + "retry_mode", + "anonymous", + "use_ssl", + "verify_ssl", + "check_hostname_ssl", + "requester_pays", + "force_virtual_addressing", + "profile_name", + ] + } } pub struct HTTPConfigFunction; @@ -151,6 +179,14 @@ impl SQLFunction for HTTPConfigFunction { Ok(Expr::Literal(LiteralValue::Struct(entries)).arced()) } + + fn docstrings(&self, _: &str) -> String { + "Create configurations for sending web requests.".to_string() + } + + fn arg_names(&self) -> &'static [&'static str] { + &["user_agent", "bearer_token"] + } } pub struct AzureConfigFunction; impl SQLFunction for AzureConfigFunction { @@ -211,6 +247,26 @@ impl SQLFunction for AzureConfigFunction { Ok(Expr::Literal(LiteralValue::Struct(entries)).arced()) } + + fn docstrings(&self, _: &str) -> String { + "Create configurations to be used when accessing Azure Blob Storage.".to_string() + } + + fn arg_names(&self) -> &'static [&'static str] { + &[ + "storage_account", + "access_key", + "sas_token", + "bearer_token", + "tenant_id", + "client_id", + "client_secret", + "use_fabric_endpoint", + "anonymous", + "endpoint_url", + "use_ssl", + ] + } } pub struct GCSConfigFunction; @@ -244,6 +300,13 @@ impl SQLFunction for GCSConfigFunction { Ok(Expr::Literal(LiteralValue::Struct(entries)).arced()) } + fn docstrings(&self, _: &str) -> String { + "Create configurations to be used when accessing Google Cloud Storage.".to_string() + } + + fn arg_names(&self) -> &'static [&'static str] { + &["project_id", "credentials", "token", "anonymous"] + } } pub(crate) fn expr_to_iocfg(expr: &ExprRef) -> SQLPlannerResult { diff --git a/src/daft-sql/src/modules/hashing.rs b/src/daft-sql/src/modules/hashing.rs index da5da1e66c..ca0f465c75 100644 --- a/src/daft-sql/src/modules/hashing.rs +++ b/src/daft-sql/src/modules/hashing.rs @@ -51,6 +51,14 @@ impl SQLFunction for SQLHash { _ => unsupported_sql_err!("Invalid arguments for hash: '{inputs:?}'"), } } + + fn docstrings(&self, _: &str) -> String { + "Hashes the values in the input expression.".to_string() + } + + fn arg_names(&self) -> &'static [&'static str] { + &["input", "seed"] + } } pub struct SQLMinhash; @@ -132,4 +140,13 @@ impl SQLFunction for SQLMinhash { _ => unsupported_sql_err!("Invalid arguments for minhash: '{inputs:?}'"), } } + + fn docstrings(&self, _: &str) -> String { + "Calculates the minimum hash over the inputs ngrams, repeating with num_hashes permutations." + .to_string() + } + + fn arg_names(&self) -> &'static [&'static str] { + &["input", "num_hashes", "ngram_size", "seed", "hash_function"] + } } diff --git a/src/daft-sql/src/modules/utf8.rs b/src/daft-sql/src/modules/utf8.rs index fed7149565..084da08962 100644 --- a/src/daft-sql/src/modules/utf8.rs +++ b/src/daft-sql/src/modules/utf8.rs @@ -108,7 +108,7 @@ impl SQLFunction for Utf8Expr { Self::ToDate(_) => "Parses the string as a date using the specified format.".to_string(), Self::ToDatetime(_, _) => "Parses the string as a datetime using the specified format.".to_string(), Self::LengthBytes => "Returns the length of the string in bytes".to_string(), - Self::Normalize(_) => unimplemented!("Normalize not implemented"), + Self::Normalize(_) => "Normalizes a string for more useful deduplication and data cleaning".to_string(), } } @@ -141,7 +141,13 @@ impl SQLFunction for Utf8Expr { Self::ToDate(_) => &["string_input", "format"], Self::ToDatetime(_, _) => &["string_input", "format"], Self::LengthBytes => &["string_input"], - Self::Normalize(_) => unimplemented!("Normalize not implemented"), + Self::Normalize(_) => &[ + "input", + "remove_punct", + "lowercase", + "nfd_unicode", + "white_space", + ], } } } @@ -358,6 +364,15 @@ impl SQLFunction for SQLCountMatches { )), } } + + fn docstrings(&self, _: &str) -> String { + "Counts the number of times a pattern, or multiple patterns, appears in the input." + .to_string() + } + + fn arg_names(&self) -> &'static [&'static str] { + &["input", "pattern", "whole_words", "case_sensitive"] + } } pub struct SQLNormalize; @@ -403,6 +418,19 @@ impl SQLFunction for SQLNormalize { _ => invalid_operation_err!("Invalid arguments for normalize"), } } + fn docstrings(&self, _: &str) -> String { + "Normalizes a string for more useful deduplication and data cleaning.".to_string() + } + + fn arg_names(&self) -> &'static [&'static str] { + &[ + "input", + "remove_punct", + "lowercase", + "nfd_unicode", + "white_space", + ] + } } pub struct SQLTokenizeEncode; @@ -476,6 +504,21 @@ impl SQLFunction for SQLTokenizeEncode { _ => invalid_operation_err!("Invalid arguments for tokenize_encode"), } } + + fn docstrings(&self, _: &str) -> String { + "Decodes each list of integer tokens into a string using a tokenizer.".to_string() + } + + fn arg_names(&self) -> &'static [&'static str] { + &[ + "input", + "token_path", + "io_config", + "pattern", + "special_tokens", + "use_special_tokens", + ] + } } pub struct SQLTokenizeDecode; @@ -540,6 +583,21 @@ impl SQLFunction for SQLTokenizeDecode { _ => invalid_operation_err!("Invalid arguments for tokenize_decode"), } } + + fn docstrings(&self, _: &str) -> String { + "Encodes each string as a list of integer tokens using a tokenizer.".to_string() + } + + fn arg_names(&self) -> &'static [&'static str] { + &[ + "input", + "token_path", + "io_config", + "pattern", + "special_tokens", + "use_special_tokens", + ] + } } pub struct SQLConcat;