From aab791a1e66a80772ff9d8d405d5590fd8b81e6b Mon Sep 17 00:00:00 2001 From: jochen Date: Mon, 9 Sep 2024 12:36:09 +0200 Subject: [PATCH] Extract out different types of servers to defs, ensure server required fields are populated. Credits to pflooky --- datacontract.schema.json | 1376 +++++++++++++++++++++----------------- 1 file changed, 746 insertions(+), 630 deletions(-) diff --git a/datacontract.schema.json b/datacontract.schema.json index f78ff14..5c35fbf 100644 --- a/datacontract.schema.json +++ b/datacontract.schema.json @@ -80,705 +80,279 @@ }, "servers": { "type": "object", - "properties": { - "description": { - "type": "string", - "description": "An optional string describing the servers." - }, - "environment": { - "type": "string", - "description": "The environment in which the servers are running. Examples: prod, sit, stg." - } - }, + "description": "Information about the servers.", "additionalProperties": { - "oneOf": [ + "$ref": "#/$defs/BaseServer", + "allOf": [ { - "type": "object", - "title": "BigQueryServer", - "properties": { - "type": { - "type": "string", - "enum": [ - "bigquery", - "BigQuery" - ], - "description": "The type of the data product technology that implements the data contract." - }, - "project": { - "type": "string", - "description": "An optional string describing the server." - }, - "dataset": { - "type": "string", - "description": "An optional string describing the server." + "if": { + "properties": { + "type": { + "const": "bigquery" + } } }, - "additionalProperties": true, - "required": [ - "type", - "project", - "dataset" - ] + "then": { + "$ref": "#/$defs/BigQueryServer" + } }, { - "type": "object", - "title": "S3Server", - "properties": { - "type": { - "type": "string", - "enum": [ - "s3" - ], - "description": "The type of the data product technology that implements the data contract." - }, - "location": { - "type": "string", - "format": "uri", - "description": "An optional string describing the server. Must be in the form of a URL.", - "examples": [ - "s3://datacontract-example-orders-latest/data/{model}/*.json" - ] - }, - "endpointUrl": { - "type": "string", - "format": "uri", - "description": "The server endpoint for S3-compatible servers.", - "examples": ["https://minio.example.com"] - }, - "format": { - "type": "string", - "enum": [ - "parquet", - "delta", - "json", - "csv" - ], - "description": "File format." + "if": { + "properties": { + "type": { + "const": "postgres" + } }, - "delimiter": { - "type": "string", - "enum": [ - "new_line", - "array" - ], - "description": "Only for format = json. How multiple json documents are delimited within one file" - } + "required": [ + "type" + ] }, - "additionalProperties": true, - "required": [ - "type", - "location" - ] + "then": { + "$ref": "#/$defs/PostgresServer" + } }, { - "type": "object", - "title": "GcsServer", - "properties": { - "type": { - "type": "string", - "enum": [ - "gcs" - ], - "description": "The type of the data product technology that implements the data contract." - }, - "location": { - "type": "string", - "format": "uri", - "description": "The GS/GCS url to the data.", - "examples": [ - "gs://example-storage/data/*/*.json" - ] - }, - "format": { - "type": "string", - "enum": [ - "parquet", - "delta", - "json", - "csv" - ], - "description": "File format." + "if": { + "properties": { + "type": { + "const": "s3" + } }, - "delimiter": { - "type": "string", - "enum": [ - "new_line", - "array" - ], - "description": "Only for format = json. How multiple json documents are delimited within one file" - } + "required": [ + "type" + ] }, - "additionalProperties": true, - "required": [ - "type", - "location" - ] + "then": { + "$ref": "#/$defs/S3Server" + } }, { - "type": "object", - "title": "SftpServer", - "properties": { - "type": { - "type": "string", - "enum": [ - "sftp" - ], - "description": "The type of the data product technology that implements the data contract." - }, - "location": { - "type": "string", - "format": "uri", - "description": "An optional string describing the server. Must be in the form of a sftp URL.", - "examples": [ - "sftp://123.123.12.123/{model}/*.json" - ] - }, - "format": { - "type": "string", - "enum": [ - "parquet", - "delta", - "json", - "csv" - ], - "description": "File format." + "if": { + "properties": { + "type": { + "const": "sftp" + } }, - "delimiter": { - "type": "string", - "enum": [ - "new_line", - "array" - ], - "description": "Only for format = json. How multiple json documents are delimited within one file" - } + "required": [ + "type" + ] }, - "additionalProperties": true, - "required": [ - "type", - "location" - ] + "then": { + "$ref": "#/$defs/SftpServer" + } }, { - "type": "object", - "title": "RedshiftServer", - "properties": { - "type": { - "type": "string", - "enum": [ - "redshift" - ], - "description": "The type of the data product technology that implements the data contract." - }, - "account": { - "type": "string", - "description": "An optional string describing the server." - }, - "database": { - "type": "string", - "description": "An optional string describing the server." + "if": { + "properties": { + "type": { + "const": "redshift" + } }, - "schema": { - "type": "string", - "description": "An optional string describing the server." - } + "required": [ + "type" + ] }, - "additionalProperties": true, - "required": [ - "type", - "account", - "database", - "schema" - ] + "then": { + "$ref": "#/$defs/RedshiftServer" + } }, { - "type": "object", - "title": "AzureServer", - "properties": { - "type": { - "type": "string", - "enum": [ - "azure" - ], - "description": "The type of the data product technology that implements the data contract." - }, - "location": { - "type": "string", - "format": "uri", - "description": "Fully qualified path to Azure Blob Storage or Azure Data Lake Storage (ADLS), supports globs.", - "examples": [ - "az://my_storage_account_name.blob.core.windows.net/my_container/path/*.parquet", - "abfss://my_storage_account_name.dfs.core.windows.net/my_container_name/path/*.parquet" - ] - }, - "format": { - "type": "string", - "enum": [ - "parquet", - "delta", - "json", - "csv" - ], - "description": "File format." + "if": { + "properties": { + "type": { + "const": "azure" + } }, - "delimiter": { - "type": "string", - "enum": [ - "new_line", - "array" - ], - "description": "Only for format = json. How multiple json documents are delimited within one file" - } + "required": [ + "type" + ] }, - "additionalProperties": true, - "required": [ - "type", - "location", - "format" - ] + "then": { + "$ref": "#/$defs/AzureServer" + } }, { - "type": "object", - "title": "SqlserverServer", - "properties": { - "type": { - "type": "string", - "enum": [ - "sqlserver" - ], - "description": "The type of the data product technology that implements the data contract." - }, - "host": { - "type": "string", - "description": "The host to the database server", - "examples": [ - "localhost" - ] - }, - "port": { - "type": "integer", - "description": "The port to the database server.", - "default": 1433, - "examples": [ - 1433 - ] - }, - "database": { - "type": "string", - "description": "The name of the database.", - "examples": [ - "database" - ] + "if": { + "properties": { + "type": { + "const": "sqlserver" + } }, - "schema": { - "type": "string", - "description": "The name of the schema in the database.", - "examples": [ - "dbo" - ] - } + "required": [ + "type" + ] }, - "additionalProperties": true, - "required": [ - "type", - "host", - "database", - "schema" - ] + "then": { + "$ref": "#/$defs/SqlserverServer" + } }, { - "type": "object", - "title": "SnowflakeServer", - "properties": { - "type": { - "type": "string", - "enum": [ - "snowflake" - ], - "description": "The type of the data product technology that implements the data contract." - }, - "account": { - "type": "string", - "description": "An optional string describing the server." - }, - "database": { - "type": "string", - "description": "An optional string describing the server." + "if": { + "properties": { + "type": { + "const": "snowflake" + } }, - "schema": { - "type": "string", - "description": "An optional string describing the server." - } + "required": [ + "type" + ] }, - "additionalProperties": true, - "required": [ - "type", - "account", - "database", - "schema" - ] + "then": { + "$ref": "#/$defs/SnowflakeServer" + } }, { - "type": "object", - "title": "DatabricksServer", - "properties": { - "type": { - "type": "string", - "const": "databricks", - "description": "The type of the data product technology that implements the data contract." - }, - "host": { - "type": "string", - "description": "The Databricks host", - "examples": [ - "dbc-abcdefgh-1234.cloud.databricks.com" - ] - }, - "catalog": { - "type": "string", - "description": "The name of the Hive or Unity catalog" + "if": { + "properties": { + "type": { + "const": "databricks" + } }, - "schema": { - "type": "string", - "description": "The schema name in the catalog" - } + "required": [ + "type" + ] }, - "additionalProperties": true, - "required": [ - "type", - "catalog", - "schema" - ] + "then": { + "$ref": "#/$defs/DatabricksServer" + } }, { - "type": "object", - "title": "DataframeServer", - "properties": { - "type": { - "type": "string", - "const": "dataframe", - "description": "The type of the data product technology that implements the data contract." - } + "if": { + "properties": { + "type": { + "const": "dataframe" + } + }, + "required": [ + "type" + ] }, - "additionalProperties": true, - "required": [ - "type" - ] + "then": { + "$ref": "#/$defs/DataframeServer" + } }, { - "type": "object", - "title": "GlueServer", - "properties": { - "type": { - "type": "string", - "const": "glue", - "description": "The type of the data product technology that implements the data contract." - }, - "account": { - "type": "string", - "description": "The AWS Glue account", - "examples": [ - "1234-5678-9012" - ] - }, - "database": { - "type": "string", - "description": "The AWS Glue database name", - "examples": [ - "my_database" - ] - }, - "location": { - "type": "string", - "format": "uri", - "description": "The AWS S3 path. Must be in the form of a URL.", - "examples": [ - "s3://datacontract-example-orders-latest/data/{model}" - ] + "if": { + "properties": { + "type": { + "const": "glue" + } }, - "format": { - "type": "string", - "description": "The format of the files", - "examples": [ - "parquet", - "csv", - "json", - "delta" - ] - } + "required": [ + "type" + ] }, - "additionalProperties": true, - "required": [ - "type", - "account", - "database" - ] + "then": { + "$ref": "#/$defs/GlueServer" + } }, { - "type": "object", - "title": "PostgresServer", - "properties": { - "type": { - "type": "string", - "const": "postgres", - "description": "The type of the data product technology that implements the data contract." - }, - "host": { - "type": "string", - "description": "The host to the database server", - "examples": [ - "localhost" - ] - }, - "port": { - "type": "integer", - "description": "The port to the database server." - }, - "database": { - "type": "string", - "description": "The name of the database.", - "examples": [ - "postgres" - ] + "if": { + "properties": { + "type": { + "const": "postgres" + } }, - "schema": { - "type": "string", - "description": "The name of the schema in the database.", - "examples": [ - "public" - ] - } + "required": [ + "type" + ] }, - "additionalProperties": true, - "required": [ - "type", - "host", - "port", - "database", - "schema" - ] + "then": { + "$ref": "#/$defs/PostgresServer" + } }, { - "type": "object", - "title": "OracleServer", - "properties": { - "type": { - "type": "string", - "const": "oracle", - "description": "The type of the data product technology that implements the data contract." - }, - "host": { - "type": "string", - "description": "The host to the oracle server", - "examples": [ - "localhost" - ] - }, - "port": { - "type": "integer", - "description": "The port to the oracle server.", - "examples": [ - 1523 - ] + "if": { + "properties": { + "type": { + "const": "oracle" + } }, - "serviceName": { - "type": "string", - "description": "The name of the service.", - "examples": [ - "service" - ] - } + "required": [ + "type" + ] }, - "additionalProperties": true, - "required": [ - "type", - "host", - "port", - "serviceName" - ] + "then": { + "$ref": "#/$defs/OracleServer" + } }, { - "type": "object", - "title": "KafkaServer", - "description": "Kafka Server", - "properties": { - "type": { - "type": "string", - "enum": [ - "kafka" - ], - "description": "The type of the data product technology that implements the data contract." - }, - "host": { - "type": "string", - "description": "The bootstrap server of the kafka cluster." - }, - "topic": { - "type": "string", - "description": "The topic name." + "if": { + "properties": { + "type": { + "const": "kafka" + } }, - "format": { - "type": "string", - "description": "The format of the message. Examples: json, avro, protobuf. Default: json.", - "default": "json" - } + "required": [ + "type" + ] }, - "additionalProperties": true, - "required": [ - "type", - "host", - "topic" - ] + "then": { + "$ref": "#/$defs/KafkaServer" + } }, { - "type": "object", - "title": "PubSubServer", - "properties": { - "type": { - "type": "string", - "enum": [ - "pubsub" - ], - "description": "The type of the data product technology that implements the data contract." - }, - "project": { - "type": "string", - "description": "The GCP project name." + "if": { + "properties": { + "type": { + "const": "pubsub" + } }, - "topic": { - "type": "string", - "description": "The topic name." - } + "required": [ + "type" + ] }, - "additionalProperties": true, - "required": [ - "type", - "project", - "topic" - ] + "then": { + "$ref": "#/$defs/PubSubServer" + } }, { - "type": "object", - "title": "KinesisDataStreamsServer", - "description": "Kinesis Data Streams Server", - "properties": { - "type": { - "type": "string", - "enum": [ - "kinesis" - ], - "description": "The type of the data product technology that implements the data contract." - }, - "stream": { - "type": "string", - "description": "The name of the Kinesis data stream." - }, - "region": { - "type": "string", - "description": "AWS region.", - "examples": [ - "eu-west-1" - ] + "if": { + "properties": { + "type": { + "const": "kinesis" + } }, - "format": { - "type": "string", - "description": "The format of the record", - "examples": [ - "json", - "avro", - "protobuf" - ] - } + "required": [ + "type" + ] }, - "additionalProperties": true, - "required": [ - "type", - "stream" - ] + "then": { + "$ref": "#/$defs/KinesisDataStreamsServer" + } }, { - "type": "object", - "title": "TrinoServer", - "properties": { - "type": { - "type": "string", - "const": "trino", - "description": "The type of the data product technology that implements the data contract." - }, - "host": { - "type": "string", - "description": "The host to the database server", - "examples": [ - "localhost" - ] - }, - "port": { - "type": "integer", - "description": "The port to the database server." - }, - "catalog": { - "type": "string", - "description": "The name of the catalog.", - "examples": [ - "hive" - ] + "if": { + "properties": { + "type": { + "const": "trino" + } }, - "schema": { - "type": "string", - "description": "The name of the schema in the database.", - "examples": [ - "my_schema" - ] - } + "required": [ + "type" + ] }, - "additionalProperties": true, - "required": [ - "type", - "host", - "port", - "catalog", - "schema" - ] + "then": { + "$ref": "#/$defs/TrinoServer" + } }, { - "type": "object", - "title": "LocalServer", - "properties": { - "type": { - "type": "string", - "enum": [ - "local" - ], - "description": "The type of the data product technology that implements the data contract." - }, - "path": { - "type": "string", - "description": "The relative or absolute path to the data file(s).", - "examples": [ - "./folder/data.parquet", - "./folder/*.parquet" - ] + "if": { + "properties": { + "type": { + "const": "local" + } }, - "format": { - "type": "string", - "description": "The format of the file(s)", - "examples": [ - "json", - "parquet", - "delta", - "csv" - ] - } + "required": [ + "type" + ] }, - "additionalProperties": true, - "required": [ - "type", - "path", - "format" - ] + "then": { + "$ref": "#/$defs/LocalServer" + } } ] - }, - "description": "Information about the servers." + } }, "terms": { "type": "object", @@ -831,7 +405,10 @@ "title": { "type": "string", "description": "An optional string providing a human readable name for the model. Especially useful if the model name is cryptic or contains abbreviations.", - "examples": ["Purchase Orders", "Air Shipments"] + "examples": [ + "Purchase Orders", + "Air Shipments" + ] }, "fields": { "description": "Specifies a field in the data model. Use the field name (e.g., the column name) as the key.", @@ -933,7 +510,7 @@ "type": "string", "description": "A regular expression the value must match. Only applies to string types.", "examples": [ - "^[a-zA-Z0-9_-]+$" + "^[a-zA-Z0-9_-]+$" ] }, "minimum": { @@ -1060,8 +637,6 @@ "object", "array", "null" - - ] }, "properties": { @@ -1533,6 +1108,547 @@ "bytes", "null" ] + }, + "BaseServer": { + "type": "object", + "properties": { + "description": { + "type": "string", + "description": "An optional string describing the servers." + }, + "environment": { + "type": "string", + "description": "The environment in which the servers are running. Examples: prod, sit, stg." + }, + "type": { + "type": "string", + "description": "The type of the data product technology that implements the data contract.", + "enum": [ + "bigquery", + "BigQuery", + "s3", + "sftp", + "redshift", + "azure", + "sqlserver", + "snowflake", + "databricks", + "dataframe", + "glue", + "postgres", + "oracle", + "kafka", + "pubsub", + "kinesis", + "trino", + "local" + ] + } + }, + "additionalProperties": true, + "required": [ + "type" + ] + }, + "BigQueryServer": { + "type": "object", + "title": "BigQueryServer", + "properties": { + "project": { + "type": "string", + "description": "The GCP project name." + }, + "dataset": { + "type": "string", + "description": "The GCP dataset name." + } + }, + "required": [ + "project", + "dataset" + ] + }, + "S3Server": { + "type": "object", + "title": "S3Server", + "properties": { + "location": { + "type": "string", + "format": "uri", + "description": "S3 URL, starting with `s3://`", + "examples": [ + "s3://datacontract-example-orders-latest/data/{model}/*.json" + ] + }, + "endpointUrl": { + "type": "string", + "format": "uri", + "description": "The server endpoint for S3-compatible servers.", + "examples": [ + "https://minio.example.com" + ] + }, + "format": { + "type": "string", + "enum": [ + "parquet", + "delta", + "json", + "csv" + ], + "description": "File format." + }, + "delimiter": { + "type": "string", + "enum": [ + "new_line", + "array" + ], + "description": "Only for format = json. How multiple json documents are delimited within one file" + } + }, + "required": [ + "location" + ] + }, + "SftpServer": { + "type": "object", + "title": "SftpServer", + "properties": { + "location": { + "type": "string", + "format": "uri", + "pattern": "^sftp://.*", + "description": "SFTP URL, starting with `sftp://`", + "examples": [ + "sftp://123.123.12.123/{model}/*.json" + ] + }, + "format": { + "type": "string", + "enum": [ + "parquet", + "delta", + "json", + "csv" + ], + "description": "File format." + }, + "delimiter": { + "type": "string", + "enum": [ + "new_line", + "array" + ], + "description": "Only for format = json. How multiple json documents are delimited within one file" + } + }, + "required": [ + "location" + ] + }, + "RedshiftServer": { + "type": "object", + "title": "RedshiftServer", + "properties": { + "account": { + "type": "string", + "description": "An optional string describing the server." + }, + "database": { + "type": "string", + "description": "An optional string describing the server." + }, + "schema": { + "type": "string", + "description": "An optional string describing the server." + } + }, + "required": [ + "account", + "database", + "schema" + ] + }, + "AzureServer": { + "type": "object", + "title": "AzureServer", + "properties": { + "location": { + "type": "string", + "format": "uri", + "description": "Fully qualified path to Azure Blob Storage or Azure Data Lake Storage (ADLS), supports globs.", + "examples": [ + "az://my_storage_account_name.blob.core.windows.net/my_container/path/*.parquet", + "abfss://my_storage_account_name.dfs.core.windows.net/my_container_name/path/*.parquet" + ] + }, + "format": { + "type": "string", + "enum": [ + "parquet", + "delta", + "json", + "csv" + ], + "description": "File format." + }, + "delimiter": { + "type": "string", + "enum": [ + "new_line", + "array" + ], + "description": "Only for format = json. How multiple json documents are delimited within one file" + } + }, + "required": [ + "location", + "format" + ] + }, + "SqlserverServer": { + "type": "object", + "title": "SqlserverServer", + "properties": { + "host": { + "type": "string", + "description": "The host to the database server", + "examples": [ + "localhost" + ] + }, + "port": { + "type": "integer", + "description": "The port to the database server.", + "default": 1433, + "examples": [ + 1433 + ] + }, + "database": { + "type": "string", + "description": "The name of the database.", + "examples": [ + "database" + ] + }, + "schema": { + "type": "string", + "description": "The name of the schema in the database.", + "examples": [ + "dbo" + ] + } + }, + "required": [ + "host", + "database", + "schema" + ] + }, + "SnowflakeServer": { + "type": "object", + "title": "SnowflakeServer", + "properties": { + "account": { + "type": "string", + "description": "An optional string describing the server." + }, + "database": { + "type": "string", + "description": "An optional string describing the server." + }, + "schema": { + "type": "string", + "description": "An optional string describing the server." + } + }, + "required": [ + "account", + "database", + "schema" + ] + }, + "DatabricksServer": { + "type": "object", + "title": "DatabricksServer", + "properties": { + "host": { + "type": "string", + "description": "The Databricks host", + "examples": [ + "dbc-abcdefgh-1234.cloud.databricks.com" + ] + }, + "catalog": { + "type": "string", + "description": "The name of the Hive or Unity catalog" + }, + "schema": { + "type": "string", + "description": "The schema name in the catalog" + } + }, + "required": [ + "catalog", + "schema" + ] + }, + "DataframeServer": { + "type": "object", + "title": "DataframeServer", + "required": [ + "type" + ] + }, + "GlueServer": { + "type": "object", + "title": "GlueServer", + "properties": { + "account": { + "type": "string", + "description": "The AWS Glue account", + "examples": [ + "1234-5678-9012" + ] + }, + "database": { + "type": "string", + "description": "The AWS Glue database name", + "examples": [ + "my_database" + ] + }, + "location": { + "type": "string", + "format": "uri", + "description": "The AWS S3 path. Must be in the form of a URL.", + "examples": [ + "s3://datacontract-example-orders-latest/data/{model}" + ] + }, + "format": { + "type": "string", + "description": "The format of the files", + "examples": [ + "parquet", + "csv", + "json", + "delta" + ] + } + }, + "required": [ + "account", + "database" + ] + }, + "PostgresServer": { + "type": "object", + "title": "PostgresServer", + "properties": { + "host": { + "type": "string", + "description": "The host to the database server", + "examples": [ + "localhost" + ] + }, + "port": { + "type": "integer", + "description": "The port to the database server." + }, + "database": { + "type": "string", + "description": "The name of the database.", + "examples": [ + "postgres" + ] + }, + "schema": { + "type": "string", + "description": "The name of the schema in the database.", + "examples": [ + "public" + ] + } + }, + "required": [ + "host", + "port", + "database", + "schema" + ] + }, + "OracleServer": { + "type": "object", + "title": "OracleServer", + "properties": { + "host": { + "type": "string", + "description": "The host to the oracle server", + "examples": [ + "localhost" + ] + }, + "port": { + "type": "integer", + "description": "The port to the oracle server.", + "examples": [ + 1523 + ] + }, + "serviceName": { + "type": "string", + "description": "The name of the service.", + "examples": [ + "service" + ] + } + }, + "required": [ + "host", + "port", + "serviceName" + ] + }, + "KafkaServer": { + "type": "object", + "title": "KafkaServer", + "description": "Kafka Server", + "properties": { + "host": { + "type": "string", + "description": "The bootstrap server of the kafka cluster." + }, + "topic": { + "type": "string", + "description": "The topic name." + }, + "format": { + "type": "string", + "description": "The format of the message. Examples: json, avro, protobuf.", + "default": "json" + } + }, + "required": [ + "host", + "topic" + ] + }, + "PubSubServer": { + "type": "object", + "title": "PubSubServer", + "properties": { + "project": { + "type": "string", + "description": "The GCP project name." + }, + "topic": { + "type": "string", + "description": "The topic name." + } + }, + "required": [ + "project", + "topic" + ] + }, + "KinesisDataStreamsServer": { + "type": "object", + "title": "KinesisDataStreamsServer", + "description": "Kinesis Data Streams Server", + "properties": { + "stream": { + "type": "string", + "description": "The name of the Kinesis data stream." + }, + "region": { + "type": "string", + "description": "AWS region.", + "examples": [ + "eu-west-1" + ] + }, + "format": { + "type": "string", + "description": "The format of the record", + "examples": [ + "json", + "avro", + "protobuf" + ] + } + }, + "required": [ + "stream" + ] + }, + "TrinoServer": { + "type": "object", + "title": "TrinoServer", + "properties": { + "host": { + "type": "string", + "description": "The Trino host URL.", + "examples": [ + "localhost" + ] + }, + "port": { + "type": "integer", + "description": "The Trino port." + }, + "catalog": { + "type": "string", + "description": "The name of the catalog.", + "examples": [ + "hive" + ] + }, + "schema": { + "type": "string", + "description": "The name of the schema in the database.", + "examples": [ + "my_schema" + ] + } + }, + "required": [ + "host", + "port", + "catalog", + "schema" + ] + }, + "LocalServer": { + "type": "object", + "title": "LocalServer", + "properties": { + "path": { + "type": "string", + "description": "The relative or absolute path to the data file(s).", + "examples": [ + "./folder/data.parquet", + "./folder/*.parquet" + ] + }, + "format": { + "type": "string", + "description": "The format of the file(s)", + "examples": [ + "json", + "parquet", + "delta", + "csv" + ] + } + }, + "required": [ + "path", + "format" + ] } } }