From 004332afdf7cf1bb8f10e0eee478a9f8c5a5d11e Mon Sep 17 00:00:00 2001 From: glass-ships Date: Mon, 22 Apr 2024 19:16:52 -0600 Subject: [PATCH] further documentation changes --- docs/Ingests/index.md | 2 +- docs/Ingests/source_config.md | 62 ++++++++++++++------------ docs/index.md | 2 +- src/koza/model/config/source_config.py | 13 +++--- 4 files changed, 41 insertions(+), 38 deletions(-) diff --git a/docs/Ingests/index.md b/docs/Ingests/index.md index 8c1eb77..44cd645 100644 --- a/docs/Ingests/index.md +++ b/docs/Ingests/index.md @@ -1,5 +1,5 @@ -(For CLI usage, see the [CLI commands](./CLI.md) page.) +(For CLI usage, see the [CLI commands](../Usage/CLI.md) page.) Koza is designed to process and transform existing data into a target csv/json/jsonl format. diff --git a/docs/Ingests/source_config.md b/docs/Ingests/source_config.md index b889b71..faa7280 100644 --- a/docs/Ingests/source_config.md +++ b/docs/Ingests/source_config.md @@ -4,35 +4,39 @@ This YAML file sets properties for the ingest of a single file type from a withi ## Source Configuration Properties -| **Required properties** | | -| --------------------------- | --------------------------------------------------------------------------------------------------- | -| `name` | Name of the data ingest, as `_`,
ex. `hpoa_gene_to_disease` | -| `files` | List of files to process | -| | | -| **Optional properties** | | -| `file_archive` | Path to a file archive containing the file(s) to process
Supported archive formats: zip, gzip | -| `format` | Format of the data file(s) (CSV or JSON) | -| `sssom_config` | Configures usage of SSSOM mapping files | -| `depends_on` | List of map config files to use | -| `metadata` | Metadata for the source, either a list of properties,
or path to a `metadata.yaml` | -| `transform_code` | Path to a python file to transform the data | -| `transform_mode` | How to process the transform file | -| `global_table` | Path to a global translation table file | -| `local_table` | Path to a local translation table file | -| `field_type_map` | Dict of field names and their type (using the FieldType enum) | -| `filters` | List of filters to apply | -| `json_path` | Path within JSON object containing data to process | -| `required_properties` | List of properties that must be present in output (JSON only) | -| | | -| **CSV-Specific Properties** | | -| `delimiter` | Delimiter for csv files (**Required for CSV format**) | -| **Optional CSV Properties** | | -| `columns` | List of columns to include in output (CSV only) | -| `header` | Header row index for csv files | -| `header_delimiter` | Delimiter for header in csv files | -| `header_prefix` | Prefix for header in csv files | -| `comment_char` | Comment character for csv files | -| `skip_blank_lines` | Skip blank lines in csv files | +| **Required properties** | | +| --------------------------- | ------------------------------------------------------------------------------------------------------ | +| `name` | Name of the data ingest, as `_`,
ex. `hpoa_gene_to_disease` | +| `files` | List of files to process | +| | | +| `node_properties` | List of node properties to include in output | +| `edge_properties` | List of edge properties to include in output | +| **Note** | Either node or edge properties (or both) must be defined in the primary config yaml for your transform | +| | | +| **Optional properties** | | +| `file_archive` | Path to a file archive containing the file(s) to process
Supported archive formats: zip, gzip | +| `format` | Format of the data file(s) (CSV or JSON) | +| `sssom_config` | Configures usage of SSSOM mapping files | +| `depends_on` | List of map config files to use | +| `metadata` | Metadata for the source, either a list of properties,
or path to a `metadata.yaml` | +| `transform_code` | Path to a python file to transform the data | +| `transform_mode` | How to process the transform file | +| `global_table` | Path to a global translation table file | +| `local_table` | Path to a local translation table file | +| `field_type_map` | Dict of field names and their type (using the FieldType enum) | +| `filters` | List of filters to apply | +| `json_path` | Path within JSON object containing data to process | +| `required_properties` | List of properties that must be present in output (JSON only) | +| | | +| **CSV-Specific Properties** | | +| `delimiter` | Delimiter for csv files (**Required for CSV format**) | +| **Optional CSV Properties** | | +| `columns` | List of columns to include in output (CSV only) | +| `header` | Header row index for csv files | +| `header_delimiter` | Delimiter for header in csv files | +| `header_prefix` | Prefix for header in csv files | +| `comment_char` | Comment character for csv files | +| `skip_blank_lines` | Skip blank lines in csv files | ## Metadata Properties diff --git a/docs/index.md b/docs/index.md index ba7844c..3dbb6ca 100644 --- a/docs/index.md +++ b/docs/index.md @@ -25,7 +25,7 @@ See the [Ingests](./Ingests/index.md) page for information on how to configure i Koza can be used as a Python library, or via the command line. [CLI commands](./Usage/CLI.md) are available for validating and transforming data. -See the [API](./Usage/API.md) page for information on using Koza as a library. +See the [Module](./Usage/Module.md) page for information on using Koza as a library. Koza also includes some examples to help you get started (see `koza/examples`). ### Basic Examples diff --git a/src/koza/model/config/source_config.py b/src/koza/model/config/source_config.py index a6e63b9..c79b2ad 100644 --- a/src/koza/model/config/source_config.py +++ b/src/koza/model/config/source_config.py @@ -120,14 +120,14 @@ class DatasetDescription: """ # id: Optional[str] = None # Can uncomment when we have a standard - name: Optional[str] = None # If empty use source name + name: Optional[str] = None # If empty use source name ingest_title: Optional[str] = None # Title of source of data, map to biolink name - ingest_url: Optional[str] = None # URL to source of data, maps to biolink iri - description: Optional[str] = None # Description of the data/ingest + ingest_url: Optional[str] = None # URL to source of data, maps to biolink iri + description: Optional[str] = None # Description of the data/ingest # source: Optional[str] = None # Possibly replaced with provided_by - provided_by: Optional[str] = None # _, ex. hpoa_gene_to_disease + provided_by: Optional[str] = None # _, ex. hpoa_gene_to_disease # license: Optional[str] = None # Possibly redundant, same as rights - rights: Optional[str] = None # License information for the data source + rights: Optional[str] = None # License information for the data source @dataclass(config=PYDANTIC_CONFIG) @@ -291,8 +291,7 @@ def __post_init__(self): @dataclass(config=PYDANTIC_CONFIG) class PrimaryFileConfig(SourceConfig): """ - node_properties and edge_properties are used for configuring - the KGX writer + node_properties and edge_properties are used for configuring the KGX writer """ node_properties: Optional[List[str]] = None