Skip to content

Commit

Permalink
Bulk commit towards a better understanding of what we do here
Browse files Browse the repository at this point in the history
Key aspects:

- separate ontology aspects from schema definitions. the latter
  are focused on data structure (e.g., nodes of a graph), while the
  former is about the definition and composition of recognized
  entities.
- Add a "full" example. Largely commented out for now, but with the
  aim to enable more parts, as they are being defined.
- Disable definitions that are not yet needed.

A usefull thing to run at this point is:

```
linkml-convert -s src/linkml/datalad-dataset-graph.yaml src/examples/datalad-dataset.yaml
```
  • Loading branch information
mih committed Nov 27, 2023
1 parent 86ee0c4 commit b3a95c7
Show file tree
Hide file tree
Showing 7 changed files with 267 additions and 31 deletions.
14 changes: 10 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
all: mkdocs-site build/context.jsonld

build/context.jsonld: src/linkml/root.yaml
build/context.jsonld: src/linkml/ontology.yaml
mkdir -p build
gen-jsonld-context \
--prefixes \
Expand All @@ -19,7 +19,7 @@ linkml-docs-stamp:
--metadata \
--format markdown \
-d build/linkml-docs \
src/linkml/root.yaml
src/linkml/ontology.yaml
touch $@

extra-docs: extra-docs-stamp
Expand All @@ -32,15 +32,21 @@ mkdocs-site-stamp: linkml-docs extra-docs
mkdocs build
touch $@

lint: src/linkml/root.yaml
lint: lint-ontology lint-dataset-graph-schema
lint-ontology: src/linkml/ontology.yaml
linkml-lint \
--config .linkmllint.yaml \
--max-warnings 0 \
$<
lint-dataset-graph-schema: src/linkml/datalad-dataset-graph.yaml
linkml-lint \
--config .linkmllint.yaml \
--max-warnings 0 \
$<

validate: validate-datalad-dataset
validate-datalad-dataset:
linkml-validate --target-class DataladDataset -s src/linkml/datalad-datasets.yaml src/examples/datalad-dataset.yaml
linkml-validate -s src/linkml/datalad-dataset-serialization.yaml src/examples/datalad-dataset.yaml

clean:
rm -rf build
Expand Down
132 changes: 131 additions & 1 deletion src/examples/datalad-dataset.yaml
Original file line number Diff line number Diff line change
@@ -1 +1,131 @@
id: eba5177f-2040-448f-b21d-1f0ae698d0f0
graph:
# content for .datalad/config
- id: 30e9ebb10380d9460522d063e53d39adecef4d94
objtype: FileContent
# size: 145
# checksum:
# hex: 190a18037c64c43e6b11489df4bf0b9eb6d2c9bf
# type: SHA-1
# available_at:
# # inline ObjectAvailability instances
# # not including an `id` mean "under the same ID"
# - storage: 27f0483a-8c70-11ee-b9d6-3b5dd1955fcc

## .datalad subdirectory
#- id: d3cafe4592eb5837cfd0fc8f4637afc0dd3e7c14
# type: Directory
# name: .datalad
# items:
# config:
# content: 30e9ebb10380d9460522d063e53d39adecef4d94
# # this "type" is not a class name, but a value from
# # an enum that defines all possible ways directory
# # items could be set up (regular file, symlink,
# # executable, etc).
# item_type: file
#
# content for outputs.txt
- id: MD5E-s4--ba1f2511fc30423bdbb183fe33f3dd0f.txt
objtype: FileContent
# size: 4
# checksum:
# hex: ba1f2511fc30423bdbb183fe33f3dd0f
# type: MD5
# available_at:
# # stored at an S3 special remote under this key
# - storage: ffa6ae3c-8c74-11ee-ad43-5fc1dc4c8fd0
# # but also as a WebDAV-accessible random copy
# - storage: aa58c8ec-8c75-11ee-a7cf-4f2bfe33f1d2
# # the access ID is the file name
# storage_id: 'random_copy/outputs.txt'
#
# content for .gitmodules
- id: 144d450caf1e6f93af67973261ac6924fdd3169b
objtype: FileContent
# size: 32
# checksum:
# hex: 939b0cbc65cdd62ab9fb08609afb62ae008a1728
# type: SHA-1
# available_at:
# - storage: 27f0483a-8c70-11ee-b9d6-3b5dd1955fcc
# content for scripts.py
- id: af926ef0c359556ac1d36d71f7e173d97b893ff2
objtype: FileContent
# size: 3255
# checksum:
# hex: cdb74a421ab03d015dadeabd713ede7d8227f618
# type: SHA-1
# available_at:
# - storage: 27f0483a-8c70-11ee-b9d6-3b5dd1955fcc
## root directory of dataset
## this record type is concerned with the presentation of
# content (naming of files, organization, permissions)
#- id: fb715e5f3c368ae50cf16c9b6a8e5ca23a353ea4
# type: Directory
# items:
# .gitmodules:
# content: 144d450caf1e6f93af67973261ac6924fdd3169b
# item_type: file
# .datalad:
# content: d3cafe4592eb5837cfd0fc8f4637afc0dd3e7c14
# script.py:
# content: af926ef0c359556ac1d36d71f7e173d97b893ff2
# item_type: executable_file
# outputs.txt:
# content: MD5E-s4--ba1f2511fc30423bdbb183fe33f3dd0f.txt
# item_type: file
# # annotate availability of the entire tree
# available_at:
# # map storage to storage-specific identifier
# # here the "remote" identifier is the same as for the tree, because
# # the storage is a clone of the repo
# - storage: 27f0483a-8c70-11ee-b9d6-3b5dd1955fcc
# # the whole directory also exists as a copy in a remote zipfile
# # the zipfile only contains this directory
# - storage: 6860c9e8-8c76-11ee-8f18-bb3625743f23
# storage_id: .
#
## particular dataset version
#- id: 7726424f50c6e9a70ba31e8d44c5d86fc46170da
# type: commit
# # entrypoint to the content
# content: fb715e5f3c368ae50cf16c9b6a8e5ca23a353ea4
# # some other metadata
# # the next too could be instances of an `Agent`
# author:
# - name: Michael Hanke
# - email: [email protected]
# creator:
# - name: Michael Hanke
# - email: [email protected]
# # ISO timestamp
# created: 2023-11-25T12:04:53+01:00
# # commit subject
# title: Initial commit
# # commit message body
# description: Something elaborate, as usual
# # previous versions
# parent_commits:
#
## object storage instances
## they can be rather heterogeneous, different parameters, etc
## however, the idea is that any implementation can be made to
## produce a file content given only the respective "object id"
## after initialization of the respectiuve handler with these
## parameters
#- id: 27f0483a-8c70-11ee-b9d6-3b5dd1955fcc
# type: GitRepo
# fetch_url: https://example.com/myrepo.git
#
#- id: ffa6ae3c-8c74-11ee-ad43-5fc1dc4c8fd0
# type: GitAnnexS3Remote
# parameters:
# bucket: somebucketid
#
#- id: aa58c8ec-8c75-11ee-a7cf-4f2bfe33f1d2
# type: WebDavService
# url: https://dav.example/com/user
#
#- id: 6860c9e8-8c76-11ee-8f18-bb3625743f23
# type: ZipArchive
# download_url: https://example.com/dumps/my.zip
24 changes: 24 additions & 0 deletions src/linkml/datalad-dataset-graph.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# schema for a specific serialization format of DataLad datasets

id: https://concepts.datalad.org/datalad-dataset-serialization
name: datalad-dataset-serialization
prefixes:
dlco: https://concepts.datalad.org/
imports:
- datalad-datasets

classes:
DataladDatasetGraph:
tree_root: true
description:
Container for graph nodes describing DataLad dataset components.
attributes:
graph:
description: >-
DataLad dataset component graph nodes.
multivalued: true
inlined_as_list: true
range: TypedThing
# this is a future TODO: can we validate a JSONLD graph
aliases:
- "@graph"
60 changes: 52 additions & 8 deletions src/linkml/datalad-datasets.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,57 @@ imports:
- datasets
- types

slots:
objtype:
designates_type: true
description: >-
Type designator.
#range: uriorcurie
range: string

id:
identifier: true
description: >-
Unique identifier.
range: string

classes:
DataladDataset:
is_a: Dataset
TypedThing:
description: >-
DataLad Dataset
slot_usage:
id:
# it is not a DataLad dataset without the ID
identifier: true
range: UUID
Object with a type designator.
slots:
- objtype

IdentifiedThing:
mixin: true
slots:
- id
FileContent:
is_a: TypedThing
mixins:
- IdentifiedThing
description: >-
File content
# DataladDataset:
# is_a: Dataset
# description: >-
# DataLad Dataset
# slot_usage:
# id:
# # it is not a DataLad dataset without the ID
# identifier: true
# range: UUID
#
# DataladDatasetVersion:
# class_uri: dlco:DataladDatasetVersion
# is_a: DatasetVersion
# description: >-
# DataLad Dataset version/revision/commit
# slot_usage:
# id:
# identifier: true
# range: SHA1
# version_of:
# range: DataladDataset
# multivalued: false
55 changes: 37 additions & 18 deletions src/linkml/datasets.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,47 +4,66 @@ id: https://concepts.datalad.org/datasets
name: datasets
prefixes:
dctypes: http://purl.org/dc/dcmitype/
dct: http://purl.org/dc/terms/
schema: http://schema.org/
dlco: https://concepts.datalad.org/
linkml: https://w3id.org/linkml/
imports:
- linkml:types

slots:
version_label:
description: >-
Some string
range: string
version_of:
description: >-
Some string
range: string

classes:

Dataset:
class_uri: dlco:Dataset
description: >-
Dataset concept
close_mappings:
- schema:Dataset
slots:
- id

DatasetVersion:
class_uri: dlco:DatasetVersion
description: >-
Dataset version
exact_mappings:
- schema:Dataset
- dctypes:Dataset
DatasetPart:
description: >-
Component of a dataset, identified by its path within that dataset.
attributes:
path:
description: some
slot_uri: dlco:path
required: true
range: Path
Path:
class_uri: dlco:Path
description: >-
POSIX relpath
slots:
- version_label
- version_of








# DatasetPart:
# description: >-
# Component of a dataset, identified by its path within that dataset.
# attributes:
# path:
# description: some
# slot_uri: dlco:path
# required: true
# range: Path
# Path:
# class_uri: dlco:Path
# description: >-
# POSIX relpath
# SubDataset:
# is_a: DatasetVersion
# description: >-
# Dataset as a part of another dataset
#
#
slots:
id:
range: string
2 changes: 2 additions & 0 deletions src/linkml/root.yaml → src/linkml/ontology.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,5 @@ default_prefix: dlco
imports:
- datasets
- datalad-datasets
# we are not importing this schema, it is top-level on its own
#- datalad-dataset-graph
11 changes: 11 additions & 0 deletions src/linkml/types.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,18 @@
id: https://concepts.datalad.org/types
name: types
prefixes:
dlco: https://concepts.datalad.org/

types:
SHA1:
uri: dlco:sha1
base: str
pattern: "^[0-9a-f]{40}$"
description: >-
40 character hex digest of a 160-bit SHA-1 (Secure Hash Algorithm 1)
hash.
see_also:
- https://en.wikipedia.org/wiki/SHA-1
UUID:
uri: http://purl.obolibrary.org/obo/NCIT_C54100
base: str
Expand Down

0 comments on commit b3a95c7

Please sign in to comment.