diff --git a/Makefile b/Makefile index af3094f..f266c18 100644 --- a/Makefile +++ b/Makefile @@ -24,8 +24,8 @@ build/context.jsonld: src/linkml/ontology.yaml build/linkml-docs: \ build/linkml-docs/ontology \ build/linkml-docs/data-access-schema \ - build/linkml-docs/git-provenance-schema \ build/linkml-docs/datalad-dataset-version-schema +# build/linkml-docs/git-provenance-schema build/linkml-docs/%: src/linkml/%.yaml src/extra-docs/% gen-doc \ --mergeimports \ @@ -51,9 +51,9 @@ check: check-models check-validation # add additional schemas to lint here check-models: \ check-model-data-access-schema \ - check-model-git-provenance-schema \ check-model-datalad-dataset-version-schema \ check-model-ontology +# check-model-git-provenance-schema check-model-%: src/linkml/%.yaml @echo [Check $<] @echo "Run linter" @@ -82,11 +82,11 @@ check-model-%: src/linkml/%.yaml check-validation: \ convert-examples-data-access-schema \ check-validation-data-access-schema \ - convert-examples-git-provenance-schema \ - check-validation-git-provenance-schema \ convert-examples-datalad-dataset-version-schema \ check-validation-datalad-dataset-version-schema \ convert-examples-ontology +# convert-examples-git-provenance-schema +# check-validation-git-provenance-schema check-validation-%: $(MAKE) check-valid-validation-$* check-invalid-validation-$* check-valid-validation-%: tests/%/validation src/linkml/%.yaml @@ -103,8 +103,8 @@ check-invalid-validation-%: tests/%/validation src/linkml/%.yaml convert-examples: \ convert-examples-data-access-schema \ convert-examples-datalad-dataset-version-schema \ - convert-examples-git-provenance-schema \ convert-examples-ontology +# convert-examples-git-provenance-schema convert-examples-%: src/linkml/%.yaml src/examples/% # loop over all examples, skip the schema file itself for ex in $^/*.yaml; do \ diff --git a/src/examples/datalad-dataset-version-schema/AnnexRemoteSE-dataserviceprops.json b/src/examples/datalad-dataset-version-schema/AnnexRemoteSE-dataserviceprops.json new file mode 100644 index 0000000..6e686b6 --- /dev/null +++ b/src/examples/datalad-dataset-version-schema/AnnexRemoteSE-dataserviceprops.json @@ -0,0 +1,6 @@ +{ + "meta_id": "annex:0b76362c-aa27-11ee-be29-b3b123281259", + "uuid": "0b76362c-aa27-11ee-be29-b3b123281259", + "endpoint_url": "s3://obj.example.com/v3", + "@type": "AnnexRemoteSE" +} diff --git a/src/examples/datalad-dataset-version-schema/AnnexRemoteSE-dataserviceprops.rdf b/src/examples/datalad-dataset-version-schema/AnnexRemoteSE-dataserviceprops.rdf new file mode 100644 index 0000000..0192333 --- /dev/null +++ b/src/examples/datalad-dataset-version-schema/AnnexRemoteSE-dataserviceprops.rdf @@ -0,0 +1,9 @@ +@prefix annex: . +@prefix dlco: . +@prefix xsd: . + +annex:0b76362c-aa27-11ee-be29-b3b123281259 a dlco:AnnexRemoteSE ; + dlco:endpoint_url "s3://obj.example.com/v3"^^xsd:anyURI ; + dlco:uuid "0b76362c-aa27-11ee-be29-b3b123281259"^^ . + + diff --git a/src/examples/datalad-dataset-version-schema/AnnexRemoteSE-dataserviceprops.yaml b/src/examples/datalad-dataset-version-schema/AnnexRemoteSE-dataserviceprops.yaml new file mode 100644 index 0000000..7480e41 --- /dev/null +++ b/src/examples/datalad-dataset-version-schema/AnnexRemoteSE-dataserviceprops.yaml @@ -0,0 +1,5 @@ +meta_id: annex:0b76362c-aa27-11ee-be29-b3b123281259 +uuid: 0b76362c-aa27-11ee-be29-b3b123281259 +# uses a standard DataService slot to encode the remote +# access target +endpoint_url: s3://obj.example.com/v3 diff --git a/src/examples/datalad-dataset-version-schema/AnnexRemoteSE-minimal.json b/src/examples/datalad-dataset-version-schema/AnnexRemoteSE-minimal.json new file mode 100644 index 0000000..646a6c2 --- /dev/null +++ b/src/examples/datalad-dataset-version-schema/AnnexRemoteSE-minimal.json @@ -0,0 +1,5 @@ +{ + "meta_id": "annex:0b76362c-aa27-11ee-be29-b3b123281259", + "uuid": "0b76362c-aa27-11ee-be29-b3b123281259", + "@type": "AnnexRemoteSE" +} diff --git a/src/examples/datalad-dataset-version-schema/AnnexRemoteSE-minimal.rdf b/src/examples/datalad-dataset-version-schema/AnnexRemoteSE-minimal.rdf new file mode 100644 index 0000000..36c8f5a --- /dev/null +++ b/src/examples/datalad-dataset-version-schema/AnnexRemoteSE-minimal.rdf @@ -0,0 +1,7 @@ +@prefix annex: . +@prefix dlco: . + +annex:0b76362c-aa27-11ee-be29-b3b123281259 a dlco:AnnexRemoteSE ; + dlco:uuid "0b76362c-aa27-11ee-be29-b3b123281259"^^ . + + diff --git a/src/examples/datalad-dataset-version-schema/AnnexRemoteSE-minimal.yaml b/src/examples/datalad-dataset-version-schema/AnnexRemoteSE-minimal.yaml new file mode 100644 index 0000000..ead0729 --- /dev/null +++ b/src/examples/datalad-dataset-version-schema/AnnexRemoteSE-minimal.yaml @@ -0,0 +1,2 @@ +meta_id: annex:0b76362c-aa27-11ee-be29-b3b123281259 +uuid: 0b76362c-aa27-11ee-be29-b3b123281259 diff --git a/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-full.json b/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-full.json new file mode 100644 index 0000000..a97afc2 --- /dev/null +++ b/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-full.json @@ -0,0 +1,37 @@ +{ + "is_version_of": { + "uuid": "0b76362c-aa27-11ee-be29-b3b123281259" + }, + "has_annex_remote": { + "annex:7e0bf3e7-7d46-4093-813e-b4009826c3bf": { + "meta_id": "annex:7e0bf3e7-7d46-4093-813e-b4009826c3bf", + "uuid": "7e0bf3e7-7d46-4093-813e-b4009826c3bf" + } + }, + "has_part": { + "gitsha:b94ef1797f7bfc1ac979be122e1b538bbb0d1d58": { + "meta_id": "gitsha:b94ef1797f7bfc1ac979be122e1b538bbb0d1d58", + "gitsha": "b94ef1797f7bfc1ac979be122e1b538bbb0d1d58", + "distribution": { + "qualified_access": { + "relation": "annex:7e0bf3e7-7d46-4093-813e-b4009826c3bf", + "access_id": "MD5E-s3425--32a617360d10e3dcbfdd0885e8d64ab8.txt" + }, + "byte_size": 3425, + "checksum": { + "algorithm": "md5", + "digest": "32a617360d10e3dcbfdd0885e8d64ab8" + } + }, + "meta_type": "dlco:AnnexedFileSE" + } + }, + "qualified_part": [ + { + "relation": "gitsha:b94ef1797f7bfc1ac979be122e1b538bbb0d1d58", + "at_location": "README.txt" + } + ], + "meta_type": "dlco:DataladDatasetVersionSE", + "@type": "DataladDatasetVersionSE" +} diff --git a/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-full.rdf b/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-full.rdf new file mode 100644 index 0000000..e184779 --- /dev/null +++ b/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-full.rdf @@ -0,0 +1,33 @@ +@prefix DCAT: . +@prefix annex: . +@prefix dct: . +@prefix dlco: . +@prefix gitsha: . +@prefix prov: . +@prefix spdx: . +@prefix xsd: . + +annex:7e0bf3e7-7d46-4093-813e-b4009826c3bf a dlco:AnnexRemoteSE ; + dlco:uuid "7e0bf3e7-7d46-4093-813e-b4009826c3bf"^^ . + +gitsha:b94ef1797f7bfc1ac979be122e1b538bbb0d1d58 dlco:distribution [ a dlco:AnnexDistributionSE ; + spdx:checksum [ a spdx:Checksum ; + spdx:algorithm "md5"^^xsd:anyURI ; + spdx:checksumValue "32a617360d10e3dcbfdd0885e8d64ab8"^^xsd:hexBinary ] ; + DCAT:byteSize "3425"^^xsd:nonNegativeInteger ; + dlco:qualified_access [ a dlco:QualifiedAnnexAccessSE ; + dct:relation annex:7e0bf3e7-7d46-4093-813e-b4009826c3bf ; + dlco:access_id "MD5E-s3425--32a617360d10e3dcbfdd0885e8d64ab8.txt" ] ] ; + dlco:gitsha "b94ef1797f7bfc1ac979be122e1b538bbb0d1d58"^^dlco:sha1 ; + dlco:meta_type "dlco:AnnexedFileSE"^^xsd:anyURI . + +[] dct:hasPart gitsha:b94ef1797f7bfc1ac979be122e1b538bbb0d1d58 ; + DCAT:isVersionOf [ a dlco:DataladDatasetSE ; + dlco:uuid "0b76362c-aa27-11ee-be29-b3b123281259"^^ ] ; + dlco:has_annex_remote annex:7e0bf3e7-7d46-4093-813e-b4009826c3bf ; + dlco:meta_type "dlco:DataladDatasetVersionSE"^^xsd:anyURI ; + dlco:qualified_part [ a dlco:QualifiedPartSE ; + dct:relation gitsha:b94ef1797f7bfc1ac979be122e1b538bbb0d1d58 ; + prov:atLocation "README.txt"^^dlco:PosixRelPath ] . + + diff --git a/src/examples/datalad-dataset-version-schema/MonolithicDataladDatasetVersion-draft.yaml b/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-full.yaml similarity index 86% rename from src/examples/datalad-dataset-version-schema/MonolithicDataladDatasetVersion-draft.yaml rename to src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-full.yaml index 75133c5..56060bf 100644 --- a/src/examples/datalad-dataset-version-schema/MonolithicDataladDatasetVersion-draft.yaml +++ b/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-full.yaml @@ -5,7 +5,7 @@ has_annex_remote: uuid: 7e0bf3e7-7d46-4093-813e-b4009826c3bf has_part: gitsha:b94ef1797f7bfc1ac979be122e1b538bbb0d1d58: - meta_type: dlco:AnnexedFile + meta_type: dlco:AnnexedFileSE gitsha: b94ef1797f7bfc1ac979be122e1b538bbb0d1d58 distribution: byte_size: 3425 @@ -17,4 +17,4 @@ has_part: relation: annex:7e0bf3e7-7d46-4093-813e-b4009826c3bf qualified_part: - at_location: README.txt - # relation: gitsha:b94ef1797f7bfc1ac979be122e1b538bbb0d1d58 + relation: gitsha:b94ef1797f7bfc1ac979be122e1b538bbb0d1d58 diff --git a/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-minimal.json b/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-minimal.json new file mode 100644 index 0000000..022fe79 --- /dev/null +++ b/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-minimal.json @@ -0,0 +1,7 @@ +{ + "is_version_of": { + "uuid": "0b76362c-aa27-11ee-be29-b3b123281259" + }, + "meta_type": "dlco:DataladDatasetVersionSE", + "@type": "DataladDatasetVersionSE" +} diff --git a/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-minimal.rdf b/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-minimal.rdf new file mode 100644 index 0000000..a493c65 --- /dev/null +++ b/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-minimal.rdf @@ -0,0 +1,9 @@ +@prefix DCAT: . +@prefix dlco: . +@prefix xsd: . + +[] DCAT:isVersionOf [ a dlco:DataladDatasetSE ; + dlco:uuid "0b76362c-aa27-11ee-be29-b3b123281259"^^ ] ; + dlco:meta_type "dlco:DataladDatasetVersionSE"^^xsd:anyURI . + + diff --git a/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-minimal.yaml b/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-minimal.yaml new file mode 100644 index 0000000..d086751 --- /dev/null +++ b/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-minimal.yaml @@ -0,0 +1,2 @@ +is_version_of: + uuid: 0b76362c-aa27-11ee-be29-b3b123281259 diff --git a/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-part-availability.json b/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-part-availability.json new file mode 100644 index 0000000..3e49b3f --- /dev/null +++ b/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-part-availability.json @@ -0,0 +1,26 @@ +{ + "is_version_of": { + "uuid": "0b76362c-aa27-11ee-be29-b3b123281259" + }, + "has_annex_remote": { + "annex:7e0bf3e7-7d46-4093-813e-b4009826c3bf": { + "meta_id": "annex:7e0bf3e7-7d46-4093-813e-b4009826c3bf", + "uuid": "7e0bf3e7-7d46-4093-813e-b4009826c3bf" + } + }, + "has_part": { + "gitsha:b94ef1797f7bfc1ac979be122e1b538bbb0d1d58": { + "meta_id": "gitsha:b94ef1797f7bfc1ac979be122e1b538bbb0d1d58", + "gitsha": "b94ef1797f7bfc1ac979be122e1b538bbb0d1d58", + "distribution": { + "qualified_access": { + "relation": "annex:7e0bf3e7-7d46-4093-813e-b4009826c3bf", + "access_id": "MD5E-s3425--32a617360d10e3dcbfdd0885e8d64ab8.txt" + } + }, + "meta_type": "dlco:AnnexedFileSE" + } + }, + "meta_type": "dlco:DataladDatasetVersionSE", + "@type": "DataladDatasetVersionSE" +} diff --git a/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-part-availability.rdf b/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-part-availability.rdf new file mode 100644 index 0000000..9715cbe --- /dev/null +++ b/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-part-availability.rdf @@ -0,0 +1,24 @@ +@prefix DCAT: . +@prefix annex: . +@prefix dct: . +@prefix dlco: . +@prefix gitsha: . +@prefix xsd: . + +gitsha:b94ef1797f7bfc1ac979be122e1b538bbb0d1d58 dlco:distribution [ a dlco:AnnexDistributionSE ; + dlco:qualified_access [ a dlco:QualifiedAnnexAccessSE ; + dct:relation annex:7e0bf3e7-7d46-4093-813e-b4009826c3bf ; + dlco:access_id "MD5E-s3425--32a617360d10e3dcbfdd0885e8d64ab8.txt" ] ] ; + dlco:gitsha "b94ef1797f7bfc1ac979be122e1b538bbb0d1d58"^^dlco:sha1 ; + dlco:meta_type "dlco:AnnexedFileSE"^^xsd:anyURI . + +annex:7e0bf3e7-7d46-4093-813e-b4009826c3bf a dlco:AnnexRemoteSE ; + dlco:uuid "7e0bf3e7-7d46-4093-813e-b4009826c3bf"^^ . + +[] dct:hasPart gitsha:b94ef1797f7bfc1ac979be122e1b538bbb0d1d58 ; + DCAT:isVersionOf [ a dlco:DataladDatasetSE ; + dlco:uuid "0b76362c-aa27-11ee-be29-b3b123281259"^^ ] ; + dlco:has_annex_remote annex:7e0bf3e7-7d46-4093-813e-b4009826c3bf ; + dlco:meta_type "dlco:DataladDatasetVersionSE"^^xsd:anyURI . + + diff --git a/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-part-availability.yaml b/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-part-availability.yaml new file mode 100644 index 0000000..4400e9d --- /dev/null +++ b/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-part-availability.yaml @@ -0,0 +1,13 @@ +is_version_of: + uuid: 0b76362c-aa27-11ee-be29-b3b123281259 +has_annex_remote: + annex:7e0bf3e7-7d46-4093-813e-b4009826c3bf: + uuid: 7e0bf3e7-7d46-4093-813e-b4009826c3bf +has_part: + gitsha:b94ef1797f7bfc1ac979be122e1b538bbb0d1d58: + meta_type: dlco:AnnexedFileSE + gitsha: b94ef1797f7bfc1ac979be122e1b538bbb0d1d58 + distribution: + qualified_access: + access_id: MD5E-s3425--32a617360d10e3dcbfdd0885e8d64ab8.txt + relation: annex:7e0bf3e7-7d46-4093-813e-b4009826c3bf diff --git a/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-part-location.json b/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-part-location.json new file mode 100644 index 0000000..a826127 --- /dev/null +++ b/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-part-location.json @@ -0,0 +1,20 @@ +{ + "is_version_of": { + "uuid": "0b76362c-aa27-11ee-be29-b3b123281259" + }, + "has_part": { + "gitsha:b94ef1797f7bfc1ac979be122e1b538bbb0d1d58": { + "meta_id": "gitsha:b94ef1797f7bfc1ac979be122e1b538bbb0d1d58", + "gitsha": "b94ef1797f7bfc1ac979be122e1b538bbb0d1d58", + "meta_type": "dlco:AnnexedFileSE" + } + }, + "qualified_part": [ + { + "relation": "gitsha:b94ef1797f7bfc1ac979be122e1b538bbb0d1d58", + "at_location": "README.txt" + } + ], + "meta_type": "dlco:DataladDatasetVersionSE", + "@type": "DataladDatasetVersionSE" +} diff --git a/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-part-location.rdf b/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-part-location.rdf new file mode 100644 index 0000000..c613bb6 --- /dev/null +++ b/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-part-location.rdf @@ -0,0 +1,19 @@ +@prefix DCAT: . +@prefix dct: . +@prefix dlco: . +@prefix gitsha: . +@prefix prov: . +@prefix xsd: . + +gitsha:b94ef1797f7bfc1ac979be122e1b538bbb0d1d58 dlco:gitsha "b94ef1797f7bfc1ac979be122e1b538bbb0d1d58"^^dlco:sha1 ; + dlco:meta_type "dlco:AnnexedFileSE"^^xsd:anyURI . + +[] dct:hasPart gitsha:b94ef1797f7bfc1ac979be122e1b538bbb0d1d58 ; + DCAT:isVersionOf [ a dlco:DataladDatasetSE ; + dlco:uuid "0b76362c-aa27-11ee-be29-b3b123281259"^^ ] ; + dlco:meta_type "dlco:DataladDatasetVersionSE"^^xsd:anyURI ; + dlco:qualified_part [ a dlco:QualifiedPartSE ; + dct:relation gitsha:b94ef1797f7bfc1ac979be122e1b538bbb0d1d58 ; + prov:atLocation "README.txt"^^dlco:PosixRelPath ] . + + diff --git a/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-part-location.yaml b/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-part-location.yaml new file mode 100644 index 0000000..507b7c5 --- /dev/null +++ b/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-part-location.yaml @@ -0,0 +1,11 @@ +is_version_of: + uuid: 0b76362c-aa27-11ee-be29-b3b123281259 +has_part: + gitsha:b94ef1797f7bfc1ac979be122e1b538bbb0d1d58: + meta_type: dlco:AnnexedFileSE + gitsha: b94ef1797f7bfc1ac979be122e1b538bbb0d1d58 +qualified_part: + - at_location: README.txt + # key aspect is this reference-by-id of the inline part specification + # above + relation: gitsha:b94ef1797f7bfc1ac979be122e1b538bbb0d1d58 diff --git a/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-part-types.json b/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-part-types.json new file mode 100644 index 0000000..f0a1ce5 --- /dev/null +++ b/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-part-types.json @@ -0,0 +1,14 @@ +{ + "is_version_of": { + "uuid": "0b76362c-aa27-11ee-be29-b3b123281259" + }, + "has_part": { + "gitsha:b94ef1797f7bfc1ac979be122e1b538bbb0d1d58": { + "meta_id": "gitsha:b94ef1797f7bfc1ac979be122e1b538bbb0d1d58", + "gitsha": "b94ef1797f7bfc1ac979be122e1b538bbb0d1d58", + "meta_type": "dlco:AnnexedFileSE" + } + }, + "meta_type": "dlco:DataladDatasetVersionSE", + "@type": "DataladDatasetVersionSE" +} diff --git a/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-part-types.rdf b/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-part-types.rdf new file mode 100644 index 0000000..73c4eb3 --- /dev/null +++ b/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-part-types.rdf @@ -0,0 +1,15 @@ +@prefix DCAT: . +@prefix dct: . +@prefix dlco: . +@prefix gitsha: . +@prefix xsd: . + +gitsha:b94ef1797f7bfc1ac979be122e1b538bbb0d1d58 dlco:gitsha "b94ef1797f7bfc1ac979be122e1b538bbb0d1d58"^^dlco:sha1 ; + dlco:meta_type "dlco:AnnexedFileSE"^^xsd:anyURI . + +[] dct:hasPart gitsha:b94ef1797f7bfc1ac979be122e1b538bbb0d1d58 ; + DCAT:isVersionOf [ a dlco:DataladDatasetSE ; + dlco:uuid "0b76362c-aa27-11ee-be29-b3b123281259"^^ ] ; + dlco:meta_type "dlco:DataladDatasetVersionSE"^^xsd:anyURI . + + diff --git a/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-part-types.yaml b/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-part-types.yaml new file mode 100644 index 0000000..1ebffcf --- /dev/null +++ b/src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-part-types.yaml @@ -0,0 +1,8 @@ +is_version_of: + uuid: 0b76362c-aa27-11ee-be29-b3b123281259 +has_part: + gitsha:b94ef1797f7bfc1ac979be122e1b538bbb0d1d58: + # this type-designator slot determines the object type used to + # interpret this inline record + meta_type: dlco:AnnexedFileSE + gitsha: b94ef1797f7bfc1ac979be122e1b538bbb0d1d58 diff --git a/src/examples/datalad-dataset-version-schema/MonolithicDataladDatasetVersion-draft.json b/src/examples/datalad-dataset-version-schema/MonolithicDataladDatasetVersion-draft.json deleted file mode 100644 index b546959..0000000 --- a/src/examples/datalad-dataset-version-schema/MonolithicDataladDatasetVersion-draft.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "meta_type": "dlco:MonolithicDataladDatasetVersion", - "qualified_part": [ - { - "relation": { - "meta_type": "dlco:AnnexedFile", - "distribution": { - "byte_size": 3425, - "checksum": { - "algorithm": "md5", - "digest": "32a617360d10e3dcbfdd0885e8d64ab8" - }, - "qualified_access": { - "access_id": "MD5E-s3425--32a617360d10e3dcbfdd0885e8d64ab8.txt", - "relation": "annex:7e0bf3e7-7d46-4093-813e-b4009826c3bf" - } - } - }, - "at_location": "README.txt" - } - ], - "is_version_of": { - "uuid": "0b76362c-aa27-11ee-be29-b3b123281259" - }, - "has_annex_remote": { - "annex:7e0bf3e7-7d46-4093-813e-b4009826c3bf": { - "meta_id": "annex:7e0bf3e7-7d46-4093-813e-b4009826c3bf", - "uuid": "7e0bf3e7-7d46-4093-813e-b4009826c3bf" - } - }, - "@type": "MonolithicDataladDatasetVersion" -} diff --git a/src/examples/datalad-dataset-version-schema/MonolithicDataladDatasetVersion-draft.rdf b/src/examples/datalad-dataset-version-schema/MonolithicDataladDatasetVersion-draft.rdf deleted file mode 100644 index 00562a9..0000000 --- a/src/examples/datalad-dataset-version-schema/MonolithicDataladDatasetVersion-draft.rdf +++ /dev/null @@ -1,28 +0,0 @@ -@prefix DCAT: . -@prefix annex: . -@prefix dct: . -@prefix dlco: . -@prefix prov: . -@prefix spdx: . -@prefix xsd: . - -annex:7e0bf3e7-7d46-4093-813e-b4009826c3bf a dlco:AnnexRemote ; - dlco:uuid "7e0bf3e7-7d46-4093-813e-b4009826c3bf"^^ . - -[] DCAT:isVersionOf [ a dlco:DataladDataset ; - dlco:uuid "0b76362c-aa27-11ee-be29-b3b123281259"^^ ] ; - dlco:has_annex_remote annex:7e0bf3e7-7d46-4093-813e-b4009826c3bf ; - dlco:meta_type "dlco:MonolithicDataladDatasetVersion"^^xsd:anyURI ; - dlco:qualified_part [ a dlco:QualifiedPart ; - dct:relation [ dlco:distribution [ a dlco:AnnexDistribution ; - spdx:checksum [ a spdx:Checksum ; - spdx:algorithm "md5"^^xsd:anyURI ; - spdx:checksumValue "32a617360d10e3dcbfdd0885e8d64ab8"^^xsd:hexBinary ] ; - DCAT:byteSize "3425"^^xsd:nonNegativeInteger ; - dlco:qualified_access [ a dlco:QualifiedAnnexAccess ; - dct:relation annex:7e0bf3e7-7d46-4093-813e-b4009826c3bf ; - dlco:access_id "MD5E-s3425--32a617360d10e3dcbfdd0885e8d64ab8.txt" ] ] ; - dlco:meta_type "dlco:AnnexedFile"^^xsd:anyURI ] ; - prov:atLocation "README.txt"^^dlco:PosixRelPath ] . - - diff --git a/src/linkml/datalad-dataset-version-schema.yaml b/src/linkml/datalad-dataset-version-schema.yaml index 0b8a584..a1ef22f 100644 --- a/src/linkml/datalad-dataset-version-schema.yaml +++ b/src/linkml/datalad-dataset-version-schema.yaml @@ -12,6 +12,7 @@ description: >- versions. prefixes: annex: https://concepts.datalad.org/namespace/annex-uuid/ + datalad: https://concepts.datalad.org/namespace/dataset-uuid/ DCAT: http://www.w3.org/ns/dcat# dct: http://purl.org/dc/terms/ dlco: https://concepts.datalad.org/ontology/ @@ -24,27 +25,34 @@ imports: default_prefix: dlco classes: - MonolithicDataladDatasetVersion: - is_a: DataladDatasetVersion + DataladDatasetVersionSE: + mixins: + - DataladDatasetVersion description: >- A variant of `dlco:DataladDatasetVersion` with additional properties for describing all dataset aspects in a structurally compact fashion with minimal duplication overhead. slots: + # TODO move to DataladDatasetVersion + # only overwrite `range` here to `AnnexRemoteSE` (or maybe even not?!) - has_annex_remote slot_usage: has_annex_remote: multivalued: true inlined: true + range: AnnexRemoteSE has_part: - range: FileInGit + range: AnnexedFileSE multivalued: true inlined: true - inlined_as_list: true is_version_of: + # a defining feature of a DataLad dataset is the association + # with a dataset concept (all versions) identifier + required: true inlined: true + range: DataladDatasetSE qualified_part: - range: QualifiedGitTrackedPart + range: QualifiedPartSE multivalued: true inlined: true inlined_as_list: true @@ -52,3 +60,71 @@ classes: - The property `has_annex_remote` is for defining all remotes inline with a meta_id index key, such that they can be referenced in a lean fashion in `QualifiedAnnexAccess` instances. + + DataladDatasetSE: + mixins: + - DataladDataset + description: >- + Data structure definition class for a `DataladDataset` + + # TODO "SE" is for "SchemaElement" or "StructureElement". The class adds nothing + # of semantic relevance of the the "AnnexRemote" base class, which comes in as a + # mixin due to technical(?) limitations of linkml. It merely adds the notion of + # a required identifier for the metadata object describing an AnnexRemote in the + # context of this particular schema. We could also have a "DDVS" name suffix + # (derived from the name of this schema) + AnnexRemoteSE: + mixins: + - AnnexRemote + description: >- + Data structure definition class for a `AnnexRemote` + slots: + - meta_id + slot_usage: + meta_id: + equals_expression: "annex:{uuid}" + + AnnexedFileSE: + mixins: + - AnnexedFile + description: >- + Data structure definition class for a `AnnexedFile` + slots: + - meta_id + slot_usage: + meta_id: + # this is for the blob that is stored in git, which equals the pointer + # to an annex objects, but NOT the relpath with the name under which + # it is registered in a (work)tree + equals_expression: "gitsha:{gitsha}" + distribution: + range: AnnexDistributionSE + + QualifiedPartSE: + mixins: + - QualifiedPart + # - QualifiedGitTrackedPart + description: >- + Data structure definition class for a `QualifiedPart` + slot_usage: + relation: + #range: FileInGit + range: AnnexedFileSE + + AnnexDistributionSE: + mixins: + - AnnexDistribution + description: >- + Data structure definition class for a `AnnexDistribution` + slot_usage: + qualified_access: + range: QualifiedAnnexAccessSE + + QualifiedAnnexAccessSE: + mixins: + - QualifiedAnnexAccess + description: >- + Data structure definition class for a `QualifiedAnnexAccess` + slot_usage: + relation: + range: AnnexRemoteSE diff --git a/src/linkml/datalad.yaml b/src/linkml/datalad.yaml index f2e43b0..ac7e79b 100644 --- a/src/linkml/datalad.yaml +++ b/src/linkml/datalad.yaml @@ -17,6 +17,7 @@ default_prefix: dlco classes: DataladDatasetVersion: + mixin: true is_a: Dataset description: >- A version (i.e., commit) of a Datalad dataset. @@ -25,6 +26,7 @@ classes: range: DataladDataset DataladDataset: + mixin: true description: >- Concept class for linking `DataladDatasetVersion` instances. slots: diff --git a/src/linkml/datasets.yaml b/src/linkml/datasets.yaml index ab61e75..15ea8f5 100644 --- a/src/linkml/datasets.yaml +++ b/src/linkml/datasets.yaml @@ -109,6 +109,7 @@ slots: classes: Resource: + mixin: true class_uri: DCAT:Resource slots: - has_part @@ -121,6 +122,9 @@ classes: multivalued: true qualified_part: multivalued: true + # TODO such structure-defining properties should be moved to a + # schema class, and should not narrow the applicability of these + # concept classes inlined: true inlined_as_list: true type: @@ -144,6 +148,7 @@ classes: range: DataService QualifiedPart: + mixin: true description: >- An association class for attaching additional information to a hasPart relationship between DCAT Resources @@ -165,6 +170,7 @@ classes: tree in multiple contexts, such as different versions of a dataset). Dataset: + mixin: true is_a: Resource description: >- A collection of data, published or curated, @@ -194,6 +200,7 @@ classes: - ncit:NCIT_C172256 File: + mixin: true is_a: Resource description: >- Resource for storing information, which is available to a computer @@ -214,6 +221,7 @@ classes: immediate context (e.g., a `QualifiedPart` of another `Resource`). Distribution: + mixin: true description: >- A specific representation of a dataset (part). Such a resource might be available in multiple serializations. @@ -231,6 +239,7 @@ classes: that cannot be inferred from a standard `DCAT:Distribution`. DataService: + mixin: true description: >- A collection of operations that provides access to one or more datasets or data processing functions. @@ -241,8 +250,3 @@ classes: - dctypes:Service slots: - endpoint_url - # although we do not expect any data service to have a unique identifier - # we must add this slot here, rather than in derived classes, due to - # a potential linkml limitation/bug - # https://github.com/psychoinformatics-de/datalad-concepts/issues/30 - - meta_id diff --git a/src/linkml/git-annex.yaml b/src/linkml/git-annex.yaml index eaa11df..689c034 100644 --- a/src/linkml/git-annex.yaml +++ b/src/linkml/git-annex.yaml @@ -22,15 +22,21 @@ slots: classes: AnnexRemote: + # The basic idea behind this mixin approach (apart from the fact that linkml + # more or less forced it on us) is that we have more or less separate class + # hierarchies within each ontology component, and the alignment between + # concepts is expressed via mixin relationships. We declare many classes + # mixins, because linkml only allows mixin classes to be derived from other + # mixins + mixin: true is_a: DataService description: >- A git-annex (special) remote. slots: - uuid slot_usage: - meta_id: + uuid: required: true - equals_expression: "annex:{uuid}" QualifiedAnnexAccess: is_a: QualifiedAccess @@ -43,7 +49,9 @@ classes: range: AnnexRemote AnnexedFile: + mixin: true is_a: FileInGit + #is_a: FileInGit description: >- A file in a Git repository that is managed by git-annex. slot_usage: @@ -51,6 +59,7 @@ classes: range: AnnexDistribution AnnexDistribution: + mixin: true is_a: Distribution description: >- A distribution that is accessible via git-annex. diff --git a/src/linkml/git.yaml b/src/linkml/git.yaml index 1cae778..161ddfe 100644 --- a/src/linkml/git.yaml +++ b/src/linkml/git.yaml @@ -28,33 +28,36 @@ classes: `gitsha` identifier. slots: - gitsha - - meta_id + # - meta_id slot_usage: gitsha: required: true - meta_id: - description: >- - SHA1 based identifier in the form of a CURIE with an explicit - `gitsha:` namespace prefix. - equals_expression: "gitsha:{gitsha}" - pattern: "^gitsha:[0-9a-f]{40}$" + #meta_id: + # description: >- + # SHA1 based identifier in the form of a CURIE with an explicit + # `gitsha:` namespace prefix. + # equals_expression: "gitsha:{gitsha}" + # pattern: "^gitsha:[0-9a-f]{40}$" comments: - Because there can only be a single main `identifier`, this mixin cannot be used on classes that already have or inherited another property declared as `identifier:true`. - QualifiedGitTrackedPart: - is_a: QualifiedPart - description: >- - TODO we may need this to make relation point to resources with a - (Git) identifier only, such that we could reference by key - slot_usage: - relation: - range: FileInGit - FileInGit: + mixin: true is_a: File mixins: - GitTracked description: >- A `File` that is tracked with Git. + + #QualifiedGitTrackedPart: + # mixin: true + # is_a: QualifiedPart + # description: >- + # TODO we may need this to make relation point to resources with a + # (Git) identifier only, such that we could reference by key + # slot_usage: + # relation: + # range: FileInGit + diff --git a/tests/datalad-dataset-version-schema/validation/basic.valid.cfg.yaml b/tests/datalad-dataset-version-schema/validation/AnnexRemoteSE.valid.cfg.yaml similarity index 50% rename from tests/datalad-dataset-version-schema/validation/basic.valid.cfg.yaml rename to tests/datalad-dataset-version-schema/validation/AnnexRemoteSE.valid.cfg.yaml index ca74769..55163b8 100644 --- a/tests/datalad-dataset-version-schema/validation/basic.valid.cfg.yaml +++ b/tests/datalad-dataset-version-schema/validation/AnnexRemoteSE.valid.cfg.yaml @@ -1,7 +1,8 @@ schema: src/linkml/datalad-dataset-version-schema.yaml -target_class: MonolithicDataladDatasetVersion +target_class: AnnexRemoteSE data_sources: - - src/examples/datalad-dataset-version-schema/MonolithicDataladDatasetVersion-draft.yaml + - src/examples/datalad-dataset-version-schema/AnnexRemoteSE-minimal.yaml + - src/examples/datalad-dataset-version-schema/AnnexRemoteSE-dataserviceprops.yaml plugins: JsonschemaValidationPlugin: closed: true diff --git a/tests/datalad-dataset-version-schema/validation/DataladDatasetVersionSE.valid.cfg.yaml b/tests/datalad-dataset-version-schema/validation/DataladDatasetVersionSE.valid.cfg.yaml new file mode 100644 index 0000000..5da76b9 --- /dev/null +++ b/tests/datalad-dataset-version-schema/validation/DataladDatasetVersionSE.valid.cfg.yaml @@ -0,0 +1,13 @@ +schema: src/linkml/datalad-dataset-version-schema.yaml +target_class: DataladDatasetVersionSE +data_sources: + - src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-minimal.yaml + - src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-part-types.yaml + - src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-part-location.yaml + - src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-part-availability.yaml + - src/examples/datalad-dataset-version-schema/DataladDatasetVersionSE-full.yaml +plugins: + JsonschemaValidationPlugin: + closed: true + include_range_class_descendants: false + RecommendedSlotsPlugin: