diff --git a/src/examples/datalad-dataset-components/ContainerSE-DatasetWFiles.json b/src/examples/datalad-dataset-components/ContainerSE-DatasetWFiles.json index 3cf479e..5538fd4 100644 --- a/src/examples/datalad-dataset-components/ContainerSE-DatasetWFiles.json +++ b/src/examples/datalad-dataset-components/ContainerSE-DatasetWFiles.json @@ -39,7 +39,7 @@ "gitsha": "13ffd94a94e32f9482528fc65d59562b011f6f87", "qualified_part": [ { - "relation": "gitsha:b94ef1797f7bfc1ac979be122e1b538bbb0d1d58", + "relation": "annexkey:MD5E-s3425--32a617360d10e3dcbfdd0885e8d64ab8.bin", "name": "data.bin" }, { @@ -54,18 +54,15 @@ "gitsha": "56094a33cf330fef5b375aa813fc4dc07147729f" }, { - "meta_id": "gitsha:b94ef1797f7bfc1ac979be122e1b538bbb0d1d58", - "meta_type": "dlccs:AnnexedFileSE", - "gitsha": "b94ef1797f7bfc1ac979be122e1b538bbb0d1d58", - "distribution": { - "qualified_access": [ - { - "relation": "annex:7e0bf3e7-7d46-4093-813e-b4009826c3bf", - "access_id": "MD5E-s3425--32a617360d10e3dcbfdd0885e8d64ab8.txt" - } - ], - "byte_size": 3425 - } + "meta_id": "annexkey:MD5E-s3425--32a617360d10e3dcbfdd0885e8d64ab8.bin", + "meta_type": "dlccs:StableAnnexKeySE", + "qualified_access": [ + { + "relation": "annex:7e0bf3e7-7d46-4093-813e-b4009826c3bf", + "access_id": "MD5E-s3425--32a617360d10e3dcbfdd0885e8d64ab8.bin" + } + ], + "byte_size": 3425 }, { "meta_id": "annex:7e0bf3e7-7d46-4093-813e-b4009826c3bf", diff --git a/src/examples/datalad-dataset-components/ContainerSE-DatasetWFiles.yaml b/src/examples/datalad-dataset-components/ContainerSE-DatasetWFiles.yaml index 525a395..1717cba 100644 --- a/src/examples/datalad-dataset-components/ContainerSE-DatasetWFiles.yaml +++ b/src/examples/datalad-dataset-components/ContainerSE-DatasetWFiles.yaml @@ -28,7 +28,7 @@ components: gitsha: 13ffd94a94e32f9482528fc65d59562b011f6f87 qualified_part: - name: data.bin - relation: gitsha:b94ef1797f7bfc1ac979be122e1b538bbb0d1d58 + relation: annexkey:MD5E-s3425--32a617360d10e3dcbfdd0885e8d64ab8.bin - name: README.txt relation: gitsha:56094a33cf330fef5b375aa813fc4dc07147729f # git blob @@ -36,14 +36,14 @@ components: meta_type: dlccs:GitBlobSE gitsha: 56094a33cf330fef5b375aa813fc4dc07147729f # annexed file - - meta_id: gitsha:b94ef1797f7bfc1ac979be122e1b538bbb0d1d58 - meta_type: dlccs:AnnexedFileSE - gitsha: b94ef1797f7bfc1ac979be122e1b538bbb0d1d58 - distribution: - byte_size: 3425 - qualified_access: - - access_id: MD5E-s3425--32a617360d10e3dcbfdd0885e8d64ab8.txt - relation: annex:7e0bf3e7-7d46-4093-813e-b4009826c3bf + - meta_id: annexkey:MD5E-s3425--32a617360d10e3dcbfdd0885e8d64ab8.bin + meta_type: dlccs:StableAnnexKeySE + byte_size: 3425 + qualified_access: + # TODO access_id is not needed in most cases, should likely + # because some kind of `STATE` report + - access_id: MD5E-s3425--32a617360d10e3dcbfdd0885e8d64ab8.bin + relation: annex:7e0bf3e7-7d46-4093-813e-b4009826c3bf # annex remote - meta_id: annex:7e0bf3e7-7d46-4093-813e-b4009826c3bf meta_type: dlccs:AnnexRemoteSE diff --git a/src/linkml/ontology/git-annex.yaml b/src/linkml/ontology/git-annex.yaml index 4c01141..df3a5a8 100644 --- a/src/linkml/ontology/git-annex.yaml +++ b/src/linkml/ontology/git-annex.yaml @@ -5,10 +5,10 @@ description: > TODO prefixes: annex: https://concepts.datalad.org/namespace/annex-uuid/ + annexkey: https://concepts.datalad.org/namespace/annex-key/ dlco: https://concepts.datalad.org/ontology/ imports: - ../ontology/datasets - - ../ontology/git - ../ontology/properties default_prefix: dlco @@ -59,21 +59,19 @@ classes: https://git-annex.branchable.com/design/external_special_remote_protocol). A state is more or less an additional/arbitrary set of parameters. - AnnexedFile: - mixin: true - is_a: FileInGit - #is_a: FileInGit - description: >- - A file in a Git repository that is managed by git-annex. - slot_usage: - distribution: - range: AnnexDistribution - - AnnexDistribution: + StableAnnexKey: mixin: true is_a: Distribution description: >- - A distribution that is accessible via git-annex. + Git-annex file content blob. Keys are identified with a name. This + class represents keys with a stable, content-based key name, such + as MD5, SHA1, SHA256 and any cryptographically secure backend, + including the "E" variants. Importantly, not `URL` keys. + slots: + - name slot_usage: qualified_access: range: QualifiedAnnexAccess + see_also: + - https://git-annex.branchable.com/internals/key_format/ + - https://git-annex.branchable.com/backends/ diff --git a/src/linkml/ontology/git.yaml b/src/linkml/ontology/git.yaml index c477299..4360635 100644 --- a/src/linkml/ontology/git.yaml +++ b/src/linkml/ontology/git.yaml @@ -107,25 +107,14 @@ classes: links other trees and blobs under particular names. - FileInGit: + GitBlob: mixin: true - is_a: File + is_a: Distribution mixins: - GitTracked description: >- - A `File` that is tracked with Git. - todos: - - Rename to `GitBlob` - - #QualifiedGitTrackedPart: - # mixin: true - # is_a: QualifiedPart - # description: >- - # TODO we may need this to make relation point to resources with a - # (Git) identifier only, such that we could reference by key - # slot_usage: - # relation: - # range: FileInGit + Content (for example of a file) that is tracked with Git. + GitUserAgent: mixin: true diff --git a/src/linkml/schemas/datalad-dataset-components.yaml b/src/linkml/schemas/datalad-dataset-components.yaml index 84007d5..f2493ef 100644 --- a/src/linkml/schemas/datalad-dataset-components.yaml +++ b/src/linkml/schemas/datalad-dataset-components.yaml @@ -34,6 +34,7 @@ license: MIT prefixes: annex: https://concepts.datalad.org/namespace/annex-uuid/ + annexkey: https://concepts.datalad.org/namespace/annex-key/ bibo: http://purl.org/ontology/bibo/ datalad: https://concepts.datalad.org/namespace/dataset-uuid/ DCAT: http://www.w3.org/ns/dcat# @@ -100,19 +101,6 @@ classes: relation: range: GitTrackedSE - AnnexDistributionSE: - class_uri: dlccs:AnnexDistributionSE - mixins: - - AnnexDistribution - description: >- - Schema element for a `AnnexDistribution`. - slot_usage: - qualified_access: - inlined: true - inlined_as_list: true - multivalued: true - range: QualifiedAnnexAccessSE - QualifiedAnnexAccessSE: class_uri: dlccs:QualifiedAnnexAccessSE mixins: @@ -198,11 +186,17 @@ classes: - FilesystemDirectoryItem description: >- Schema element for a `FilesystemDirectoryItem`. + slots: + - relation slot_usage: relation: - range: GitTrackedSE + # union range specification does not seem to work + #any_of: + # - range: GitTrackedSE + # - range: StableAnnexKeySE + range: ComponentSE todos: - - make relation range a class that can only be a tree or a blob + - figure out why a union range specification is not working CommittingSE: class_uri: dlccs:CommittingSE @@ -230,26 +224,22 @@ classes: class_uri: dlccs:GitBlobSE is_a: GitTrackedSE mixins: - - FileInGit + - GitBlob description: >- Schema element for a `FileInGit`. - AnnexedFileSE: - class_uri: dlccs:AnnexedFileSE - is_a: GitTrackedSE + StableAnnexKeySE: + class_uri: dlccs:StableAnnexKeySE + is_a: ComponentSE mixins: - - AnnexedFile + - StableAnnexKey description: >- - Schema element for a `AnnexedFile`. + Schema element for a `StableAnnexKey`. + slots: + - meta_id slot_usage: - distribution: + qualified_access: inlined: true - multivalued: false - range: AnnexDistributionSE - notes: - - This is not multivalued, because the distribution of an annexed - file is an annex key, a bit identical blob. The only thing that - we can have multiple of are remote locations, where this key is - available. Even when have a URL key and the actual content may - be unknown (yet), the system is not made to switch between - distributions without filename changes. + inlined_as_list: true + multivalued: true + range: QualifiedAnnexAccessSE