From e1a87553116c430daca811f4abcbab9fcfb10939 Mon Sep 17 00:00:00 2001 From: Michael Bar-Sinai Date: Mon, 5 Dec 2022 00:02:20 +0200 Subject: [PATCH] Summary for dkNET-DRP done --- README.md | 19 +- .../dkNET-DRP/summary/614b619410622.xml.txt | 650 ++ .../dkNET-DRP/summary/614ba8756c8db.xml.txt | 617 ++ .../dkNET-DRP/summary/614df6a85b9b1.xml.txt | 650 ++ .../dkNET-DRP/summary/6155e4b25d5bb.xml.txt | 621 ++ .../dkNET-DRP/summary/61573f38ef525.xml.txt | 636 ++ .../dkNET-DRP/summary/615b72f7e220a.xml.txt | 639 ++ .../dkNET-DRP/summary/615b89fa770b6.xml.txt | 639 ++ .../dkNET-DRP/summary/615ca4418ee3f.xml.txt | 594 ++ .../dkNET-DRP/summary/615dc910773b8.xml.txt | 637 ++ .../dkNET-DRP/summary/616066f394fa6.xml.txt | 606 ++ .../dkNET-DRP/summary/61609db5051d2.xml.txt | 618 ++ .../dkNET-DRP/summary/6164baf6052a8.xml.txt | 637 ++ .../dkNET-DRP/summary/6165b40aacafb.xml.txt | 573 ++ .../dkNET-DRP/summary/616712744a595.xml.txt | 621 ++ .../dkNET-DRP/summary/61676b2d11524.xml.txt | 638 ++ .../dkNET-DRP/summary/616f4035b5cf4.xml.txt | 604 ++ .../dkNET-DRP/summary/6171d0459add1.xml.txt | 615 ++ .../dkNET-DRP/summary/61732f9ba6055.xml.txt | 645 ++ .../dkNET-DRP/summary/61735ece7e758.xml.txt | 653 ++ .../dkNET-DRP/summary/6176c84f1c023.xml.txt | 612 ++ .../dkNET-DRP/summary/6176e09b35d7f.xml.txt | 622 ++ .../dkNET-DRP/summary/6177452b8b6c7.xml.txt | 643 ++ .../dkNET-DRP/summary/61782a94716e3.xml.txt | 610 ++ .../dkNET-DRP/summary/61783f6c4c8e7.xml.txt | 592 ++ .../dkNET-DRP/summary/617850897d411.xml.txt | 625 ++ .../dkNET-DRP/summary/617864ac7b873.xml.txt | 606 ++ .../dkNET-DRP/summary/61799d37c3555.xml.txt | 671 ++ .../dkNET-DRP/summary/6179b9da80888.xml.txt | 654 ++ .../dkNET-DRP/summary/617accb79fb53.xml.txt | 599 ++ .../dkNET-DRP/summary/617ad2ab32afc.xml.txt | 597 ++ .../dkNET-DRP/summary/617af3b01bff2.xml.txt | 630 ++ .../dkNET-DRP/summary/617aff8495ced.xml.txt | 640 ++ .../dkNET-DRP/summary/617c779c5bc94.xml.txt | 619 ++ .../dkNET-DRP/summary/61800a6acbf2a.xml.txt | 610 ++ .../dkNET-DRP/summary/61805a39e24bb.xml.txt | 662 ++ .../dkNET-DRP/summary/618063fc1eba5.xml.txt | 604 ++ .../dkNET-DRP/summary/61816a627e26b.xml.txt | 666 ++ .../dkNET-DRP/summary/618180e41db0e.xml.txt | 609 ++ .../dkNET-DRP/summary/61818c0471e3e.xml.txt | 637 ++ .../dkNET-DRP/summary/618194d41e94a.xml.txt | 610 ++ .../dkNET-DRP/summary/6182df9c3d9e3.xml.txt | 612 ++ .../dkNET-DRP/summary/6182f66084e0b.xml.txt | 621 ++ .../dkNET-DRP/summary/61830248beb1c.xml.txt | 621 ++ .../dkNET-DRP/summary/61856c2f8d135.xml.txt | 623 ++ .../dkNET-DRP/summary/618772a4eda31.xml.txt | 619 ++ .../dkNET-DRP/summary/61895560bbab4.xml.txt | 617 ++ .../dkNET-DRP/summary/618ab2f1efc9f.xml.txt | 647 ++ .../dkNET-DRP/summary/618ac6bb76674.xml.txt | 621 ++ .../dkNET-DRP/summary/618af1fa72f85.xml.txt | 637 ++ .../dkNET-DRP/summary/618afa63748f2.xml.txt | 588 ++ .../dkNET-DRP/summary/618b05ddaf1c8.xml.txt | 624 ++ .../dkNET-DRP/summary/618b0ed289968.xml.txt | 619 ++ .../dkNET-DRP/summary/618e95d1e58c7.xml.txt | 597 ++ .../dkNET-DRP/summary/618eaa9fcd36f.xml.txt | 585 ++ .../dkNET-DRP/summary/61941c2f7748c.xml.txt | 589 ++ .../dkNET-DRP/summary/61942b69bdeab.xml.txt | 579 ++ .../dkNET-DRP/summary/6194378833fb3.xml.txt | 583 ++ .../dkNET-DRP/summary/619441c4acb77.xml.txt | 618 ++ .../dkNET-DRP/summary/6198116a6dfed.xml.txt | 624 ++ .../dkNET-DRP/summary/6198331eb3593.xml.txt | 639 ++ .../dkNET-DRP/summary/61985711a8424.xml.txt | 660 ++ .../dkNET-DRP/summary/6198648660f3d.xml.txt | 656 ++ .../dkNET-DRP/summary/619c1b5161bad.xml.txt | 641 ++ .../dkNET-DRP/summary/619c34b5360d2.xml.txt | 612 ++ .../dkNET-DRP/summary/61a50fa18488f.xml.txt | 603 ++ .../dkNET-DRP/summary/61a66d491f16a.xml.txt | 603 ++ .../dkNET-DRP/summary/61a67946c16f2.xml.txt | 619 ++ .../dkNET-DRP/summary/61a67f53dc9b3.xml.txt | 648 ++ .../dkNET-DRP/summary/61a7bc9e3a002.xml.txt | 585 ++ .../dkNET-DRP/summary/61a7d3c6aeb5d.xml.txt | 624 ++ .../dkNET-DRP/summary/61a7e89c785ae.xml.txt | 620 ++ .../dkNET-DRP/summary/61a909a92778f.xml.txt | 611 ++ .../dkNET-DRP/summary/61a9183722786.xml.txt | 636 ++ .../dkNET-DRP/summary/61a92a18640d8.xml.txt | 637 ++ .../dkNET-DRP/summary/61aa5882975cb.xml.txt | 621 ++ .../dkNET-DRP/summary/61aa8e7632db5.xml.txt | 639 ++ .../dkNET-DRP/summary/61ae6db443359.xml.txt | 637 ++ .../dkNET-DRP/summary/61ae7c9289651.xml.txt | 639 ++ .../dkNET-DRP/summary/61af958cc8abe.xml.txt | 622 ++ .../dkNET-DRP/summary/61afc04c3cc7a.xml.txt | 619 ++ .../dkNET-DRP/summary/61afe38742749.xml.txt | 617 ++ .../dkNET-DRP/summary/61b23d317644e.xml.txt | 647 ++ .../dkNET-DRP/summary/61b257c62b44a.xml.txt | 607 ++ .../dkNET-DRP/summary/61b2654669195.xml.txt | 620 ++ .../dkNET-DRP/summary/61b38f199319f.xml.txt | 615 ++ .../dkNET-DRP/summary/61b3991a21735.xml.txt | 611 ++ .../dkNET-DRP/summary/621d12cf667c7.xml.txt | 611 ++ .../dkNET-DRP/summary/621d4ff4808c5.xml.txt | 639 ++ .../dkNET-DRP/summary/621d682e591a5.xml.txt | 639 ++ .../dkNET-DRP/summary/answer-summary.ods | Bin 0 -> 25958 bytes .../dkNET-DRP/summary/summary-coordinate.tsv | 46 + .../dkNET-DRP/summary/summary-transcript.tsv | 32 + split-by-model/dkNET-DRP/summary/summary.json | 6853 +++++++++++++++++ 94 files changed, 62360 insertions(+), 2 deletions(-) create mode 100644 split-by-model/dkNET-DRP/summary/614b619410622.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/614ba8756c8db.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/614df6a85b9b1.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/6155e4b25d5bb.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/61573f38ef525.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/615b72f7e220a.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/615b89fa770b6.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/615ca4418ee3f.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/615dc910773b8.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/616066f394fa6.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/61609db5051d2.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/6164baf6052a8.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/6165b40aacafb.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/616712744a595.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/61676b2d11524.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/616f4035b5cf4.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/6171d0459add1.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/61732f9ba6055.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/61735ece7e758.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/6176c84f1c023.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/6176e09b35d7f.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/6177452b8b6c7.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/61782a94716e3.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/61783f6c4c8e7.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/617850897d411.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/617864ac7b873.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/61799d37c3555.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/6179b9da80888.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/617accb79fb53.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/617ad2ab32afc.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/617af3b01bff2.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/617aff8495ced.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/617c779c5bc94.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/61800a6acbf2a.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/61805a39e24bb.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/618063fc1eba5.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/61816a627e26b.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/618180e41db0e.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/61818c0471e3e.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/618194d41e94a.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/6182df9c3d9e3.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/6182f66084e0b.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/61830248beb1c.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/61856c2f8d135.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/618772a4eda31.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/61895560bbab4.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/618ab2f1efc9f.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/618ac6bb76674.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/618af1fa72f85.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/618afa63748f2.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/618b05ddaf1c8.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/618b0ed289968.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/618e95d1e58c7.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/618eaa9fcd36f.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/61941c2f7748c.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/61942b69bdeab.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/6194378833fb3.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/619441c4acb77.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/6198116a6dfed.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/6198331eb3593.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/61985711a8424.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/6198648660f3d.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/619c1b5161bad.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/619c34b5360d2.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/61a50fa18488f.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/61a66d491f16a.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/61a67946c16f2.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/61a67f53dc9b3.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/61a7bc9e3a002.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/61a7d3c6aeb5d.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/61a7e89c785ae.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/61a909a92778f.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/61a9183722786.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/61a92a18640d8.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/61aa5882975cb.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/61aa8e7632db5.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/61ae6db443359.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/61ae7c9289651.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/61af958cc8abe.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/61afc04c3cc7a.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/61afe38742749.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/61b23d317644e.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/61b257c62b44a.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/61b2654669195.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/61b38f199319f.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/61b3991a21735.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/621d12cf667c7.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/621d4ff4808c5.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/621d682e591a5.xml.txt create mode 100644 split-by-model/dkNET-DRP/summary/answer-summary.ods create mode 100644 split-by-model/dkNET-DRP/summary/summary-coordinate.tsv create mode 100644 split-by-model/dkNET-DRP/summary/summary-transcript.tsv create mode 100644 split-by-model/dkNET-DRP/summary/summary.json diff --git a/README.md b/README.md index d328dcb..7aedad9 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,25 @@ # dkNET-Repository-Finder-Data Repository for the data behind the repository finder +## Models: + +## Original Paper Model [dkNET-DRP](split-by-model/dkNET-DRP) + +Model: http://trees.scicrunch.io/models/dkNET-DRP/7/?localizationName=en-US + +Top Coordinate: `DataRepoCompliance` + +## Repository Tagging for the dkNET tool [nih-repotag](split-by-model/nih-repotag/) + +Model: http://trees.scicrunch.io/models/nih-repotag/4/?localizationName=en_US + +Top Coordinate: `NIHDSP` + ## Log ### Initial Work -* Splitting the .xml files in the repo according to questionnaire that created them. New files are placed in [split-by-modal](split-by-modal). +. Splitting the .xml files in the repo according to questionnaire that created them. New files are placed in [split-by-modal](split-by-modal). * Problems: The following files are mentioned in the [index csv](scicrunch-raw-data/Policy_Interviews.csv), but the xml file itself is missing: ```` cp: scicrunch-raw-data/6165af2cb187d.xml: No such file or directory @@ -13,4 +27,5 @@ Repository for the data behind the repository finder cp: scicrunch-raw-data/618063ccbdc33.xml: No such file or directory cp: scicrunch-raw-data/61ae86c8d478a.xml: No such file or directory cp: scicrunch-raw-data/621d07ad2f91a.xml: No such file or directory - ```` \ No newline at end of file + ```` +. Creating the index files. diff --git a/split-by-model/dkNET-DRP/summary/614b619410622.xml.txt b/split-by-model/dkNET-DRP/summary/614b619410622.xml.txt new file mode 100644 index 0000000..4d5f9e5 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/614b619410622.xml.txt @@ -0,0 +1,650 @@ +Transcript: 614b619410622.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: minimal restrictions (0.3333) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: yes (0.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: dataset level (0.0000) + +4. [sc-drc.dg]lic-cc: +### Are the data covered by a commons-compliant license? + + + +FAIR requires a clear license but it is mute about the level of openness; the Commons requires that the data be as open as possible; closed as necessary. Is the license used consistent with that? + + + +In this question, we use the definition for "Open" from [https://opendefinition.org/licenses/](the Open Definition). + + + + + +These licenses conform to the Open Definition but not to Re-Use + + + + + +#### Options + +* **best:** all content covered by an open license + +* **good:** Some content covered by an open license. + +* **somewhat open:** All content covered by a somewhat open license + +* **closed:** All content covered by closed license + +Answer: good (0.3333) + +5. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +6. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: good (0.3333) + +7. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +8. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is yes if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality but unless this is specified on the website, the response is ‘No’. + +Answer: no (1.0000) + +9. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: yes (0.0000) + +10. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) + +11. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: some (0.5000) + +12. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +13. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +14. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +15. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: full (0.0000) + +16. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +17. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +18. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +19. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: best (0.0000) + +20. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +21. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: minimal (1.0000) + +22. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: no (1.0000) + +23. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: yes (0.0000) + +24. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) + +25. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +26. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +27. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +28. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +29. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: good (0.5000) + +30. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,licenseOK,machineAccessible,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: full +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: exists +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: fullyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: partial +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: internalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: formalMetadataVocabularyOK,qualifiedMetadataReferencesOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: minimal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: good +OpenProps/Properties/DataRepoCompliance/OpenFlags: ccLicenseOK,openFormat,platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: minimal +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: datasetAndSubject +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: good +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,dkNetMetadataOK,documentationOK,generalMetadata,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: datasetLevel +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: partially +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/614ba8756c8db.xml.txt b/split-by-model/dkNET-DRP/summary/614ba8756c8db.xml.txt new file mode 100644 index 0000000..6455094 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/614ba8756c8db.xml.txt @@ -0,0 +1,617 @@ +Transcript: 614ba8756c8db.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: yes (0.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is yes if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality but unless this is specified on the website, the response is ‘No’. + +Answer: no (1.0000) + +8. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: some (0.5000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: partial (0.5000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: no (1.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: best (0.0000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: allowed (0.5000) + +21. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: no (1.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: no (1.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: full (0.0000) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,machineAccessible +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: partial +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: exists +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: partial +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: internalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,qualifiedMetadataReferencesOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: allowed +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/OpenFlags: openFormat,platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: datasetAndSubject +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: dkNetMetadataOK,documentationOK,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: full diff --git a/split-by-model/dkNET-DRP/summary/614df6a85b9b1.xml.txt b/split-by-model/dkNET-DRP/summary/614df6a85b9b1.xml.txt new file mode 100644 index 0000000..b43a462 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/614df6a85b9b1.xml.txt @@ -0,0 +1,650 @@ +Transcript: 614df6a85b9b1.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: minimal restrictions (0.3333) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: yes (0.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: repository level (0.5000) + +4. [sc-drc.dg]lic-cc: +### Are the data covered by a commons-compliant license? + + + +FAIR requires a clear license but it is mute about the level of openness; the Commons requires that the data be as open as possible; closed as necessary. Is the license used consistent with that? + + + +In this question, we use the definition for "Open" from [https://opendefinition.org/licenses/](the Open Definition). + + + + + +These licenses conform to the Open Definition but not to Re-Use + + + + + +#### Options + +* **best:** all content covered by an open license + +* **good:** Some content covered by an open license. + +* **somewhat open:** All content covered by a somewhat open license + +* **closed:** All content covered by closed license + +Answer: good (0.3333) + +5. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +6. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +7. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +8. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is yes if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality but unless this is specified on the website, the response is ‘No’. + +Answer: no (1.0000) + +9. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: yes (0.0000) + +10. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) + +11. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: some (0.5000) + +12. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +13. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +14. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +15. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: partial (0.5000) + +16. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +17. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +18. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +19. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +20. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +21. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: allowed (0.5000) + +22. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: no (1.0000) + +23. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +24. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) + +25. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +26. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +27. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +28. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +29. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: good (0.5000) + +30. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,licenseOK,machineAccessible +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: partial +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: exists +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: partial +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: internalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK,qualifiedMetadataReferencesOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: allowed +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: good +OpenProps/Properties/DataRepoCompliance/OpenFlags: ccLicenseOK,openFormat,platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: minimal +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,dkNetMetadataOK,documentationOK,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: repositoryLevel +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: partially +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/6155e4b25d5bb.xml.txt b/split-by-model/dkNET-DRP/summary/6155e4b25d5bb.xml.txt new file mode 100644 index 0000000..c50d3cf --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/6155e4b25d5bb.xml.txt @@ -0,0 +1,621 @@ +Transcript: 6155e4b25d5bb.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: yes (0.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: no (1.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is yes if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality but unless this is specified on the website, the response is ‘No’. + +Answer: no (1.0000) +Note: +Is Pub Chem CID one of PIDs according to FAIR principles for reagents? + +8. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: some (0.5000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: partial (0.5000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: worst (1.0000) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) +Note: +They provide link to PubChem via PubChem CID; ChEBI Ontology is used; + +21. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: no (1.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: no (1.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: no (1.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: none (1.0000) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,machineAccessible +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: partial +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: exists +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: partial +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: internalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: none +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/OpenFlags: openFormat +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,dkNetMetadataOK,documentationOK,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: none diff --git a/split-by-model/dkNET-DRP/summary/61573f38ef525.xml.txt b/split-by-model/dkNET-DRP/summary/61573f38ef525.xml.txt new file mode 100644 index 0000000..55aa673 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/61573f38ef525.xml.txt @@ -0,0 +1,636 @@ +Transcript: 61573f38ef525.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: minimal restrictions (0.3333) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: yes (0.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: dataset level (0.0000) + +4. [sc-drc.dg]lic-cc: +### Are the data covered by a commons-compliant license? + + + +FAIR requires a clear license but it is mute about the level of openness; the Commons requires that the data be as open as possible; closed as necessary. Is the license used consistent with that? + + + +In this question, we use the definition for "Open" from [https://opendefinition.org/licenses/](the Open Definition). + + + + + +These licenses conform to the Open Definition but not to Re-Use + + + + + +#### Options + +* **best:** all content covered by an open license + +* **good:** Some content covered by an open license. + +* **somewhat open:** All content covered by a somewhat open license + +* **closed:** All content covered by closed license + +Answer: best (0.0000) + +5. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +6. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: good (0.3333) + +7. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +8. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is yes if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality but unless this is specified on the website, the response is ‘No’. + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: full (0.0000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: minimal (1.0000) + +21. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: yes (0.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: no (1.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: weak (0.6667) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: best (0.0000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,licenseOK,machineAccessible,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: full +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: supportsDataCitation +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: fullyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: notInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: minimal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: full +OpenProps/Properties/DataRepoCompliance/OpenFlags: ccLicenseOK,openFormat,platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: minimal +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: good +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,dkNetMetadataOK,documentationOK,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: datasetLevel +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: yes +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: weak diff --git a/split-by-model/dkNET-DRP/summary/615b72f7e220a.xml.txt b/split-by-model/dkNET-DRP/summary/615b72f7e220a.xml.txt new file mode 100644 index 0000000..ea55798 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/615b72f7e220a.xml.txt @@ -0,0 +1,639 @@ +Transcript: 615b72f7e220a.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: yes (0.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: dataset level (0.0000) + +4. [sc-drc.dg]lic-cc: +### Are the data covered by a commons-compliant license? + + + +FAIR requires a clear license but it is mute about the level of openness; the Commons requires that the data be as open as possible; closed as necessary. Is the license used consistent with that? + + + +In this question, we use the definition for "Open" from [https://opendefinition.org/licenses/](the Open Definition). + + + + + +These licenses conform to the Open Definition but not to Re-Use + + + + + +#### Options + +* **best:** all content covered by an open license + +* **good:** Some content covered by an open license. + +* **somewhat open:** All content covered by a somewhat open license + +* **closed:** All content covered by closed license + +Answer: good (0.3333) + +5. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +6. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: worst (1.0000) + +7. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +8. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is yes if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality but unless this is specified on the website, the response is ‘No’. + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: minimal (1.0000) +Note: +no information on the website + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: full (0.0000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: no (1.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: minimal (1.0000) + +21. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: no (1.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: no (1.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: weak (0.6667) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: best (0.0000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,licenseOK,machineAccessible +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: full +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: exists +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: fullyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: minimal +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: qualifiedMetadataReferencesOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: minimal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: good +OpenProps/Properties/DataRepoCompliance/OpenFlags: ccLicenseOK,openFormat,platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: lacking +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: dkNetMetadataOK,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: datasetLevel +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: yes +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: weak diff --git a/split-by-model/dkNET-DRP/summary/615b89fa770b6.xml.txt b/split-by-model/dkNET-DRP/summary/615b89fa770b6.xml.txt new file mode 100644 index 0000000..7059d94 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/615b89fa770b6.xml.txt @@ -0,0 +1,639 @@ +Transcript: 615b89fa770b6.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: yes (0.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: dataset level (0.0000) + +4. [sc-drc.dg]lic-cc: +### Are the data covered by a commons-compliant license? + + + +FAIR requires a clear license but it is mute about the level of openness; the Commons requires that the data be as open as possible; closed as necessary. Is the license used consistent with that? + + + +In this question, we use the definition for "Open" from [https://opendefinition.org/licenses/](the Open Definition). + + + + + +These licenses conform to the Open Definition but not to Re-Use + + + + + +#### Options + +* **best:** all content covered by an open license + +* **good:** Some content covered by an open license. + +* **somewhat open:** All content covered by a somewhat open license + +* **closed:** All content covered by closed license + +Answer: good (0.3333) + +5. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +6. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: good (0.3333) + +7. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +8. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is yes if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality but unless this is specified on the website, the response is ‘No’. + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: required (0.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: best (0.0000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: full (0.0000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: best (0.0000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: good (0.3333) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: best (0.0000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) +Note: +In case of closure of the repository, best efforts will be made to integrate all content into suitable alternative institutional and/or subject based repositories. + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) + +21. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: yes (0.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: yes (0.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: best (0.0000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: best (0.0000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,licenseOK,machineAccessible,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: full +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: supportsDataCitation +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: required +DataRepoCompliance/Citable: fullyCitable +DataRepoCompliance/Open: fullyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: fullyInteroperable +FAIR/DataRepoCompliance/Reusable: fullyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK,qualifiedMetadataReferencesOK,studyLinkageOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: formal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: textualMetadata +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: good +OpenProps/Properties/DataRepoCompliance/OpenFlags: ccLicenseOK,openFormat,platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: datasetAndSubject +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: good +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: full +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,dkNetMetadataOK,documentationOK,generalMetadata,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: datasetLevel +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: full +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: yes +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/615ca4418ee3f.xml.txt b/split-by-model/dkNET-DRP/summary/615ca4418ee3f.xml.txt new file mode 100644 index 0000000..8c7d499 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/615ca4418ee3f.xml.txt @@ -0,0 +1,594 @@ +Transcript: 615ca4418ee3f.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: no (1.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: no (1.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: best (0.0000) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is yes if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality but unless this is specified on the website, the response is ‘No’. + +Answer: no (1.0000) +Note: +The FAIR Principles Explained - page not found + +8. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: partial (0.5000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: no (1.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: minimal (1.0000) + +21. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: no (1.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: no (1.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) +Note: +FlowRepository implements an Application Programming Interface (API) that allows third party software tools to browse public + +datasets and download data and annotations associated with these datasets. The current API is a beta version that will likely be + +extended in the future, namely to support authentication, data submission and querying. + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: worst (1.0000) + +27. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +28. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,machineAccessible +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: partial +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: exists +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: internalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: qualifiedMetadataReferencesOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: minimal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: full +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: dkNetMetadataOK,documentationOK,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: opaque +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no diff --git a/split-by-model/dkNET-DRP/summary/615dc910773b8.xml.txt b/split-by-model/dkNET-DRP/summary/615dc910773b8.xml.txt new file mode 100644 index 0000000..754eae5 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/615dc910773b8.xml.txt @@ -0,0 +1,637 @@ +Transcript: 615dc910773b8.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: somewhat (0.5000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: repository level (0.5000) + +4. [sc-drc.dg]lic-cc: +### Are the data covered by a commons-compliant license? + + + +FAIR requires a clear license but it is mute about the level of openness; the Commons requires that the data be as open as possible; closed as necessary. Is the license used consistent with that? + + + +In this question, we use the definition for "Open" from [https://opendefinition.org/licenses/](the Open Definition). + + + + + +These licenses conform to the Open Definition but not to Re-Use + + + + + +#### Options + +* **best:** all content covered by an open license + +* **good:** Some content covered by an open license. + +* **somewhat open:** All content covered by a somewhat open license + +* **closed:** All content covered by closed license + +Answer: best (0.0000) + +5. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +6. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +7. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +8. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is yes if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality but unless this is specified on the website, the response is ‘No’. + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: supported (0.5000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: rich (0.0000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: best (0.0000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: full (0.0000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: good (0.3333) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: no (1.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) + +21. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: yes (0.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: no (1.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: no (1.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: weak (0.6667) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: good (0.5000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,licenseOK,machineAccessible +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: full +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: supportsDataCitation +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: supported +DataRepoCompliance/Citable: fullyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: fullyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: rich +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,qualifiedMetadataReferencesOK,studyLinkageOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: textualMetadata +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: full +OpenProps/Properties/DataRepoCompliance/OpenFlags: ccLicenseOK,platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: full +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: dkNetMetadataOK,documentationOK,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: repositoryLevel +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: partially +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: weak diff --git a/split-by-model/dkNET-DRP/summary/616066f394fa6.xml.txt b/split-by-model/dkNET-DRP/summary/616066f394fa6.xml.txt new file mode 100644 index 0000000..ffec482 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/616066f394fa6.xml.txt @@ -0,0 +1,606 @@ +Transcript: 616066f394fa6.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: minimal restrictions (0.3333) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: yes (0.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is yes if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality but unless this is specified on the website, the response is ‘No’. + +Answer: yes (0.0000) + +8. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) + +9. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) + +10. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: supported (0.5000) + +11. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +12. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +13. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: full (0.0000) + +14. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +15. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +16. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +17. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +18. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +19. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) + +20. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: yes (0.0000) + +21. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: yes (0.0000) + +22. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) + +23. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +24. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: no (1.0000) + +25. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) +Note: +ICPSR + +26. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +27. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +28. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,machineAccessible +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: full +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: supportsDataCitation +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: supported +DataRepoCompliance/Citable: fullyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: fullyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK,qualifiedMetadataReferencesOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/OpenFlags: openFormat,platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: minimal +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,dkNetMetadataOK,documentationOK,generalMetadata,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/61609db5051d2.xml.txt b/split-by-model/dkNET-DRP/summary/61609db5051d2.xml.txt new file mode 100644 index 0000000..3ee37b8 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/61609db5051d2.xml.txt @@ -0,0 +1,618 @@ +Transcript: 61609db5051d2.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: somewhat (0.5000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: no (1.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: worst (1.0000) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is yes if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality but unless this is specified on the website, the response is ‘No’. + +Answer: no (1.0000) + +8. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: no (1.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: some (0.5000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: partial (0.5000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: no (1.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: allowed (0.5000) + +21. [sc-drc.dg]land-api: +### Does the repository provide a machine-readable landing page? + + + +Ideally, the citation metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from Dublin Core or Schema.org in the mark up metatags (Should be in the html head part). + + + +More on implementations of machine-readable metadata on dataset landing pages, see M. Fenner et al. _A data citation roadmap for scholarly data repositories_, Scientific Data, 2019. [doi.org/10.1038/s41597-019-0031-8](https://doi.org/10.1038/s41597-019-0031-8). + +Answer: no (1.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: yes (0.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: none (1.0000) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: machineAccessible,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: partial +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: none +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: partial +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: internalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK,qualifiedMetadataReferencesOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: allowed +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: lacking +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: dkNetMetadataOK,generalMetadata +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: none diff --git a/split-by-model/dkNET-DRP/summary/6164baf6052a8.xml.txt b/split-by-model/dkNET-DRP/summary/6164baf6052a8.xml.txt new file mode 100644 index 0000000..39e4634 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/6164baf6052a8.xml.txt @@ -0,0 +1,637 @@ +Transcript: 6164baf6052a8.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: minimal restrictions (0.3333) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: yes (0.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: dataset level (0.0000) + +4. [sc-drc.dg]lic-cc: +### Are the data covered by a commons-compliant license? + + + +FAIR requires a clear license but it is mute about the level of openness; the Commons requires that the data be as open as possible; closed as necessary. Is the license used consistent with that? + + + +In this question, we use the definition for "Open" from [https://opendefinition.org/licenses/](the Open Definition). + + + + + +These licenses conform to the Open Definition but not to Re-Use + + + + + +#### Options + +* **best:** all content covered by an open license + +* **good:** Some content covered by an open license. + +* **somewhat open:** All content covered by a somewhat open license + +* **closed:** All content covered by closed license + +Answer: best (0.0000) + +5. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +6. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: good (0.3333) + +7. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +8. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: supported (0.5000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: full (0.0000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: by policy (0.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: allowed (0.5000) + +21. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: yes (0.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: yes (0.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: best (0.0000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,licenseOK,machineAccessible,persistentMetadata,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: byStatedPolicy +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: full +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: supportsDataCitation +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: supported +DataRepoCompliance/Citable: fullyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: fullyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: fullyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK,qualifiedMetadataReferencesOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: allowed +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: full +OpenProps/Properties/DataRepoCompliance/OpenFlags: ccLicenseOK,openFormat,platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: minimal +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: good +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,dkNetMetadataOK,documentationOK,generalMetadata,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: datasetLevel +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: yes +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/6165b40aacafb.xml.txt b/split-by-model/dkNET-DRP/summary/6165b40aacafb.xml.txt new file mode 100644 index 0000000..8206791 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/6165b40aacafb.xml.txt @@ -0,0 +1,573 @@ +Transcript: 6165b40aacafb.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: somewhat (0.5000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: no (1.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: worst (1.0000) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: yes (0.0000) + +8. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) + +9. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) + +10. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +11. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: rich (0.0000) + +12. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +13. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: partial (0.5000) + +14. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +15. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +16. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +17. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +18. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +19. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: minimal (1.0000) + +20. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: no (1.0000) + +21. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: yes (0.0000) + +22. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) + +23. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: no (1.0000) + +24. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +25. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: worst (1.0000) + +26. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: good (0.5000) + +27. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,machineAccessible,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: partial +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: exists +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: fullyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: rich +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: qualifiedMetadataReferencesOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: minimal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: lacking +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,dkNetMetadataOK,generalMetadata,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: opaque +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: partially diff --git a/split-by-model/dkNET-DRP/summary/616712744a595.xml.txt b/split-by-model/dkNET-DRP/summary/616712744a595.xml.txt new file mode 100644 index 0000000..4c4ccec --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/616712744a595.xml.txt @@ -0,0 +1,621 @@ +Transcript: 616712744a595.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: minimal restrictions (0.3333) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: somewhat (0.5000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) + +8. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: partial (0.5000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: no (1.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) +Note: +MeSH controlled vocabulary or SNOMED CT + +21. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: no (1.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: yes (0.0000) +Note: +XML schema + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: no (1.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: none (1.0000) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,machineAccessible,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: partial +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: exists +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: internalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,qualifiedMetadataReferencesOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/OpenFlags: platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: minimal +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: dkNetMetadataOK,documentationOK,generalMetadata,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: none diff --git a/split-by-model/dkNET-DRP/summary/61676b2d11524.xml.txt b/split-by-model/dkNET-DRP/summary/61676b2d11524.xml.txt new file mode 100644 index 0000000..c186947 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/61676b2d11524.xml.txt @@ -0,0 +1,638 @@ +Transcript: 61676b2d11524.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: no (1.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) + +8. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: no (1.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: some (0.5000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: partial (0.5000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) +Note: +VCF format is standard file format + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) + +21. [sc-drc.dg]land-api: +### Does the repository provide a machine-readable landing page? + + + +Ideally, the citation metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from Dublin Core or Schema.org in the mark up metatags (Should be in the html head part). + + + +More on implementations of machine-readable metadata on dataset landing pages, see M. Fenner et al. _A data citation roadmap for scholarly data repositories_, Scientific Data, 2019. [doi.org/10.1038/s41597-019-0031-8](https://doi.org/10.1038/s41597-019-0031-8). + +Answer: yes (0.0000) + +22. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: yes (0.0000) + +23. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +24. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) + +25. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +26. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +27. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +28. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: weak (0.6667) + +29. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: good (0.5000) + +30. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: machineAccessible,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: partial +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: supportsDataCitation +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: partial +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: internalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK,qualifiedMetadataReferencesOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/OpenFlags: platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,dkNetMetadataOK,documentationOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: partially +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: weak diff --git a/split-by-model/dkNET-DRP/summary/616f4035b5cf4.xml.txt b/split-by-model/dkNET-DRP/summary/616f4035b5cf4.xml.txt new file mode 100644 index 0000000..c03ed37 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/616f4035b5cf4.xml.txt @@ -0,0 +1,604 @@ +Transcript: 616f4035b5cf4.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: yes (0.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: good (0.3333) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: yes (0.0000) + +8. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) + +9. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) + +10. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +11. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: rich (0.0000) + +12. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: best (0.0000) + +13. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: full (0.0000) + +14. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +15. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +16. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +17. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: best (0.0000) + +18. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: by policy (0.0000) + +19. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) + +20. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: yes (0.0000) + +21. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: yes (0.0000) + +22. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) + +23. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +24. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +25. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +26. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +27. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +28. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,machineAccessible,persistentMetadata,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: byStatedPolicy +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: full +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: supportsDataCitation +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: fullyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: fullyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: fullyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: rich +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/OpenFlags: openFormat,platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: datasetAndSubject +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: good +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: full +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,dkNetMetadataOK,documentationOK,generalMetadata,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/6171d0459add1.xml.txt b/split-by-model/dkNET-DRP/summary/6171d0459add1.xml.txt new file mode 100644 index 0000000..884ff0f --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/6171d0459add1.xml.txt @@ -0,0 +1,615 @@ +Transcript: 6171d0459add1.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: no (1.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: dataset level (0.0000) + +4. [sc-drc.dg]lic-cc: +### Are the data covered by a commons-compliant license? + + + +FAIR requires a clear license but it is mute about the level of openness; the Commons requires that the data be as open as possible; closed as necessary. Is the license used consistent with that? + + + +In this question, we use the definition for "Open" from [https://opendefinition.org/licenses/](the Open Definition). + + + + + +These licenses conform to the Open Definition but not to Re-Use + + + + + +#### Options + +* **best:** all content covered by an open license + +* **good:** Some content covered by an open license. + +* **somewhat open:** All content covered by a somewhat open license + +* **closed:** All content covered by closed license + +Answer: good (0.3333) + +5. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: no (1.0000) + +6. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: worst (1.0000) + +7. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +8. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: yes (0.0000) +Note: +mzspec + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: no (1.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: some (0.5000) +Note: +doi:10.25345/C5GP0R - includes such DOI but it doesn't resolve; internal ID: MassIVE MSV000088235 ; PXD019909 + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: minimal (1.0000) +Note: +species, instrument, post translational modification, PI name, email, institute, description + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) +Note: +dataset metadata standardized controlled vocabulary maintained by HUPO - proteomics standards initiative + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: partial (0.5000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: no (1.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: worst (1.0000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: minimal (1.0000) + +21. [sc-drc.dg]land-api: +### Does the repository provide a machine-readable landing page? + + + +Ideally, the citation metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from Dublin Core or Schema.org in the mark up metatags (Should be in the html head part). + + + +More on implementations of machine-readable metadata on dataset landing pages, see M. Fenner et al. _A data citation roadmap for scholarly data repositories_, Scientific Data, 2019. [doi.org/10.1038/s41597-019-0031-8](https://doi.org/10.1038/s41597-019-0031-8). + +Answer: no (1.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: no (1.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: worst (1.0000) + +27. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +28. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: licenseOK,machineAccessible +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: partial +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: none +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: notInteroperable +FAIR/DataRepoCompliance/Reusable: notReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: partial +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: minimal +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: minimal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: good +OpenProps/Properties/DataRepoCompliance/OpenFlags: ccLicenseOK +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: lacking +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: datasetLevel +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: opaque +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no diff --git a/split-by-model/dkNET-DRP/summary/61732f9ba6055.xml.txt b/split-by-model/dkNET-DRP/summary/61732f9ba6055.xml.txt new file mode 100644 index 0000000..19d0547 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/61732f9ba6055.xml.txt @@ -0,0 +1,645 @@ +Transcript: 61732f9ba6055.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: minimal restrictions (0.3333) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: somewhat (0.5000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: dataset level (0.0000) + +4. [sc-drc.dg]lic-cc: +### Are the data covered by a commons-compliant license? + + + +FAIR requires a clear license but it is mute about the level of openness; the Commons requires that the data be as open as possible; closed as necessary. Is the license used consistent with that? + + + +In this question, we use the definition for "Open" from [https://opendefinition.org/licenses/](the Open Definition). + + + + + +These licenses conform to the Open Definition but not to Re-Use + + + + + +#### Options + +* **best:** all content covered by an open license + +* **good:** Some content covered by an open license. + +* **somewhat open:** All content covered by a somewhat open license + +* **closed:** All content covered by closed license + +Answer: good (0.3333) + +5. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +6. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +7. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +8. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: supported (0.5000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: rich (0.0000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: full (0.0000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: good (0.3333) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: no (1.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: worst (1.0000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: by policy (0.0000) +Note: +ISO27001 certified for sustainbility - business model for sustainable research data repository + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: allowed (0.5000) + +21. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: no (1.0000) +Note: +copy/paste citation + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: yes (0.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) +Note: +Figshare API + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: no (1.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: full (0.0000) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: good (0.5000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) +Note: +written "endorsment of TRUST principles for Digital Repository" + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,licenseOK,machineAccessible,persistentMetadata,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: byStatedPolicy +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: full +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: exists +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: supported +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: fullyAccessible +FAIR/DataRepoCompliance/Findable: fullyFindable +FAIR/DataRepoCompliance/Interoperable: fullyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: rich +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK,qualifiedMetadataReferencesOK,studyLinkageOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: allowed +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: textualMetadata +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: good +OpenProps/Properties/DataRepoCompliance/OpenFlags: ccLicenseOK,platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: minimal +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: documentationOK,generalMetadata,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: datasetLevel +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: partially +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: full diff --git a/split-by-model/dkNET-DRP/summary/61735ece7e758.xml.txt b/split-by-model/dkNET-DRP/summary/61735ece7e758.xml.txt new file mode 100644 index 0000000..ddf814c --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/61735ece7e758.xml.txt @@ -0,0 +1,653 @@ +Transcript: 61735ece7e758.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: somewhat (0.5000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: repository level (0.5000) +Note: +All metadata is licensed under Creative Commons Zero, while the data files may be either open access and subject to a license described in the metadata or closed access and not available for download. + +4. [sc-drc.dg]lic-cc: +### Are the data covered by a commons-compliant license? + + + +FAIR requires a clear license but it is mute about the level of openness; the Commons requires that the data be as open as possible; closed as necessary. Is the license used consistent with that? + + + +In this question, we use the definition for "Open" from [https://opendefinition.org/licenses/](the Open Definition). + + + + + +These licenses conform to the Open Definition but not to Re-Use + + + + + +#### Options + +* **best:** all content covered by an open license + +* **good:** Some content covered by an open license. + +* **somewhat open:** All content covered by a somewhat open license + +* **closed:** All content covered by closed license + +Answer: good (0.3333) +Note: +All metadata is licensed under Creative Commons Zero, while the data files may be either open access and subject to a license described in the metadata or closed access and not available for download. + +5. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +6. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: worst (1.0000) + +7. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +8. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: no (1.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: supported (0.5000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: full (0.0000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: best (0.0000) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) +Note: +Metadata formats + +Metadata for each record is available in several formats. The available formats include: + +oai_datacite + +OAI DataCite (latest schema version) — This metadata format has been specifically established for the dissemination of DataCite records using OAI-PMH. In addition to the original DataCite metadata, this format contains several other elements describing the version of the metadata, whether it is of reference quality, and the registering datacentre. For more information about this format and its schema please see the DataCite OAI schema website.Dublin Core — only minimal metadata is included in this format. The format is exported according to the OpenAIRE Guidelines. + +the contents of the .zenodo.json file are based on our deposit metadata documentation and can be structurally validated by our deposit JSON Schema. + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: worst (1.0000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: by policy (0.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: minimal (1.0000) + +21. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: yes (0.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: yes (0.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) +Note: +REST API currently in testing; OAI-PMH + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: full (0.0000) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: good (0.5000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: licenseOK,machineAccessible,persistentMetadata,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: byStatedPolicy +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: full +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: supportsDataCitation +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: supported +DataRepoCompliance/Citable: fullyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: formalMetadataVocabularyOK,qualifiedMetadataReferencesOK,studyLinkageOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: minimal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: machineReadableMetadata +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: good +OpenProps/Properties/DataRepoCompliance/OpenFlags: ccLicenseOK,platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: lacking +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,generalMetadata,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: repositoryLevel +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: partially +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: full diff --git a/split-by-model/dkNET-DRP/summary/6176c84f1c023.xml.txt b/split-by-model/dkNET-DRP/summary/6176c84f1c023.xml.txt new file mode 100644 index 0000000..8332b52 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/6176c84f1c023.xml.txt @@ -0,0 +1,612 @@ +Transcript: 6176c84f1c023.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: yes (0.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: good (0.3333) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: yes (0.0000) + +8. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) + +9. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) + +10. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +11. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: rich (0.0000) + +12. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) +Note: +no ORCID; no Lab, PI, Institution, no contributors role; no publication, no clear versioning policy + +13. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: full (0.0000) +Note: +not for all data + +14. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: best (0.0000) +Note: +no ORCID; no publication for all + +15. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: best (0.0000) + +16. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +17. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: best (0.0000) + +18. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +19. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) + +20. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: no (1.0000) + +21. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +22. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) + +23. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +24. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) +Note: +The PHP file at application/controllers/CILServiceUtil2.php contains all utility functions for accessing the data through the REST API.The CIL utilizes Elasticsearch as the NoSQL JSON search engine. The CIL website communicates with the internal REST service API, for querying the datastore and tracking the statistics. The CIL is implemented in PHP. This program relies on the CodeIgniter to maintain the Model View Controller (MVC) programming structure. + +25. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: best (0.0000) + +26. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: full (0.0000) + +27. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: best (0.0000) + +28. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,machineAccessible,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: full +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: exists +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: noConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: fullyFindable +FAIR/DataRepoCompliance/Interoperable: fullyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: rich +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK,qualifiedMetadataReferencesOK,studyLinkageOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: formal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: machineReadableMetadata +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/OpenFlags: openFormat,platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: datasetAndSubject +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: good +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,dkNetMetadataOK,documentationOK,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: full +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: yes +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: full diff --git a/split-by-model/dkNET-DRP/summary/6176e09b35d7f.xml.txt b/split-by-model/dkNET-DRP/summary/6176e09b35d7f.xml.txt new file mode 100644 index 0000000..581e6c2 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/6176e09b35d7f.xml.txt @@ -0,0 +1,622 @@ +Transcript: 6176e09b35d7f.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: no (1.0000) +Note: +FCS file format + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) + +8. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: no (1.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: partial (0.5000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: worst (1.0000) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) +Note: +MIFlow data standards;ISAC + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: worst (1.0000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: allowed (0.5000) + +21. [sc-drc.dg]land-api: +### Does the repository provide a machine-readable landing page? + + + +Ideally, the citation metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from Dublin Core or Schema.org in the mark up metatags (Should be in the html head part). + + + +More on implementations of machine-readable metadata on dataset landing pages, see M. Fenner et al. _A data citation roadmap for scholarly data repositories_, Scientific Data, 2019. [doi.org/10.1038/s41597-019-0031-8](https://doi.org/10.1038/s41597-019-0031-8). + +Answer: no (1.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: yes (0.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: no (1.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: no (1.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: partial +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: none +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: notAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: internalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: allowed +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: none +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/OpenFlags: platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,documentationOK,generalMetadata,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/6177452b8b6c7.xml.txt b/split-by-model/dkNET-DRP/summary/6177452b8b6c7.xml.txt new file mode 100644 index 0000000..89f3b52 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/6177452b8b6c7.xml.txt @@ -0,0 +1,643 @@ +Transcript: 6177452b8b6c7.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: minimal restrictions (0.3333) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: yes (0.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: repository level (0.5000) + +4. [sc-drc.dg]lic-cc: +### Are the data covered by a commons-compliant license? + + + +FAIR requires a clear license but it is mute about the level of openness; the Commons requires that the data be as open as possible; closed as necessary. Is the license used consistent with that? + + + +In this question, we use the definition for "Open" from [https://opendefinition.org/licenses/](the Open Definition). + + + + + +These licenses conform to the Open Definition but not to Re-Use + + + + + +#### Options + +* **best:** all content covered by an open license + +* **good:** Some content covered by an open license. + +* **somewhat open:** All content covered by a somewhat open license + +* **closed:** All content covered by closed license + +Answer: good (0.3333) + +5. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +6. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: good (0.3333) + +7. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +8. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: yes (0.0000) +Note: +https://www.uniprot.org/uniprot/P31749 + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) +Note: +https://www.uniprot.org/uniprot/P31749 + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: some (0.5000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: required (0.0000) +Note: +data submission and update using ORCID + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: rich (0.0000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: partial (0.5000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) + +21. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: yes (0.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: best (0.0000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: full (0.0000) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: good (0.5000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,licenseOK,machineAccessible,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: partial +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: supportsDataCitation +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: required +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: partial +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: rich +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: good +OpenProps/Properties/DataRepoCompliance/OpenFlags: ccLicenseOK,openFormat,platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: minimal +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: good +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,dkNetMetadataOK,documentationOK,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: repositoryLevel +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: full +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: partially +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: full diff --git a/split-by-model/dkNET-DRP/summary/61782a94716e3.xml.txt b/split-by-model/dkNET-DRP/summary/61782a94716e3.xml.txt new file mode 100644 index 0000000..8717e25 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/61782a94716e3.xml.txt @@ -0,0 +1,610 @@ +Transcript: 61782a94716e3.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: minimal restrictions (0.3333) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: somewhat (0.5000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: good (0.3333) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: yes (0.0000) +Note: +ZFIN + +8. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) + +9. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) + +10. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +11. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: rich (0.0000) + +12. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +13. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: no support (1.0000) + +14. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +15. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +16. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: no (1.0000) + +17. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +18. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +19. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) + +20. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: no (1.0000) + +21. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: yes (0.0000) + +22. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) +Note: +no information about APIs; but it can be accessed + +23. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: no (1.0000) +Note: +use 14 ontologies;for some OBO file format; + +24. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +25. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +26. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +27. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +28. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,machineAccessible +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: none +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: exists +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: fullyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: rich +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/OpenFlags: platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: minimal +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: good +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: dkNetMetadataOK,documentationOK,generalMetadata,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/61783f6c4c8e7.xml.txt b/split-by-model/dkNET-DRP/summary/61783f6c4c8e7.xml.txt new file mode 100644 index 0000000..e2695fe --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/61783f6c4c8e7.xml.txt @@ -0,0 +1,592 @@ +Transcript: 61783f6c4c8e7.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: minimal restrictions (0.3333) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: no (1.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: no (1.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: worst (1.0000) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) + +8. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) +Note: +phs000101.v5.p1 copy/paste - it will resolve to the landing page + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: minimal (1.0000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: no support (1.0000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: no (1.0000) +Note: +NIH Data Sharing Policy + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: worst (1.0000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) + +21. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: no (1.0000) +Note: +Authorized Access System + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: no (1.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: no (1.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: worst (1.0000) + +27. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +28. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: none +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: exists +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: notAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: minimal +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: internalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,qualifiedMetadataReferencesOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/Restrictions: minimal +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: lacking +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: opaque +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no diff --git a/split-by-model/dkNET-DRP/summary/617850897d411.xml.txt b/split-by-model/dkNET-DRP/summary/617850897d411.xml.txt new file mode 100644 index 0000000..b8c69a5 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/617850897d411.xml.txt @@ -0,0 +1,625 @@ +Transcript: 617850897d411.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: somewhat (0.5000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) +Note: +GenBank: U46667.1; https://www.ncbi.nlm.nih.gov/nuccore/U46667 + +8. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: no (1.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: partial (0.5000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: good (0.3333) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: no (1.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) + +21. [sc-drc.dg]land-api: +### Does the repository provide a machine-readable landing page? + + + +Ideally, the citation metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from Dublin Core or Schema.org in the mark up metatags (Should be in the html head part). + + + +More on implementations of machine-readable metadata on dataset landing pages, see M. Fenner et al. _A data citation roadmap for scholarly data repositories_, Scientific Data, 2019. [doi.org/10.1038/s41597-019-0031-8](https://doi.org/10.1038/s41597-019-0031-8). + +Answer: no (1.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) +Note: +accessible through NCBI Entrez retrival system; + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) +Note: +daily data exchange with European Nucleotide Archive and DDBJ; LinkOut system + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: weak (0.6667) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: machineAccessible,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: partial +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: none +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: fullyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: internalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK,qualifiedMetadataReferencesOK,studyLinkageOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: textualMetadata +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/OpenFlags: platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: dkNetMetadataOK,documentationOK,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: weak diff --git a/split-by-model/dkNET-DRP/summary/617864ac7b873.xml.txt b/split-by-model/dkNET-DRP/summary/617864ac7b873.xml.txt new file mode 100644 index 0000000..0d1f0a4 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/617864ac7b873.xml.txt @@ -0,0 +1,606 @@ +Transcript: 617864ac7b873.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: somewhat (0.5000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: yes (0.0000) + +8. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) + +9. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) + +10. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +11. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +12. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +13. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: partial (0.5000) + +14. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +15. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: good (0.3333) + +16. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +17. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +18. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +19. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) + +20. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: no (1.0000) + +21. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: yes (0.0000) + +22. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) +Note: +accessible through NCBI Entrez retrival system; documentation not easy to find + +23. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: no (1.0000) + +24. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +25. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +26. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +27. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +28. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,machineAccessible,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: partial +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: exists +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: fullyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,qualifiedMetadataReferencesOK,studyLinkageOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: textualMetadata +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/OpenFlags: platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,dkNetMetadataOK,documentationOK,generalMetadata,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/61799d37c3555.xml.txt b/split-by-model/dkNET-DRP/summary/61799d37c3555.xml.txt new file mode 100644 index 0000000..95b6d46 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/61799d37c3555.xml.txt @@ -0,0 +1,671 @@ +Transcript: 61799d37c3555.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: yes (0.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: repository level (0.5000) + +4. [sc-drc.dg]lic-cc: +### Are the data covered by a commons-compliant license? + + + +FAIR requires a clear license but it is mute about the level of openness; the Commons requires that the data be as open as possible; closed as necessary. Is the license used consistent with that? + + + +In this question, we use the definition for "Open" from [https://opendefinition.org/licenses/](the Open Definition). + + + + + +These licenses conform to the Open Definition but not to Re-Use + + + + + +#### Options + +* **best:** all content covered by an open license + +* **good:** Some content covered by an open license. + +* **somewhat open:** All content covered by a somewhat open license + +* **closed:** All content covered by closed license + +Answer: good (0.3333) + +5. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +6. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: best (0.0000) + +7. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +8. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) + +9. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: yes (0.0000) + +10. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: no (1.0000) + +11. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: some (0.5000) + +12. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +13. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: rich (0.0000) + +14. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +15. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: partial (0.5000) + +16. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +17. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +18. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) +Note: +data collected to MIAME and MINSEQE standards + +19. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: best (0.0000) + +20. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +21. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) + +22. [sc-drc.dg]land-api: +### Does the repository provide a machine-readable landing page? + + + +Ideally, the citation metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from Dublin Core or Schema.org in the mark up metatags (Should be in the html head part). + + + +More on implementations of machine-readable metadata on dataset landing pages, see M. Fenner et al. _A data citation roadmap for scholarly data repositories_, Scientific Data, 2019. [doi.org/10.1038/s41597-019-0031-8](https://doi.org/10.1038/s41597-019-0031-8). + +Answer: yes (0.0000) + +23. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: no (1.0000) + +24. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: yes (0.0000) + +25. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) + +26. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +27. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +28. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +29. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: full (0.0000) + +30. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: good (0.5000) + +31. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: licenseOK,machineAccessible,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: partial +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: exists +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: fullyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: fullyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: partial +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: rich +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: internalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: good +OpenProps/Properties/DataRepoCompliance/OpenFlags: ccLicenseOK,openFormat,platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: datasetAndSubject +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: full +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,dkNetMetadataOK,documentationOK,generalMetadata,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: repositoryLevel +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: partially +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: full diff --git a/split-by-model/dkNET-DRP/summary/6179b9da80888.xml.txt b/split-by-model/dkNET-DRP/summary/6179b9da80888.xml.txt new file mode 100644 index 0000000..fcb53ee --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/6179b9da80888.xml.txt @@ -0,0 +1,654 @@ +Transcript: 6179b9da80888.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: minimal restrictions (0.3333) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: yes (0.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: repository level (0.5000) + +4. [sc-drc.dg]lic-cc: +### Are the data covered by a commons-compliant license? + + + +FAIR requires a clear license but it is mute about the level of openness; the Commons requires that the data be as open as possible; closed as necessary. Is the license used consistent with that? + + + +In this question, we use the definition for "Open" from [https://opendefinition.org/licenses/](the Open Definition). + + + + + +These licenses conform to the Open Definition but not to Re-Use + + + + + +#### Options + +* **best:** all content covered by an open license + +* **good:** Some content covered by an open license. + +* **somewhat open:** All content covered by a somewhat open license + +* **closed:** All content covered by closed license + +Answer: good (0.3333) + +5. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: no (1.0000) + +6. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: worst (1.0000) + +7. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +8. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) + +9. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: yes (0.0000) + +10. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: no (1.0000) + +11. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: some (0.5000) + +12. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +13. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +14. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +15. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: partial (0.5000) + +16. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +17. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +18. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +19. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: worst (1.0000) + +20. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +21. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: minimal (1.0000) + +22. [sc-drc.dg]land-api: +### Does the repository provide a machine-readable landing page? + + + +Ideally, the citation metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from Dublin Core or Schema.org in the mark up metatags (Should be in the html head part). + + + +More on implementations of machine-readable metadata on dataset landing pages, see M. Fenner et al. _A data citation roadmap for scholarly data repositories_, Scientific Data, 2019. [doi.org/10.1038/s41597-019-0031-8](https://doi.org/10.1038/s41597-019-0031-8). + +Answer: no (1.0000) + +23. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +24. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) + +25. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +26. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: no (1.0000) + +27. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +28. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) +Note: +NBDC of Japan Science and Tech Board Members + +29. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: good (0.5000) + +30. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: licenseOK,machineAccessible,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: partial +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: none +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: partial +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: internalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: formalMetadataVocabularyOK,qualifiedMetadataReferencesOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: minimal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: good +OpenProps/Properties/DataRepoCompliance/OpenFlags: ccLicenseOK,openFormat +OpenProps/Properties/DataRepoCompliance/Restrictions: minimal +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: lacking +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: repositoryLevel +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: partially +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/617accb79fb53.xml.txt b/split-by-model/dkNET-DRP/summary/617accb79fb53.xml.txt new file mode 100644 index 0000000..ed0fa87 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/617accb79fb53.xml.txt @@ -0,0 +1,599 @@ +Transcript: 617accb79fb53.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: yes (0.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) +Note: +They say: Public Domain - on repository level;no specific licence + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: worst (1.0000) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) + +8. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: no (1.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: some (0.5000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: minimal (1.0000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: no support (1.0000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) +Note: +EMBL format used for sequences data + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: worst (1.0000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) +Note: +Gene Ontology + +21. [sc-drc.dg]land-api: +### Does the repository provide a machine-readable landing page? + + + +Ideally, the citation metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from Dublin Core or Schema.org in the mark up metatags (Should be in the html head part). + + + +More on implementations of machine-readable metadata on dataset landing pages, see M. Fenner et al. _A data citation roadmap for scholarly data repositories_, Scientific Data, 2019. [doi.org/10.1038/s41597-019-0031-8](https://doi.org/10.1038/s41597-019-0031-8). + +Answer: no (1.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) +Note: +no information provided + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: no (1.0000) +Note: +probably yes; but no information can be found on the website + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: worst (1.0000) + +27. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +28. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: machineAccessible +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: none +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: none +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: notCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: partial +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: minimal +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: internalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK,qualifiedMetadataReferencesOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/OpenFlags: openFormat,platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: lacking +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: opaque +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no diff --git a/split-by-model/dkNET-DRP/summary/617ad2ab32afc.xml.txt b/split-by-model/dkNET-DRP/summary/617ad2ab32afc.xml.txt new file mode 100644 index 0000000..5c9318f --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/617ad2ab32afc.xml.txt @@ -0,0 +1,597 @@ +Transcript: 617ad2ab32afc.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: yes (0.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: no (1.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: worst (1.0000) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) + +8. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) +Note: +MIMAT0000062 to https://www.mirbase.org/cgi-bin/mature.pl?mature_acc=MIMAT0000062; + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: some (0.5000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: minimal (1.0000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: partial (0.5000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) +Note: +EMBL format + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) +Note: +Gene Ontology + +21. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: no (1.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) +Note: +no documentation but they use API + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) +Note: +yes but no documents about it + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: worst (1.0000) + +27. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +28. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,machineAccessible +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: partial +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: exists +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: partial +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: minimal +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: internalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK,qualifiedMetadataReferencesOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/OpenFlags: openFormat +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: lacking +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,dkNetMetadataOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: opaque +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no diff --git a/split-by-model/dkNET-DRP/summary/617af3b01bff2.xml.txt b/split-by-model/dkNET-DRP/summary/617af3b01bff2.xml.txt new file mode 100644 index 0000000..0d48c8c --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/617af3b01bff2.xml.txt @@ -0,0 +1,630 @@ +Transcript: 617af3b01bff2.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: minimal restrictions (0.3333) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: yes (0.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: repository level (0.5000) + +4. [sc-drc.dg]lic-cc: +### Are the data covered by a commons-compliant license? + + + +FAIR requires a clear license but it is mute about the level of openness; the Commons requires that the data be as open as possible; closed as necessary. Is the license used consistent with that? + + + +In this question, we use the definition for "Open" from [https://opendefinition.org/licenses/](the Open Definition). + + + + + +These licenses conform to the Open Definition but not to Re-Use + + + + + +#### Options + +* **best:** all content covered by an open license + +* **good:** Some content covered by an open license. + +* **somewhat open:** All content covered by a somewhat open license + +* **closed:** All content covered by closed license + +Answer: best (0.0000) +Note: +all records under Creative Commons Attribution License + +5. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: no (1.0000) + +6. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: worst (1.0000) + +7. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +8. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) + +9. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: yes (0.0000) + +10. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: no (1.0000) + +11. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: some (0.5000) + +12. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +13. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: minimal (1.0000) + +14. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) +Note: +the dataseet is linked to other databases for more information + +15. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: no support (1.0000) + +16. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +17. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: worst (1.0000) + +18. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) +Note: +HUPO Proteomics Standard Initiative + +19. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: worst (1.0000) + +20. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +21. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) +Note: +provide EBI's ontology look up service to use as contolled vocabulary + +22. [sc-drc.dg]land-api: +### Does the repository provide a machine-readable landing page? + + + +Ideally, the citation metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from Dublin Core or Schema.org in the mark up metatags (Should be in the html head part). + + + +More on implementations of machine-readable metadata on dataset landing pages, see M. Fenner et al. _A data citation roadmap for scholarly data repositories_, Scientific Data, 2019. [doi.org/10.1038/s41597-019-0031-8](https://doi.org/10.1038/s41597-019-0031-8). + +Answer: no (1.0000) + +23. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +24. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) + +25. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: no (1.0000) + +26. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: no (1.0000) + +27. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: worst (1.0000) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: licenseOK,machineAccessible +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: none +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: none +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: notCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: partial +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: minimal +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: internalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: none +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: full +OpenProps/Properties/DataRepoCompliance/OpenFlags: ccLicenseOK,openFormat +OpenProps/Properties/DataRepoCompliance/Restrictions: minimal +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: lacking +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: repositoryLevel +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: opaque +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no diff --git a/split-by-model/dkNET-DRP/summary/617aff8495ced.xml.txt b/split-by-model/dkNET-DRP/summary/617aff8495ced.xml.txt new file mode 100644 index 0000000..d5cb421 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/617aff8495ced.xml.txt @@ -0,0 +1,640 @@ +Transcript: 617aff8495ced.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: yes (0.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) + +8. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: no (1.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: some (0.5000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: required (0.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: rich (0.0000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: partial (0.5000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: good (0.3333) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: allowed (0.5000) + +21. [sc-drc.dg]land-api: +### Does the repository provide a machine-readable landing page? + + + +Ideally, the citation metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from Dublin Core or Schema.org in the mark up metatags (Should be in the html head part). + + + +More on implementations of machine-readable metadata on dataset landing pages, see M. Fenner et al. _A data citation roadmap for scholarly data repositories_, Scientific Data, 2019. [doi.org/10.1038/s41597-019-0031-8](https://doi.org/10.1038/s41597-019-0031-8). + +Answer: yes (0.0000) + +22. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: yes (0.0000) + +23. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: yes (0.0000) +Note: +EMBD schema; EMBD data model + +24. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) + +25. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) +Note: +XML files; FTP; README; EMBD schema; REST API + +26. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +27. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +28. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +29. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: good (0.5000) + +30. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: machineAccessible,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: partial +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: supportsDataCitation +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: required +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: fullyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: partial +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: rich +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: internalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK,qualifiedMetadataReferencesOK,studyLinkageOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: allowed +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: textualMetadata +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/OpenFlags: openFormat,platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,dkNetMetadataOK,documentationOK,generalMetadata +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: partially +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/617c779c5bc94.xml.txt b/split-by-model/dkNET-DRP/summary/617c779c5bc94.xml.txt new file mode 100644 index 0000000..5ce4795 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/617c779c5bc94.xml.txt @@ -0,0 +1,619 @@ +Transcript: 617c779c5bc94.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: minimal restrictions (0.3333) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: somewhat (0.5000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) + +8. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) +Note: +nstd + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: no support (1.0000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: allowed (0.5000) + +21. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: no (1.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: weak (0.6667) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,machineAccessible +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: none +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: exists +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: internalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: allowed +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/OpenFlags: platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: minimal +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,dkNetMetadataOK,documentationOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: weak diff --git a/split-by-model/dkNET-DRP/summary/61800a6acbf2a.xml.txt b/split-by-model/dkNET-DRP/summary/61800a6acbf2a.xml.txt new file mode 100644 index 0000000..3fbceac --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/61800a6acbf2a.xml.txt @@ -0,0 +1,610 @@ +Transcript: 61800a6acbf2a.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: minimal restrictions (0.3333) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: yes (0.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: yes (0.0000) + +8. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: no (1.0000) + +9. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: some (0.5000) + +10. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +11. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +12. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +13. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: partial (0.5000) + +14. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +15. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +16. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +17. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +18. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +19. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) +Note: +BTO, DOID, OLS Ontology look up service + +20. [sc-drc.dg]land-api: +### Does the repository provide a machine-readable landing page? + + + +Ideally, the citation metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from Dublin Core or Schema.org in the mark up metatags (Should be in the html head part). + + + +More on implementations of machine-readable metadata on dataset landing pages, see M. Fenner et al. _A data citation roadmap for scholarly data repositories_, Scientific Data, 2019. [doi.org/10.1038/s41597-019-0031-8](https://doi.org/10.1038/s41597-019-0031-8). + +Answer: no (1.0000) + +21. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: yes (0.0000) +Note: +HUPO + +22. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) + +23. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: no (1.0000) + +24. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: no (1.0000) + +25. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +26. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +27. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +28. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: machineAccessible +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: partial +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: none +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: fullyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: partial +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,qualifiedMetadataReferencesOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/OpenFlags: openFormat,platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: minimal +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,dkNetMetadataOK,documentationOK,generalMetadata,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/61805a39e24bb.xml.txt b/split-by-model/dkNET-DRP/summary/61805a39e24bb.xml.txt new file mode 100644 index 0000000..b602ce9 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/61805a39e24bb.xml.txt @@ -0,0 +1,662 @@ +Transcript: 61805a39e24bb.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: minimal restrictions (0.3333) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: yes (0.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: dataset level (0.0000) + +4. [sc-drc.dg]lic-cc: +### Are the data covered by a commons-compliant license? + + + +FAIR requires a clear license but it is mute about the level of openness; the Commons requires that the data be as open as possible; closed as necessary. Is the license used consistent with that? + + + +In this question, we use the definition for "Open" from [https://opendefinition.org/licenses/](the Open Definition). + + + + + +These licenses conform to the Open Definition but not to Re-Use + + + + + +#### Options + +* **best:** all content covered by an open license + +* **good:** Some content covered by an open license. + +* **somewhat open:** All content covered by a somewhat open license + +* **closed:** All content covered by closed license + +Answer: best (0.0000) + +5. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: no (1.0000) + +6. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +7. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +8. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: no (1.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: supported (0.5000) +Note: +PI ORCID during submission + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: full (0.0000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: good (0.3333) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: no (1.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: minimal (1.0000) + +21. [sc-drc.dg]land-api: +### Does the repository provide a machine-readable landing page? + + + +Ideally, the citation metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from Dublin Core or Schema.org in the mark up metatags (Should be in the html head part). + + + +More on implementations of machine-readable metadata on dataset landing pages, see M. Fenner et al. _A data citation roadmap for scholarly data repositories_, Scientific Data, 2019. [doi.org/10.1038/s41597-019-0031-8](https://doi.org/10.1038/s41597-019-0031-8). + +Answer: yes (0.0000) + +22. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: yes (0.0000) + +23. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +24. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) +Note: +under API Documentation - not found info + +25. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: no (1.0000) + +26. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: no (1.0000) + +27. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +28. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: weak (0.6667) +Note: +consortium + +29. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +30. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: licenseOK,machineAccessible +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: full +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: supportsDataCitation +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: supported +DataRepoCompliance/Citable: fullyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: qualifiedMetadataReferencesOK,studyLinkageOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: minimal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: textualMetadata +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: full +OpenProps/Properties/DataRepoCompliance/OpenFlags: ccLicenseOK,openFormat +OpenProps/Properties/DataRepoCompliance/Restrictions: minimal +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: dkNetMetadataOK,documentationOK,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: datasetLevel +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: weak diff --git a/split-by-model/dkNET-DRP/summary/618063fc1eba5.xml.txt b/split-by-model/dkNET-DRP/summary/618063fc1eba5.xml.txt new file mode 100644 index 0000000..a30cf21 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/618063fc1eba5.xml.txt @@ -0,0 +1,604 @@ +Transcript: 618063fc1eba5.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: somewhat (0.5000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: no (1.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) +Note: +DOI for networks if requested + +8. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: no (1.0000) + +9. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +10. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: minimal (1.0000) + +11. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) + +12. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: partial (0.5000) + +13. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +14. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +15. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +16. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: worst (1.0000) + +17. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +18. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: minimal (1.0000) + +19. [sc-drc.dg]land-api: +### Does the repository provide a machine-readable landing page? + + + +Ideally, the citation metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from Dublin Core or Schema.org in the mark up metatags (Should be in the html head part). + + + +More on implementations of machine-readable metadata on dataset landing pages, see M. Fenner et al. _A data citation roadmap for scholarly data repositories_, Scientific Data, 2019. [doi.org/10.1038/s41597-019-0031-8](https://doi.org/10.1038/s41597-019-0031-8). + +Answer: yes (0.0000) + +20. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: yes (0.0000) + +21. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +22. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) + +23. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) +Note: +CX Data Model + +24. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +25. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +26. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: none (1.0000) + +27. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: best (0.0000) + +28. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: machineAccessible,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: partial +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: supportsDataCitation +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: minimal +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: none +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: formalMetadataVocabularyOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: minimal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,documentationOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: yes +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: none diff --git a/split-by-model/dkNET-DRP/summary/61816a627e26b.xml.txt b/split-by-model/dkNET-DRP/summary/61816a627e26b.xml.txt new file mode 100644 index 0000000..3539bca --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/61816a627e26b.xml.txt @@ -0,0 +1,666 @@ +Transcript: 61816a627e26b.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: yes (0.0000) +Note: +public data CC0 licence + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: repository level (0.5000) + +4. [sc-drc.dg]lic-cc: +### Are the data covered by a commons-compliant license? + + + +FAIR requires a clear license but it is mute about the level of openness; the Commons requires that the data be as open as possible; closed as necessary. Is the license used consistent with that? + + + +In this question, we use the definition for "Open" from [https://opendefinition.org/licenses/](the Open Definition). + + + + + +These licenses conform to the Open Definition but not to Re-Use + + + + + +#### Options + +* **best:** all content covered by an open license + +* **good:** Some content covered by an open license. + +* **somewhat open:** All content covered by a somewhat open license + +* **closed:** All content covered by closed license + +Answer: good (0.3333) + +5. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: no (1.0000) + +6. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +7. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +8. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) +Note: +PX identifiers for citation in literature, DOI provided for datasets for complete submission + +9. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: yes (0.0000) + +10. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: no (1.0000) + +11. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: some (0.5000) + +12. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +13. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: minimal (1.0000) + +14. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) + +15. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: partial (0.5000) + +16. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +17. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +18. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +19. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: worst (1.0000) + +20. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +21. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) +Note: +PSI -MS controlled vocabulary ( OBO file) + +22. [sc-drc.dg]land-api: +### Does the repository provide a machine-readable landing page? + + + +Ideally, the citation metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from Dublin Core or Schema.org in the mark up metatags (Should be in the html head part). + + + +More on implementations of machine-readable metadata on dataset landing pages, see M. Fenner et al. _A data citation roadmap for scholarly data repositories_, Scientific Data, 2019. [doi.org/10.1038/s41597-019-0031-8](https://doi.org/10.1038/s41597-019-0031-8). + +Answer: no (1.0000) + +23. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: yes (0.0000) +Note: +XML Schema + +24. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) + +25. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) +Note: +mzIdentML open standard format;PX XML format, XML schema, PSI-MS controlled vocabulary (OBO file), mzML + +26. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: no (1.0000) +Note: +no documentation + +27. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: best (0.0000) +Note: +consortium + +28. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +29. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: best (0.0000) + +30. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: licenseOK,machineAccessible +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: partial +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: none +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: partial +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: minimal +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: internalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: good +OpenProps/Properties/DataRepoCompliance/OpenFlags: ccLicenseOK,openFormat +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,documentationOK,generalMetadata +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: repositoryLevel +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: full +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: yes +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/618180e41db0e.xml.txt b/split-by-model/dkNET-DRP/summary/618180e41db0e.xml.txt new file mode 100644 index 0000000..7c2a40d --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/618180e41db0e.xml.txt @@ -0,0 +1,609 @@ +Transcript: 618180e41db0e.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: somewhat (0.5000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: no (1.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: yes (0.0000) + +8. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) + +9. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) + +10. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +11. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: minimal (1.0000) + +12. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) + +13. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: partial (0.5000) + +14. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +15. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: good (0.3333) + +16. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +17. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: worst (1.0000) + +18. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +19. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) +Note: +Chemical Compound Dictionary, Macromolecular Dictionary, Small Molecule Dictionary, BIRD, ChEBI + +20. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: no (1.0000) + +21. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: yes (0.0000) +Note: +PDBML schema + +22. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) + +23. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) +Note: +PDBML/XML; ChEBI; CCD; BIRD + +24. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +25. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +26. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +27. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: good (0.5000) + +28. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,machineAccessible,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: partial +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: exists +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: fullyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: minimal +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK,qualifiedMetadataReferencesOK,studyLinkageOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: textualMetadata +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,documentationOK,generalMetadata +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: partially +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/61818c0471e3e.xml.txt b/split-by-model/dkNET-DRP/summary/61818c0471e3e.xml.txt new file mode 100644 index 0000000..1c48efc --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/61818c0471e3e.xml.txt @@ -0,0 +1,637 @@ +Transcript: 61818c0471e3e.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: yes (0.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: dataset level (0.0000) + +4. [sc-drc.dg]lic-cc: +### Are the data covered by a commons-compliant license? + + + +FAIR requires a clear license but it is mute about the level of openness; the Commons requires that the data be as open as possible; closed as necessary. Is the license used consistent with that? + + + +In this question, we use the definition for "Open" from [https://opendefinition.org/licenses/](the Open Definition). + + + + + +These licenses conform to the Open Definition but not to Re-Use + + + + + +#### Options + +* **best:** all content covered by an open license + +* **good:** Some content covered by an open license. + +* **somewhat open:** All content covered by a somewhat open license + +* **closed:** All content covered by closed license + +Answer: best (0.0000) + +5. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +6. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: good (0.3333) + +7. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +8. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: minimal (1.0000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: full (0.0000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: worst (1.0000) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) + +21. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: yes (0.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: yes (0.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: good (0.5000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,licenseOK,machineAccessible,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: full +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: supportsDataCitation +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: fullyCitable +DataRepoCompliance/Open: fullyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: fullyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: minimal +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: none +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: full +OpenProps/Properties/DataRepoCompliance/OpenFlags: ccLicenseOK,openFormat,platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: good +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,dkNetMetadataOK,documentationOK,generalMetadata,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: datasetLevel +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: partially +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/618194d41e94a.xml.txt b/split-by-model/dkNET-DRP/summary/618194d41e94a.xml.txt new file mode 100644 index 0000000..9f930bb --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/618194d41e94a.xml.txt @@ -0,0 +1,610 @@ +Transcript: 618194d41e94a.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: somewhat (0.5000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: yes (0.0000) + +8. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) +Note: +https://www.uniprot.org/uniprot/P62258 + +9. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) + +10. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: supported (0.5000) +Note: +ORCID can be associated with publication + +11. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: minimal (1.0000) + +12. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) + +13. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: no support (1.0000) + +14. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +15. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +16. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +17. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +18. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +19. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) + +20. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: yes (0.0000) + +21. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: yes (0.0000) +Note: +SPARQL RDF + +22. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) + +23. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +24. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: no (1.0000) + +25. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +26. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +27. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +28. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,machineAccessible +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: none +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: supportsDataCitation +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: supported +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: minimal +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/OpenFlags: platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,dkNetMetadataOK,documentationOK,generalMetadata +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/6182df9c3d9e3.xml.txt b/split-by-model/dkNET-DRP/summary/6182df9c3d9e3.xml.txt new file mode 100644 index 0000000..e4fc7fc --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/6182df9c3d9e3.xml.txt @@ -0,0 +1,612 @@ +Transcript: 6182df9c3d9e3.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: significant restrictions (0.6667) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: no (1.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) +Note: +cloud computation capability for data reuse + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: good (0.3333) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: yes (0.0000) + +8. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) + +9. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) + +10. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +11. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +12. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +13. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: partial (0.5000) + +14. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +15. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: good (0.3333) + +16. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +17. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +18. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +19. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) +Note: +Data Dictionary + +20. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: no (1.0000) + +21. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: yes (0.0000) +Note: +NDA Omics Standards + +22. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) +Note: +" direct web service calls to Amazon Web Service's S3 API and data can be streamed or downloaded" + +23. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +24. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +25. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +26. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +27. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +28. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,machineAccessible,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: partial +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: exists +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: fullyInteroperable +FAIR/DataRepoCompliance/Reusable: fullyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK,qualifiedMetadataReferencesOK,studyLinkageOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: textualMetadata +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/OpenFlags: platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: significant +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: good +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,dkNetMetadataOK,documentationOK,generalMetadata,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/6182f66084e0b.xml.txt b/split-by-model/dkNET-DRP/summary/6182f66084e0b.xml.txt new file mode 100644 index 0000000..6e9c807 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/6182f66084e0b.xml.txt @@ -0,0 +1,621 @@ +Transcript: 6182f66084e0b.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: yes (0.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) + +8. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: no (1.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) +Note: +SID for all + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: minimal (1.0000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: no support (1.0000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: worst (1.0000) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: worst (1.0000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) + +21. [sc-drc.dg]land-api: +### Does the repository provide a machine-readable landing page? + + + +Ideally, the citation metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from Dublin Core or Schema.org in the mark up metatags (Should be in the html head part). + + + +More on implementations of machine-readable metadata on dataset landing pages, see M. Fenner et al. _A data citation roadmap for scholarly data repositories_, Scientific Data, 2019. [doi.org/10.1038/s41597-019-0031-8](https://doi.org/10.1038/s41597-019-0031-8). + +Answer: no (1.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: weak (0.6667) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: good (0.5000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: machineAccessible,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: none +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: none +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: notCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: minimal +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: internalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: none +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/OpenFlags: openFormat,platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,documentationOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: partially +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: weak diff --git a/split-by-model/dkNET-DRP/summary/61830248beb1c.xml.txt b/split-by-model/dkNET-DRP/summary/61830248beb1c.xml.txt new file mode 100644 index 0000000..22cf2c6 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/61830248beb1c.xml.txt @@ -0,0 +1,621 @@ +Transcript: 61830248beb1c.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: somewhat (0.5000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) +Note: +SRA data on Google Cloud Platform and Amazon Web Services clouds; user pays for accessing SRA data on the cloud + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: best (0.0000) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) + +8. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: no (1.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: some (0.5000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: minimal (1.0000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: no support (1.0000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: worst (1.0000) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: worst (1.0000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: minimal (1.0000) + +21. [sc-drc.dg]land-api: +### Does the repository provide a machine-readable landing page? + + + +Ideally, the citation metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from Dublin Core or Schema.org in the mark up metatags (Should be in the html head part). + + + +More on implementations of machine-readable metadata on dataset landing pages, see M. Fenner et al. _A data citation roadmap for scholarly data repositories_, Scientific Data, 2019. [doi.org/10.1038/s41597-019-0031-8](https://doi.org/10.1038/s41597-019-0031-8). + +Answer: no (1.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: machineAccessible,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: none +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: none +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: notCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: partial +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: minimal +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: internalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: formalMetadataVocabularyOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: minimal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: none +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/OpenFlags: platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: full +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,documentationOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/61856c2f8d135.xml.txt b/split-by-model/dkNET-DRP/summary/61856c2f8d135.xml.txt new file mode 100644 index 0000000..1a03f4b --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/61856c2f8d135.xml.txt @@ -0,0 +1,623 @@ +Transcript: 61856c2f8d135.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: yes (0.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) + +8. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: no (1.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: some (0.5000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: no support (1.0000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) + +21. [sc-drc.dg]land-api: +### Does the repository provide a machine-readable landing page? + + + +Ideally, the citation metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from Dublin Core or Schema.org in the mark up metatags (Should be in the html head part). + + + +More on implementations of machine-readable metadata on dataset landing pages, see M. Fenner et al. _A data citation roadmap for scholarly data repositories_, Scientific Data, 2019. [doi.org/10.1038/s41597-019-0031-8](https://doi.org/10.1038/s41597-019-0031-8). + +Answer: no (1.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) +Note: +since 2019 all data must be submitted to European Variant Archive (EVA); EVA validator + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) +Note: +search FTP or API; EVA API + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: weak (0.6667) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: machineAccessible,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: none +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: none +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: notCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: partial +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: internalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/OpenFlags: openFormat,platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,dkNetMetadataOK,documentationOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: weak diff --git a/split-by-model/dkNET-DRP/summary/618772a4eda31.xml.txt b/split-by-model/dkNET-DRP/summary/618772a4eda31.xml.txt new file mode 100644 index 0000000..ffe7cb1 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/618772a4eda31.xml.txt @@ -0,0 +1,619 @@ +Transcript: 618772a4eda31.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: significant restrictions (0.6667) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: no (1.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: no (1.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) + +8. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) +Note: +https://ega-archive.org/studies/EGAS00000000001 + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: some (0.5000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: no support (1.0000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: worst (1.0000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: minimal (1.0000) + +21. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: no (1.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: no (1.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) +Note: +metadata REST API - using API allows to obtain publicly available info from EGA study, sample,experiment, run analysis, dataset + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,machineAccessible +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: none +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: exists +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: closed +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: notInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: partial +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: internalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: minimal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/Restrictions: significant +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,documentationOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/61895560bbab4.xml.txt b/split-by-model/dkNET-DRP/summary/61895560bbab4.xml.txt new file mode 100644 index 0000000..2d0bd5b --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/61895560bbab4.xml.txt @@ -0,0 +1,617 @@ +Transcript: 61895560bbab4.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: somewhat (0.5000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) + +8. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: some (0.5000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: minimal (1.0000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: partial (0.5000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: worst (1.0000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) + +21. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: yes (0.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: yes (0.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: weak (0.6667) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: good (0.5000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,machineAccessible,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: partial +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: supportsDataCitation +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: partial +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: minimal +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: internalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/OpenFlags: platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,documentationOK,generalMetadata +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: partially +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: weak diff --git a/split-by-model/dkNET-DRP/summary/618ab2f1efc9f.xml.txt b/split-by-model/dkNET-DRP/summary/618ab2f1efc9f.xml.txt new file mode 100644 index 0000000..ddc9896 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/618ab2f1efc9f.xml.txt @@ -0,0 +1,647 @@ +Transcript: 618ab2f1efc9f.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) +Note: +no restrictions for public data + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: yes (0.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: dataset level (0.0000) + +4. [sc-drc.dg]lic-cc: +### Are the data covered by a commons-compliant license? + + + +FAIR requires a clear license but it is mute about the level of openness; the Commons requires that the data be as open as possible; closed as necessary. Is the license used consistent with that? + + + +In this question, we use the definition for "Open" from [https://opendefinition.org/licenses/](the Open Definition). + + + + + +These licenses conform to the Open Definition but not to Re-Use + + + + + +#### Options + +* **best:** all content covered by an open license + +* **good:** Some content covered by an open license. + +* **somewhat open:** All content covered by a somewhat open license + +* **closed:** All content covered by closed license + +Answer: best (0.0000) + +5. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) +Note: +a lot of different options to download data including AWS CLI, DataLad, Node.js + +6. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +7. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +8. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: minimal (1.0000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: full (0.0000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) + +21. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: yes (0.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: yes (0.0000) +Note: +BIDS validator + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) +Note: +no information provided + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: no (1.0000) +Note: +no info + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: weak (0.6667) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: good (0.5000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,licenseOK,machineAccessible +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: full +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: supportsDataCitation +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: fullyCitable +DataRepoCompliance/Open: fullyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: fullyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: minimal +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: full +OpenProps/Properties/DataRepoCompliance/OpenFlags: ccLicenseOK,openFormat,platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,dkNetMetadataOK,documentationOK,generalMetadata,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: datasetLevel +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: partially +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: weak diff --git a/split-by-model/dkNET-DRP/summary/618ac6bb76674.xml.txt b/split-by-model/dkNET-DRP/summary/618ac6bb76674.xml.txt new file mode 100644 index 0000000..d52e398 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/618ac6bb76674.xml.txt @@ -0,0 +1,621 @@ +Transcript: 618ac6bb76674.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: somewhat (0.5000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) + +8. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: no (1.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: some (0.5000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: minimal (1.0000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: partial (0.5000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) +Note: +CIF standard by IUCr ( International Union of Crystallography + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: worst (1.0000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) + +21. [sc-drc.dg]land-api: +### Does the repository provide a machine-readable landing page? + + + +Ideally, the citation metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from Dublin Core or Schema.org in the mark up metatags (Should be in the html head part). + + + +More on implementations of machine-readable metadata on dataset landing pages, see M. Fenner et al. _A data citation roadmap for scholarly data repositories_, Scientific Data, 2019. [doi.org/10.1038/s41597-019-0031-8](https://doi.org/10.1038/s41597-019-0031-8). + +Answer: no (1.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: yes (0.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: weak (0.6667) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: good (0.5000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: machineAccessible,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: partial +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: none +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: partial +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: minimal +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: internalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/OpenFlags: platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,documentationOK,generalMetadata +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: partially +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: weak diff --git a/split-by-model/dkNET-DRP/summary/618af1fa72f85.xml.txt b/split-by-model/dkNET-DRP/summary/618af1fa72f85.xml.txt new file mode 100644 index 0000000..ba0348d --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/618af1fa72f85.xml.txt @@ -0,0 +1,637 @@ +Transcript: 618af1fa72f85.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: yes (0.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: dataset level (0.0000) + +4. [sc-drc.dg]lic-cc: +### Are the data covered by a commons-compliant license? + + + +FAIR requires a clear license but it is mute about the level of openness; the Commons requires that the data be as open as possible; closed as necessary. Is the license used consistent with that? + + + +In this question, we use the definition for "Open" from [https://opendefinition.org/licenses/](the Open Definition). + + + + + +These licenses conform to the Open Definition but not to Re-Use + + + + + +#### Options + +* **best:** all content covered by an open license + +* **good:** Some content covered by an open license. + +* **somewhat open:** All content covered by a somewhat open license + +* **closed:** All content covered by closed license + +Answer: best (0.0000) + +5. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: no (1.0000) + +6. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +7. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +8. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: required (0.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: full (0.0000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: best (0.0000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: worst (1.0000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: by policy (0.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: minimal (1.0000) + +21. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: yes (0.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: yes (0.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: no (1.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: best (0.0000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: full (0.0000) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,licenseOK,machineAccessible,persistentMetadata +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: byStatedPolicy +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: full +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: supportsDataCitation +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: required +DataRepoCompliance/Citable: fullyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: formalMetadataVocabularyOK,qualifiedMetadataReferencesOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: minimal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: formal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: full +OpenProps/Properties/DataRepoCompliance/OpenFlags: ccLicenseOK,openFormat +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,documentationOK,generalMetadata,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: datasetLevel +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: full +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: full diff --git a/split-by-model/dkNET-DRP/summary/618afa63748f2.xml.txt b/split-by-model/dkNET-DRP/summary/618afa63748f2.xml.txt new file mode 100644 index 0000000..46fd307 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/618afa63748f2.xml.txt @@ -0,0 +1,588 @@ +Transcript: 618afa63748f2.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: no (1.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: no (1.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: worst (1.0000) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) + +8. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: yes (0.0000) +Note: +MGI ID + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: rich (0.0000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: no support (1.0000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: no (1.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) + +21. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: no (1.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: no (1.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: no (1.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: worst (1.0000) + +27. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +28. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: none +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: exists +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: notAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: rich +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: internalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: lacking +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: dkNetMetadataOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: opaque +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no diff --git a/split-by-model/dkNET-DRP/summary/618b05ddaf1c8.xml.txt b/split-by-model/dkNET-DRP/summary/618b05ddaf1c8.xml.txt new file mode 100644 index 0000000..b64b3bd --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/618b05ddaf1c8.xml.txt @@ -0,0 +1,624 @@ +Transcript: 618b05ddaf1c8.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: no (1.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: no (1.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: yes (0.0000) + +8. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: no (1.0000) + +9. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) + +10. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: supported (0.5000) + +11. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +12. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) + +13. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: partial (0.5000) + +14. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +15. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +16. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +17. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +18. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +19. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) + +20. [sc-drc.dg]land-api: +### Does the repository provide a machine-readable landing page? + + + +Ideally, the citation metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from Dublin Core or Schema.org in the mark up metatags (Should be in the html head part). + + + +More on implementations of machine-readable metadata on dataset landing pages, see M. Fenner et al. _A data citation roadmap for scholarly data repositories_, Scientific Data, 2019. [doi.org/10.1038/s41597-019-0031-8](https://doi.org/10.1038/s41597-019-0031-8). + +Answer: yes (0.0000) + +21. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: no (1.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: yes (0.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) +Note: +data access with REST API + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: machineAccessible,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: partial +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: exists +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: supported +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,dkNetMetadataOK,documentationOK,generalMetadata +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/618b0ed289968.xml.txt b/split-by-model/dkNET-DRP/summary/618b0ed289968.xml.txt new file mode 100644 index 0000000..8cb465e --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/618b0ed289968.xml.txt @@ -0,0 +1,619 @@ +Transcript: 618b0ed289968.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: somewhat (0.5000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) + +8. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: no (1.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: supported (0.5000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: no support (1.0000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: worst (1.0000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) + +21. [sc-drc.dg]land-api: +### Does the repository provide a machine-readable landing page? + + + +Ideally, the citation metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from Dublin Core or Schema.org in the mark up metatags (Should be in the html head part). + + + +More on implementations of machine-readable metadata on dataset landing pages, see M. Fenner et al. _A data citation roadmap for scholarly data repositories_, Scientific Data, 2019. [doi.org/10.1038/s41597-019-0031-8](https://doi.org/10.1038/s41597-019-0031-8). + +Answer: no (1.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: yes (0.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: good (0.5000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: machineAccessible,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: none +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: none +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: supported +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: internalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/OpenFlags: platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,documentationOK,generalMetadata +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: partially +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/618e95d1e58c7.xml.txt b/split-by-model/dkNET-DRP/summary/618e95d1e58c7.xml.txt new file mode 100644 index 0000000..303ff1e --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/618e95d1e58c7.xml.txt @@ -0,0 +1,597 @@ +Transcript: 618e95d1e58c7.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: significant restrictions (0.6667) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: no (1.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: no (1.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: worst (1.0000) +Note: +I have no access + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) +Note: +I have no access to see the dataset landing page; only studies page + +8. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: no (1.0000) +Note: +I have no access to see + +9. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +10. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: rich (0.0000) + +11. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) +Note: +studies data + +12. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: no support (1.0000) +Note: +studies data + +13. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +14. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +15. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: no (1.0000) +Note: +no statement + +16. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +17. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +18. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: minimal (1.0000) +Note: +I can not see the datasets landing pages; no statement + +19. [sc-drc.dg]land-api: +### Does the repository provide a machine-readable landing page? + + + +Ideally, the citation metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from Dublin Core or Schema.org in the mark up metatags (Should be in the html head part). + + + +More on implementations of machine-readable metadata on dataset landing pages, see M. Fenner et al. _A data citation roadmap for scholarly data repositories_, Scientific Data, 2019. [doi.org/10.1038/s41597-019-0031-8](https://doi.org/10.1038/s41597-019-0031-8). + +Answer: no (1.0000) +Note: +I have no access + +20. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +21. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) + +22. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: no (1.0000) + +23. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: no (1.0000) + +24. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +25. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +26. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +27. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: none +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: none +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: notCitable +DataRepoCompliance/Open: closed +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: notAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: notInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: rich +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: none +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: minimal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/Restrictions: significant +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: lacking +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: dkNetMetadataOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/618eaa9fcd36f.xml.txt b/split-by-model/dkNET-DRP/summary/618eaa9fcd36f.xml.txt new file mode 100644 index 0000000..11e9b79 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/618eaa9fcd36f.xml.txt @@ -0,0 +1,585 @@ +Transcript: 618eaa9fcd36f.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: somewhat (0.5000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: no (1.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) + +8. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: no (1.0000) + +9. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +10. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +11. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) + +12. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: no support (1.0000) + +13. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +14. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +15. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +16. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +17. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +18. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) + +19. [sc-drc.dg]land-api: +### Does the repository provide a machine-readable landing page? + + + +Ideally, the citation metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from Dublin Core or Schema.org in the mark up metatags (Should be in the html head part). + + + +More on implementations of machine-readable metadata on dataset landing pages, see M. Fenner et al. _A data citation roadmap for scholarly data repositories_, Scientific Data, 2019. [doi.org/10.1038/s41597-019-0031-8](https://doi.org/10.1038/s41597-019-0031-8). + +Answer: no (1.0000) + +20. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: yes (0.0000) +Note: +Chado schema + +21. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) + +22. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +23. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +24. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: best (0.0000) + +25. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +26. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: best (0.0000) + +27. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: machineAccessible,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: none +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: none +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: notCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: none +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,dkNetMetadataOK,documentationOK,generalMetadata +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: full +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: yes +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/61941c2f7748c.xml.txt b/split-by-model/dkNET-DRP/summary/61941c2f7748c.xml.txt new file mode 100644 index 0000000..c56485f --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/61941c2f7748c.xml.txt @@ -0,0 +1,589 @@ +Transcript: 61941c2f7748c.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: significant but not justified restrictions (1.0000) +Note: +no search button + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: somewhat (0.5000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: no (1.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: worst (1.0000) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: no (1.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) +Note: +no; data linked to PDBj; on PDBj dataset DOI is provided + +8. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: no (1.0000) + +9. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: required (0.0000) +Note: +starting on September 24, 2021 ORCID for submission is required; log in with ORCID + +10. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: minimal (1.0000) + +11. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) + +12. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: no support (1.0000) + +13. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +14. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +15. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +16. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: worst (1.0000) + +17. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +18. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) + +19. [sc-drc.dg]land-api: +### Does the repository provide a machine-readable landing page? + + + +Ideally, the citation metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from Dublin Core or Schema.org in the mark up metatags (Should be in the html head part). + + + +More on implementations of machine-readable metadata on dataset landing pages, see M. Fenner et al. _A data citation roadmap for scholarly data repositories_, Scientific Data, 2019. [doi.org/10.1038/s41597-019-0031-8](https://doi.org/10.1038/s41597-019-0031-8). + +Answer: no (1.0000) + +20. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +21. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) + +22. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: no (1.0000) + +23. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: no (1.0000) + +24. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +25. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: weak (0.6667) + +26. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +27. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: machineAccessible +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: none +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: none +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: required +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: closed +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: notFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: minimal +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: none +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/OpenFlags: restrictionsNotJustified +OpenProps/Properties/DataRepoCompliance/Restrictions: significant +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: lacking +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: weak diff --git a/split-by-model/dkNET-DRP/summary/61942b69bdeab.xml.txt b/split-by-model/dkNET-DRP/summary/61942b69bdeab.xml.txt new file mode 100644 index 0000000..5185757 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/61942b69bdeab.xml.txt @@ -0,0 +1,579 @@ +Transcript: 61942b69bdeab.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: significant restrictions (0.6667) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: no (1.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: no (1.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: worst (1.0000) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: no (1.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) + +8. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: no (1.0000) + +9. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +10. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: minimal (1.0000) + +11. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) + +12. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: no support (1.0000) + +13. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +14. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +15. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: no (1.0000) + +16. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: worst (1.0000) + +17. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +18. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: minimal (1.0000) + +19. [sc-drc.dg]land-api: +### Does the repository provide a machine-readable landing page? + + + +Ideally, the citation metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from Dublin Core or Schema.org in the mark up metatags (Should be in the html head part). + + + +More on implementations of machine-readable metadata on dataset landing pages, see M. Fenner et al. _A data citation roadmap for scholarly data repositories_, Scientific Data, 2019. [doi.org/10.1038/s41597-019-0031-8](https://doi.org/10.1038/s41597-019-0031-8). + +Answer: no (1.0000) + +20. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +21. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) + +22. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: no (1.0000) + +23. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: no (1.0000) + +24. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +25. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: weak (0.6667) + +26. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +27. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: none +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: none +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: notCitable +DataRepoCompliance/Open: closed +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: notAccessible +FAIR/DataRepoCompliance/Findable: notFindable +FAIR/DataRepoCompliance/Interoperable: notInteroperable +FAIR/DataRepoCompliance/Reusable: notReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: minimal +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: none +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: minimal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/Restrictions: significant +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: lacking +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: weak diff --git a/split-by-model/dkNET-DRP/summary/6194378833fb3.xml.txt b/split-by-model/dkNET-DRP/summary/6194378833fb3.xml.txt new file mode 100644 index 0000000..9a068c1 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/6194378833fb3.xml.txt @@ -0,0 +1,583 @@ +Transcript: 6194378833fb3.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: somewhat (0.5000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: no (1.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) + +8. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: no (1.0000) + +9. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +10. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: minimal (1.0000) + +11. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) + +12. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: no support (1.0000) + +13. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +14. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +15. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +16. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: worst (1.0000) + +17. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +18. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) + +19. [sc-drc.dg]land-api: +### Does the repository provide a machine-readable landing page? + + + +Ideally, the citation metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from Dublin Core or Schema.org in the mark up metatags (Should be in the html head part). + + + +More on implementations of machine-readable metadata on dataset landing pages, see M. Fenner et al. _A data citation roadmap for scholarly data repositories_, Scientific Data, 2019. [doi.org/10.1038/s41597-019-0031-8](https://doi.org/10.1038/s41597-019-0031-8). + +Answer: no (1.0000) + +20. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +21. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) + +22. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: no (1.0000) + +23. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: no (1.0000) + +24. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +25. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: weak (0.6667) + +26. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +27. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: machineAccessible +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: none +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: none +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: notCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: minimal +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: none +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,documentationOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: weak diff --git a/split-by-model/dkNET-DRP/summary/619441c4acb77.xml.txt b/split-by-model/dkNET-DRP/summary/619441c4acb77.xml.txt new file mode 100644 index 0000000..d70335e --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/619441c4acb77.xml.txt @@ -0,0 +1,618 @@ +Transcript: 619441c4acb77.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: somewhat (0.5000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: no (1.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) + +8. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: no (1.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: some (0.5000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: no support (1.0000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: no (1.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: minimal (1.0000) + +21. [sc-drc.dg]land-api: +### Does the repository provide a machine-readable landing page? + + + +Ideally, the citation metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from Dublin Core or Schema.org in the mark up metatags (Should be in the html head part). + + + +More on implementations of machine-readable metadata on dataset landing pages, see M. Fenner et al. _A data citation roadmap for scholarly data repositories_, Scientific Data, 2019. [doi.org/10.1038/s41597-019-0031-8](https://doi.org/10.1038/s41597-019-0031-8). + +Answer: no (1.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: no (1.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: no (1.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: weak (0.6667) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: machineAccessible +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: none +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: none +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: notCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: partial +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: internalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: qualifiedMetadataReferencesOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: minimal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: dkNetMetadataOK,documentationOK,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: weak diff --git a/split-by-model/dkNET-DRP/summary/6198116a6dfed.xml.txt b/split-by-model/dkNET-DRP/summary/6198116a6dfed.xml.txt new file mode 100644 index 0000000..80511e6 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/6198116a6dfed.xml.txt @@ -0,0 +1,624 @@ +Transcript: 6198116a6dfed.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: somewhat (0.5000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: no (1.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) + +8. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: no (1.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: some (0.5000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: minimal (1.0000) +Note: +different accession numbers redirect to different databases like to UniProt where metadata are provided + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: partial (0.5000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: good (0.3333) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) +Note: +obo - the ontology look up service is part of ELIXIR infrastructure + +21. [sc-drc.dg]land-api: +### Does the repository provide a machine-readable landing page? + + + +Ideally, the citation metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from Dublin Core or Schema.org in the mark up metatags (Should be in the html head part). + + + +More on implementations of machine-readable metadata on dataset landing pages, see M. Fenner et al. _A data citation roadmap for scholarly data repositories_, Scientific Data, 2019. [doi.org/10.1038/s41597-019-0031-8](https://doi.org/10.1038/s41597-019-0031-8). + +Answer: no (1.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: yes (0.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) +Note: +intAct is part of ELIXIR infrastructure + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: machineAccessible,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: partial +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: none +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: fullyInteroperable +FAIR/DataRepoCompliance/Reusable: fullyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: partial +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: minimal +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: internalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK,qualifiedMetadataReferencesOK,studyLinkageOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: textualMetadata +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,dkNetMetadataOK,documentationOK,generalMetadata,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/6198331eb3593.xml.txt b/split-by-model/dkNET-DRP/summary/6198331eb3593.xml.txt new file mode 100644 index 0000000..30fa463 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/6198331eb3593.xml.txt @@ -0,0 +1,639 @@ +Transcript: 6198331eb3593.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: minimal restrictions (0.3333) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: yes (0.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: dataset level (0.0000) + +4. [sc-drc.dg]lic-cc: +### Are the data covered by a commons-compliant license? + + + +FAIR requires a clear license but it is mute about the level of openness; the Commons requires that the data be as open as possible; closed as necessary. Is the license used consistent with that? + + + +In this question, we use the definition for "Open" from [https://opendefinition.org/licenses/](the Open Definition). + + + + + +These licenses conform to the Open Definition but not to Re-Use + + + + + +#### Options + +* **best:** all content covered by an open license + +* **good:** Some content covered by an open license. + +* **somewhat open:** All content covered by a somewhat open license + +* **closed:** All content covered by closed license + +Answer: good (0.3333) + +5. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: no (1.0000) + +6. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: good (0.3333) + +7. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +8. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: rich (0.0000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: full (0.0000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: good (0.3333) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) +Note: +use of standarized signal name + +21. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: no (1.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: no (1.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: no (1.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: weak (0.6667) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,licenseOK,machineAccessible +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: full +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: exists +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: fullyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: rich +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,qualifiedMetadataReferencesOK,studyLinkageOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: textualMetadata +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: good +OpenProps/Properties/DataRepoCompliance/OpenFlags: ccLicenseOK,openFormat +OpenProps/Properties/DataRepoCompliance/Restrictions: minimal +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: good +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,dkNetMetadataOK,documentationOK,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: datasetLevel +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: weak diff --git a/split-by-model/dkNET-DRP/summary/61985711a8424.xml.txt b/split-by-model/dkNET-DRP/summary/61985711a8424.xml.txt new file mode 100644 index 0000000..dd93e20 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/61985711a8424.xml.txt @@ -0,0 +1,660 @@ +Transcript: 61985711a8424.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: somewhat (0.5000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: repository level (0.5000) +Note: +MIT license for all data + +4. [sc-drc.dg]lic-cc: +### Are the data covered by a commons-compliant license? + + + +FAIR requires a clear license but it is mute about the level of openness; the Commons requires that the data be as open as possible; closed as necessary. Is the license used consistent with that? + + + +In this question, we use the definition for "Open" from [https://opendefinition.org/licenses/](the Open Definition). + + + + + +These licenses conform to the Open Definition but not to Re-Use + + + + + +#### Options + +* **best:** all content covered by an open license + +* **good:** Some content covered by an open license. + +* **somewhat open:** All content covered by a somewhat open license + +* **closed:** All content covered by closed license + +Answer: somewhat open (0.6667) +Note: +MIT license is open source friendly + +5. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +6. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: worst (1.0000) + +7. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +8. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) + +9. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: yes (0.0000) + +10. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: no (1.0000) + +11. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: some (0.5000) + +12. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +13. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) +Note: +information provided in BioGRID is linked to another database to get more descriptive info about the data set + +14. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) + +15. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: partial (0.5000) + +16. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +17. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: good (0.3333) + +18. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +19. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +20. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +21. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) + +22. [sc-drc.dg]land-api: +### Does the repository provide a machine-readable landing page? + + + +Ideally, the citation metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from Dublin Core or Schema.org in the mark up metatags (Should be in the html head part). + + + +More on implementations of machine-readable metadata on dataset landing pages, see M. Fenner et al. _A data citation roadmap for scholarly data repositories_, Scientific Data, 2019. [doi.org/10.1038/s41597-019-0031-8](https://doi.org/10.1038/s41597-019-0031-8). + +Answer: no (1.0000) + +23. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +24. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) +Note: +REST webservices - fill up the form to get access key + +25. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +26. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +27. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +28. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +29. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: good (0.5000) + +30. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: licenseOK,machineAccessible,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: partial +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: none +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: fullyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: partial +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: internalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK,qualifiedMetadataReferencesOK,studyLinkageOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: textualMetadata +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: adequate +OpenProps/Properties/DataRepoCompliance/OpenFlags: ccLicenseOK,platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: lacking +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,dkNetMetadataOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: repositoryLevel +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: partially +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/6198648660f3d.xml.txt b/split-by-model/dkNET-DRP/summary/6198648660f3d.xml.txt new file mode 100644 index 0000000..117b6c5 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/6198648660f3d.xml.txt @@ -0,0 +1,656 @@ +Transcript: 6198648660f3d.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: somewhat (0.5000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: repository level (0.5000) + +4. [sc-drc.dg]lic-cc: +### Are the data covered by a commons-compliant license? + + + +FAIR requires a clear license but it is mute about the level of openness; the Commons requires that the data be as open as possible; closed as necessary. Is the license used consistent with that? + + + +In this question, we use the definition for "Open" from [https://opendefinition.org/licenses/](the Open Definition). + + + + + +These licenses conform to the Open Definition but not to Re-Use + + + + + +#### Options + +* **best:** all content covered by an open license + +* **good:** Some content covered by an open license. + +* **somewhat open:** All content covered by a somewhat open license + +* **closed:** All content covered by closed license + +Answer: best (0.0000) + +5. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +6. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +7. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +8. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) + +9. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: yes (0.0000) + +10. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: no (1.0000) + +11. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) +Note: +uses DOI: 10.2210/pdb7mdn/pdb from wwPDB; + +12. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +13. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +14. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) + +15. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: partial (0.5000) + +16. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +17. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: good (0.3333) + +18. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +19. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +20. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +21. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) + +22. [sc-drc.dg]land-api: +### Does the repository provide a machine-readable landing page? + + + +Ideally, the citation metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from Dublin Core or Schema.org in the mark up metatags (Should be in the html head part). + + + +More on implementations of machine-readable metadata on dataset landing pages, see M. Fenner et al. _A data citation roadmap for scholarly data repositories_, Scientific Data, 2019. [doi.org/10.1038/s41597-019-0031-8](https://doi.org/10.1038/s41597-019-0031-8). + +Answer: no (1.0000) + +23. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: yes (0.0000) +Note: +wwPDB validator; + +24. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) + +25. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +26. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +27. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +28. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +29. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: good (0.5000) + +30. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: licenseOK,machineAccessible,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: partial +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: none +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: fullyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: internalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK,qualifiedMetadataReferencesOK,studyLinkageOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: textualMetadata +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: full +OpenProps/Properties/DataRepoCompliance/OpenFlags: ccLicenseOK,platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,dkNetMetadataOK,documentationOK,generalMetadata +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: repositoryLevel +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: partially +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/619c1b5161bad.xml.txt b/split-by-model/dkNET-DRP/summary/619c1b5161bad.xml.txt new file mode 100644 index 0000000..7040a95 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/619c1b5161bad.xml.txt @@ -0,0 +1,641 @@ +Transcript: 619c1b5161bad.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: minimal restrictions (0.3333) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: yes (0.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: dataset level (0.0000) + +4. [sc-drc.dg]lic-cc: +### Are the data covered by a commons-compliant license? + + + +FAIR requires a clear license but it is mute about the level of openness; the Commons requires that the data be as open as possible; closed as necessary. Is the license used consistent with that? + + + +In this question, we use the definition for "Open" from [https://opendefinition.org/licenses/](the Open Definition). + + + + + +These licenses conform to the Open Definition but not to Re-Use + + + + + +#### Options + +* **best:** all content covered by an open license + +* **good:** Some content covered by an open license. + +* **somewhat open:** All content covered by a somewhat open license + +* **closed:** All content covered by closed license + +Answer: good (0.3333) + +5. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +6. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +7. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +8. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) +Note: +for the Cancer Imaging Archive under Data Citation this info is provided for a dataset: https://doi.org/10.7937/TCIA.JHQD-FR46 + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: full (0.0000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: good (0.3333) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) + +21. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: yes (0.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: yes (0.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) +Note: +most APIs can return results as CSV/JSON/XML/HTML + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: weak (0.6667) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: good (0.5000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,licenseOK,machineAccessible,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: full +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: supportsDataCitation +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: fullyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: fullyInteroperable +FAIR/DataRepoCompliance/Reusable: fullyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK,qualifiedMetadataReferencesOK,studyLinkageOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: textualMetadata +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: good +OpenProps/Properties/DataRepoCompliance/OpenFlags: ccLicenseOK,openFormat,platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: minimal +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,dkNetMetadataOK,documentationOK,generalMetadata,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: datasetLevel +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: partially +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: weak diff --git a/split-by-model/dkNET-DRP/summary/619c34b5360d2.xml.txt b/split-by-model/dkNET-DRP/summary/619c34b5360d2.xml.txt new file mode 100644 index 0000000..f9d349d --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/619c34b5360d2.xml.txt @@ -0,0 +1,612 @@ +Transcript: 619c34b5360d2.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: yes (0.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) + +8. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: no (1.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: some (0.5000) +Note: +PubChem Bioassay is part of PubChem; a lot of IDs for dataset ( SID, CID) are provided for bioassay but not DOIs specific for dataset + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: minimal (1.0000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: no support (1.0000) +Note: +yes, according to PubChem Citation Guidelines. + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: worst (1.0000) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: worst (1.0000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) + +21. [sc-drc.dg]land-api: +### Does the repository provide a machine-readable landing page? + + + +Ideally, the citation metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from Dublin Core or Schema.org in the mark up metatags (Should be in the html head part). + + + +More on implementations of machine-readable metadata on dataset landing pages, see M. Fenner et al. _A data citation roadmap for scholarly data repositories_, Scientific Data, 2019. [doi.org/10.1038/s41597-019-0031-8](https://doi.org/10.1038/s41597-019-0031-8). + +Answer: yes (0.0000) + +22. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: no (1.0000) + +23. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: yes (0.0000) +Note: +PubChem schema + +24. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) + +25. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +26. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +27. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: worst (1.0000) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: machineAccessible,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: none +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: exists +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: partial +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: minimal +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: internalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: none +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/OpenFlags: openFormat,platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,documentationOK,generalMetadata +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: opaque +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no diff --git a/split-by-model/dkNET-DRP/summary/61a50fa18488f.xml.txt b/split-by-model/dkNET-DRP/summary/61a50fa18488f.xml.txt new file mode 100644 index 0000000..fe30e73 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/61a50fa18488f.xml.txt @@ -0,0 +1,603 @@ +Transcript: 61a50fa18488f.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: minimal restrictions (0.3333) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: somewhat (0.5000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: no (1.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: good (0.3333) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: yes (0.0000) + +8. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) + +9. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) + +10. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +11. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: rich (0.0000) + +12. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +13. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: partial (0.5000) + +14. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +15. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: good (0.3333) + +16. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +17. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +18. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +19. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) + +20. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: no (1.0000) + +21. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +22. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) + +23. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: no (1.0000) + +24. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: no (1.0000) + +25. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +26. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +27. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +28. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: yes (0.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,machineAccessible +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: partial +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: exists +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: noConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: fullyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: rich +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,qualifiedMetadataReferencesOK,studyLinkageOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: textualMetadata +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/Restrictions: minimal +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: good +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,dkNetMetadataOK,documentationOK,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/61a66d491f16a.xml.txt b/split-by-model/dkNET-DRP/summary/61a66d491f16a.xml.txt new file mode 100644 index 0000000..18169cb --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/61a66d491f16a.xml.txt @@ -0,0 +1,603 @@ +Transcript: 61a66d491f16a.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: somewhat (0.5000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: no (1.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: good (0.3333) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: yes (0.0000) + +8. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) + +9. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) + +10. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +11. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +12. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +13. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: full (0.0000) + +14. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +15. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: good (0.3333) + +16. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +17. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +18. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +19. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) + +20. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: no (1.0000) + +21. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: yes (0.0000) + +22. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) + +23. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +24. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +25. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +26. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +27. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: good (0.5000) + +28. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,machineAccessible,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: full +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: exists +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: fullyInteroperable +FAIR/DataRepoCompliance/Reusable: fullyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK,qualifiedMetadataReferencesOK,studyLinkageOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: textualMetadata +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: good +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,dkNetMetadataOK,documentationOK,generalMetadata,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: partially +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/61a67946c16f2.xml.txt b/split-by-model/dkNET-DRP/summary/61a67946c16f2.xml.txt new file mode 100644 index 0000000..c309a62 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/61a67946c16f2.xml.txt @@ -0,0 +1,619 @@ +Transcript: 61a67946c16f2.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: somewhat (0.5000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: good (0.3333) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) + +8. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: no (1.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: some (0.5000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: no support (1.0000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) + +21. [sc-drc.dg]land-api: +### Does the repository provide a machine-readable landing page? + + + +Ideally, the citation metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from Dublin Core or Schema.org in the mark up metatags (Should be in the html head part). + + + +More on implementations of machine-readable metadata on dataset landing pages, see M. Fenner et al. _A data citation roadmap for scholarly data repositories_, Scientific Data, 2019. [doi.org/10.1038/s41597-019-0031-8](https://doi.org/10.1038/s41597-019-0031-8). + +Answer: no (1.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: yes (0.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: good (0.5000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: machineAccessible +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: none +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: none +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: notCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: partial +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: internalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK,qualifiedMetadataReferencesOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/OpenFlags: platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: good +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,dkNetMetadataOK,documentationOK,generalMetadata +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: partially +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/61a67f53dc9b3.xml.txt b/split-by-model/dkNET-DRP/summary/61a67f53dc9b3.xml.txt new file mode 100644 index 0000000..dfe6dd8 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/61a67f53dc9b3.xml.txt @@ -0,0 +1,648 @@ +Transcript: 61a67f53dc9b3.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: somewhat (0.5000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: repository level (0.5000) + +4. [sc-drc.dg]lic-cc: +### Are the data covered by a commons-compliant license? + + + +FAIR requires a clear license but it is mute about the level of openness; the Commons requires that the data be as open as possible; closed as necessary. Is the license used consistent with that? + + + +In this question, we use the definition for "Open" from [https://opendefinition.org/licenses/](the Open Definition). + + + + + +These licenses conform to the Open Definition but not to Re-Use + + + + + +#### Options + +* **best:** all content covered by an open license + +* **good:** Some content covered by an open license. + +* **somewhat open:** All content covered by a somewhat open license + +* **closed:** All content covered by closed license + +Answer: somewhat open (0.6667) + +5. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: no (1.0000) + +6. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: worst (1.0000) + +7. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +8. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) + +9. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: yes (0.0000) + +10. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) + +11. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: some (0.5000) + +12. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +13. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +14. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) + +15. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: no support (1.0000) + +16. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +17. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +18. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: no (1.0000) + +19. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: worst (1.0000) + +20. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +21. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: minimal (1.0000) + +22. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: no (1.0000) + +23. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +24. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) + +25. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: no (1.0000) + +26. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: no (1.0000) + +27. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +28. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: weak (0.6667) + +29. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +30. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,licenseOK,machineAccessible +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: none +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: exists +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: notInteroperable +FAIR/DataRepoCompliance/Reusable: notReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: partial +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: internalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: minimal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: adequate +OpenProps/Properties/DataRepoCompliance/OpenFlags: ccLicenseOK +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: lacking +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: repositoryLevel +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: weak diff --git a/split-by-model/dkNET-DRP/summary/61a7bc9e3a002.xml.txt b/split-by-model/dkNET-DRP/summary/61a7bc9e3a002.xml.txt new file mode 100644 index 0000000..57e6e84 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/61a7bc9e3a002.xml.txt @@ -0,0 +1,585 @@ +Transcript: 61a7bc9e3a002.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: somewhat (0.5000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: no (1.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: good (0.3333) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) + +8. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: no (1.0000) + +9. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +10. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: minimal (1.0000) + +11. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) + +12. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: no support (1.0000) + +13. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +14. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: worst (1.0000) + +15. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +16. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: worst (1.0000) + +17. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +18. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) +Note: +Nanoparticles Ontology, NCI thesaurus + +19. [sc-drc.dg]land-api: +### Does the repository provide a machine-readable landing page? + + + +Ideally, the citation metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from Dublin Core or Schema.org in the mark up metatags (Should be in the html head part). + + + +More on implementations of machine-readable metadata on dataset landing pages, see M. Fenner et al. _A data citation roadmap for scholarly data repositories_, Scientific Data, 2019. [doi.org/10.1038/s41597-019-0031-8](https://doi.org/10.1038/s41597-019-0031-8). + +Answer: no (1.0000) + +20. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +21. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) + +22. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +23. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: no (1.0000) + +24. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +25. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +26. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +27. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: machineAccessible +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: none +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: none +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: notCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: minimal +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: none +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: none +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: good +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,documentationOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/61a7d3c6aeb5d.xml.txt b/split-by-model/dkNET-DRP/summary/61a7d3c6aeb5d.xml.txt new file mode 100644 index 0000000..093a80b --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/61a7d3c6aeb5d.xml.txt @@ -0,0 +1,624 @@ +Transcript: 61a7d3c6aeb5d.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: somewhat (0.5000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: no (1.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: good (0.3333) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) + +8. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: no (1.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: some (0.5000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: minimal (1.0000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: no support (1.0000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: worst (1.0000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) +Note: +USP Dictionary of USAN and International Drug Names + +21. [sc-drc.dg]land-api: +### Does the repository provide a machine-readable landing page? + + + +Ideally, the citation metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from Dublin Core or Schema.org in the mark up metatags (Should be in the html head part). + + + +More on implementations of machine-readable metadata on dataset landing pages, see M. Fenner et al. _A data citation roadmap for scholarly data repositories_, Scientific Data, 2019. [doi.org/10.1038/s41597-019-0031-8](https://doi.org/10.1038/s41597-019-0031-8). + +Answer: no (1.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: yes (0.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: no (1.0000) +Note: +EBI RDF platform - RDF platform has no funding currently; the best way to access large amount of data is to install a database instance on your own computer using Oracle or MySQL; + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) +Note: +query accross resources using W3C SPARQL query language + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: machineAccessible +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: none +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: none +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: notCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: partial +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: minimal +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: internalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: good +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,documentationOK,generalMetadata +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/61a7e89c785ae.xml.txt b/split-by-model/dkNET-DRP/summary/61a7e89c785ae.xml.txt new file mode 100644 index 0000000..fd92b9a --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/61a7e89c785ae.xml.txt @@ -0,0 +1,620 @@ +Transcript: 61a7e89c785ae.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: somewhat (0.5000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: no (1.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: good (0.3333) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) + +8. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: no (1.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: some (0.5000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: required (0.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: partial (0.5000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: good (0.3333) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) +Note: +CHEBI ontology, IUPAC names + +21. [sc-drc.dg]land-api: +### Does the repository provide a machine-readable landing page? + + + +Ideally, the citation metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from Dublin Core or Schema.org in the mark up metatags (Should be in the html head part). + + + +More on implementations of machine-readable metadata on dataset landing pages, see M. Fenner et al. _A data citation roadmap for scholarly data repositories_, Scientific Data, 2019. [doi.org/10.1038/s41597-019-0031-8](https://doi.org/10.1038/s41597-019-0031-8). + +Answer: no (1.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: no (1.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: machineAccessible +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: partial +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: none +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: required +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: fullyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: partial +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: internalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK,qualifiedMetadataReferencesOK,studyLinkageOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: textualMetadata +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: good +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,dkNetMetadataOK,documentationOK,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/61a909a92778f.xml.txt b/split-by-model/dkNET-DRP/summary/61a909a92778f.xml.txt new file mode 100644 index 0000000..93f4f9f --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/61a909a92778f.xml.txt @@ -0,0 +1,611 @@ +Transcript: 61a909a92778f.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: minimal restrictions (0.3333) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: somewhat (0.5000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: no (1.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: yes (0.0000) + +8. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) + +9. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) +Note: +most datasets have CCDC DOI + +10. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: supported (0.5000) + +11. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: minimal (1.0000) + +12. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +13. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: partial (0.5000) + +14. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +15. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +16. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +17. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: worst (1.0000) + +18. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +19. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) +Note: +IUPAC + +20. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: no (1.0000) + +21. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +22. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) +Note: +CCDC programmatic access is not permitted; CCDS structure redistribution is not permitted; possible only through CSD system or other CCDS services ( Web CSD) + +23. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: no (1.0000) + +24. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: no (1.0000) +Note: +advanced search with CSD Python API + +25. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: best (0.0000) + +26. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +27. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: good (0.5000) + +28. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: yes (0.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,machineAccessible +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: partial +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: exists +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: supported +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: noConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: minimal +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,qualifiedMetadataReferencesOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/Restrictions: minimal +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,documentationOK,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: full +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: partially +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/61a9183722786.xml.txt b/split-by-model/dkNET-DRP/summary/61a9183722786.xml.txt new file mode 100644 index 0000000..cf340c0 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/61a9183722786.xml.txt @@ -0,0 +1,636 @@ +Transcript: 61a9183722786.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: yes (0.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: dataset level (0.0000) + +4. [sc-drc.dg]lic-cc: +### Are the data covered by a commons-compliant license? + + + +FAIR requires a clear license but it is mute about the level of openness; the Commons requires that the data be as open as possible; closed as necessary. Is the license used consistent with that? + + + +In this question, we use the definition for "Open" from [https://opendefinition.org/licenses/](the Open Definition). + + + + + +These licenses conform to the Open Definition but not to Re-Use + + + + + +#### Options + +* **best:** all content covered by an open license + +* **good:** Some content covered by an open license. + +* **somewhat open:** All content covered by a somewhat open license + +* **closed:** All content covered by closed license + +Answer: best (0.0000) + +5. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +6. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +7. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: no (1.0000) + +8. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: rich (0.0000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: full (0.0000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: good (0.3333) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: worst (1.0000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: minimal (1.0000) + +21. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: yes (0.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: no (1.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: no (1.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: weak (0.6667) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,licenseOK,machineAccessible +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: full +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: supportsDataCitation +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: fullyCitable +DataRepoCompliance/Open: fullyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: rich +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: qualifiedMetadataReferencesOK,studyLinkageOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: minimal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: textualMetadata +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: full +OpenProps/Properties/DataRepoCompliance/OpenFlags: ccLicenseOK,openFormat,platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,documentationOK,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: datasetLevel +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: weak diff --git a/split-by-model/dkNET-DRP/summary/61a92a18640d8.xml.txt b/split-by-model/dkNET-DRP/summary/61a92a18640d8.xml.txt new file mode 100644 index 0000000..f111d6a --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/61a92a18640d8.xml.txt @@ -0,0 +1,637 @@ +Transcript: 61a92a18640d8.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: yes (0.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: dataset level (0.0000) + +4. [sc-drc.dg]lic-cc: +### Are the data covered by a commons-compliant license? + + + +FAIR requires a clear license but it is mute about the level of openness; the Commons requires that the data be as open as possible; closed as necessary. Is the license used consistent with that? + + + +In this question, we use the definition for "Open" from [https://opendefinition.org/licenses/](the Open Definition). + + + + + +These licenses conform to the Open Definition but not to Re-Use + + + + + +#### Options + +* **best:** all content covered by an open license + +* **good:** Some content covered by an open license. + +* **somewhat open:** All content covered by a somewhat open license + +* **closed:** All content covered by closed license + +Answer: best (0.0000) + +5. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +6. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +7. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +8. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: no (1.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: full (0.0000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: good (0.3333) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: worst (1.0000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: by policy (0.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: minimal (1.0000) + +21. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: no (1.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: weak (0.6667) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: good (0.5000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: licenseOK,machineAccessible,persistentMetadata,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: byStatedPolicy +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: full +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: exists +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: fullyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: formalMetadataVocabularyOK,qualifiedMetadataReferencesOK,studyLinkageOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: minimal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: textualMetadata +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: full +OpenProps/Properties/DataRepoCompliance/OpenFlags: ccLicenseOK,openFormat,platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,documentationOK,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: datasetLevel +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: partially +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: weak diff --git a/split-by-model/dkNET-DRP/summary/61aa5882975cb.xml.txt b/split-by-model/dkNET-DRP/summary/61aa5882975cb.xml.txt new file mode 100644 index 0000000..c3fe1b5 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/61aa5882975cb.xml.txt @@ -0,0 +1,621 @@ +Transcript: 61aa5882975cb.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: yes (0.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: no (1.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) + +8. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: no (1.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: some (0.5000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: no support (1.0000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: worst (1.0000) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: no (1.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: worst (1.0000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: minimal (1.0000) + +21. [sc-drc.dg]land-api: +### Does the repository provide a machine-readable landing page? + + + +Ideally, the citation metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from Dublin Core or Schema.org in the mark up metatags (Should be in the html head part). + + + +More on implementations of machine-readable metadata on dataset landing pages, see M. Fenner et al. _A data citation roadmap for scholarly data repositories_, Scientific Data, 2019. [doi.org/10.1038/s41597-019-0031-8](https://doi.org/10.1038/s41597-019-0031-8). + +Answer: no (1.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) +Note: +MGnify API provides programmatic access to the data + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: good (0.5000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: machineAccessible,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: none +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: none +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: notCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: partial +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: internalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: formalMetadataVocabularyOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: minimal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: none +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/OpenFlags: openFormat +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: documentationOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: partially +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/61aa8e7632db5.xml.txt b/split-by-model/dkNET-DRP/summary/61aa8e7632db5.xml.txt new file mode 100644 index 0000000..921fb67 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/61aa8e7632db5.xml.txt @@ -0,0 +1,639 @@ +Transcript: 61aa8e7632db5.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: minimal restrictions (0.3333) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: yes (0.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: dataset level (0.0000) + +4. [sc-drc.dg]lic-cc: +### Are the data covered by a commons-compliant license? + + + +FAIR requires a clear license but it is mute about the level of openness; the Commons requires that the data be as open as possible; closed as necessary. Is the license used consistent with that? + + + +In this question, we use the definition for "Open" from [https://opendefinition.org/licenses/](the Open Definition). + + + + + +These licenses conform to the Open Definition but not to Re-Use + + + + + +#### Options + +* **best:** all content covered by an open license + +* **good:** Some content covered by an open license. + +* **somewhat open:** All content covered by a somewhat open license + +* **closed:** All content covered by closed license + +Answer: best (0.0000) + +5. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: no (1.0000) + +6. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: worst (1.0000) + +7. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +8. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: partial (0.5000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: worst (1.0000) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: no (1.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: best (0.0000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) +Note: +SICAS reserve the right to discontinue SMIR services at any time without stating the reasons + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) + +21. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: yes (0.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: yes (0.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: best (0.0000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,licenseOK,machineAccessible,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: partial +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: supportsDataCitation +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: none +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: full +OpenProps/Properties/DataRepoCompliance/OpenFlags: ccLicenseOK,openFormat +OpenProps/Properties/DataRepoCompliance/Restrictions: minimal +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: datasetAndSubject +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: lacking +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: dkNetMetadataOK,generalMetadata +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: datasetLevel +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: yes +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/61ae6db443359.xml.txt b/split-by-model/dkNET-DRP/summary/61ae6db443359.xml.txt new file mode 100644 index 0000000..cbf905b --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/61ae6db443359.xml.txt @@ -0,0 +1,637 @@ +Transcript: 61ae6db443359.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: yes (0.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: dataset level (0.0000) + +4. [sc-drc.dg]lic-cc: +### Are the data covered by a commons-compliant license? + + + +FAIR requires a clear license but it is mute about the level of openness; the Commons requires that the data be as open as possible; closed as necessary. Is the license used consistent with that? + + + +In this question, we use the definition for "Open" from [https://opendefinition.org/licenses/](the Open Definition). + + + + + +These licenses conform to the Open Definition but not to Re-Use + + + + + +#### Options + +* **best:** all content covered by an open license + +* **good:** Some content covered by an open license. + +* **somewhat open:** All content covered by a somewhat open license + +* **closed:** All content covered by closed license + +Answer: best (0.0000) + +5. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +6. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: good (0.3333) + +7. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +8. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: minimal (1.0000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: no support (1.0000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: worst (1.0000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) + +21. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: no (1.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: best (0.0000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,licenseOK,machineAccessible,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: none +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: exists +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: fullyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: minimal +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK,qualifiedMetadataReferencesOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: full +OpenProps/Properties/DataRepoCompliance/OpenFlags: ccLicenseOK,openFormat,platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: good +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,documentationOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: datasetLevel +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: yes +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/61ae7c9289651.xml.txt b/split-by-model/dkNET-DRP/summary/61ae7c9289651.xml.txt new file mode 100644 index 0000000..3945b8c --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/61ae7c9289651.xml.txt @@ -0,0 +1,639 @@ +Transcript: 61ae7c9289651.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: yes (0.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: dataset level (0.0000) + +4. [sc-drc.dg]lic-cc: +### Are the data covered by a commons-compliant license? + + + +FAIR requires a clear license but it is mute about the level of openness; the Commons requires that the data be as open as possible; closed as necessary. Is the license used consistent with that? + + + +In this question, we use the definition for "Open" from [https://opendefinition.org/licenses/](the Open Definition). + + + + + +These licenses conform to the Open Definition but not to Re-Use + + + + + +#### Options + +* **best:** all content covered by an open license + +* **good:** Some content covered by an open license. + +* **somewhat open:** All content covered by a somewhat open license + +* **closed:** All content covered by closed license + +Answer: best (0.0000) + +5. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +6. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +7. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +8. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: supported (0.5000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: minimal (1.0000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: partial (0.5000) +Note: +no title + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: best (0.0000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: good (0.3333) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) + +21. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: no (1.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: no (1.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: no (1.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,licenseOK,machineAccessible +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: partial +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: exists +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: supported +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: fullyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: minimal +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,qualifiedMetadataReferencesOK,studyLinkageOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: formal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: textualMetadata +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: full +OpenProps/Properties/DataRepoCompliance/OpenFlags: ccLicenseOK,openFormat,platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,dkNetMetadataOK,documentationOK,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: datasetLevel +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/61af958cc8abe.xml.txt b/split-by-model/dkNET-DRP/summary/61af958cc8abe.xml.txt new file mode 100644 index 0000000..ab4ec93 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/61af958cc8abe.xml.txt @@ -0,0 +1,622 @@ +Transcript: 61af958cc8abe.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: minimal restrictions (0.3333) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: somewhat (0.5000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: repository level (0.5000) +Note: +answers based on info provided on the repository KiMoSys website + +4. [sc-drc.dg]lic-cc: +### Are the data covered by a commons-compliant license? + + + +FAIR requires a clear license but it is mute about the level of openness; the Commons requires that the data be as open as possible; closed as necessary. Is the license used consistent with that? + + + +In this question, we use the definition for "Open" from [https://opendefinition.org/licenses/](the Open Definition). + + + + + +These licenses conform to the Open Definition but not to Re-Use + + + + + +#### Options + +* **best:** all content covered by an open license + +* **good:** Some content covered by an open license. + +* **somewhat open:** All content covered by a somewhat open license + +* **closed:** All content covered by closed license + +Answer: somewhat open (0.6667) +Note: +answers based on info provided on the repository KiMoSys website + +5. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: no (1.0000) + +6. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +7. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: no (1.0000) + +8. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: yes (0.0000) +Note: +they say so on the website ( KiMoSys ); available public data info is a link to other databases; + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) +Note: +might; they claim so - on the general repository level info; + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: some (0.5000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: minimal (1.0000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: no support (1.0000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) +Note: +they claim so on their website ( KiMoSys ) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: worst (1.0000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) +Note: +answer based on info on the repository level info + +21. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: yes (0.0000) +Note: +answer based on repository level info + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: yes (0.0000) +Note: +answer based on repository level info + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: no (1.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: worst (1.0000) + +27. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: good (0.5000) + +28. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,licenseOK,machineAccessible +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: none +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: supportsDataCitation +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: partial +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: minimal +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: adequate +OpenProps/Properties/DataRepoCompliance/OpenFlags: ccLicenseOK +OpenProps/Properties/DataRepoCompliance/Restrictions: minimal +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,documentationOK,generalMetadata +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: repositoryLevel +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: opaque +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: partially diff --git a/split-by-model/dkNET-DRP/summary/61afc04c3cc7a.xml.txt b/split-by-model/dkNET-DRP/summary/61afc04c3cc7a.xml.txt new file mode 100644 index 0000000..dfe751c --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/61afc04c3cc7a.xml.txt @@ -0,0 +1,619 @@ +Transcript: 61afc04c3cc7a.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: yes (0.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: good (0.3333) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) + +8. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: no (1.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: some (0.5000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: minimal (1.0000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: no support (1.0000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: good (0.3333) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: worst (1.0000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) + +21. [sc-drc.dg]land-api: +### Does the repository provide a machine-readable landing page? + + + +Ideally, the citation metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from Dublin Core or Schema.org in the mark up metatags (Should be in the html head part). + + + +More on implementations of machine-readable metadata on dataset landing pages, see M. Fenner et al. _A data citation roadmap for scholarly data repositories_, Scientific Data, 2019. [doi.org/10.1038/s41597-019-0031-8](https://doi.org/10.1038/s41597-019-0031-8). + +Answer: no (1.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: yes (0.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: good (0.5000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: machineAccessible,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: none +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: none +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: notCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: fullyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: partial +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: minimal +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: internalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK,qualifiedMetadataReferencesOK,studyLinkageOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: textualMetadata +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/OpenFlags: openFormat,platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: good +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,documentationOK,generalMetadata +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: partially +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/61afe38742749.xml.txt b/split-by-model/dkNET-DRP/summary/61afe38742749.xml.txt new file mode 100644 index 0000000..5caeefb --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/61afe38742749.xml.txt @@ -0,0 +1,617 @@ +Transcript: 61afe38742749.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: no (1.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) + +8. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: some (0.5000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: minimal (1.0000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: partial (0.5000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: no (1.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: worst (1.0000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) + +21. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: no (1.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: no (1.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: no (1.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: weak (0.6667) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: partial +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: exists +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: notAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: partial +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: minimal +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: internalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,qualifiedMetadataReferencesOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/OpenFlags: platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: documentationOK,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: weak diff --git a/split-by-model/dkNET-DRP/summary/61b23d317644e.xml.txt b/split-by-model/dkNET-DRP/summary/61b23d317644e.xml.txt new file mode 100644 index 0000000..47b8982 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/61b23d317644e.xml.txt @@ -0,0 +1,647 @@ +Transcript: 61b23d317644e.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: significant restrictions (0.6667) +Note: +prices for licenses; no programmatic access is not permitted + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: no (1.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: repository level (0.5000) +Note: +ICSD license; prices for licenses + +4. [sc-drc.dg]lic-cc: +### Are the data covered by a commons-compliant license? + + + +FAIR requires a clear license but it is mute about the level of openness; the Commons requires that the data be as open as possible; closed as necessary. Is the license used consistent with that? + + + +In this question, we use the definition for "Open" from [https://opendefinition.org/licenses/](the Open Definition). + + + + + +These licenses conform to the Open Definition but not to Re-Use + + + + + +#### Options + +* **best:** all content covered by an open license + +* **good:** Some content covered by an open license. + +* **somewhat open:** All content covered by a somewhat open license + +* **closed:** All content covered by closed license + +Answer: somewhat open (0.6667) +Note: +ICSD license; A license is usually concluded for one year. + +5. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: no (1.0000) + +6. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +7. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +8. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: no (1.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: minimal (1.0000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: partial (0.5000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: worst (1.0000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: minimal (1.0000) + +21. [sc-drc.dg]land-api: +### Does the repository provide a machine-readable landing page? + + + +Ideally, the citation metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from Dublin Core or Schema.org in the mark up metatags (Should be in the html head part). + + + +More on implementations of machine-readable metadata on dataset landing pages, see M. Fenner et al. _A data citation roadmap for scholarly data repositories_, Scientific Data, 2019. [doi.org/10.1038/s41597-019-0031-8](https://doi.org/10.1038/s41597-019-0031-8). + +Answer: no (1.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) +Note: +API services/ RESTful API/end users of the service can use swagger to easily create code in various common programming/script languages to access the API - for larger number of structures + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: no (1.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: no (1.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: licenseOK,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: partial +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: none +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: minimal +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: qualifiedMetadataReferencesOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: minimal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: adequate +OpenProps/Properties/DataRepoCompliance/OpenFlags: ccLicenseOK +OpenProps/Properties/DataRepoCompliance/Restrictions: significant +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,documentationOK,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: repositoryLevel +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/61b257c62b44a.xml.txt b/split-by-model/dkNET-DRP/summary/61b257c62b44a.xml.txt new file mode 100644 index 0000000..4d64653 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/61b257c62b44a.xml.txt @@ -0,0 +1,607 @@ +Transcript: 61b257c62b44a.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: significant restrictions (0.6667) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: no (1.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: yes (0.0000) +Note: +they say so on the repository level - sequencing data are indexed with global unique IDs + +8. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: no (1.0000) + +9. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: some (0.5000) + +10. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +11. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: minimal (1.0000) + +12. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) + +13. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: no support (1.0000) + +14. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +15. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: worst (1.0000) + +16. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +17. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: worst (1.0000) + +18. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +19. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) + +20. [sc-drc.dg]land-api: +### Does the repository provide a machine-readable landing page? + + + +Ideally, the citation metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from Dublin Core or Schema.org in the mark up metatags (Should be in the html head part). + + + +More on implementations of machine-readable metadata on dataset landing pages, see M. Fenner et al. _A data citation roadmap for scholarly data repositories_, Scientific Data, 2019. [doi.org/10.1038/s41597-019-0031-8](https://doi.org/10.1038/s41597-019-0031-8). + +Answer: no (1.0000) + +21. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +22. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) + +23. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: no (1.0000) + +24. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: no (1.0000) + +25. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +26. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +27. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: good (0.5000) + +28. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: none +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: none +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: notCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: notAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: partial +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: minimal +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: none +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/OpenFlags: platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: significant +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,documentationOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: partially +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/61b2654669195.xml.txt b/split-by-model/dkNET-DRP/summary/61b2654669195.xml.txt new file mode 100644 index 0000000..ae59548 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/61b2654669195.xml.txt @@ -0,0 +1,620 @@ +Transcript: 61b2654669195.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: significant but not justified restrictions (1.0000) +Note: +no information how to access Figshare Plus data + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: somewhat (0.5000) +Note: +no information provided + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: repository level (0.5000) + +4. [sc-drc.dg]lic-cc: +### Are the data covered by a commons-compliant license? + + + +FAIR requires a clear license but it is mute about the level of openness; the Commons requires that the data be as open as possible; closed as necessary. Is the license used consistent with that? + + + +In this question, we use the definition for "Open" from [https://opendefinition.org/licenses/](the Open Definition). + + + + + +These licenses conform to the Open Definition but not to Re-Use + + + + + +#### Options + +* **best:** all content covered by an open license + +* **good:** Some content covered by an open license. + +* **somewhat open:** All content covered by a somewhat open license + +* **closed:** All content covered by closed license + +Answer: somewhat open (0.6667) + +5. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +6. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: worst (1.0000) + +7. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: no (1.0000) + +8. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: no (1.0000) +Note: +I have no access to the datasets + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) +Note: +statement on the repository level - we provide DOI for each dataset + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: minimal (1.0000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) +Note: +I have no access to the data + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: no support (1.0000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: worst (1.0000) +Note: +I have no access to the data + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: by policy (0.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) + +21. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: no (1.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: no (1.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: no (1.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: worst (1.0000) +Note: +info about figshare team not figshare plus + +27. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +28. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: licenseOK,machineAccessible,persistentMetadata +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: byStatedPolicy +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: none +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: exists +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: minimal +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: adequate +OpenProps/Properties/DataRepoCompliance/OpenFlags: ccLicenseOK,platformSupportsDataWork,restrictionsNotJustified +OpenProps/Properties/DataRepoCompliance/Restrictions: significant +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: lacking +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: repositoryLevel +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: opaque +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no diff --git a/split-by-model/dkNET-DRP/summary/61b38f199319f.xml.txt b/split-by-model/dkNET-DRP/summary/61b38f199319f.xml.txt new file mode 100644 index 0000000..f739515 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/61b38f199319f.xml.txt @@ -0,0 +1,615 @@ +Transcript: 61b38f199319f.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: significant restrictions (0.6667) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: somewhat (0.5000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: dataset level (0.0000) + +4. [sc-drc.dg]lic-cc: +### Are the data covered by a commons-compliant license? + + + +FAIR requires a clear license but it is mute about the level of openness; the Commons requires that the data be as open as possible; closed as necessary. Is the license used consistent with that? + + + +In this question, we use the definition for "Open" from [https://opendefinition.org/licenses/](the Open Definition). + + + + + +These licenses conform to the Open Definition but not to Re-Use + + + + + +#### Options + +* **best:** all content covered by an open license + +* **good:** Some content covered by an open license. + +* **somewhat open:** All content covered by a somewhat open license + +* **closed:** All content covered by closed license + +Answer: best (0.0000) + +5. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: no (1.0000) + +6. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: worst (1.0000) + +7. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: no (1.0000) + +8. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: no (1.0000) + +9. [sc-drc.dg]pid-l: +### Does the repository assign, or the contributor provide, a locally unique identifier to the data set or the data contribution? + + + +Examples include an accession number, a UUID, or some other convention. + + + +*Note:* The use of a title or free text as the unique string is not considered compliant. + +Answer: no (1.0000) + +10. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +11. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +12. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) + +13. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: no support (1.0000) + +14. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +15. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: worst (1.0000) + +16. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: no (1.0000) + +17. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +18. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +19. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: minimal (1.0000) + +20. [sc-drc.dg]land-api: +### Does the repository provide a machine-readable landing page? + + + +Ideally, the citation metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from Dublin Core or Schema.org in the mark up metatags (Should be in the html head part). + + + +More on implementations of machine-readable metadata on dataset landing pages, see M. Fenner et al. _A data citation roadmap for scholarly data repositories_, Scientific Data, 2019. [doi.org/10.1038/s41597-019-0031-8](https://doi.org/10.1038/s41597-019-0031-8). + +Answer: no (1.0000) + +21. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +22. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) + +23. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: no (1.0000) + +24. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: no (1.0000) + +25. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +26. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: none (1.0000) + +27. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: no (1.0000) + +28. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: licenseOK,machineAccessible +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: none +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: none +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: notCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: notInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: none +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: minimal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: none +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: full +OpenProps/Properties/DataRepoCompliance/OpenFlags: ccLicenseOK +OpenProps/Properties/DataRepoCompliance/Restrictions: significant +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: lacking +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: dkNetMetadataOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: datasetLevel +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: no +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: none diff --git a/split-by-model/dkNET-DRP/summary/61b3991a21735.xml.txt b/split-by-model/dkNET-DRP/summary/61b3991a21735.xml.txt new file mode 100644 index 0000000..a4d660a --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/61b3991a21735.xml.txt @@ -0,0 +1,611 @@ +Transcript: 61b3991a21735.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: minimal restrictions (0.3333) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: somewhat (0.5000) +Note: +no license; no format info + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: no license (1.0000) + +4. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: no (1.0000) + +5. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: worst (1.0000) + +6. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +7. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: yes (0.0000) + +8. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: no (1.0000) +Note: +Registration DOI is provided; it resolves to different landing page with info about dataset + +9. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: some (0.5000) +Note: +some studies have DOI + +10. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: supported (0.5000) + +11. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +12. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) + +13. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: partial (0.5000) + +14. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +15. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: worst (1.0000) + +16. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +17. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +18. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +19. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: allowed (0.5000) + +20. [sc-drc.dg]land-api: +### Does the repository provide a machine-readable landing page? + + + +Ideally, the citation metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from Dublin Core or Schema.org in the mark up metatags (Should be in the html head part). + + + +More on implementations of machine-readable metadata on dataset landing pages, see M. Fenner et al. _A data citation roadmap for scholarly data repositories_, Scientific Data, 2019. [doi.org/10.1038/s41597-019-0031-8](https://doi.org/10.1038/s41597-019-0031-8). + +Answer: no (1.0000) + +21. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +22. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) + +23. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: no (1.0000) + +24. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: no (1.0000) + +25. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +26. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: weak (0.6667) + +27. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: good (0.5000) + +28. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: machineAccessible +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: partial +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: none +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: supported +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: partial +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: allowed +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: none +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: none +OpenProps/Properties/DataRepoCompliance/Restrictions: minimal +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: lacking +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,dkNetMetadataOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: none +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: partially +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: weak diff --git a/split-by-model/dkNET-DRP/summary/621d12cf667c7.xml.txt b/split-by-model/dkNET-DRP/summary/621d12cf667c7.xml.txt new file mode 100644 index 0000000..978917d --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/621d12cf667c7.xml.txt @@ -0,0 +1,611 @@ +Transcript: 621d12cf667c7.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: yes (0.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: dataset level (0.0000) + +4. [sc-drc.dg]lic-cc: +### Are the data covered by a commons-compliant license? + + + +FAIR requires a clear license but it is mute about the level of openness; the Commons requires that the data be as open as possible; closed as necessary. Is the license used consistent with that? + + + +In this question, we use the definition for "Open" from [https://opendefinition.org/licenses/](the Open Definition). + + + + + +These licenses conform to the Open Definition but not to Re-Use + + + + + +#### Options + +* **best:** all content covered by an open license + +* **good:** Some content covered by an open license. + +* **somewhat open:** All content covered by a somewhat open license + +* **closed:** All content covered by closed license + +Answer: good (0.3333) + +5. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) + +6. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: good (0.3333) + +7. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +8. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: supported (0.5000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: rich (0.0000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: best (0.0000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: full (0.0000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: best (0.0000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: good (0.3333) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: worst (1.0000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: by policy (0.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: minimal (1.0000) +Note: +no evidence + +21. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: yes (0.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: worst (1.0000) +Note: +OSF is maintained and developed by the Center for Open Science (COS), a 501(c)3 non-profit organization. COS is supported through grants from a variety of supporters, including federal agencies, private foundations, and commercial entities. + +27. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: best (0.0000) + +28. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,licenseOK,machineAccessible,persistentMetadata,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: byStatedPolicy +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: full +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: supportsDataCitation +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: supported +DataRepoCompliance/Citable: fullyCitable +DataRepoCompliance/Open: fullyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: fullyAccessible +FAIR/DataRepoCompliance/Findable: fullyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: rich +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: formalMetadataVocabularyOK,qualifiedMetadataReferencesOK,studyLinkageOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: minimal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: formal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: textualMetadata +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: good +OpenProps/Properties/DataRepoCompliance/OpenFlags: ccLicenseOK,openFormat,platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: good +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: full +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,documentationOK,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: datasetLevel +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: opaque +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: yes diff --git a/split-by-model/dkNET-DRP/summary/621d4ff4808c5.xml.txt b/split-by-model/dkNET-DRP/summary/621d4ff4808c5.xml.txt new file mode 100644 index 0000000..9a9d3d8 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/621d4ff4808c5.xml.txt @@ -0,0 +1,639 @@ +Transcript: 621d4ff4808c5.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: minimal restrictions (0.3333) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: yes (0.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: repository level (0.5000) +Note: +on the respository level there is a statement that all public data are released under CCA license + +4. [sc-drc.dg]lic-cc: +### Are the data covered by a commons-compliant license? + + + +FAIR requires a clear license but it is mute about the level of openness; the Commons requires that the data be as open as possible; closed as necessary. Is the license used consistent with that? + + + +In this question, we use the definition for "Open" from [https://opendefinition.org/licenses/](the Open Definition). + + + + + +These licenses conform to the Open Definition but not to Re-Use + + + + + +#### Options + +* **best:** all content covered by an open license + +* **good:** Some content covered by an open license. + +* **somewhat open:** All content covered by a somewhat open license + +* **closed:** All content covered by closed license + +Answer: good (0.3333) + +5. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: no (1.0000) + +6. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: worst (1.0000) + +7. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +8. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: some (0.5000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: supported (0.5000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: worst (1.0000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: no support (1.0000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: worst (1.0000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: unclear (0.6667) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: enforced (0.0000) + +21. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: yes (0.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: yes (0.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: no (1.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: no (1.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: best (0.0000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: good (0.5000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,licenseOK,machineAccessible +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: none +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: supportsDataCitation +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: supported +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: partiallyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: partial +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: fairMetadataOK,formalMetadataVocabularyOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: enforced +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: freeText +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: freeText +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: good +OpenProps/Properties/DataRepoCompliance/OpenFlags: ccLicenseOK,openFormat +OpenProps/Properties/DataRepoCompliance/Restrictions: minimal +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: lacking +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: unclear +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,dkNetMetadataOK,generalMetadata +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: repositoryLevel +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: full +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: partially +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/621d682e591a5.xml.txt b/split-by-model/dkNET-DRP/summary/621d682e591a5.xml.txt new file mode 100644 index 0000000..75123a0 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/621d682e591a5.xml.txt @@ -0,0 +1,639 @@ +Transcript: 621d682e591a5.xml + +Questions and Answers: +1. [sc-drc.dg]acc: +### Does the repository provide access to the data with minimal or no restrictions? + + + +How easy is it for users to gain access to the data? Are any impediments in place reasonable given the nature of the data, e.g., authorization for sensitive data. Repositories that make metadata available during the embargo period should not be penalized on this question. + + + +#### Options + +* **No restriction:** Accessible without a log in + +* **Minimal restriction:** requiring an account and/or the user to sign data policy agreement would be considered a minimal restriction. + +* **Significant restriction, Authorization required:** Requiring that someone obtain authorization ahead of downloading data, as would be the case for sensitive data, for example, even if it is understandable. + +* **Significant but not justified:** The repository imposes significant restrictions for accessing datasets. Said restrictions are too strict for possible harm mis-use of the data might cause. + +Answer: no restrictions (0.0000) + +2. [sc-drc.dg]reuse: +### Are you free to reuse the data with no or minimal restrictions? + + + +Many repositories that claim to be open are only open for humans to read, *not for machine-based access* or *for re-use*. So it is important to check before depositing the data that it is free to re-use according to the definition of the Commons. + + + +Data should be stored in a non-proprietary format, that is, a format that is published and free for re-use by anyone, such as CSV. In contrast, proprietary formats can only be read by certain commercial software. As the goal of publishing data in a repository is for openness and re-use, data reliant on propriety software is by definition non-commons compliant. Adapted from [Wikipedia](https://en.wikipedia.org/wiki/Proprietary_format) + + + +Consider the type of data and whether the access mechanism places undue restraints on the ability to re-use the data. Also, consider the license that specifies 3rd party rights: does it allow the data to be re-used and -shared as part of a new product? + + + +Ideally, the repository should have a clear statement about acceptable file format characteristics. In the absence of such a policy, a check can be made to ascertain which formats are available. If there are both open and closed examples they are coded as “Yes with proprietary formats”. If individual data sets are offered under multiple licenses, this can complicate the re-use process further. + + + +#### Options + +* **Yes:** Permissive license or data use terms including the right to re-distribute products arising from the data; open and well supported format + +* **Somewhat:** Yes with proprietary formats, or with multiple licenses that require users to navigate the terms separately for each data set. A proprietary format that is still well used and has multiple tools that can read it, such as `xls`, is better than a custom format that is not well supported + +* **No:** A proprietary format that is difficult to read without the required software. Inability to distribute data products so that others may build on them. Terms of use are unclear. + +Answer: yes (0.0000) + +3. [sc-drc.dg]lic-clr: +### Does the repository provide a clear license for reuse of the data? + + + +Ideally, a metadata field `License=` or an easy to find statement on the web page stating the license under which data are released. The license should also ideally be one in common use where the usage rights are clearly stated and uncomplicated. + + + +#### Options + +* **Dataset Level:** Clear license and assigned at the level of individual data sets as part of the metadata + +* **Repository Level:** Clear license provided at the level of the repository, e.g., all data are released under a CC-BY license + +* **No license** + +Answer: dataset level (0.0000) + +4. [sc-drc.dg]lic-cc: +### Are the data covered by a commons-compliant license? + + + +FAIR requires a clear license but it is mute about the level of openness; the Commons requires that the data be as open as possible; closed as necessary. Is the license used consistent with that? + + + +In this question, we use the definition for "Open" from [https://opendefinition.org/licenses/](the Open Definition). + + + + + +These licenses conform to the Open Definition but not to Re-Use + + + + + +#### Options + +* **best:** all content covered by an open license + +* **good:** Some content covered by an open license. + +* **somewhat open:** All content covered by a somewhat open license + +* **closed:** All content covered by closed license + +Answer: best (0.0000) + +5. [sc-drc.dg]plat: +### Does the repository platform make it easy to work with (e.g. download/re-use) the data? + + + +Most repositories provide data for download, but with very large data sets, download can be a significant impediment for reuse. In such a case, a cloud platform may make it easier for researchers to actually reuse the data. + +Answer: yes (0.0000) +Note: +OpenStack cloud at EMBL-EBI + +6. [sc-drc.dg]ru-doc: +### Does the repository require or support documentation that aids in proper (re)-use of the data? + + + +Vignettes or help that are designed not just for use of the repository, but that helps users understand the types of questions that can be answered by using the data and tools. May be at the repository level for homogeneous data or at the data set level for heterogeneous data. Repositories are expected to have basic help materials and tutorials. We are asking for a level above that to fully achieve FAIR. not just how to perform certain functions but why you can use the resource to answer certain types of questions. + + + +#### Options + +* **best:** Basic tutorials/help + accompanied by use cases or user stories at the repository level and data set level if appropriate + +* **good:** Basic tutorials + encouragement and ability to add use cases even if not enforced + +* **adequate:** Tutorials but no use cases + +* **worst:** Inadequate tutorials + no use cases and no mention of them. + +Answer: adequate (0.6667) + +7. [sc-drc.dg]sch-ui: +### Does the repository provide a search facility for the data and metadata? + + + +Human focused: On-line repositories should provide a means to search available data either through keyword or structured search. + +Answer: yes (0.0000) + +8. [sc-drc.dg]pid-g: +### Does the repository assign globally unique and persistent identifiers (PIDs)? + + + +A globally unique and persistent identifier is one of the key pillars of FAIR data. If a data set can't be found reliably, i.e., it does not have a stable address that is machine-readable, then it can't be accessible, interoperable or reusable to anyone else. A repository should assign a globally unique and resolvable identifier, e.g., a DOI, to a data set. Many data repositories assign locally unique identifiers, e.g., accession numbers like 5639. These can be turned into globally unique identifiers, e.g., by adding a URL prefix. The repository should also ensure that the identifier is persistent-that is, it is never re-assigned to another entity, even if the underlying data are removed, and the repository must stand behind its resolution, ensuring that the identifier reliably resolves to the data, even if the data move location. For a more detailed description of identifiers, see [The FAIR Principles Explained](https://www.dtls.nl/fair-data/fair-principles-explained/) by the Dutch Techcenter for Life Sciences. + + + +Answer is "Yes" if this is the default option, i.e., an externally linked and registered PID, e.g., a DOI, Handle, ARK. Sometimes Accession numbers are offered as standard. These can be easily upgraded to PIDs through the Compact Identifiers functionality (e.g. by registering them at [identifiers.org](https://registry.identifiers.org)) but unless this is specified on the website, the response is "No". + +Answer: yes (0.0000) + +9. [sc-drc.dg]land-pg: +### Does the PID or other dataset identifier resolve to a landing page that describes the data? + + + +Both the [FAIR principles](https://www.dtls.nl/fair-data/fair-principles-explained/) and the [Data citation principles](https://www.force11.org/group/joint-declaration-data-citation-principles-final) require that metadata persist, even if the data they describe are no longer available. FAIR also requires that the access rights to the data be both machine-readable and human understandable. Having the persistent identifier resolve to this page rather than to the data themselves ensures that a stable reference is provided even if the data are removed. The descriptive metadata should also include the necessary information for citing the data set (see Fenner M, Crosas M, Grethe J, Kennedy D, Hermjakob H, Rocca-Serra P, Berjon R, Karcher S, Martone M, Clark T (2016) A Data Citation Roadmap for Scholarly Data Repositories. bioRXiv Dec. 28, 2016. [https://doi.org/10.1101/097196](https://doi.org/10.1101/097196)) + + + +--- + + + +We are interpreting this as a stable landing page that contains metadata about the data set that uses the identifier for the data set in the URL. [Cool URI’s don’t change](https://www.w3.org/Provider/Style/URI.html). + +Answer: yes (0.0000) + +10. [sc-drc.dg]md-pid: +### Does the metadata clearly and explicitly include identifiers of the data it describes? + + + +Should have a metadata field = data set identifier or equivalent that points to the PID or other identifier if no PID Sometimes it is useful to check the API services if documented about what they provide + + + +* *all* All study IDs are included in the metadata + +* *some* Some study IDs are included, e.g., accession number but not DOI + +* *none* No IDs + +Answer: all (0.0000) + +11. [sc-drc.dg]orcid: +### Does the repository allow you to associate your [ORCID](https://orcid.org) ID with a dataset? + + + +Data sets are scholarly works and should be credited as such. The use of ORCID streamlines this process. + + + +#### Options: + + + +* **Required:** Required and exports relationship to ORCID + +* **Supported:** Recommended but not required + +* **None:** No use of ORCID + +Answer: none (1.0000) + +12. [sc-drc.dg]md-level: +### Does the repository support the addition of rich metadata to promote search and reuse of data? + + + +We are interpreting rich metadata to include the basic descriptive information about the data set, i.e., those fields recommended by the DCIP, with the addition of critical biomedical metadata, e.g., organism studied, disease condition, technique. dkNET has a recommended set of [rich metadata](https://docs.google.com/document/d/1E1fA2AJDvvmxlS8g8yvpnt6BIayvZVOR7dMYe-hWIiU/view). These data provide an overall context for understanding what the data set is about, but don’t necessarily delve into particulars. + + + +#### Options + + + +* **Rich:** the majority of DCIP fields + biomedical extensions according to dkNET or Bio Schema + +* **Limited:** Has some structured metadata but room for improvement + +* **Minimal:** Minimal descriptive information + +Answer: limited (0.5000) + +13. [sc-drc.dg]md-prv: +### Are the (meta)data associated with detailed provenance? + + + +Is the appropriate provenance provided, e.g., if they use the Gene Ontology term do we know it’s from gene ontology? In biomedicine, making sure that the relationship between subjects and specimens and data is explicit is extremely important. RRIDs should be used to make sure that all data sets that use the same strain or sample can be found and combined. + +Some aspects to look at: + + + +* Does the repository provide originating information for the data set? Lab, PI, Institution. + +* Do they provide a contact person? Does the contact person provide an ORCID? + +* Do they use contributor roles so that we know who performed various actions? + +* Do they provide an originating publication if applicable? + +* Do they provide clear dates for submission and modification? + +* Do the have a clear versioning policy? + +* If they use external identifiers, are they accessible by their PIDs? + +* Do they make provenance of any externally imported or referenced data explicit in the (meta)data? + + + +#### Options + +* **best:** Clear provenance where required + machine readable tag; clear versioning policy and old versions can be accessed + +* **good:** Some good things, e.g., clear provenance provided in free text + +* **worst:** No clear provenance + +Answer: good (0.5000) + +14. [sc-drc.dg]md-daci: +### Does the repository provide the required metadata for supporting data citation? + + + +The repository should provide the necessary metadata for a full data citation according to the Joint Declaration of Data Citation Principles. Authors, Title of data set, Version, Repository, Date published, PID. It should also be set up to enable exporting the citation reference via a reference manager (e.g. JSON, XML, Bibtex). + + + + + + + +#### Options + +* **Full support:** The repository contains a metadata field with the full citation(s). + +* **Partial Support:** The repository has the required metadata elements but does not provide an easy way to cite the data. Required metadata should include all contributors just like with an article. + +* **No Support:** Insufficient metadata for a full citation, e.g., no title or authors. + +Answer: partial (0.5000) + +15. [sc-drc.dg]md-ref: +### Do the metadata include qualified references to other (meta)data? + + + +How well specified are the relationships included in the metadata, e.g., applied in the context of publications, does the resource use the DataCite or some other schema/standard that specifies the relationship of an identifier to the data set, e.g., a PubMed ID for a publication that first reported the data set. Should be machine friendly, e.g., ID’s for publications rather than free text. + + + +#### Options + + + +* **best:** The relationship between the data set or element and an identifier that references an external entity is clearly specified, e.g., the people listed and the related publication are clearly specified. + + * Data publication: DOI or PMID + + * Author: ORCID + metadata + + * Contact person: ORCID + appropriate metadata + +* **good:** Identifiers provided but no explicit relationships given + + * Publication: Tagged but doesn’t specify the relationship of the publication to the data set clearly + + * Creators: Tagged but doesn’t specify key roles clearly + +* **worst:** Authors and publication are provided in free text + +Answer: good (0.5000) + +16. [sc-drc.dg]md-lnk: +### Does the repository support bidirectional linkages between related objects such that a user accessing one object would know that there is a relationship to another object? + + + +E.g., does the repository provide a linkage between the publication that first described the data and the data set; does the repository maintain bidirectional linkages between versions, if a dataset has multiple parts, each deposited in a different specialist repository, are the linkages clearly specified across all repositories. + + + +#### Options + +* **best:** Repository not only records article provenance, but links that provenance to the PID such that a consumer of this metadata, e.g., DataCite, Crossref, Zenodo (OpenAIRE) or Scholix, can make use of this information + +* **good:** originating article is clearly indicated with an appropriate metadata tag (check landing page metadata) + +* **unclear:** publication is there but not indicated by a metadata tag, so the relationship between the data set and the publication is not clear (check landing page) + +* **worst:** No record of a publication (and no clear statement that there is no publication) (check landing page) + +Answer: good (0.3333) + +17. [sc-drc.dg]fmt-com: +### Does the repository enforce or allow the use of community standards for data format or metadata? + + + +A statement by the repository on the standards they follow and their enforcement policy, including curation and/or software validation. The standards should be recognized as a community standard, e.g., in FAIRsharing or through associated publications. If no such statement can be found on the site, then “No” + +Answer: yes (0.0000) + +18. [sc-drc.dg]md-dkn: +### Does the repository accept metadata that is applicable to the dkNET community disciplines? + + + +Biomedical repositories, in addition to the basic [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) metadata, require certain fields to maximize utility as specified in dkNET’s rich metadata specification. In addition, since dkNET is fostering an information network among the centers and data bases funded by NIDDK, we are expecting that they will include relevant connections to other dkNET listed resources. + + + +#### Options + +* **Best:** plurality. Subject level metadata (ages, weights and sex of each subject rather than pooled data). + +* **Good:** some basic biomedically relevant metadata + +* **Worst:** only generic metadata is supplied + +Answer: good (0.5000) + +19. [sc-drc.dg]md-psst: +### Does the repository have a policy that ensures the metadata (landing page) will persist even if the data are no longer available? + + + +Is there evidence that metadata persists even when the data are no longer available. Ideally, repositories clearly state their accessioning and de-accessioning policies as per the data citation principles. + + + +#### Options + + + +* **by policy:** a clear persistence policy + +* **by evidence:** evidence that dataset metadata is persisted when its dataset becomes unavailable (e.g., landing page makes it clear that a data set is no longer available) + +* **no:** No policy stated and no evidence. + +Answer: no (1.0000) + +20. [sc-drc.dg]md-FAIR: +### Do the metadata use vocabularies that follow FAIR principles? + + + +Use of a community ontology, e.g., OBO, or a controlled vocabulary that follows FAIR principles in order to facilitate combining data from one repository with another. + + + +#### Options + + + +* **enforced:** Required mapping to appropriate FAIR community ontologies widely used in biomedicine and vocabularies where possible and clear documentation + +* **allowed:** Allowed use of identifiers in the metadata scheme although not necessarily enforced; use of some identifiers but lack of mapping in some areas where it would be possible + +* **minimal:** Minimal or no mapping to appropriate ontologies + +Answer: minimal (1.0000) + +21. [sc-drc.dg]land-ctsp: +### Does the machine-readable landing page support data citation? + + + +Ideally, the above metadata (both descriptive and data citation relevant) should be able to be harvested automatically, e.g., by a citation manager. We check this by: + + + +1. Can you export landing page metadata in JSON or XML + +1. Can you import the landing page metadata into a reference manager tool like Mendeley or Paperpile + +1. If you look at the page source, do you see recognizable elements from [Dublin Core](https://dublincore.org/specifications/dublin-core/) or [Schema.org](http://schema.org) in the markup metatags (Should be in the html head part). + +Answer: no (1.0000) + +22. [sc-drc.dg]md-cs: +### Does the repository use a recognized community standard for representing basic metadata? + + + +There are good schemas now available for general purpose data set metadata, e.g., DataCite schema, Dublin Core, schema.org. When a recognized schema is used, it promotes interoperability among data repositories and helps with data set search. Does the repository have supporting software and tools to enforce and take advantage of this standard, e.g., a validator. + + + +#### Options + +* **Yes:** When a recognized schema is mentioned. + +* **No:** Otherwise. + +Answer: no (1.0000) + +23. [sc-drc.dg]acc-api: +### Can the (meta)data be accessed via a standards compliant API? + + + +The repository provides documentation on how to programmatically access their content and that this method uses a well recognized and used method for access, e.g., RESTful services. + +Answer: yes (0.0000) + +24. [sc-drc.dg]md-vcb: +### Do the metadata use a formal accessible shared and broadly applicable language for knowledge representation? + + + +The key concept here is “shared”. That is, two resources that use the same tags to mean the same thing, can be combined more easily than if they assign custom labels. https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/ In assessing the repository, consider the hurdles that have to be cleared in order to use the data and metadata, in other words, what does the user have to struggle with before using the data? Check formats, services provided and evaluate whether they conform with the principle. + + + +Resources include [GOFAIR](https://www.go-fair.org/fair-principles/i1-metadata-use-formal-accessible-shared-broadly-applicable-language-knowledge-representation/): + + + +> Humans should be able to exchange and interpret each other’s data (so preferably do not use dead languages). But this also applies to computers, meaning that data that should be readable for machines without the need for specialised or ad hoc algorithms, translators, or mappings. + + + +The RDF extensible knowledge representation model is a way to describe and structure datasets. The Dublin Core Schema is an example. Also includes: OWL, JSON LD, OPM (Open Provenance Model) and OntoDM (Ontology for Data Mining), EBI RDF Platform ontologies + + + +#### Options + + + +* **Yes:** if a formal, accessible language is explicitly listed. Some common formats (including Schema.org/microformats) + +* **No:** if no evidence of such a language can be found. + +Answer: yes (0.0000) + +25. [sc-drc.dg]sch-api: +### Does the repository provide an API-based search of the data and metadata? + + + +Application focused: A remote system can send a query according to a structured API, and the repository will return a list of datasets or research artifacts that match the query criteria. + +Answer: yes (0.0000) + +26. [sc-drc.dg]gov-tsp: +### Is the governance of the repository transparent? + + + +In general, the operations of a repository, including the selection of an advisory board, should be [transparent](https://hyp.is/Ionpxh-5EeeELfNgkgoC8w/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +Evidence of how decisions are made that affect the repository’s scope or direction, e.g., Is there an Advisory Board? how are advisory members chosen, what are their terms, how are decisions made on behalf of the repository? Is it one person? Is there a voting system? Do we know who runs the repository? + + + +#### Options: + + + +* **Best:** Clear and up to date information + +* **Good:** Some information but perhaps difficult to find, not exactly clear or up to date + +* **Worst:** No information at all + +Answer: good (0.5000) + +27. [sc-drc.dg]gov-stk: +### Is the repository stakeholder governed? + + + +Does the repository make it clear how the community participates in the decision making process for the repository. + +Should have a listing of the board and evidence that they meet regularly, e.g. minutes, reports, etc. + + + +We adapt here some of the principles for open infrastructures laid out by Bilder G, Lin J, Neylon C (2015) [Principles for Open Scholarly Infrastructure](http://dx.doi.org/10.6084/m9.figshare.1314859). One of the most important is that the repository is [stakeholder governed](https://hyp.is/_X3W4h-4EeeCpm8NXWVZGg/cameronneylon.net/blog/principles-for-open-scholarly-infrastructures/). + + + +#### Options: + + + +* **Full:** Repository is governed by the research community through a clear governance process + +* **Good:** Repository is run by an individual or company but has a strong scientific advisory board that has power to influence decisions. + +* **Weak:** Clearly run by NIH/researchers for researchers but not really governed as a community resource + +* **None:** Unclear or no accountability to the scientific community, and no means of input + +Answer: good (0.3333) + +28. [sc-drc.dg]oss: +### Is the code that runs the data infrastructure covered under an open source license? + + + +From the principles of open infrastructures. If the repository violates the community principles, could the repository be recreated by the community? Some of them are and say so. Some things to look for: + + + +1. Is Code maintained in an open repository? + +1. Is the license for the code made clear? + +1. Is it an open license? + + + + + +#### Options: + +* **Best:** Code maintained in an open code repository where it can be forked. The license allows for reuse by 3rd parties. + +* **Good:** Code covered under an open license but not maintained in an open repository + +* **No:** No evidence of the above + +Answer: good (0.5000) + +29. [sc-drc.dg]tr-seal: +### Has the repository been certified by [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) or the [Core Trust Seal](https://www.coretrustseal.org/) or equivalent? + + + +These two review processes have merged but either is acceptable and indicates that the repository has undergone an external review for trustworthiness. + + + +#### Links + +* [Data Seal of Approval](https://www.datasealofapproval.org/en/information/requirements/) + +* [Core Trust Seal](https://www.coretrustseal.org/) + +Answer: no (1.0000) + + +Results: +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/AccessibleFlags: humanAccessible,licenseOK,machineAccessible,stdApi +AccessibleProps/FAIRProps/Properties/DataRepoCompliance/MetadataPersistence: no +CitableProps/Properties/DataRepoCompliance/CitationMetadataLevel: partial +CitableProps/Properties/DataRepoCompliance/MachineReadableLandingPage: exists +CitableProps/Properties/DataRepoCompliance/OrcidAssociation: none +DataRepoCompliance/Citable: partiallyCitable +DataRepoCompliance/Open: fullyOpen +DataRepoCompliance/Trustworthy: minorConcerns +FAIR/DataRepoCompliance/Accessible: partiallyAccessible +FAIR/DataRepoCompliance/Findable: partiallyFindable +FAIR/DataRepoCompliance/Interoperable: partiallyInteroperable +FAIR/DataRepoCompliance/Reusable: partiallyReusable +FindableProps/FAIRProps/Properties/DataRepoCompliance/FindableFlags: internalSearchOK +FindableProps/FAIRProps/Properties/DataRepoCompliance/IdInMetadata: all +FindableProps/FAIRProps/Properties/DataRepoCompliance/MetadataGrade: limited +FindableProps/FAIRProps/Properties/DataRepoCompliance/PersistentIdentifier: externalPID +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/InteroperableFlags: formalMetadataVocabularyOK,qualifiedMetadataReferencesOK,studyLinkageOK +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataFAIRness: minimal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/MetadataReferenceQuality: informal +InteroperableProps/FAIRProps/Properties/DataRepoCompliance/StudyLinkage: textualMetadata +OpenProps/Properties/DataRepoCompliance/CCLicenseCompliance: full +OpenProps/Properties/DataRepoCompliance/OpenFlags: ccLicenseOK,openFormat,platformSupportsDataWork +OpenProps/Properties/DataRepoCompliance/Restrictions: none +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DkNetMetadataLevel: dataset +ReusableProps/FAIRProps/Properties/DataRepoCompliance/DocumentationLevel: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/MetadataProvenance: adequate +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReusableFlags: communityStandard,dkNetMetadataOK,documentationOK,metadataProvenanceOK +ReusableProps/FAIRProps/Properties/DataRepoCompliance/ReuseLicense: datasetLevel +TrustworthinessProps/Properties/DataRepoCompliance/GovernanceTransparency: partial +TrustworthinessProps/Properties/DataRepoCompliance/SourceOpen: partially +TrustworthinessProps/Properties/DataRepoCompliance/StakeholderGovernance: good diff --git a/split-by-model/dkNET-DRP/summary/answer-summary.ods b/split-by-model/dkNET-DRP/summary/answer-summary.ods new file mode 100644 index 0000000000000000000000000000000000000000..971c43d7629ffefce8e56e2672f00c1e41f1ddf7 GIT binary patch literal 25958 zcmb5Ub8x3Y(>5C0wvCOQjcr@M*tTt(8{4*R+qRu-vM0}5-#LH1->G`;s;TMfs=21S z>+bHE=~j>i1%m+sf&v1f=?PX1vF3{W1q1~2pYg*2vazr+arUq`0odDHTNnYHE$nO= zTy2ei*#Vp^oPOEao7fuL8M)Y)*gF4mvUfBA7(1Dpm^dr^V4?v*K|%cw<`2yO_u>3d z!gjW%7G^Gv|AXel%;4k;)z?xdy0h~?#JMsQEMeXhEUF?5s%Ciub=rWbbHa=4j&d zUmNXz)9yb4{kO*d-*BAm?5zJ=9K!!aHwznpnTZpFkcG1iz~1S>J%IpCH7TM)U%3L2Qj{8KX-WAHA7(nWaank)*N zz1Nm{ee{muHK*uN2fC58fp?t62090!c`02L8dmqo92nU>H5VlJrLAWJ4woLN+CS6_ z8xFP{nUEM_%tpa@cS?4)P9y(3r**#fjGt_m5PM`X#Ei@VzP^1;c~~Y^17X)pO}Cb* z$phv!zst_Lu5f&m=V1QMWjPd*NQhxgQcsTW4^Z(ZXD5ir>`hswR70u;fo1u3V^Dj+ zV|X`kRU+-m^c8VpsO(WX1A;6LocDD8onZuSnaf}I>afUsEK(-iVtz=CjnZ^aU)5>M z_N9SaFS~9%xg0V4$8%EvnA>c?KtOIk?)!iD+@ELzsI&LtceWAy_ z4hU>m+ySjSGwYI7HWB7YFq_jz<4A%%ymotHXMl}FS$7cEJMX?`8`ToZb(ig=*XxYS zcBfBYMoJb?rFF`FoxYNG)DpH>ExTmbrP4T_WjK#@xb<6u_bjv+$Hintu{22Y+NEy}P!pa?2W306r?0 z^!eISxCQgE%Tu~pa(@^(Z#Hq2dF=gN9Wm>I)2tM^#Y(9WZ%v2zMa!bRS+KTWx6;5Z z4Y!))6}3|QPtyL~jq1&vcP7>?zjfHM*>b?9CKbm!TZeWHL|{_px9cZu#7&l28yx?v zJeO{%3_n{=E4@=cM&1&xG!M>A`gr+(O6kc*9f(aj_yq95Z^6=pswZmLRy%f zm_^T%yCtff7P>w_-nGi9fsUs#IIXl1Lme2eCMyv|cz1uEH383W7E!`zsJg-;K%fV7 z;i>WiOSsrR z2PHgWp`0I+%ifh4TJE_fP?oXThq%Xc7fx z2t9m^)MeGSdvU7Dt1U!;7HymqxW`+Z);K3TIVb@t?|nbDf}Y$0X8-fEBsswPF!l>6 z3|xokbb^s>hl%X2le-O>n;Fp&I7GKy!QZBh%ke@Va}wO3bt24b#LdfnonF0>@S7I{ z_A{gw0aSP3RFUrq1d)Mgi56xG6QX_9yBHp_j&^XipgIM(%)!7yFl+eAOETNX-wn^m zmL$go_!p+llTLw7otXhPHxqNfed8Fl@!6eKV+?62N~BFT`7Nm{N}1;y0^kHh@qA8v zA_}@93o{oXN+#lK-C*m)6(6f=QNOZ2IvashvIe+O!JhG%;@!_47v2pI7kb6SS;>~I zkL_}VsKP-;UsW_V^$h*qu`yD`mudOY@YhLhc`pCaqv1F3f4tPB;S1On6bOh1?*GqA z{}W4%>};L?PY%7#xpdqdYi_YC<}alo=OQ7$kg#CRNF?7Z97PQ;;bDZVQr;}t8XEP3H&(C?l&n57%Oj;mnI6RC-B6ucKQHC*La5I4X0Wr^$a zVPg(InL9bLXSta1Y_sCp*xK1Vv-k4*x!^}Hi?|k$F&LwiQ7-Q-q7~M@6d-3Eu6W(p zX6|ZhL-uxf5FO_2yx9=2Z~EE`Jda3nVV7BJ-U@4ub*1mZ8xR=JH)*^enO=56Yq;(@ zhkvzP=J&=F9-Z*XdxeFCWqs5>mv~N1YI)4|Ta-8^<4c7_y0j-+-7dPa%TT_;YpTOu z)EC+UsB1^@qfJ_zjh{zswC%zy*vI(|Y`qr$wzH#~@=#?RXBXORhQd6%o+~c2T>A{l z%%rx`nrRSkbjlku^~?${3HUwY`7Qm+?ycA^il?)RZL!%L$R1YkGg+8vC{xh1q3Ipg zYS9s>KZjH+ji5Pd5YM;SJftT}?J#j$!EJp-z@(jNnlkKW4e2k5M6VuxH?7^=^d_lz zu4_)V>p8em$sbx-8tF}hP2>B$5XoQHMc$Q^oo?oX?PyFxnC96#Z94~IN=P*<{Z<R)y~8efw+`KISz!q>*0NiX43`{#r8kzz(N8`U>AdW;?f#M95$zu@1x4VF{xd`Bvku!{D( z%BS!y8{r=7m4@}dtsjd1RXduZMAKyNqZG5GyD_uL=BoHD307GOsK>1PWr{+Ba3?#| z?AcxBrEj1Gd%3OxShO=y9hxoVAX4biQjd04UTkf+S;<$SRq(PReM>QrQ(FMW9|V;u z{BEs3BTVeiO~G1xaAOAMe#tJBn$w})~0;)!!>>VkOYWr9RDWoN^B zo2u64W^%n`Nr9PhjqD=H0uVZ;^y@Uu4^rQj-@%7K$V;#$%qXWc(ScONmu(6X3RQ>Z zG&@9shT7R;j(ISDuk9qcqf|MS4w$E5;JmKi{x&?CemZY|7$6sM)y!}VwJPdOaDK$; z?QsOl2sxzAL8oGz>U1Rr$?*+mfEY>l{Pr^OGuEUyo!wn9(X!=%nJr+I6GyI@AC?Xw z+wWD^kbfwXXyj7cfy?h}7rm%JDRqvij?}3F^roqsy)-W2?6%{P!(jIVIjL~kYCO3Q zv1Du+2%M_VR+*iM$U$g|EPEzSJ1nA+x@Kg@GEu?AaP}{YhjMdqas38h!3p{Ql{tc{ z%;B5~bZ~|}w3C~OZI9L;BcoSglq-uoVO3Sqp;SDt0#E!7m%oY&nL$E>Ah*v?m2B`G zV3e2V@;wkvhn*zr60 z1TWYHVU;;#fRwi0)zfvH8KxsZw@55Q?4(Zg0azFAC zqfE$(8sE$LvT6Nf!Kwuk7~Dm}>-Uf^7O%tJKd&Ehs86XP=`I$FI!?#Q#b`+to$$+Kh5bfyf%A})5K2vEBIkj>`}jh`GpuCAmJ z_>8EkV~!@ca2z`OFGljEEgZQ1a(IkS$E9&wx z<-~5@BT~O=)_)mi`hAiNXU3-1>y&v`ly4a``%MZ)9wt*8mY4gTeFBCT8f`Xd6AH8F zNf)Jtr+#w1b+)?HC?yQS5*X2S+NAv%Z9i*3@FTJZq`~*vvBqZDIV&sb*4~W+=$uJx zM)f}D>+@@O-BU|Vnq=nR_35_NDZ}-yiEcJ859+8#4MmXqPSwD!7|gEadJUt^r`CQH zk!PrP2*YM3aFwq9IzPPm=RS|T)|qwkagr zf7^Cyb04ZgI51~-HT0htW_mE5r!$|e8#lQBZSW42|H0RAlk@WkJK#dNZ!Yw;6x==Z z@p-w<0nFhe_S9#DH%A@T%Ew*v^H*I;?(XyY!6MM~ucBFDmd@^OF#HNJlRULx$1mMp zeaSsMA6;G*EK*;_glZ@_Ep9>m_o>;Pcemjx&;NO~gxD=he|bw*Ad*h$0!R9$xWxhD zqiwR?k-0!RcSw@DQVYQ_x(BQ@rEw{jQOh_anG9}1$HIGpIHHMjoTL`G`b%C8o1kwg z&cBC48a3+j#PgGG=t~tWQj1}`?YWoveG_obr(U?sJ@ z@T7i05@$4u;V9y`jPPlX7y7jj<4DP;a;2uXGU?LnX*=rqap`$|{JwcVgpu*lAodp5 z|B^#}mO(Bq;B9^*P!v zNkqlo&&zbv3#-7+PEpMzU{yZDMtfv2m=g>GrmkA+`k6d$AKH{9*5f(u6th-df73O2 zJQ8G$CHmq1KvDG5_L`7k=2w-v4QE?VTND0_z5lJQ4Q_^rI^OPj^OSW9mv&ySTmNR9 zK>+_4ncXJBj;~$aBH;mfY|hSpT@aYj~9mCA9&Y-WIeOo&#zbxva z2cj+LmtveNCiLlVLAy_&%Z(l4MJZB5udke->+7IzKKc14lRADZysKM6MtU2c?97(1 zue#Pjv;|PEEo|#&h(TiIl$%31-mTq@%~YQnJOnS{i1FQ1*z>wxuQp7o5|1@P>+6vG zAufahd3dCc^wGDXJz&-zSYlphp76eEhuYKS}@)12E zH#aYzV%K!I7cZZor@T^EH2HbBme*I;*H=U@ozl{t#0PtPke{8MGPKix-qp>t&;9tzXg_;SPB;_4%3U)a zLV}y;pu9M@nDz>1H_ok)Fks~rsrJ=dJ3@jha2eEpd6>4>H~15b#vS1s@L`_~y&Rl) zjMzh@sOy8&mb6C|3!uyVa0nk>iKS9rUS5`%h;IYkz{>Mih;FT}FA!e+@#kP$X~ONj zPIa579sEPs|LooP&#s-YuWkiA3;7j-^!gf_o)vPnzXpTsTSr+D5kGhazYI9eDnSVl zKQHVVuBfw_(&29QVd`65?CV$zv@Bi_Ttj($*tRxJxqstdStBAsq9Drs7`JtBA5e#S zP|hene`R-li|B^nR=;zH^&c88=nsy)F4V0ZerQm4b+_KwQmA_5MUgG- zrQY>xDR?LF{H2D;Q1CWZ|-d?sXfM=$A_^1#t0WBOt zI>xQ=7Oo-20Yth*ae{F3BLCX38zyh-RyW@^zU|>;sZYt|(DzE}mN-nVqLBs7WlWy8e z>&wwh{>IL6hgYJPw(wTM%}aaF=QUJFCq?gPq2IQBGM>vDj%X&>0;tS*^ptf}s{BG~zZjW&d z)N$Ym?NA_e)3L5GruNx=WdRg>59Go=+{w`9F3HKuCS)OQ{w< zzU*Z0$06YkfnWZ=cO2$2sS_vUV-lLC1CpJ|>8E`$6ub8>zo%NT9rciPFdo^G`1PfN zQIJicDNMU2A-+^F6yiEMvA$g5n@9&rC0JeoCx72UL0hP0e3yJTO^id<$|HtnpgbxB zUtq-S1aS+h z2M4VYViLu0V3icKwZ}7l8niz5D5mUM7tPM7HKIE|e;AzZ>9kAv+{jXShMVArvvXw; zVM_m@N0+_D1|%i_I&q22?U_pBrUijdQ+IgI1>w*GRH2+y_ftaazZ8jL0R=h=rjO>y zXXfD+IO6>}V~QnB8%^mJy?dxyKu2X^aB|}m@AO_!#w^)Zc=ElUAWqe?>_hiC*6I7@ z@u8l}aLsveM{_ok#OsL&FuD|OFG)t8mDuA5i<&51DH;l?mgo>&e|g3MY+i_E&7OI~M!- z)w9wYkiDoAW%1;8u`hD?Uk@(&aq{b7L0l2GSypLhUL1OEw#nA||2px_TDz^(kxz?z zHx*4@ZdfD_ZXHyg+t-Y1G)$d(#W6B;pH8B;hA7_~#vvc?As%1TFTcT}38p%X`QCpY zo+i)i4yw1NaTCsqe-q85mC76B#X6JjnrMA}K>r)NJZ6#DyWKyy1tfV~fB)98)_?m9 zyL+1%%kTWr$~!~1YjkU3a3lkS+WZrjHBc|&Hm9^03xt{cuL`d1m~Idzsec~(t@fFg z`#qQ90KCKcUZ)^kAv8YL8cjzIh)+mAs~|1G7w&%Ep(Dw>i8^lDeyFf(>TUcT~a-2?~xP7bGog*w(z_Pz`5(3hV z1qQGY`}7TE!&+ay;Hnr`pSdF@;c{-c1y?w+pp2gIZ*4@o{RS)`7`YPxM-CCaf+#eB z&7?~^;IBXA!4Q}kQw~qB^d~?HsHoo2NO!IhVM11J)6Wq0k|w~OfS@;g3>Gq&FkW`X z4=VC5Zwl#;ibb2gDX$}#}NYLUM$5nD2|X= zh)|6|Hl%?j7I@m^MUZ`L({qu(ce>7~$bWwQYiIuzY2n+{UY>YU^_}bqYqwyRl@>+! zcQV-92RI;CMWtD;{bpTUgQYknkr9_RIS|k+OC2axss^znBY1e1srHMyP9R87sxYBh zYzBA^v4E52q-4k?;=F=+?9(B;6c>J>Iwt570xRyl?B0qsfwYPXlSh-Pgn2N4dAsQf zYL1Oh41SUmVq2fEDbtqXrX-lVLz>oi<{6nPrA(TwkSy4I{4E7|#D#J07w~A7vd<2n ztnz?BdhbIi&y4Xy5@=eCLC{hk^2@JapGmUxBOx10EbpkYLoAya8*T3~z#Ou#qFm+t z&JhR;&kn_aw@!v;WNKL?p5H zhv6=cN-u-s9UA-K!!l=9-#XHh9gu--F;(@L4xgi-jxwn&olEkpmD^lA`pSG4jWkT( zSgq(;UVF;@`!7~ef2l&e@s_?)U!V*aFA0+p;opmIj#5D%EL7PVD+HZ;US9`~$%Rrh zVM(iE9LXjxTCbwUh)IZl3egpCDKlY2J4#72$Vzm_)9>*0b%L^}Fu~uh3Uvrhc6g*6 zZyON*(!j2KMSZeD`Orxi+cZ}Rn|4p7okH*qPJ*Quf+>xoV9<5gqT`HG$uu%r7StSo zzlJ3-Ihn#rDwf0#_hVgH63%ZrA7&Y#(PrrT=Zky#Z)XlRorV!_BYEW;h=JT7qT(cA zhw$VK&W*!fcIcN<&V)X`4Ho?TG*|#LEzF7o4E{8~vhY(0bhZZFp_#j6sb-*-#Y*w~ z5qD`LzG2L|&@H&^kq+DhRP92#iS+`yoijL~L5XWVnOads1CkNnTEA*en@UBp<$5Z` z1?YGE&QwG{bd7hL>1h5EW7|O}wEkdQ-yrrSu>4ZH(1}=p2gBUCZw7LE(i7>L%#25D7)YD zL<`+v$2?n)!?bZJ6Q@Dh_>b z1A!i8p&ZC?ONotHM+M&&>l{?UW;p$bxDKWzHn4oVS$Ifaw=YJ71GnQ9z>d==a4sju zJ^QUE-i19Rs^$0eRDR7|Qm+{7vSy77>Y`sfgD)MG{5TZ9`6r6&Q8aff;^Sdl{1wRn zo~V*Z|K^{GMH`mD4O?3Cfo4p*2L4XzzY~=M!zYbWwAX_p!+|$%M-J^O(OJtWbx5+Q zf0L6!Hyv)<1p2ZKa%?wE_1vRrEOz7ii3?G;<7I{5yFK0qv)O$#d8i%xQZg38zv2w|KNBj~~=C&ChQVKp( z2_TQL)<()Kxl|1|&#Zy3Zq+J*67|f6fli7{EmY{NIx0ihdN}zyK`=^>H;KAm5zr*1 zX2oiqg;0FN>=UBd(Ddm$p?&y1{uZIkSjUzvZ)ffcsv^UD6un>!THux$xMZYJ z=8bamKAJnx^mf1&EIcir(qvYBDmkcg7HIinCA#+${?#-0rb?h9W$NadR(nZ!*dA=! z@RemAVnXPMhlIeL-(JFRHFGpRZ;vS#-bu7R(Zl?&7mNsd6ai^{{O51n%-1CTZ#3`? zIKn}Uu?wt4+cm@vtwX*e^`!^ri#X{kVa23)etnEZKrx;>U!aVU>AjBrQp}AwZPyVZ zfF_&5$1`|^x%D;n6Xk~Jz?8TJX2pYayf}VT-+bMYRL+8<$e2-n$}UJ{Z$Us0 zqD}JLjqmS|H{l_s?Z3^7L16mTm?_%tdu0X)xO~@$ndEq@%~hx&gT$9?Bl}=s@RSr? z&(>a`x&7z|hw{v$#wT_C^>@F&t-TO^ofHqK$YP-ZznA4W`x9&~GwcVTFBLuiLU5X9 z)3OuzwdABPB67s04b|{YM3MkTL>zH&r;Af-6l+rtswuBdEBkov;+FhwtdMV zC(X*C=guYmIX|)p^`$cX%|-Swhl#Z<32d5Q$f$KI#B^)B_)ue`Fx-PektS+IQM@7Y`ye0YI}k; zN0)uR@%87PQ}-4Q2vfWl07njYN>dBQ_sQE*{NEP_ZNh@=#iDXG)hj2ajff=%bk2I#1 zKO-y^mYo-=1$0DF_()$=Ov7Ne%T^2I&4obEk>X?4hQ*D z&P(42)a$>!FmL1P%a2L_UZcOffil~47?YOOVfh{0pR|xELox&fV$BK>IPDBjd3-|p~R^_X+GO% z4kK};^F7S?4u-B&OxtMsipgUJF;htn?Sl^&;wr;669)`~X$|qvPkcPyrpeW(G$J#G z!=+QEQhZjg-f%RwQ$|)d2Au{Q>_3Khd6{&W4mIX9BByu?VpG3O11dY(uIf}YjEs*X zhIp~uCdSw;9|cpI=(=Ns=!HsL28I+MAX-S4sz>12&^Z)C$l5B&qh4INfZsX52GT3< zB)a6d9g>lnS5TxTM=h|0%c_`I6^YivkC`Wog?1*WQiIx1B$bh?63^-A#f=;aPsU8j z62?hr^Yv4K{*)FYC^kF`v1=;>`o-D?w&Uw{yuTJq{@?rMO zLpE=l`u#d9{gdnId+}cGc2Nt#@l$*g|E90+ZG3fmiF=yyJ?1K~KywvqaY#o5h%i$Y zGciRj2G}xaP|VAdLk9x?4T#l9#+;Kx0gA~>Wfid-(5ce>lXG%OpL*v0-Yp6yOJgsV zLWF}v%oJnDsXW}c-WChP|@(NL~IFv49&wx zTl)3z^o%VAgc#*k0`37*?yfwC0wc|`E4IQwzhPh zd<0!&5z92USF~0U&HHV(*+Eg}v2c2N-FgKccI=4!{zO4cGfd3DH*ib4*$A@3%XD(J@1y!em~Nu-LG+N?QHNDCif0Nja$P(Psq zFG1oNtxDr+B&z1`oOAUktQBDn(L#!%m^WeCn4bzh{ep4J%*nP}i-tJg1PFM{(}IEw znNu0r0uKBU^E@D@9JpgA^Esx%9;RQ2b*XyMAtBRRBdw>=@7{bDWN;DgQ*N!b4~c?9 z=1qmpP1uBA?w5u^I_j_B@g79D2m{7_k?bp;eR{gWS~GqrIF`WvU{r^O+hAA>tcr;K z;4c}@pR@~bBthMYm_7Y78KJYO&)o&?2Q9tJ3yhdGT9Gsct#|S=6nbrEq?_&vM_a{F zPbPGBX;J>N&L_hs^KE@9c^7wqI$+P-(98EfnfJ5Yeso|0HPxH}JEW!doL@p?gefyz z$xk8h?SU7*ESJJ3K7vqBKPNlqmp|ce1w;Rp;q~%#L51ppZ%5Up@JZ!4t-Wr6LgXPZ z{0=lIaw_t(>g{x~GITd6EoLAJa44uFG^|o)tROm4iRceN;K0&LKVLvdc07oVD5~IN z%-Sf%B>3$&k)9PFE&oCeN7hT0`q>J0f28fI=Dz|#dLZMoWa%lLs`3_Zy!>$+%PRIy z{jAWAY2dj@v}rEr7Lu_D_nC^%n*!l8aCd2&n*rySs^9fFhhVPVF($br9~$H2x4RX8 z7A8JOj$z$B-qi?c32Aw8uE8!Adq&QrbnEH0p3iq*GSVgywD&k^rm4+1K~8Mm79;tq z_rTLjbO*N~6McQ(-tRR|!}zfg!iwDMoulB?DR@?k3u*5ke7$|0f=N&1==*ju-bgpZ zoRYD_;9IU5u@AyCTleMZ;+}8%S8ptre{BwonT8h*tYzv-l0@(EQI8ZyK%*aB5FwR>fC*;(Pk~S%5z$~HXWw6+6y!RLM^Gcr9)NSC z+k{InTmehSe(wcMSG5Mm;Wg_uczFF&65iR7+ZkE?QuOoXk&XN^GjsI)jI-0%#}7XB zQ_s0~Gn?ngYE8(jUMdn7zVwClLfA{k*rv^DO7Bd9oN4m6>2^fclrxw}20jJ;Kry`(>8Q_|jlK9Ca z#B+(Bqe)geeU!R-N)XeJ=VOH_f&+h4Tbk52MRE_V{}H8bS|70_!QH!2WhEwwzw(w< zA##+3GT1I?o@skwkY0>HkY>$qqm~kM%6p`WpkOWmRps|B)3cd9T5CHB&xnYqw6QZk zS#Rl2#zIg}Zx}#hqOkKe%wu}bs~t*58B{Ul`O8>!)yVM8HT$(+ehG&H8P;%O!SB?E_31oZz`{YZQBgXL}Y;(Iw}I6 z&|wPCg&B%ygXk}6`;yPhH8%&o5}mdHhhK$;*LBjX-JaFMt1u~_`DLXA5vmGgI_6=y zFi6(vop4<9wQ*QN;eeNX(e$yf%B%Km3xomhkEu;e0j8JH%mcFo+>RdAVlKN>rfp^B=7RYmI zN?(pKn8x}Osl2_5P9IZQC^e0)WTtRom_?xvh_ZwO!~6AJw?@je;a(DpAzrjM_e&E@ zk~f-Ky1bq?^YmhF*@Ouflv4#;ups<{-e9PSj4u9e`R}$cdr%Q0LsxDpXwe#D36)eb z!&fASC0~;t5^aW}KO+$g=7c#)z74ZyywV>Ei|{{1C;o1Tif$I{kU-&*p9&x))G{-d zG4s$k*o-WNwvd}Q;xQ}TrWFN`_KS7xmg=}BOGNDdq{3k|t5}LA>)G%HHI&4UKi*^& zNu$whM0E|By664V3^s>Z8GY0+t%9+e*jF%Qto_FWCSa?z-3tm=aQfkNqsm!hk_+CH zw~KKIGkz@>#1}FIzX>!sS1n>`ayU#~VjCG2U@SxkSjH$7{$Y8-J&3B~=+2J`S(&Sl z%BPsqaGWzqS6Ck*H3h=f7Pr-j)5VIm0~g>3dPv;VV7gJu%fV~)qH zW+|1{cg{(Ol7Oq71yPrpJ~i5Ysg@SnVHh!Wz;QI3R(U8Vx{Vo`>#xCh2TSam`R}85 z2=NWw`hD6<;*{eFuCzi=O`1tT(zt+pI1{9G->y4io8d@`1r6OG9=1fMP@*gp1qrJ3 zk!m13OM+@}_3rkld(|$$=Ug+vu6O>b#AvFa#>y4IXRC9-Blz=+Ekve}Qx(|x6Anfk z#XHzHN5u#^4Z8`4G%tdyZ1fPNAPO6*fp8Z5=TIGOH65!POT4z zszD@Hg<=JY1>LXP*QcJihu^OmShEk9vF16IYM=1%AnM=hxp`gG!RMfE*lhS9*)uNjrqosWoZqn)tGTLJ<}}!) z7%>EOrENe9yQ(u;FzY&o75DfCdtz@7@d*)ezR!A106ZQb;YG>Hu;ok?KL}9jhYq zq08s$CF)Q-iPy?XpMA#43l&hXpwp~Z;Y*+|o9Uf928tK&%s&?Z`D4)(pC!=uE32Cb zjOLTMGI8zx)>4IQCb6nU(L!!(f2*H@MHcVNl+Ci;t08-aZeDP(()qGm*Y%47=64lsRl%0 zA3Tn?lQD4}rDFGZI?X3)H}^THJtKRSsiZ_bhR#x!cpf^1&!UVv?nT0vFdsr;Cehkv z>`TCf!M8RXtZ1F9*`{~x(*pBJ`x3jKu{8-JAJVd%no^0Q+ppb`-?jab;fMHx_BY?A zZD6k`^RK0UZRYexc`}m-1TYXg(a+n8Ourkjk2M;vHqiO1Kd@P(1iQIdrVlN)pwKrQ z8)s|@7wfIFxGlY}c$nAe9n~ z90Y8!I(@p%X}i%wnWG>TJK_o0y-Hl0)IKT!#TjM@-Y`{C@LQl&sZy~;LZy(EX4r&_ zb2NNbu34|p8gWSjTP*%*QAS@(#YA=-SqGZ?-ZMKD2?y*CcLH!S=)4s#=mN&bo>ojm zqcW*HOxT7Xl7R&^@m&!)*&2z$?jbyGVr-vJAn-00S>rgO7+^?x)2K*5jV|wmb{86U zxL%cV2)rQ7H5G`bXf1!2Jz1_fp4gb^(w{@9+St58X@Ly9t1@4?OlP6A!xpI~by6$y zp<5U`E(B@rm8_(#8T4#nan9lApNY$ncxl_$!%SgS`iH_rKFXQMINc100RzcNjomN44N?$|sO zARly&jPZIwR!3I&<0Zc=+9ez_%)K@Ka*OpuF8mujz-r*J^2C;i%(6+jvg#}-{_#T= z>|rN(!@jk8>P$tA?p&8#W|Nk1k_Y8GCNZo_dAe6iR&OCm_qMX!7n$bp8ZsgWr3`$Bp0q2JaJ|0(BZR zQHM-0QKUaeHgx~IGf35D&@iG%>BO=nc}54ukm(R`=~9kkk=3Q5$Vq3G7{W-KV@3tY zM#f`oew_+~q@{S{Y}kh(J42PfGtV_}O4;TOK{<3pYE(MZH(hk(r1huIwSf$Hn7_ge zb1Y6{(WFzmysczKWSHJMquO`VYfvXbkTrMnV4?5vLqGEjTu=R>f4S!>BDw{NXchp- zdhHu5+h@)9j2h1iz*AyX@cPr};i46d%r-$fSct$E*f~V`Sx?E%9d_pN?e-#9bd=tQ zA}u9q5vCR~Ny6BJ#6TzRqk&~BBN{PAkg#S(VWA+zUKi%C3|75K_7$M-XiPe+oS7vR zy{7**CY5#y#vU^#b;T0=s|;%Vk^6ckIxu0*wn7uC9s^gTMlOTe7gD5@5yDx9jIgvH zHb;pgYiF(#8!B8xDI|N8@va;*D9DT^w{8^MUTuF&xS~`CCFhA;W51_S8(0qPysS+p z{AGVh1~!B?9plBu1CY-^7HIx4rn2I4ki(>wwGy$_+h!}TFC#8Nb?BF#s+DdgWv%q7 zB}{g33p&DcKOW$2JOjS5D32=ch-@I?zh`dZ8m6$L&HT!8T^R)dx@ji{Trlg^Lv=FJ ztwJYJSNRM`EXTQ|Ouwcv)4UzZhSGKJP5**J5a6%%G#+Ur`%wFdP=7A>`li_y+cI#T zM9F^zDbF?y)ftBt4y7Lq8;|6NJG2b|f~X&DGwvs&d_BG!dG=XU>vLaeKa8VIb{etv zshG_0_MtGs;7g7t=~?h1?sr>j&H`ukCuk8x5hz6|B`8~R-F(9%RU&M~WxTPmxGjg_ z`05TO7^|SQNQFIW)p%I(v)K5|GYJ-b*$7vJu;VP^BI7~UL`SX9y13dTzs3*p-ahdP z!Zv(|sl?`BScMCB`VC6mK6MV=X=%^|&_SOKXg=32VCQ<_=^?775sIJCJ7idh= zij{r^39A+9Z0*nNUg-^yrpoax_XBPk*Ky>%-6}}Ip*Dfi&Acj zvl9VlYVFZ>wYz_ph{A;^5(u2b!K*)F06)`7QARdZ-&`aj_mdf8i49RMi{p*)!VDpn zE@`tMQ(BaK|G!-k*cXWT;w2px>>E`de}t-FKTntS`{PV3m92umfD#zl5}K;HAP@lD zD+}V6F@CrLdA>CHL6jgV7S$Y2B{BIP5CI(#-ng=TVi67Dh6TO;5^P^;7Js=xI#R4L z1kb8YdM{$8_WA_rc|%e$$LOJE&)0~n2GM?Nu_=n`8bCi~nx~0?VE3?gB0DC6w367< z0sSjxHU`;CrkF+gb@mJotOVQ*qU-A5>iN+M-vqzXEiezAGW7K{BatsQX^JcobWt-D zkE+~?B7>d0j#3C(w#{DQZ5pf!yWT;0XE!nayYlF$gsF@*Qd3R+U@*Z%SRkKusVYgq93a;-4A-%0V<~jS5B> zVNZivjdmW(;~dX}zc2L+Sd@vV!+$agmvxMl=9pffQYwDZz!3)jSAdj- zR;6jokXEPY5jT!BNPlMFVF5v|K!Jb{{4)a1Imz}fbJJmip0dG<>S#lu8!g^#&228F zZLxfkR2fvynD&dgcCMFb)@CSz&*s9QVs2P;mDxPnm9nIKx3cqlD1WI+w|M}On3{`^ zEGo$)Yh($x8{Z;~NfU=T7n~{JcQ{dqSQFuf1%(LAx2M|z!~)2=smg2_8#&oTS-vU0 zAU@FJ>dT9ZwRzl!87gHYXtSx!0r*HI4E7u|ehn0-R`)v4hdJt-5{2xqU=8ICK^vZ0 z18}=KO_kU6B`&5IgYsTF;N;gLmw8VXNsBT}3tc(xgE?cCZgwlojuxwJzUnJ_x<`bf zxWqUVZXe&okv~1D-t_NUQeybqs(X`^)5pQrNz_@y{DWU`uPlGv()WE2-Xl&acMK2S z-}g>~auK86_iBL%?_3%XS3m=g)wOyTd@E8`+3yjbUzn@x)Zo2g1La-QK`gMw8%6AUxaEV5(!)Ud73o*P$jJ0{7_ZPBqae}nK^ z$|%N3l(}FFx7)U_HHZ`#Szw^3rbOn$Nua*<%P_i_RA{qAV`?P>z9XkfmzHex=F(!wBlsg zE>S+e$5$Se(J1?HLBUcNL2?Yz!ih%tw?d{C)wjRrz_hYWi;Qeu4NfzgU>4ov`Wf|M zpru;nGbE@Pt^vW3e8x>sSI`RD3wZ^Pg^dRc$1XjIwO|cfn%OTOUlS2c-FA8+UIEQ$ z_>7#e=NgPj-25B2B4M3f*<`jQgF9zuW^l$)Ms;tIl>dSxnATRtq|Z2%)>6mSdri&r zfidSGy~2MSm*34>*6%&IyreyHdh|Ze;e1#T*pib1`6fY9iHi2V#O~`PUSvZO#01ocpi!m3LDyZl zB#Mu}Sj8A=-b|qhyKhxA2Zfsyghpcr&)dQd#7`-d0;vgR8Vj~}VVeD%ke6rLAzKrL z_tOH_d(l!()IY-9DVikHXW2lfWcd=YQdZPudM41t1?pa>CY3b?X2Z+^)#L*?W>*E2 zgKlIV0e>P6t5Lg(PH9fSEiWDLC--Or-!H2_Nb&RX`4aYE>gNJq{AzcNBl}cy^c3bw zNd|ud58Rq?qr=9T7j8j!a3&Vv6rxsCpP3XP4XxR1fmXROx2}N5B&W>Ew)`bAj5ZUH zMcXJ~6VABoH;WF466BUD_;*&f??3dNKToX~LpKeuGF33;I94(1jW7JE!*M`Yw_sr= zX+$fU+LP0+R9`l_SkIDv+BdoZ_=ZJ=^%NF&L-{y*S5QP>8Vzd7$gJfqzR+vbmWQ*L~BSndWs)2+D$w<_sz%3rGjlYUx=@R?PRikgv#aExAqcT#-XgkH`Zo}hYF1Oi8Yu)C z9XAEW@fj;EewBmwv&t9?5Y8#UJ~KY?$p~eHAPG*E5t4ITCT3%9$RZdMfWyZu8@BA2 z_tRVck*wAWS$PXYP*0q}_|5vfFGi!7Ap$zX95kb`jM2D_*l4bR5FJ1pkHlP3JeryI37DMr{6|F)o^A5Rc-` zh}jZRaj9(O&Cfkgb49%Haos3f`l9 z-ld#bOF&qzr;DK-^;Z>(bN_x{<*Lbl(^!Hx9;%ST#`h_&gVP=H^IX?>ok24D9z)8V zd>1n2eL>?Af5M%yC+0Hz2|d_QE5Vxgq$7frz9cxn zjG+a~S^W~53S|-2sKQc%6@%nvw&Rx9Ityu3sR^yw^x1z#yLoNKq9+Izf?z!6z*ZH6 zxxLLLiD2n3rN`?Z`CQ z#?skzd~xP}($7Ss^bTCZKME$B%Q&Xg^T1vokq|A>7LMKj(fzFj z*sxW#TvJs7pkmb%$=aRp5(OYZG_nj8!DnX9bhgm^Iv&zogEc*@4qlOKXH%(w0@!(& z_nCWQ4YnTBh22)>8_IMMLdz(KI0@T)vy>XsYB(G?EL7@3fNfSXJZADfJg`i-=&mEG zu}wlDPYkUKAF>wVOy#8lBP%xkM>oX4BK^?IO2}&&$#%Zi;i7HQimf#ABdvDu@(d+p z()v58(1$tF>y534y(qyxJo&3I7HifxE5l*wy$FdS7XrAH5p@3?3CFG^rm~*A&&9SR zLJN5C5{MMYNcI8efw^f?d&(@fYTd|`Z20)4^yXYVo>@ge7aZGGkY@{`)2M`fZj2>Fsko{#ZlDiwIcph zr<>D1_xNgMGkQUEDxVy+B6tTi;17FfXyN6no^}C+-ZKdgR)QbQjjd@Fl_Cfg7DERt@2Y<)?D}b+Q$8>T_Fz@gOd%k6UeDdd{T;$dxG+u$&2&Qe zMo`se4?Qnr&jDIS>Gc#ApYfR2r6ra2LZGqH1qoV z)v=L>1xParZclzaz)jP`9)r$o5!?M1Q7Tj%7OiZ*si*|R+M61l`><|c!2OzMK&|b* z1gx1(IZc3JJ%}+%nD$!05DFc}_{Z)!V#}b2| zsANoCFg_(U=$&khCz&zvd2ID97{T&Vp4v$y)Z_C9dFIc8N*A*`m<`<=gk_n!`PS*} z^?hcp`8tf13^dq`n_h;EXAD9FkOC3xD1v_FY-w7YuMU6m1Y3qqP!k0@PQ98*D*8x6>9+3l?F9(dukO2f8yQ+CKD&?wQ&Pa(x zoEj3D@bc)xlIh^)TifNjhl|km`e74PWzLe-7F;Llc&(kvb9ehueRFU zIktValpYq*Bdi3d{RpU+TIS4;d;IINoO}G!U`1|9uc;g8bPXR1(KYs2y-$c!P zL+mG9398B(3;`hT@zxTG_u?@*SqvRDE%QeUH-&O9-F6EAZ53Yfjf3&tcNX1S3Nf}o)=w?{J0x!f@B1tAL#5BW3sW$JDg z^~Y*!n^cHRMs7lk7o0B|6DpFIrRX6D#;8ESK+E^pNkyWTvwZcca82H$FM`?dp0&Y$ZWvrg?U5<1m@f20YG5&#gUioeI_@EvaZMGJP#fuwG}rz5Ec8D$e&&*D z=fF!YpuD9p1TD{P08^*z=@Xt$VkN6L9KOXRD#K=;R4<~ccPIWN1o zc3|J|y8TF|f8io9*DT99h*oW-FYzU+Yz}y|O6&z6$VpUQVk03|lzv|UU0?e>L=>U; z7tZJW#;hWBN|h*L5E5BE1Vp1tBzuP*u2g(!z)p_8OK-%7$C?0%NkxgMv8u8lvPFs% zK%yaBQ4nsdq%7dU%w-l(8zDV$Pq?#xZ({p4c5B)(*(tkl==7ka{=&F)t2&G%KSRT3 z3i=9V0y;Fspz%vcwJnlO05!pW!NGmNmB4+^tU(!-LyvrfBik#_GHodA<*;CG?JPl+ z&kBJ|9!fgxOpWW5BSL!tO%`$>&nV~68eUunKmvH2g36`zRf{y&hUv=0tDlewQ3!|j zxd%hG2E)gxWsxB{KyG9@R?Z#J_eibX?;%G)A1UPSKuF&m=NmwU zK(AR%HTv#S8@8u$*%F~ely(bt$rYLQtxprPNYaRfa|p@RlQ7 z=e3zK8bETK{G1p_tTt9Yl9jx@eF8kD5|;}5wuS`p06F3}>E6#VJ3+}k>G){dB|}0@ z;_QmYqMq(rMgJ9hlz)lv*!_v7`U26`6V(0;GR-02=F{9@)yY>RwlhfNqa4 zXVFu`CgfMn)&zAz>o(UXDH}Gq@kBpdBwM+v*gZB#I2vz}%+cSmr)=CEhsqECh#Ihwy^`2i6n(w52LW(^>=XB zoyZxdZ#7LK+YjkYI>U?HljW7KrMn~ce@G_1-iJ>Qz9lwX&Fx@31_lj>>^NeKmcyyy z=tKwkKyoQqxiv8fW2HDcrkClpny2b7xY`%dY7BE11Jj?pwIU$;^<}cemnG26OBgoF z@5;1R*anlaF&aia=3+l^F_lq5*OA^ln=v<`6OrM64V>Yo=nWXl3 z%Ft#gxwrH^^y2u$eSx>ruuiY#5uIQuBi-SQvev$)xm5gRz~pJ|mScDJb0xk)V*e&# z{`trKUc{LllbA&EcR{a%`^&y_+;-$1MmZVEqM*GSa}_@UYv39oc`{=na}6O>K2hb> zBJUa@*PF&~>$Kbz!s}829Yb#mK%qV$q9|z z*cT@1;8W?dICqAa^{Od~7G^lLek>l-u9z9DK?Cv=J>0Vcb^g3kmYMRjOMD%)!a1`Oob zU$8A2r8-UIhg3}<8ch~P)c3i$XEa~@qBc*8W^VyAq7Cya2V&!m>O1LTQxEJJ*gYOTzNK7YdUZRzr6VPVH?UeJlm3X8ifg9mUr zM>7;CUOC+`)nk9a)BVJ-RoM$rAQ2%)j=W>38EFL()525bk3@=PyMDRMh+n57IP<+G zFN0jCtbYyL)&13imi?)+OfTD97Jh8NJ3I5z$YKSEa&uTnuCNrVPJk_WO50As4je0! zhNjHyOW9xQ27%Pt_zkN?M{shXu7+rppp;_3DTHsq3`oVjo_`1+0My3FK!QF8NzB$O zg`aGM!Lc*mx6yLC$AewyzBD!a`0lNTk6>kqH+ucgv$g=3b<+C*0}kp1z_T%l9j^9y5N}E#>4l49M?@Rg1b>ErXzF zX}(a*-cOQCddQgXDtD2utB<@sE>d}UPGX)~itL#BJuEjrcnjCTR^Pm@Rfye_C;Ma= zzXwvC7GoY5rm|A~ik4LE1cAM`~jyOrAepWCxzN|7581;>8cEN@mYFw28caqsVJykl}MLUt+&$18T zt}t&?NpxbjP{Q4G+Bd~NM)Sg%+g|Dhw+h{mJLI{%OMj=S-1xS7V~j?XB~UL3|Yr*^r+`dpgbp5 z5D%!R@N6>zec+WZBUK*CtHo8g{O3ROzsvAgr>$~cc57%?^+ml%bmNGiymIaj14I*j zV%SQfTp8H9NNfZECeWFMtfB11*f^3xC*9_~pX?iTJv#M)rG49SImYkbPztJ1mgfTG z`|(-gdOYaJC~WcHKZ1M1&O)D4=)bn$Qn(n5%T#E34KIUoL=UH|0F*&h zM$ddoQ&@v@oTt8*Fgd}u%B*{BaaWBT-!{LrJ)jg$^uJ?3%AbJ zP;Npr|32;d+33Lgs9B&;lLv%C2v1zFQCUjq5}$IZa=7%ah$Gq^0TCYm84BFe#L_$4 zIDQEFKq`Ctv|?)P-P>q;v>(Lv+Qw}k zLru2dT%>@?%waQ^rwx>SIXI23`7eoVBjI#2 zVxpC?ur$*JGWLK!XqU42_Hp~uqjJ* z!7qd;WM9tx{9y3RN(iDg+PHzuSjwJnt^oV@n^GJ%1sj{BODI4g;YW{dG3kpSlB86`S5Ci;{lbm}kWYuQo~@%-G_Z zaaFp7#w#hap>!3#E)h6EghRJXGH;)t(fg&;@D^4kOQvDg$9i|Xez}i9=hCRTfG{~p zB^oVs#PpwXZ5(rdLx&X&r(!%S73~P!EV;y;7X(BNQeDJG-^7KG8W3h-CB=>SguO)> zl0`WmMAU17^D-5(8%EWzlRStOcd_VCr;Qb{-0Ri9y9CR?Uk%e*qS74@CptkUQ}frR zk2h5^?AgPDNY`1NtOx;n$oi^^@&m&B=(EIzQI0d1eB<)Ou>;5w0zJmp+h!m5 zOfH@j$V13dX=cHu>JeRxdg`xHEw=$o-7! z@+S-$gmlVLF6dL>7@A`~O|UHIOD~T>*Hzn>NF}r03s+%6LsOJxc9%Ph3vKJpco#Pm z-7nXSGaVzUbp)<7RtgY*o`to5f~7pGblxeuw9;mm?knoI_24jo!ttIvMYs(zXfo@H z@UiVFv`-QQ-t4XGZ)=K|au1agBQ~pJ%q;RDc$lrWS>P4weN${+*G7I`yF7=sB!Slg ztE_j^#5Yz#JNGf_7_ICLURk@(yFYrkxo*=9oqli^$?AVvkRB0Z<_Tvl2L_`-xs8T!4ccj>81K46Fo&9s5M}8qpgW{~$4u{L&@IrC&wT16_h2*Z!C+XZ z@G*1-jnsMVXe~K+4s6j-#WH~v(+eJuYMS4r2Xvz{4-cU;e@*N-Sz2Y-2j?dcQ8?&R zFRKz{v7k%PgK@JgFmDar3^At^F{Aklp3TUh8uhI%OVq|f%nR1l{aU*siNhJaA*-!M zWPqt6;e1wne{4RlaA$`DGz(n)CwFJ$*cQ>Y2tw$+2uDb2=paZ)`>dfWh|)A$dY$7# zY*!}!K1q?ZvgRuW^n%z=wjDxn~b)h)v!4$a9V`|bmc3(i>9n34g z==xS%^gGV9otHlOhU!^yc8tZTbZexkhC(y7tB_k0afg|CZfe*TK%B|! zviz?BDkg2$(ShG2k_@44uw2F+k44&Kyd-w~H^eYL-n5!*)ITy5TtE*4J8|$z*U>kU ztDjq4_xxSo;~_AguluSP-2M#ED0wvk6`WqfTM$}c|CckvF4-6s(N%UeXkNnTv8&lx9cH7mC_UL9Ze_Vz-ESBYLC_@9f@&FR!0HCNKTc-n zfsMt+GgoDQci0T!(7&%TXS3@z7W~zu*>`^Et#75pR-CoZiZ1TP8R}eUz!g+L2ajU8 zA?nknz2FnNYj%vi$}fBqYaj2#pJ`8#1?R%?^!Kro%rZ>|B(pnBbR1F!wPRd9*(@V= z6=hP+I6=#%Wn=tYeOpk?9&f_CnEzbK!Q`EXOaTIZL48D3gG9pHrar4dl9kmZRX{hv z+*?d?gEU?_ln`G;MonkgCnEt#mQk&@I+t!e1>fOOqpmEdG*!4w@xPaP@?mOmYP8%e z>-&RJ)Bt!6>tsA+0SCU!>bF!F*Ho=^9B9Zq0`%*qq%^LPBnSRo4d(B+C6R6!-1A!35XeE{5Tcm!!x-O1|$q> z>SPhMEt8}8(_psD4qYq;!zFzHhQ1m#rjJlfvy!khCvovSWcyf|9ksMtk!@bGjZq{w zodj$pXk$4>=_Zg8w%!n>YYZ>qhOS=YC>5{+_L3_PmjMmA#kB|T>k?Kt?rV<8vdG5j z(YUn6Rj^vd--nDeethNIWQVDA%;I}^f^%z zcb8|^Q4Sj+XE{dw3$N+kfEQjO5)6YkSK4^8yVdWBf2hrA=if#**UJtxsO$}EcYK1iONnx}gAT#E3Z!7bb)o*{jE1yy)a+n@y{@ay_wsND0P;mSP%@dSOQauBwwW!VES zo3Fj4NB(-YLFMmTi=^+7nYAvizsqHdlvB)OHaBtQ6N%@cq%6o@lR+jr+Y2)zVh=as z2~j^`1i#UWvpU=#9iKs{Ls$+?cX)`5CmjaamqNS;P7u)~l9P|alf&6YF|x|AG`{Eb za8REa56Cc5sLE?g1C`Yjp1v}7w6nHy`=e=oV~VO{4;Nl&HNk6Wft8Ly2&i%fFoB3Q z#-M)!6?=iyTrC}%E{_i!8jIUM>Ucf&b!4uCe-?+{?nvb4l2{~0)=7uw3ucKq>a%F? z=Rly4I8Ch>Zdv>2q}4(sVb-ZsdI4fIgMl*j#F+_Wblk?7&uapaUd!B0ywHUl>@Tmq z#I#czWqFElx)#+M*0<{WO=L&I1F{0}VsyJ&3p}^3H-D6~9`F&;$viWX9-he$@e8Li?I@%m{S4q^2}P*~P`|C`CNe1`{k2A^!otIWyN+Mj?QND> zi1x}{E}cHx=_JD%)0{YT32cX0-$&r5v^EH(e~))Z3bAE6>g%%O6!LXqI_X$p`rwi? z${?xuk^j;YmFKWkTwt5JiZB{WF#TFmNoMmlZ(>GYg0!7v!6ZB3m)RNe<6Xc{jVVYG z))xI9<5!7as5fE@NX!OBnrRf#eYHh2c^QlfjzeU{w;Mdu)h#0 z{8RC-{r>-;?f8G12QWn#bI2cb3jfslYya^_E`Z;n4|7+)nF0Q(_t(MH-+$&k>fd^m z|5Nd=p73$3^;_(W{yy^hr{-U;+(%FOTQ*Gphu>6JL55xCaB#S=*BY$Lx|8|i+W!G8 CDrR>8 literal 0 HcmV?d00001 diff --git a/split-by-model/dkNET-DRP/summary/summary-coordinate.tsv b/split-by-model/dkNET-DRP/summary/summary-coordinate.tsv new file mode 100644 index 0000000..315cbb1 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/summary-coordinate.tsv @@ -0,0 +1,46 @@ +row # slot 614b619410622.xml Text 614b619410622.xml Ordinal 614b619410622.xml Scaled 614ba8756c8db.xml Text 614ba8756c8db.xml Ordinal 614ba8756c8db.xml Scaled 614df6a85b9b1.xml Text 614df6a85b9b1.xml Ordinal 614df6a85b9b1.xml Scaled 6155e4b25d5bb.xml Text 6155e4b25d5bb.xml Ordinal 6155e4b25d5bb.xml Scaled 61573f38ef525.xml Text 61573f38ef525.xml Ordinal 61573f38ef525.xml Scaled 615b72f7e220a.xml Text 615b72f7e220a.xml Ordinal 615b72f7e220a.xml Scaled 615b89fa770b6.xml Text 615b89fa770b6.xml Ordinal 615b89fa770b6.xml Scaled 615ca4418ee3f.xml Text 615ca4418ee3f.xml Ordinal 615ca4418ee3f.xml Scaled 615dc910773b8.xml Text 615dc910773b8.xml Ordinal 615dc910773b8.xml Scaled 616066f394fa6.xml Text 616066f394fa6.xml Ordinal 616066f394fa6.xml Scaled 61609db5051d2.xml Text 61609db5051d2.xml Ordinal 61609db5051d2.xml Scaled 6164baf6052a8.xml Text 6164baf6052a8.xml Ordinal 6164baf6052a8.xml Scaled 6165b40aacafb.xml Text 6165b40aacafb.xml Ordinal 6165b40aacafb.xml Scaled 616712744a595.xml Text 616712744a595.xml Ordinal 616712744a595.xml Scaled 61676b2d11524.xml Text 61676b2d11524.xml Ordinal 61676b2d11524.xml Scaled 616f4035b5cf4.xml Text 616f4035b5cf4.xml Ordinal 616f4035b5cf4.xml Scaled 6171d0459add1.xml Text 6171d0459add1.xml Ordinal 6171d0459add1.xml Scaled 61732f9ba6055.xml Text 61732f9ba6055.xml Ordinal 61732f9ba6055.xml Scaled 61735ece7e758.xml Text 61735ece7e758.xml Ordinal 61735ece7e758.xml Scaled 6176c84f1c023.xml Text 6176c84f1c023.xml Ordinal 6176c84f1c023.xml Scaled 6176e09b35d7f.xml Text 6176e09b35d7f.xml Ordinal 6176e09b35d7f.xml Scaled 6177452b8b6c7.xml Text 6177452b8b6c7.xml Ordinal 6177452b8b6c7.xml Scaled 61782a94716e3.xml Text 61782a94716e3.xml Ordinal 61782a94716e3.xml Scaled 61783f6c4c8e7.xml Text 61783f6c4c8e7.xml Ordinal 61783f6c4c8e7.xml Scaled 617850897d411.xml Text 617850897d411.xml Ordinal 617850897d411.xml Scaled 617864ac7b873.xml Text 617864ac7b873.xml Ordinal 617864ac7b873.xml Scaled 61799d37c3555.xml Text 61799d37c3555.xml Ordinal 61799d37c3555.xml Scaled 6179b9da80888.xml Text 6179b9da80888.xml Ordinal 6179b9da80888.xml Scaled 617accb79fb53.xml Text 617accb79fb53.xml Ordinal 617accb79fb53.xml Scaled 617ad2ab32afc.xml Text 617ad2ab32afc.xml Ordinal 617ad2ab32afc.xml Scaled 617af3b01bff2.xml Text 617af3b01bff2.xml Ordinal 617af3b01bff2.xml Scaled 617aff8495ced.xml Text 617aff8495ced.xml Ordinal 617aff8495ced.xml Scaled 617c779c5bc94.xml Text 617c779c5bc94.xml Ordinal 617c779c5bc94.xml Scaled 61800a6acbf2a.xml Text 61800a6acbf2a.xml Ordinal 61800a6acbf2a.xml Scaled 61805a39e24bb.xml Text 61805a39e24bb.xml Ordinal 61805a39e24bb.xml Scaled 618063fc1eba5.xml Text 618063fc1eba5.xml Ordinal 618063fc1eba5.xml Scaled 61816a627e26b.xml Text 61816a627e26b.xml Ordinal 61816a627e26b.xml Scaled 618180e41db0e.xml Text 618180e41db0e.xml Ordinal 618180e41db0e.xml Scaled 61818c0471e3e.xml Text 61818c0471e3e.xml Ordinal 61818c0471e3e.xml Scaled 618194d41e94a.xml Text 618194d41e94a.xml Ordinal 618194d41e94a.xml Scaled 6182df9c3d9e3.xml Text 6182df9c3d9e3.xml Ordinal 6182df9c3d9e3.xml Scaled 6182f66084e0b.xml Text 6182f66084e0b.xml Ordinal 6182f66084e0b.xml Scaled 61830248beb1c.xml Text 61830248beb1c.xml Ordinal 61830248beb1c.xml Scaled 61856c2f8d135.xml Text 61856c2f8d135.xml Ordinal 61856c2f8d135.xml Scaled 618772a4eda31.xml Text 618772a4eda31.xml Ordinal 618772a4eda31.xml Scaled 61895560bbab4.xml Text 61895560bbab4.xml Ordinal 61895560bbab4.xml Scaled 618ab2f1efc9f.xml Text 618ab2f1efc9f.xml Ordinal 618ab2f1efc9f.xml Scaled 618ac6bb76674.xml Text 618ac6bb76674.xml Ordinal 618ac6bb76674.xml Scaled 618af1fa72f85.xml Text 618af1fa72f85.xml Ordinal 618af1fa72f85.xml Scaled 618afa63748f2.xml Text 618afa63748f2.xml Ordinal 618afa63748f2.xml Scaled 618b05ddaf1c8.xml Text 618b05ddaf1c8.xml Ordinal 618b05ddaf1c8.xml Scaled 618b0ed289968.xml Text 618b0ed289968.xml Ordinal 618b0ed289968.xml Scaled 618e95d1e58c7.xml Text 618e95d1e58c7.xml Ordinal 618e95d1e58c7.xml Scaled 618eaa9fcd36f.xml Text 618eaa9fcd36f.xml Ordinal 618eaa9fcd36f.xml Scaled 61941c2f7748c.xml Text 61941c2f7748c.xml Ordinal 61941c2f7748c.xml Scaled 61942b69bdeab.xml Text 61942b69bdeab.xml Ordinal 61942b69bdeab.xml Scaled 6194378833fb3.xml Text 6194378833fb3.xml Ordinal 6194378833fb3.xml Scaled 619441c4acb77.xml Text 619441c4acb77.xml Ordinal 619441c4acb77.xml Scaled 6198116a6dfed.xml Text 6198116a6dfed.xml Ordinal 6198116a6dfed.xml Scaled 6198331eb3593.xml Text 6198331eb3593.xml Ordinal 6198331eb3593.xml Scaled 61985711a8424.xml Text 61985711a8424.xml Ordinal 61985711a8424.xml Scaled 6198648660f3d.xml Text 6198648660f3d.xml Ordinal 6198648660f3d.xml Scaled 619c1b5161bad.xml Text 619c1b5161bad.xml Ordinal 619c1b5161bad.xml Scaled 619c34b5360d2.xml Text 619c34b5360d2.xml Ordinal 619c34b5360d2.xml Scaled 61a50fa18488f.xml Text 61a50fa18488f.xml Ordinal 61a50fa18488f.xml Scaled 61a66d491f16a.xml Text 61a66d491f16a.xml Ordinal 61a66d491f16a.xml Scaled 61a67946c16f2.xml Text 61a67946c16f2.xml Ordinal 61a67946c16f2.xml Scaled 61a67f53dc9b3.xml Text 61a67f53dc9b3.xml Ordinal 61a67f53dc9b3.xml Scaled 61a7bc9e3a002.xml Text 61a7bc9e3a002.xml Ordinal 61a7bc9e3a002.xml Scaled 61a7d3c6aeb5d.xml Text 61a7d3c6aeb5d.xml Ordinal 61a7d3c6aeb5d.xml Scaled 61a7e89c785ae.xml Text 61a7e89c785ae.xml Ordinal 61a7e89c785ae.xml Scaled 61a909a92778f.xml Text 61a909a92778f.xml Ordinal 61a909a92778f.xml Scaled 61a9183722786.xml Text 61a9183722786.xml Ordinal 61a9183722786.xml Scaled 61a92a18640d8.xml Text 61a92a18640d8.xml Ordinal 61a92a18640d8.xml Scaled 61aa5882975cb.xml Text 61aa5882975cb.xml Ordinal 61aa5882975cb.xml Scaled 61aa8e7632db5.xml Text 61aa8e7632db5.xml Ordinal 61aa8e7632db5.xml Scaled 61ae6db443359.xml Text 61ae6db443359.xml Ordinal 61ae6db443359.xml Scaled 61ae7c9289651.xml Text 61ae7c9289651.xml Ordinal 61ae7c9289651.xml Scaled 61af958cc8abe.xml Text 61af958cc8abe.xml Ordinal 61af958cc8abe.xml Scaled 61afc04c3cc7a.xml Text 61afc04c3cc7a.xml Ordinal 61afc04c3cc7a.xml Scaled 61afe38742749.xml Text 61afe38742749.xml Ordinal 61afe38742749.xml Scaled 61b23d317644e.xml Text 61b23d317644e.xml Ordinal 61b23d317644e.xml Scaled 61b257c62b44a.xml Text 61b257c62b44a.xml Ordinal 61b257c62b44a.xml Scaled 61b2654669195.xml Text 61b2654669195.xml Ordinal 61b2654669195.xml Scaled 61b38f199319f.xml Text 61b38f199319f.xml Ordinal 61b38f199319f.xml Scaled 61b3991a21735.xml Text 61b3991a21735.xml Ordinal 61b3991a21735.xml Scaled 621d12cf667c7.xml Text 621d12cf667c7.xml Ordinal 621d12cf667c7.xml Scaled 621d4ff4808c5.xml Text 621d4ff4808c5.xml Ordinal 621d4ff4808c5.xml Scaled 621d682e591a5.xml Text 621d682e591a5.xml Ordinal 621d682e591a5.xml Scaled Min Max Counts +1 Properties/FAIRProps/AccessibleProps/AccessibleFlags/humanAccessible yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 no 0 1 yes 1 10 no 0 1 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 no 0 1 yes 1 10 no 0 1 yes 1 10 yes 1 10 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 yes 1 10 yes 1 10 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 no yes no:43 yes:46 +2 Properties/FAIRProps/AccessibleProps/AccessibleFlags/machineAccessible yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no yes no:8 yes:81 +3 Properties/FAIRProps/AccessibleProps/AccessibleFlags/persistentMetadata no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 yes 1 10 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 no 0 1 yes 1 10 no 0 1 no 0 1 no yes no:81 yes:8 +4 Properties/FAIRProps/AccessibleProps/AccessibleFlags/licenseOK yes 1 10 no 0 1 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 no 0 1 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 no 0 1 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 yes 1 10 no 0 1 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 yes 1 10 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 no 0 1 yes 1 10 no 0 1 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 no yes no:53 yes:36 +5 Properties/FAIRProps/AccessibleProps/AccessibleFlags/stdApi yes 1 10 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 no 0 1 yes 1 10 no 0 1 no 0 1 yes 1 10 yes 1 10 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 no 0 1 yes 1 10 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 yes 1 10 no yes no:45 yes:44 +6 Properties/FAIRProps/AccessibleProps/MetadataPersistence no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 byStatedPolicy 2 10.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 byStatedPolicy 2 10.0000 no 0 1.0000 byStatedPolicy 2 10.0000 byStatedPolicy 2 10.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 byStatedPolicy 2 10.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 byStatedPolicy 2 10.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 byStatedPolicy 2 10.0000 no 0 1.0000 no 0 1.0000 byStatedPolicy 2 10.0000 no 0 1.0000 no 0 1.0000 no byStatedPolicy no:81 byEvidence:0 byStatedPolicy:8 +7 Properties/FAIRProps/FindableProps/PersistentIdentifier internalPID 1 5.5000 internalPID 1 5.5000 internalPID 1 5.5000 internalPID 1 5.5000 externalPID 2 10.0000 externalPID 2 10.0000 externalPID 2 10.0000 internalPID 1 5.5000 externalPID 2 10.0000 externalPID 2 10.0000 internalPID 1 5.5000 externalPID 2 10.0000 externalPID 2 10.0000 internalPID 1 5.5000 internalPID 1 5.5000 externalPID 2 10.0000 externalPID 2 10.0000 externalPID 2 10.0000 externalPID 2 10.0000 externalPID 2 10.0000 internalPID 1 5.5000 externalPID 2 10.0000 externalPID 2 10.0000 internalPID 1 5.5000 internalPID 1 5.5000 externalPID 2 10.0000 internalPID 1 5.5000 internalPID 1 5.5000 internalPID 1 5.5000 internalPID 1 5.5000 internalPID 1 5.5000 internalPID 1 5.5000 internalPID 1 5.5000 externalPID 2 10.0000 externalPID 2 10.0000 none 0 1.0000 internalPID 1 5.5000 externalPID 2 10.0000 externalPID 2 10.0000 externalPID 2 10.0000 externalPID 2 10.0000 internalPID 1 5.5000 internalPID 1 5.5000 internalPID 1 5.5000 internalPID 1 5.5000 internalPID 1 5.5000 externalPID 2 10.0000 internalPID 1 5.5000 externalPID 2 10.0000 internalPID 1 5.5000 externalPID 2 10.0000 internalPID 1 5.5000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 internalPID 1 5.5000 internalPID 1 5.5000 externalPID 2 10.0000 internalPID 1 5.5000 internalPID 1 5.5000 externalPID 2 10.0000 internalPID 1 5.5000 externalPID 2 10.0000 externalPID 2 10.0000 internalPID 1 5.5000 internalPID 1 5.5000 none 0 1.0000 internalPID 1 5.5000 internalPID 1 5.5000 externalPID 2 10.0000 externalPID 2 10.0000 externalPID 2 10.0000 internalPID 1 5.5000 externalPID 2 10.0000 externalPID 2 10.0000 externalPID 2 10.0000 externalPID 2 10.0000 internalPID 1 5.5000 internalPID 1 5.5000 externalPID 2 10.0000 externalPID 2 10.0000 externalPID 2 10.0000 none 0 1.0000 externalPID 2 10.0000 externalPID 2 10.0000 externalPID 2 10.0000 externalPID 2 10.0000 none externalPID none:8 internalPID:39 externalPID:42 +8 Properties/FAIRProps/FindableProps/IdInMetadata partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 all 2 10.0000 all 2 10.0000 all 2 10.0000 all 2 10.0000 all 2 10.0000 all 2 10.0000 partial 1 5.5000 all 2 10.0000 all 2 10.0000 all 2 10.0000 partial 1 5.5000 all 2 10.0000 partial 1 5.5000 all 2 10.0000 all 2 10.0000 all 2 10.0000 all 2 10.0000 partial 1 5.5000 all 2 10.0000 all 2 10.0000 all 2 10.0000 all 2 10.0000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 all 2 10.0000 partial 1 5.5000 all 2 10.0000 -1 1 partial 1 5.5000 all 2 10.0000 all 2 10.0000 all 2 10.0000 all 2 10.0000 all 2 10.0000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 all 2 10.0000 partial 1 5.5000 all 2 10.0000 all 2 10.0000 all 2 10.0000 all 2 10.0000 -1 1 -1 1 -1 1 -1 1 -1 1 partial 1 5.5000 partial 1 5.5000 all 2 10.0000 partial 1 5.5000 all 2 10.0000 all 2 10.0000 partial 1 5.5000 all 2 10.0000 all 2 10.0000 partial 1 5.5000 partial 1 5.5000 -1 1 partial 1 5.5000 partial 1 5.5000 all 2 10.0000 all 2 10.0000 all 2 10.0000 partial 1 5.5000 all 2 10.0000 all 2 10.0000 all 2 10.0000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 all 2 10.0000 partial 1 5.5000 all 2 10.0000 -1 1 partial 1 5.5000 all 2 10.0000 partial 1 5.5000 all 2 10.0000 all none:0 partial:36 all:45 +9 Properties/FAIRProps/FindableProps/MetadataGrade limited 1 5.5000 limited 1 5.5000 limited 1 5.5000 limited 1 5.5000 limited 1 5.5000 minimal 0 1.0000 limited 1 5.5000 limited 1 5.5000 rich 2 10.0000 limited 1 5.5000 limited 1 5.5000 limited 1 5.5000 rich 2 10.0000 limited 1 5.5000 limited 1 5.5000 rich 2 10.0000 minimal 0 1.0000 rich 2 10.0000 limited 1 5.5000 rich 2 10.0000 limited 1 5.5000 rich 2 10.0000 rich 2 10.0000 minimal 0 1.0000 limited 1 5.5000 limited 1 5.5000 rich 2 10.0000 limited 1 5.5000 minimal 0 1.0000 minimal 0 1.0000 minimal 0 1.0000 rich 2 10.0000 limited 1 5.5000 limited 1 5.5000 limited 1 5.5000 minimal 0 1.0000 minimal 0 1.0000 minimal 0 1.0000 minimal 0 1.0000 minimal 0 1.0000 limited 1 5.5000 minimal 0 1.0000 minimal 0 1.0000 limited 1 5.5000 limited 1 5.5000 minimal 0 1.0000 minimal 0 1.0000 minimal 0 1.0000 limited 1 5.5000 rich 2 10.0000 limited 1 5.5000 limited 1 5.5000 rich 2 10.0000 limited 1 5.5000 minimal 0 1.0000 minimal 0 1.0000 minimal 0 1.0000 limited 1 5.5000 minimal 0 1.0000 rich 2 10.0000 limited 1 5.5000 limited 1 5.5000 limited 1 5.5000 minimal 0 1.0000 rich 2 10.0000 limited 1 5.5000 limited 1 5.5000 limited 1 5.5000 minimal 0 1.0000 minimal 0 1.0000 limited 1 5.5000 minimal 0 1.0000 rich 2 10.0000 limited 1 5.5000 limited 1 5.5000 limited 1 5.5000 minimal 0 1.0000 minimal 0 1.0000 minimal 0 1.0000 minimal 0 1.0000 minimal 0 1.0000 minimal 0 1.0000 minimal 0 1.0000 minimal 0 1.0000 limited 1 5.5000 limited 1 5.5000 rich 2 10.0000 limited 1 5.5000 limited 1 5.5000 minimal rich minimal:32 limited:42 rich:15 +10 Properties/FAIRProps/FindableProps/FindableFlags/internalSearchOK yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no yes no:6 yes:83 +11 Properties/FAIRProps/InteroperableProps/MetadataFAIRness minimal 0 1.0000 allowed 1 5.5000 allowed 1 5.5000 enforced 2 10.0000 minimal 0 1.0000 minimal 0 1.0000 enforced 2 10.0000 minimal 0 1.0000 enforced 2 10.0000 enforced 2 10.0000 allowed 1 5.5000 allowed 1 5.5000 minimal 0 1.0000 enforced 2 10.0000 enforced 2 10.0000 enforced 2 10.0000 minimal 0 1.0000 allowed 1 5.5000 minimal 0 1.0000 enforced 2 10.0000 allowed 1 5.5000 enforced 2 10.0000 enforced 2 10.0000 enforced 2 10.0000 enforced 2 10.0000 enforced 2 10.0000 enforced 2 10.0000 minimal 0 1.0000 enforced 2 10.0000 enforced 2 10.0000 enforced 2 10.0000 allowed 1 5.5000 allowed 1 5.5000 enforced 2 10.0000 minimal 0 1.0000 minimal 0 1.0000 enforced 2 10.0000 enforced 2 10.0000 enforced 2 10.0000 enforced 2 10.0000 enforced 2 10.0000 enforced 2 10.0000 minimal 0 1.0000 enforced 2 10.0000 minimal 0 1.0000 enforced 2 10.0000 enforced 2 10.0000 enforced 2 10.0000 minimal 0 1.0000 enforced 2 10.0000 enforced 2 10.0000 enforced 2 10.0000 minimal 0 1.0000 enforced 2 10.0000 enforced 2 10.0000 minimal 0 1.0000 enforced 2 10.0000 minimal 0 1.0000 enforced 2 10.0000 enforced 2 10.0000 enforced 2 10.0000 enforced 2 10.0000 enforced 2 10.0000 enforced 2 10.0000 enforced 2 10.0000 enforced 2 10.0000 enforced 2 10.0000 minimal 0 1.0000 enforced 2 10.0000 enforced 2 10.0000 enforced 2 10.0000 enforced 2 10.0000 minimal 0 1.0000 minimal 0 1.0000 minimal 0 1.0000 enforced 2 10.0000 enforced 2 10.0000 enforced 2 10.0000 enforced 2 10.0000 enforced 2 10.0000 enforced 2 10.0000 minimal 0 1.0000 enforced 2 10.0000 enforced 2 10.0000 minimal 0 1.0000 allowed 1 5.5000 minimal 0 1.0000 enforced 2 10.0000 minimal 0 1.0000 minimal enforced minimal:24 allowed:9 enforced:56 +12 Properties/FAIRProps/InteroperableProps/StudyLinkage freeText 1 4.0000 freeText 1 4.0000 freeText 1 4.0000 none 0 1.0000 freeText 1 4.0000 freeText 1 4.0000 textualMetadata 2 7.0000 freeText 1 4.0000 textualMetadata 2 7.0000 freeText 1 4.0000 freeText 1 4.0000 freeText 1 4.0000 freeText 1 4.0000 freeText 1 4.0000 freeText 1 4.0000 freeText 1 4.0000 freeText 1 4.0000 textualMetadata 2 7.0000 machineReadableMetadata 3 10.0000 machineReadableMetadata 3 10.0000 none 0 1.0000 freeText 1 4.0000 freeText 1 4.0000 freeText 1 4.0000 textualMetadata 2 7.0000 textualMetadata 2 7.0000 freeText 1 4.0000 freeText 1 4.0000 freeText 1 4.0000 freeText 1 4.0000 none 0 1.0000 textualMetadata 2 7.0000 freeText 1 4.0000 freeText 1 4.0000 textualMetadata 2 7.0000 freeText 1 4.0000 freeText 1 4.0000 textualMetadata 2 7.0000 none 0 1.0000 freeText 1 4.0000 textualMetadata 2 7.0000 none 0 1.0000 none 0 1.0000 freeText 1 4.0000 freeText 1 4.0000 freeText 1 4.0000 freeText 1 4.0000 freeText 1 4.0000 freeText 1 4.0000 freeText 1 4.0000 freeText 1 4.0000 freeText 1 4.0000 freeText 1 4.0000 freeText 1 4.0000 freeText 1 4.0000 freeText 1 4.0000 freeText 1 4.0000 freeText 1 4.0000 textualMetadata 2 7.0000 textualMetadata 2 7.0000 textualMetadata 2 7.0000 textualMetadata 2 7.0000 textualMetadata 2 7.0000 none 0 1.0000 textualMetadata 2 7.0000 textualMetadata 2 7.0000 freeText 1 4.0000 freeText 1 4.0000 none 0 1.0000 freeText 1 4.0000 textualMetadata 2 7.0000 freeText 1 4.0000 textualMetadata 2 7.0000 textualMetadata 2 7.0000 none 0 1.0000 none 0 1.0000 freeText 1 4.0000 textualMetadata 2 7.0000 freeText 1 4.0000 textualMetadata 2 7.0000 freeText 1 4.0000 freeText 1 4.0000 none 0 1.0000 freeText 1 4.0000 none 0 1.0000 none 0 1.0000 textualMetadata 2 7.0000 freeText 1 4.0000 textualMetadata 2 7.0000 none machineReadableMetadata none:13 freeText:51 textualMetadata:23 machineReadableMetadata:2 +13 Properties/FAIRProps/InteroperableProps/InteroperableFlags/formalMetadataVocabularyOK yes 1 10 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 no 0 1 no 0 1 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 no 0 1 no 0 1 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 no 0 1 yes 1 10 no 0 1 yes 1 10 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 no yes no:36 yes:53 +14 Properties/FAIRProps/InteroperableProps/InteroperableFlags/fairMetadataOK no 0 1 yes 1 10 yes 1 10 yes 1 10 no 0 1 no 0 1 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 no 0 1 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 no 0 1 yes 1 10 no 0 1 yes 1 10 no 0 1 no yes no:24 yes:65 +15 Properties/FAIRProps/InteroperableProps/InteroperableFlags/qualifiedMetadataReferencesOK yes 1 10 yes 1 10 yes 1 10 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 no 0 1 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 no 0 1 yes 1 10 yes 1 10 no 0 1 no 0 1 yes 1 10 no 0 1 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 no 0 1 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 no 0 1 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 yes 1 10 no yes no:41 yes:48 +16 Properties/FAIRProps/InteroperableProps/InteroperableFlags/studyLinkageOK no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 no 0 1 yes 1 10 no 0 1 no 0 1 yes 1 10 no 0 1 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 yes 1 10 yes 1 10 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 yes 1 10 no yes no:64 yes:25 +17 Properties/FAIRProps/InteroperableProps/MetadataReferenceQuality informal 1 5.5000 informal 1 5.5000 informal 1 5.5000 freeText 0 1.0000 freeText 0 1.0000 informal 1 5.5000 formal 2 10.0000 informal 1 5.5000 informal 1 5.5000 informal 1 5.5000 informal 1 5.5000 informal 1 5.5000 informal 1 5.5000 informal 1 5.5000 informal 1 5.5000 freeText 0 1.0000 freeText 0 1.0000 informal 1 5.5000 informal 1 5.5000 formal 2 10.0000 freeText 0 1.0000 freeText 0 1.0000 freeText 0 1.0000 informal 1 5.5000 informal 1 5.5000 informal 1 5.5000 freeText 0 1.0000 informal 1 5.5000 informal 1 5.5000 informal 1 5.5000 freeText 0 1.0000 informal 1 5.5000 freeText 0 1.0000 informal 1 5.5000 informal 1 5.5000 freeText 0 1.0000 freeText 0 1.0000 informal 1 5.5000 freeText 0 1.0000 freeText 0 1.0000 informal 1 5.5000 freeText 0 1.0000 freeText 0 1.0000 freeText 0 1.0000 freeText 0 1.0000 freeText 0 1.0000 freeText 0 1.0000 freeText 0 1.0000 formal 2 10.0000 freeText 0 1.0000 freeText 0 1.0000 freeText 0 1.0000 freeText 0 1.0000 freeText 0 1.0000 freeText 0 1.0000 freeText 0 1.0000 freeText 0 1.0000 informal 1 5.5000 informal 1 5.5000 informal 1 5.5000 informal 1 5.5000 informal 1 5.5000 informal 1 5.5000 freeText 0 1.0000 informal 1 5.5000 informal 1 5.5000 informal 1 5.5000 freeText 0 1.0000 freeText 0 1.0000 freeText 0 1.0000 informal 1 5.5000 informal 1 5.5000 informal 1 5.5000 informal 1 5.5000 freeText 0 1.0000 freeText 0 1.0000 informal 1 5.5000 formal 2 10.0000 freeText 0 1.0000 informal 1 5.5000 informal 1 5.5000 informal 1 5.5000 freeText 0 1.0000 freeText 0 1.0000 freeText 0 1.0000 freeText 0 1.0000 formal 2 10.0000 freeText 0 1.0000 informal 1 5.5000 freeText formal freeText:41 informal:43 formal:5 +18 Properties/FAIRProps/ReusableProps/DocumentationLevel good 2 7.0000 adequate 1 4.0000 adequate 1 4.0000 adequate 1 4.0000 good 2 7.0000 lacking 0 1.0000 good 2 7.0000 full 3 10.0000 adequate 1 4.0000 adequate 1 4.0000 lacking 0 1.0000 good 2 7.0000 lacking 0 1.0000 adequate 1 4.0000 adequate 1 4.0000 good 2 7.0000 lacking 0 1.0000 adequate 1 4.0000 lacking 0 1.0000 good 2 7.0000 adequate 1 4.0000 good 2 7.0000 good 2 7.0000 lacking 0 1.0000 adequate 1 4.0000 adequate 1 4.0000 full 3 10.0000 lacking 0 1.0000 lacking 0 1.0000 lacking 0 1.0000 lacking 0 1.0000 adequate 1 4.0000 adequate 1 4.0000 adequate 1 4.0000 adequate 1 4.0000 adequate 1 4.0000 adequate 1 4.0000 adequate 1 4.0000 good 2 7.0000 adequate 1 4.0000 good 2 7.0000 adequate 1 4.0000 full 3 10.0000 adequate 1 4.0000 adequate 1 4.0000 adequate 1 4.0000 adequate 1 4.0000 adequate 1 4.0000 adequate 1 4.0000 lacking 0 1.0000 adequate 1 4.0000 adequate 1 4.0000 lacking 0 1.0000 adequate 1 4.0000 lacking 0 1.0000 lacking 0 1.0000 adequate 1 4.0000 adequate 1 4.0000 adequate 1 4.0000 good 2 7.0000 lacking 0 1.0000 adequate 1 4.0000 adequate 1 4.0000 adequate 1 4.0000 good 2 7.0000 good 2 7.0000 good 2 7.0000 lacking 0 1.0000 good 2 7.0000 good 2 7.0000 good 2 7.0000 adequate 1 4.0000 adequate 1 4.0000 adequate 1 4.0000 adequate 1 4.0000 lacking 0 1.0000 good 2 7.0000 adequate 1 4.0000 adequate 1 4.0000 good 2 7.0000 adequate 1 4.0000 adequate 1 4.0000 adequate 1 4.0000 lacking 0 1.0000 lacking 0 1.0000 lacking 0 1.0000 good 2 7.0000 lacking 0 1.0000 adequate 1 4.0000 lacking full lacking:21 adequate:45 good:20 full:3 +19 Properties/FAIRProps/ReusableProps/MetadataProvenance adequate 1 5.5000 adequate 1 5.5000 adequate 1 5.5000 adequate 1 5.5000 adequate 1 5.5000 adequate 1 5.5000 full 2 10.0000 adequate 1 5.5000 full 2 10.0000 adequate 1 5.5000 unclear 0 1.0000 adequate 1 5.5000 adequate 1 5.5000 adequate 1 5.5000 unclear 0 1.0000 full 2 10.0000 unclear 0 1.0000 adequate 1 5.5000 adequate 1 5.5000 adequate 1 5.5000 adequate 1 5.5000 adequate 1 5.5000 adequate 1 5.5000 adequate 1 5.5000 adequate 1 5.5000 adequate 1 5.5000 adequate 1 5.5000 adequate 1 5.5000 unclear 0 1.0000 unclear 0 1.0000 unclear 0 1.0000 unclear 0 1.0000 unclear 0 1.0000 adequate 1 5.5000 adequate 1 5.5000 unclear 0 1.0000 unclear 0 1.0000 unclear 0 1.0000 adequate 1 5.5000 unclear 0 1.0000 adequate 1 5.5000 unclear 0 1.0000 unclear 0 1.0000 unclear 0 1.0000 unclear 0 1.0000 unclear 0 1.0000 adequate 1 5.5000 unclear 0 1.0000 adequate 1 5.5000 unclear 0 1.0000 unclear 0 1.0000 unclear 0 1.0000 unclear 0 1.0000 unclear 0 1.0000 unclear 0 1.0000 unclear 0 1.0000 unclear 0 1.0000 adequate 1 5.5000 adequate 1 5.5000 adequate 1 5.5000 unclear 0 1.0000 unclear 0 1.0000 adequate 1 5.5000 unclear 0 1.0000 adequate 1 5.5000 adequate 1 5.5000 unclear 0 1.0000 unclear 0 1.0000 unclear 0 1.0000 unclear 0 1.0000 adequate 1 5.5000 adequate 1 5.5000 adequate 1 5.5000 adequate 1 5.5000 unclear 0 1.0000 unclear 0 1.0000 unclear 0 1.0000 adequate 1 5.5000 unclear 0 1.0000 unclear 0 1.0000 adequate 1 5.5000 adequate 1 5.5000 unclear 0 1.0000 unclear 0 1.0000 unclear 0 1.0000 unclear 0 1.0000 full 2 10.0000 unclear 0 1.0000 adequate 1 5.5000 unclear full unclear:43 adequate:42 full:4 +20 Properties/FAIRProps/ReusableProps/ReusableFlags/documentationOK yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 no 0 1 yes 1 10 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 yes 1 10 no yes no:21 yes:68 +21 Properties/FAIRProps/ReusableProps/ReusableFlags/dkNetMetadataOK yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 no 0 1 no 0 1 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 no 0 1 no 0 1 yes 1 10 no 0 1 no 0 1 yes 1 10 no 0 1 no 0 1 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 no 0 1 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 no yes no:37 yes:52 +22 Properties/FAIRProps/ReusableProps/ReusableFlags/communityStandard yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 no 0 1 no 0 1 yes 1 10 no 0 1 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 no 0 1 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no yes no:21 yes:68 +23 Properties/FAIRProps/ReusableProps/ReusableFlags/generalMetadata yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 no 0 1 yes 1 10 yes 1 10 no 0 1 yes 1 10 no 0 1 yes 1 10 no 0 1 no 0 1 yes 1 10 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 yes 1 10 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 no 0 1 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 no 0 1 yes 1 10 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 no yes no:50 yes:39 +24 Properties/FAIRProps/ReusableProps/ReusableFlags/metadataProvenanceOK yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 yes 1 10 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 no 0 1 no 0 1 yes 1 10 no 0 1 yes 1 10 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 no 0 1 yes 1 10 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 yes 1 10 no yes no:43 yes:46 +25 Properties/FAIRProps/ReusableProps/DkNetMetadataLevel datasetAndSubject 2 10.0000 datasetAndSubject 2 10.0000 dataset 1 5.5000 dataset 1 5.5000 dataset 1 5.5000 dataset 1 5.5000 datasetAndSubject 2 10.0000 dataset 1 5.5000 dataset 1 5.5000 dataset 1 5.5000 dataset 1 5.5000 dataset 1 5.5000 dataset 1 5.5000 dataset 1 5.5000 dataset 1 5.5000 datasetAndSubject 2 10.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 datasetAndSubject 2 10.0000 none 0 1.0000 dataset 1 5.5000 dataset 1 5.5000 none 0 1.0000 dataset 1 5.5000 dataset 1 5.5000 datasetAndSubject 2 10.0000 none 0 1.0000 none 0 1.0000 dataset 1 5.5000 none 0 1.0000 dataset 1 5.5000 dataset 1 5.5000 dataset 1 5.5000 dataset 1 5.5000 none 0 1.0000 none 0 1.0000 none 0 1.0000 dataset 1 5.5000 dataset 1 5.5000 dataset 1 5.5000 none 0 1.0000 none 0 1.0000 dataset 1 5.5000 none 0 1.0000 none 0 1.0000 dataset 1 5.5000 none 0 1.0000 none 0 1.0000 dataset 1 5.5000 dataset 1 5.5000 none 0 1.0000 dataset 1 5.5000 dataset 1 5.5000 none 0 1.0000 none 0 1.0000 none 0 1.0000 dataset 1 5.5000 dataset 1 5.5000 dataset 1 5.5000 dataset 1 5.5000 dataset 1 5.5000 dataset 1 5.5000 none 0 1.0000 dataset 1 5.5000 dataset 1 5.5000 dataset 1 5.5000 none 0 1.0000 none 0 1.0000 none 0 1.0000 dataset 1 5.5000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 datasetAndSubject 2 10.0000 none 0 1.0000 dataset 1 5.5000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 dataset 1 5.5000 dataset 1 5.5000 none 0 1.0000 dataset 1 5.5000 dataset 1 5.5000 none datasetAndSubject none:37 dataset:45 datasetAndSubject:7 +26 Properties/FAIRProps/ReusableProps/ReuseLicense datasetLevel 2 10.0000 none 0 1.0000 repositoryLevel 1 5.5000 none 0 1.0000 datasetLevel 2 10.0000 datasetLevel 2 10.0000 datasetLevel 2 10.0000 none 0 1.0000 repositoryLevel 1 5.5000 none 0 1.0000 none 0 1.0000 datasetLevel 2 10.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 datasetLevel 2 10.0000 datasetLevel 2 10.0000 repositoryLevel 1 5.5000 none 0 1.0000 none 0 1.0000 repositoryLevel 1 5.5000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 repositoryLevel 1 5.5000 repositoryLevel 1 5.5000 none 0 1.0000 none 0 1.0000 repositoryLevel 1 5.5000 none 0 1.0000 none 0 1.0000 none 0 1.0000 datasetLevel 2 10.0000 none 0 1.0000 repositoryLevel 1 5.5000 none 0 1.0000 datasetLevel 2 10.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 datasetLevel 2 10.0000 none 0 1.0000 datasetLevel 2 10.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 datasetLevel 2 10.0000 repositoryLevel 1 5.5000 repositoryLevel 1 5.5000 datasetLevel 2 10.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 repositoryLevel 1 5.5000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 datasetLevel 2 10.0000 datasetLevel 2 10.0000 none 0 1.0000 datasetLevel 2 10.0000 datasetLevel 2 10.0000 datasetLevel 2 10.0000 repositoryLevel 1 5.5000 none 0 1.0000 none 0 1.0000 repositoryLevel 1 5.5000 none 0 1.0000 repositoryLevel 1 5.5000 datasetLevel 2 10.0000 none 0 1.0000 datasetLevel 2 10.0000 repositoryLevel 1 5.5000 datasetLevel 2 10.0000 none datasetLevel none:53 repositoryLevel:15 datasetLevel:21 +27 Properties/TrustworthinessProps/GovernanceTransparency partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 full 2 10.0000 opaque 0 1.0000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 opaque 0 1.0000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 opaque 0 1.0000 partial 1 5.5000 partial 1 5.5000 full 2 10.0000 partial 1 5.5000 full 2 10.0000 partial 1 5.5000 opaque 0 1.0000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 opaque 0 1.0000 opaque 0 1.0000 opaque 0 1.0000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 full 2 10.0000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 full 2 10.0000 opaque 0 1.0000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 full 2 10.0000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 opaque 0 1.0000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 full 2 10.0000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 opaque 0 1.0000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 opaque 0 1.0000 partial 1 5.5000 partial 1 5.5000 opaque 0 1.0000 full 2 10.0000 partial 1 5.5000 opaque full opaque:12 partial:69 full:8 +28 Properties/TrustworthinessProps/SourceOpen partially 1 5.5000 no 0 1.0000 partially 1 5.5000 no 0 1.0000 yes 2 10.0000 yes 2 10.0000 yes 2 10.0000 no 0 1.0000 partially 1 5.5000 no 0 1.0000 no 0 1.0000 yes 2 10.0000 partially 1 5.5000 no 0 1.0000 partially 1 5.5000 no 0 1.0000 no 0 1.0000 partially 1 5.5000 partially 1 5.5000 yes 2 10.0000 no 0 1.0000 partially 1 5.5000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 partially 1 5.5000 partially 1 5.5000 no 0 1.0000 no 0 1.0000 no 0 1.0000 partially 1 5.5000 no 0 1.0000 no 0 1.0000 no 0 1.0000 yes 2 10.0000 yes 2 10.0000 partially 1 5.5000 partially 1 5.5000 no 0 1.0000 no 0 1.0000 partially 1 5.5000 no 0 1.0000 no 0 1.0000 no 0 1.0000 partially 1 5.5000 partially 1 5.5000 partially 1 5.5000 no 0 1.0000 no 0 1.0000 no 0 1.0000 partially 1 5.5000 no 0 1.0000 yes 2 10.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 partially 1 5.5000 partially 1 5.5000 partially 1 5.5000 no 0 1.0000 no 0 1.0000 partially 1 5.5000 partially 1 5.5000 no 0 1.0000 no 0 1.0000 no 0 1.0000 no 0 1.0000 partially 1 5.5000 no 0 1.0000 partially 1 5.5000 partially 1 5.5000 yes 2 10.0000 yes 2 10.0000 no 0 1.0000 partially 1 5.5000 partially 1 5.5000 no 0 1.0000 no 0 1.0000 partially 1 5.5000 no 0 1.0000 no 0 1.0000 partially 1 5.5000 yes 2 10.0000 partially 1 5.5000 partially 1 5.5000 no yes no:46 partially:32 yes:11 +29 Properties/TrustworthinessProps/StakeholderGovernance good 2 7.0000 full 3 10.0000 good 2 7.0000 none 0 1.0000 weak 1 4.0000 weak 1 4.0000 good 2 7.0000 -1 1 weak 1 4.0000 good 2 7.0000 none 0 1.0000 good 2 7.0000 -1 1 none 0 1.0000 weak 1 4.0000 good 2 7.0000 -1 1 full 3 10.0000 full 3 10.0000 full 3 10.0000 good 2 7.0000 full 3 10.0000 good 2 7.0000 -1 1 weak 1 4.0000 good 2 7.0000 full 3 10.0000 good 2 7.0000 -1 1 -1 1 -1 1 good 2 7.0000 weak 1 4.0000 good 2 7.0000 weak 1 4.0000 none 0 1.0000 good 2 7.0000 good 2 7.0000 good 2 7.0000 good 2 7.0000 good 2 7.0000 weak 1 4.0000 good 2 7.0000 weak 1 4.0000 good 2 7.0000 weak 1 4.0000 weak 1 4.0000 weak 1 4.0000 full 3 10.0000 -1 1 good 2 7.0000 good 2 7.0000 good 2 7.0000 good 2 7.0000 weak 1 4.0000 weak 1 4.0000 weak 1 4.0000 weak 1 4.0000 good 2 7.0000 weak 1 4.0000 good 2 7.0000 good 2 7.0000 weak 1 4.0000 -1 1 good 2 7.0000 good 2 7.0000 good 2 7.0000 weak 1 4.0000 good 2 7.0000 good 2 7.0000 good 2 7.0000 good 2 7.0000 weak 1 4.0000 weak 1 4.0000 good 2 7.0000 good 2 7.0000 good 2 7.0000 good 2 7.0000 -1 1 good 2 7.0000 weak 1 4.0000 good 2 7.0000 good 2 7.0000 -1 1 none 0 1.0000 weak 1 4.0000 -1 1 good 2 7.0000 good 2 7.0000 full none:5 weak:23 good:42 full:7 +30 Properties/OpenProps/Restrictions minimal 1 5.5000 none 0 1.0000 minimal 1 5.5000 none 0 1.0000 minimal 1 5.5000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 minimal 1 5.5000 none 0 1.0000 minimal 1 5.5000 none 0 1.0000 minimal 1 5.5000 none 0 1.0000 none 0 1.0000 none 0 1.0000 minimal 1 5.5000 none 0 1.0000 none 0 1.0000 none 0 1.0000 minimal 1 5.5000 minimal 1 5.5000 minimal 1 5.5000 none 0 1.0000 none 0 1.0000 none 0 1.0000 minimal 1 5.5000 none 0 1.0000 none 0 1.0000 minimal 1 5.5000 none 0 1.0000 minimal 1 5.5000 minimal 1 5.5000 minimal 1 5.5000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 significant 2 10.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 significant 2 10.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 significant 2 10.0000 none 0 1.0000 significant 2 10.0000 significant 2 10.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 minimal 1 5.5000 none 0 1.0000 none 0 1.0000 minimal 1 5.5000 none 0 1.0000 minimal 1 5.5000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 minimal 1 5.5000 none 0 1.0000 none 0 1.0000 none 0 1.0000 minimal 1 5.5000 none 0 1.0000 none 0 1.0000 minimal 1 5.5000 none 0 1.0000 none 0 1.0000 significant 2 10.0000 significant 2 10.0000 significant 2 10.0000 significant 2 10.0000 minimal 1 5.5000 none 0 1.0000 minimal 1 5.5000 none 0 1.0000 none significant none:57 minimal:23 significant:9 +31 Properties/OpenProps/CCLicenseCompliance good 3 7.7500 none 1 3.2500 good 3 7.7500 none 1 3.2500 full 4 10.0000 good 3 7.7500 good 3 7.7500 none 1 3.2500 full 4 10.0000 none 1 3.2500 none 1 3.2500 full 4 10.0000 none 1 3.2500 none 1 3.2500 none 1 3.2500 none 1 3.2500 good 3 7.7500 good 3 7.7500 good 3 7.7500 none 1 3.2500 none 1 3.2500 good 3 7.7500 none 1 3.2500 none 1 3.2500 none 1 3.2500 none 1 3.2500 good 3 7.7500 good 3 7.7500 none 1 3.2500 none 1 3.2500 full 4 10.0000 none 1 3.2500 none 1 3.2500 none 1 3.2500 full 4 10.0000 none 1 3.2500 good 3 7.7500 none 1 3.2500 full 4 10.0000 none 1 3.2500 none 1 3.2500 none 1 3.2500 none 1 3.2500 none 1 3.2500 none 1 3.2500 none 1 3.2500 full 4 10.0000 none 1 3.2500 full 4 10.0000 none 1 3.2500 none 1 3.2500 none 1 3.2500 none 1 3.2500 none 1 3.2500 none 1 3.2500 none 1 3.2500 none 1 3.2500 none 1 3.2500 none 1 3.2500 good 3 7.7500 adequate 2 5.5000 full 4 10.0000 good 3 7.7500 none 1 3.2500 none 1 3.2500 none 1 3.2500 none 1 3.2500 adequate 2 5.5000 none 1 3.2500 none 1 3.2500 none 1 3.2500 none 1 3.2500 full 4 10.0000 full 4 10.0000 none 1 3.2500 full 4 10.0000 full 4 10.0000 full 4 10.0000 adequate 2 5.5000 none 1 3.2500 none 1 3.2500 adequate 2 5.5000 none 1 3.2500 adequate 2 5.5000 full 4 10.0000 none 1 3.2500 good 3 7.7500 good 3 7.7500 full 4 10.0000 none full nonCompliant:0 none:53 adequate:5 good:15 full:16 +32 Properties/OpenProps/OpenFlags/openFormat yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 no 0 1 yes 1 10 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 no 0 1 yes 1 10 no 0 1 yes 1 10 no 0 1 no 0 1 yes 1 10 no 0 1 yes 1 10 no 0 1 no 0 1 yes 1 10 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 no 0 1 yes 1 10 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 no yes no:50 yes:39 +33 Properties/OpenProps/OpenFlags/platformSupportsDataWork yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 no 0 1 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 yes 1 10 no 0 1 no 0 1 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 no 0 1 no 0 1 yes 1 10 no 0 1 yes 1 10 no yes no:39 yes:50 +34 Properties/OpenProps/OpenFlags/ccLicenseOK yes 1 10 no 0 1 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 no 0 1 yes 1 10 no 0 1 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 no 0 1 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 yes 1 10 no 0 1 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 yes 1 10 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 yes 1 10 no 0 1 no 0 1 yes 1 10 no 0 1 yes 1 10 yes 1 10 no 0 1 yes 1 10 yes 1 10 yes 1 10 no yes no:53 yes:36 +35 Properties/OpenProps/OpenFlags/restrictionsNotJustified no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 yes 1 10 no 0 1 no 0 1 no 0 1 no 0 1 no 0 1 no yes no:87 yes:2 +36 Properties/CitableProps/MachineReadableLandingPage exists 1 5.5000 exists 1 5.5000 exists 1 5.5000 exists 1 5.5000 supportsDataCitation 2 10.0000 exists 1 5.5000 supportsDataCitation 2 10.0000 exists 1 5.5000 supportsDataCitation 2 10.0000 supportsDataCitation 2 10.0000 none 0 1.0000 supportsDataCitation 2 10.0000 exists 1 5.5000 exists 1 5.5000 supportsDataCitation 2 10.0000 supportsDataCitation 2 10.0000 none 0 1.0000 exists 1 5.5000 supportsDataCitation 2 10.0000 exists 1 5.5000 none 0 1.0000 supportsDataCitation 2 10.0000 exists 1 5.5000 exists 1 5.5000 none 0 1.0000 exists 1 5.5000 exists 1 5.5000 none 0 1.0000 none 0 1.0000 exists 1 5.5000 none 0 1.0000 supportsDataCitation 2 10.0000 exists 1 5.5000 none 0 1.0000 supportsDataCitation 2 10.0000 supportsDataCitation 2 10.0000 none 0 1.0000 exists 1 5.5000 supportsDataCitation 2 10.0000 supportsDataCitation 2 10.0000 exists 1 5.5000 none 0 1.0000 none 0 1.0000 none 0 1.0000 exists 1 5.5000 supportsDataCitation 2 10.0000 supportsDataCitation 2 10.0000 none 0 1.0000 supportsDataCitation 2 10.0000 exists 1 5.5000 exists 1 5.5000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 exists 1 5.5000 none 0 1.0000 none 0 1.0000 supportsDataCitation 2 10.0000 exists 1 5.5000 exists 1 5.5000 exists 1 5.5000 none 0 1.0000 exists 1 5.5000 none 0 1.0000 none 0 1.0000 none 0 1.0000 exists 1 5.5000 supportsDataCitation 2 10.0000 exists 1 5.5000 none 0 1.0000 supportsDataCitation 2 10.0000 exists 1 5.5000 exists 1 5.5000 supportsDataCitation 2 10.0000 none 0 1.0000 exists 1 5.5000 none 0 1.0000 none 0 1.0000 exists 1 5.5000 none 0 1.0000 none 0 1.0000 supportsDataCitation 2 10.0000 supportsDataCitation 2 10.0000 exists 1 5.5000 none supportsDataCitation none:33 exists:33 supportsDataCitation:23 +37 Properties/CitableProps/CitationMetadataLevel full 2 10.0000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 full 2 10.0000 full 2 10.0000 full 2 10.0000 partial 1 5.5000 full 2 10.0000 full 2 10.0000 partial 1 5.5000 full 2 10.0000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 full 2 10.0000 partial 1 5.5000 full 2 10.0000 full 2 10.0000 full 2 10.0000 partial 1 5.5000 partial 1 5.5000 none 0 1.0000 none 0 1.0000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 none 0 1.0000 partial 1 5.5000 none 0 1.0000 partial 1 5.5000 none 0 1.0000 partial 1 5.5000 full 2 10.0000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 full 2 10.0000 none 0 1.0000 partial 1 5.5000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 partial 1 5.5000 full 2 10.0000 partial 1 5.5000 full 2 10.0000 none 0 1.0000 partial 1 5.5000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 partial 1 5.5000 full 2 10.0000 partial 1 5.5000 partial 1 5.5000 full 2 10.0000 none 0 1.0000 partial 1 5.5000 full 2 10.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 partial 1 5.5000 partial 1 5.5000 full 2 10.0000 full 2 10.0000 none 0 1.0000 partial 1 5.5000 none 0 1.0000 partial 1 5.5000 none 0 1.0000 none 0 1.0000 partial 1 5.5000 partial 1 5.5000 none 0 1.0000 none 0 1.0000 none 0 1.0000 partial 1 5.5000 full 2 10.0000 none 0 1.0000 partial 1 5.5000 none full none:31 partial:37 full:21 +38 Properties/CitableProps/OrcidAssociation none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 required 2 10.0000 none 0 1.0000 supported 1 5.5000 supported 1 5.5000 none 0 1.0000 supported 1 5.5000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 supported 1 5.5000 supported 1 5.5000 none 0 1.0000 none 0 1.0000 required 2 10.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 required 2 10.0000 none 0 1.0000 none 0 1.0000 supported 1 5.5000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 supported 1 5.5000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 required 2 10.0000 none 0 1.0000 supported 1 5.5000 supported 1 5.5000 none 0 1.0000 none 0 1.0000 required 2 10.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 required 2 10.0000 supported 1 5.5000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 supported 1 5.5000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 none 0 1.0000 supported 1 5.5000 supported 1 5.5000 supported 1 5.5000 none 0 1.0000 none required none:69 supported:14 required:6 +39 Trustworthy minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 noConcerns 2 10.0000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 noConcerns 2 10.0000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 noConcerns 2 10.0000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns 1 5.5000 minorConcerns noConcerns significantConcerns:0 minorConcerns:86 noConcerns:3 +40 Citable partiallyCitable 1 5.5000 partiallyCitable 1 5.5000 partiallyCitable 1 5.5000 partiallyCitable 1 5.5000 fullyCitable 2 10.0000 partiallyCitable 1 5.5000 fullyCitable 2 10.0000 partiallyCitable 1 5.5000 fullyCitable 2 10.0000 fullyCitable 2 10.0000 partiallyCitable 1 5.5000 fullyCitable 2 10.0000 partiallyCitable 1 5.5000 partiallyCitable 1 5.5000 partiallyCitable 1 5.5000 fullyCitable 2 10.0000 partiallyCitable 1 5.5000 partiallyCitable 1 5.5000 fullyCitable 2 10.0000 partiallyCitable 1 5.5000 partiallyCitable 1 5.5000 partiallyCitable 1 5.5000 partiallyCitable 1 5.5000 partiallyCitable 1 5.5000 partiallyCitable 1 5.5000 partiallyCitable 1 5.5000 partiallyCitable 1 5.5000 partiallyCitable 1 5.5000 notCitable 0 1.0000 partiallyCitable 1 5.5000 notCitable 0 1.0000 partiallyCitable 1 5.5000 partiallyCitable 1 5.5000 partiallyCitable 1 5.5000 fullyCitable 2 10.0000 partiallyCitable 1 5.5000 partiallyCitable 1 5.5000 partiallyCitable 1 5.5000 fullyCitable 2 10.0000 partiallyCitable 1 5.5000 partiallyCitable 1 5.5000 notCitable 0 1.0000 notCitable 0 1.0000 notCitable 0 1.0000 partiallyCitable 1 5.5000 partiallyCitable 1 5.5000 fullyCitable 2 10.0000 partiallyCitable 1 5.5000 fullyCitable 2 10.0000 partiallyCitable 1 5.5000 partiallyCitable 1 5.5000 partiallyCitable 1 5.5000 notCitable 0 1.0000 notCitable 0 1.0000 partiallyCitable 1 5.5000 notCitable 0 1.0000 notCitable 0 1.0000 notCitable 0 1.0000 partiallyCitable 1 5.5000 partiallyCitable 1 5.5000 partiallyCitable 1 5.5000 partiallyCitable 1 5.5000 fullyCitable 2 10.0000 partiallyCitable 1 5.5000 partiallyCitable 1 5.5000 partiallyCitable 1 5.5000 notCitable 0 1.0000 partiallyCitable 1 5.5000 notCitable 0 1.0000 notCitable 0 1.0000 partiallyCitable 1 5.5000 partiallyCitable 1 5.5000 fullyCitable 2 10.0000 partiallyCitable 1 5.5000 notCitable 0 1.0000 partiallyCitable 1 5.5000 partiallyCitable 1 5.5000 partiallyCitable 1 5.5000 partiallyCitable 1 5.5000 notCitable 0 1.0000 partiallyCitable 1 5.5000 partiallyCitable 1 5.5000 notCitable 0 1.0000 partiallyCitable 1 5.5000 notCitable 0 1.0000 partiallyCitable 1 5.5000 fullyCitable 2 10.0000 partiallyCitable 1 5.5000 partiallyCitable 1 5.5000 notCitable fullyCitable notCitable:17 partiallyCitable:58 fullyCitable:14 +41 Open partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 fullyOpen 2 10.0000 fullyOpen 2 10.0000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 fullyOpen 2 10.0000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 fullyOpen 2 10.0000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 closed 0 1.0000 partiallyOpen 1 5.5000 fullyOpen 2 10.0000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 closed 0 1.0000 partiallyOpen 1 5.5000 closed 0 1.0000 closed 0 1.0000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 fullyOpen 2 10.0000 fullyOpen 2 10.0000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 fullyOpen 2 10.0000 fullyOpen 2 10.0000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 partiallyOpen 1 5.5000 fullyOpen 2 10.0000 partiallyOpen 1 5.5000 fullyOpen 2 10.0000 closed fullyOpen closed:4 partiallyOpen:74 fullyOpen:11 +42 FAIR/Accessible partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 fullyAccessible 2 10.0000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 fullyAccessible 2 10.0000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 notAccessible 0 1.0000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 notAccessible 0 1.0000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 notAccessible 0 1.0000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 notAccessible 0 1.0000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 notAccessible 0 1.0000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 notAccessible 0 1.0000 partiallyAccessible 1 5.5000 notAccessible 0 1.0000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 fullyAccessible 2 10.0000 partiallyAccessible 1 5.5000 partiallyAccessible 1 5.5000 notAccessible fullyAccessible notAccessible:7 partiallyAccessible:79 fullyAccessible:3 +43 FAIR/Interoperable partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 notInteroperable 0 1.0000 partiallyInteroperable 1 5.5000 fullyInteroperable 2 10.0000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 notInteroperable 0 1.0000 fullyInteroperable 2 10.0000 partiallyInteroperable 1 5.5000 fullyInteroperable 2 10.0000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 fullyInteroperable 2 10.0000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 fullyInteroperable 2 10.0000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 fullyInteroperable 2 10.0000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 fullyInteroperable 2 10.0000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 notInteroperable 0 1.0000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 notInteroperable 0 1.0000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 notInteroperable 0 1.0000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 fullyInteroperable 2 10.0000 partiallyInteroperable 1 5.5000 fullyInteroperable 2 10.0000 fullyInteroperable 2 10.0000 fullyInteroperable 2 10.0000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 fullyInteroperable 2 10.0000 partiallyInteroperable 1 5.5000 notInteroperable 0 1.0000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 fullyInteroperable 2 10.0000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 fullyInteroperable 2 10.0000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 notInteroperable 0 1.0000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 partiallyInteroperable 1 5.5000 notInteroperable fullyInteroperable notInteroperable:7 partiallyInteroperable:68 fullyInteroperable:14 +44 FAIR/Findable partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 fullyFindable 2 10.0000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 fullyFindable 2 10.0000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 fullyFindable 2 10.0000 partiallyFindable 1 5.5000 fullyFindable 2 10.0000 partiallyFindable 1 5.5000 fullyFindable 2 10.0000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 fullyFindable 2 10.0000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 notFindable 0 1.0000 notFindable 0 1.0000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 fullyFindable 2 10.0000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 fullyFindable 2 10.0000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 fullyFindable 2 10.0000 partiallyFindable 1 5.5000 partiallyFindable 1 5.5000 notFindable fullyFindable notFindable:2 partiallyFindable:78 fullyFindable:9 +45 FAIR/Reusable fullyReusable 2 10.0000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 fullyReusable 2 10.0000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 fullyReusable 2 10.0000 partiallyReusable 1 5.5000 fullyReusable 2 10.0000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 fullyReusable 2 10.0000 notReusable 0 1.0000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 fullyReusable 2 10.0000 fullyReusable 2 10.0000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 fullyReusable 2 10.0000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 fullyReusable 2 10.0000 partiallyReusable 1 5.5000 fullyReusable 2 10.0000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 fullyReusable 2 10.0000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 notReusable 0 1.0000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 fullyReusable 2 10.0000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 fullyReusable 2 10.0000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 fullyReusable 2 10.0000 partiallyReusable 1 5.5000 notReusable 0 1.0000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 partiallyReusable 1 5.5000 notReusable fullyReusable notReusable:3 partiallyReusable:72 fullyReusable:14 diff --git a/split-by-model/dkNET-DRP/summary/summary-transcript.tsv b/split-by-model/dkNET-DRP/summary/summary-transcript.tsv new file mode 100644 index 0000000..7595ea9 --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/summary-transcript.tsv @@ -0,0 +1,32 @@ +row # identifier 614b619410622.xml Answer Text 614b619410622.xml Answer Score 614b619410622.xml Answer Scaled 614ba8756c8db.xml Answer Text 614ba8756c8db.xml Answer Score 614ba8756c8db.xml Answer Scaled 614df6a85b9b1.xml Answer Text 614df6a85b9b1.xml Answer Score 614df6a85b9b1.xml Answer Scaled 6155e4b25d5bb.xml Answer Text 6155e4b25d5bb.xml Answer Score 6155e4b25d5bb.xml Answer Scaled 61573f38ef525.xml Answer Text 61573f38ef525.xml Answer Score 61573f38ef525.xml Answer Scaled 615b72f7e220a.xml Answer Text 615b72f7e220a.xml Answer Score 615b72f7e220a.xml Answer Scaled 615b89fa770b6.xml Answer Text 615b89fa770b6.xml Answer Score 615b89fa770b6.xml Answer Scaled 615ca4418ee3f.xml Answer Text 615ca4418ee3f.xml Answer Score 615ca4418ee3f.xml Answer Scaled 615dc910773b8.xml Answer Text 615dc910773b8.xml Answer Score 615dc910773b8.xml Answer Scaled 616066f394fa6.xml Answer Text 616066f394fa6.xml Answer Score 616066f394fa6.xml Answer Scaled 61609db5051d2.xml Answer Text 61609db5051d2.xml Answer Score 61609db5051d2.xml Answer Scaled 6164baf6052a8.xml Answer Text 6164baf6052a8.xml Answer Score 6164baf6052a8.xml Answer Scaled 6165b40aacafb.xml Answer Text 6165b40aacafb.xml Answer Score 6165b40aacafb.xml Answer Scaled 616712744a595.xml Answer Text 616712744a595.xml Answer Score 616712744a595.xml Answer Scaled 61676b2d11524.xml Answer Text 61676b2d11524.xml Answer Score 61676b2d11524.xml Answer Scaled 616f4035b5cf4.xml Answer Text 616f4035b5cf4.xml Answer Score 616f4035b5cf4.xml Answer Scaled 6171d0459add1.xml Answer Text 6171d0459add1.xml Answer Score 6171d0459add1.xml Answer Scaled 61732f9ba6055.xml Answer Text 61732f9ba6055.xml Answer Score 61732f9ba6055.xml Answer Scaled 61735ece7e758.xml Answer Text 61735ece7e758.xml Answer Score 61735ece7e758.xml Answer Scaled 6176c84f1c023.xml Answer Text 6176c84f1c023.xml Answer Score 6176c84f1c023.xml Answer Scaled 6176e09b35d7f.xml Answer Text 6176e09b35d7f.xml Answer Score 6176e09b35d7f.xml Answer Scaled 6177452b8b6c7.xml Answer Text 6177452b8b6c7.xml Answer Score 6177452b8b6c7.xml Answer Scaled 61782a94716e3.xml Answer Text 61782a94716e3.xml Answer Score 61782a94716e3.xml Answer Scaled 61783f6c4c8e7.xml Answer Text 61783f6c4c8e7.xml Answer Score 61783f6c4c8e7.xml Answer Scaled 617850897d411.xml Answer Text 617850897d411.xml Answer Score 617850897d411.xml Answer Scaled 617864ac7b873.xml Answer Text 617864ac7b873.xml Answer Score 617864ac7b873.xml Answer Scaled 61799d37c3555.xml Answer Text 61799d37c3555.xml Answer Score 61799d37c3555.xml Answer Scaled 6179b9da80888.xml Answer Text 6179b9da80888.xml Answer Score 6179b9da80888.xml Answer Scaled 617accb79fb53.xml Answer Text 617accb79fb53.xml Answer Score 617accb79fb53.xml Answer Scaled 617ad2ab32afc.xml Answer Text 617ad2ab32afc.xml Answer Score 617ad2ab32afc.xml Answer Scaled 617af3b01bff2.xml Answer Text 617af3b01bff2.xml Answer Score 617af3b01bff2.xml Answer Scaled 617aff8495ced.xml Answer Text 617aff8495ced.xml Answer Score 617aff8495ced.xml Answer Scaled 617c779c5bc94.xml Answer Text 617c779c5bc94.xml Answer Score 617c779c5bc94.xml Answer Scaled 61800a6acbf2a.xml Answer Text 61800a6acbf2a.xml Answer Score 61800a6acbf2a.xml Answer Scaled 61805a39e24bb.xml Answer Text 61805a39e24bb.xml Answer Score 61805a39e24bb.xml Answer Scaled 618063fc1eba5.xml Answer Text 618063fc1eba5.xml Answer Score 618063fc1eba5.xml Answer Scaled 61816a627e26b.xml Answer Text 61816a627e26b.xml Answer Score 61816a627e26b.xml Answer Scaled 618180e41db0e.xml Answer Text 618180e41db0e.xml Answer Score 618180e41db0e.xml Answer Scaled 61818c0471e3e.xml Answer Text 61818c0471e3e.xml Answer Score 61818c0471e3e.xml Answer Scaled 618194d41e94a.xml Answer Text 618194d41e94a.xml Answer Score 618194d41e94a.xml Answer Scaled 6182df9c3d9e3.xml Answer Text 6182df9c3d9e3.xml Answer Score 6182df9c3d9e3.xml Answer Scaled 6182f66084e0b.xml Answer Text 6182f66084e0b.xml Answer Score 6182f66084e0b.xml Answer Scaled 61830248beb1c.xml Answer Text 61830248beb1c.xml Answer Score 61830248beb1c.xml Answer Scaled 61856c2f8d135.xml Answer Text 61856c2f8d135.xml Answer Score 61856c2f8d135.xml Answer Scaled 618772a4eda31.xml Answer Text 618772a4eda31.xml Answer Score 618772a4eda31.xml Answer Scaled 61895560bbab4.xml Answer Text 61895560bbab4.xml Answer Score 61895560bbab4.xml Answer Scaled 618ab2f1efc9f.xml Answer Text 618ab2f1efc9f.xml Answer Score 618ab2f1efc9f.xml Answer Scaled 618ac6bb76674.xml Answer Text 618ac6bb76674.xml Answer Score 618ac6bb76674.xml Answer Scaled 618af1fa72f85.xml Answer Text 618af1fa72f85.xml Answer Score 618af1fa72f85.xml Answer Scaled 618afa63748f2.xml Answer Text 618afa63748f2.xml Answer Score 618afa63748f2.xml Answer Scaled 618b05ddaf1c8.xml Answer Text 618b05ddaf1c8.xml Answer Score 618b05ddaf1c8.xml Answer Scaled 618b0ed289968.xml Answer Text 618b0ed289968.xml Answer Score 618b0ed289968.xml Answer Scaled 618e95d1e58c7.xml Answer Text 618e95d1e58c7.xml Answer Score 618e95d1e58c7.xml Answer Scaled 618eaa9fcd36f.xml Answer Text 618eaa9fcd36f.xml Answer Score 618eaa9fcd36f.xml Answer Scaled 61941c2f7748c.xml Answer Text 61941c2f7748c.xml Answer Score 61941c2f7748c.xml Answer Scaled 61942b69bdeab.xml Answer Text 61942b69bdeab.xml Answer Score 61942b69bdeab.xml Answer Scaled 6194378833fb3.xml Answer Text 6194378833fb3.xml Answer Score 6194378833fb3.xml Answer Scaled 619441c4acb77.xml Answer Text 619441c4acb77.xml Answer Score 619441c4acb77.xml Answer Scaled 6198116a6dfed.xml Answer Text 6198116a6dfed.xml Answer Score 6198116a6dfed.xml Answer Scaled 6198331eb3593.xml Answer Text 6198331eb3593.xml Answer Score 6198331eb3593.xml Answer Scaled 61985711a8424.xml Answer Text 61985711a8424.xml Answer Score 61985711a8424.xml Answer Scaled 6198648660f3d.xml Answer Text 6198648660f3d.xml Answer Score 6198648660f3d.xml Answer Scaled 619c1b5161bad.xml Answer Text 619c1b5161bad.xml Answer Score 619c1b5161bad.xml Answer Scaled 619c34b5360d2.xml Answer Text 619c34b5360d2.xml Answer Score 619c34b5360d2.xml Answer Scaled 61a50fa18488f.xml Answer Text 61a50fa18488f.xml Answer Score 61a50fa18488f.xml Answer Scaled 61a66d491f16a.xml Answer Text 61a66d491f16a.xml Answer Score 61a66d491f16a.xml Answer Scaled 61a67946c16f2.xml Answer Text 61a67946c16f2.xml Answer Score 61a67946c16f2.xml Answer Scaled 61a67f53dc9b3.xml Answer Text 61a67f53dc9b3.xml Answer Score 61a67f53dc9b3.xml Answer Scaled 61a7bc9e3a002.xml Answer Text 61a7bc9e3a002.xml Answer Score 61a7bc9e3a002.xml Answer Scaled 61a7d3c6aeb5d.xml Answer Text 61a7d3c6aeb5d.xml Answer Score 61a7d3c6aeb5d.xml Answer Scaled 61a7e89c785ae.xml Answer Text 61a7e89c785ae.xml Answer Score 61a7e89c785ae.xml Answer Scaled 61a909a92778f.xml Answer Text 61a909a92778f.xml Answer Score 61a909a92778f.xml Answer Scaled 61a9183722786.xml Answer Text 61a9183722786.xml Answer Score 61a9183722786.xml Answer Scaled 61a92a18640d8.xml Answer Text 61a92a18640d8.xml Answer Score 61a92a18640d8.xml Answer Scaled 61aa5882975cb.xml Answer Text 61aa5882975cb.xml Answer Score 61aa5882975cb.xml Answer Scaled 61aa8e7632db5.xml Answer Text 61aa8e7632db5.xml Answer Score 61aa8e7632db5.xml Answer Scaled 61ae6db443359.xml Answer Text 61ae6db443359.xml Answer Score 61ae6db443359.xml Answer Scaled 61ae7c9289651.xml Answer Text 61ae7c9289651.xml Answer Score 61ae7c9289651.xml Answer Scaled 61af958cc8abe.xml Answer Text 61af958cc8abe.xml Answer Score 61af958cc8abe.xml Answer Scaled 61afc04c3cc7a.xml Answer Text 61afc04c3cc7a.xml Answer Score 61afc04c3cc7a.xml Answer Scaled 61afe38742749.xml Answer Text 61afe38742749.xml Answer Score 61afe38742749.xml Answer Scaled 61b23d317644e.xml Answer Text 61b23d317644e.xml Answer Score 61b23d317644e.xml Answer Scaled 61b257c62b44a.xml Answer Text 61b257c62b44a.xml Answer Score 61b257c62b44a.xml Answer Scaled 61b2654669195.xml Answer Text 61b2654669195.xml Answer Score 61b2654669195.xml Answer Scaled 61b38f199319f.xml Answer Text 61b38f199319f.xml Answer Score 61b38f199319f.xml Answer Scaled 61b3991a21735.xml Answer Text 61b3991a21735.xml Answer Score 61b3991a21735.xml Answer Scaled 621d12cf667c7.xml Answer Text 621d12cf667c7.xml Answer Score 621d12cf667c7.xml Answer Scaled 621d4ff4808c5.xml Answer Text 621d4ff4808c5.xml Answer Score 621d4ff4808c5.xml Answer Scaled 621d682e591a5.xml Answer Text 621d682e591a5.xml Answer Score 621d682e591a5.xml Answer Scaled Count Min Average Normalized Average Max +1 [sc-drc.dg]acc minimal restrictions 1 7.0000 no restrictions 0 10.0000 minimal restrictions 1 7.0000 no restrictions 0 10.0000 minimal restrictions 1 7.0000 no restrictions 0 10.0000 no restrictions 0 10.0000 no restrictions 0 10.0000 no restrictions 0 10.0000 minimal restrictions 1 7.0000 no restrictions 0 10.0000 minimal restrictions 1 7.0000 no restrictions 0 10.0000 minimal restrictions 1 7.0000 no restrictions 0 10.0000 no restrictions 0 10.0000 no restrictions 0 10.0000 minimal restrictions 1 7.0000 no restrictions 0 10.0000 no restrictions 0 10.0000 no restrictions 0 10.0000 minimal restrictions 1 7.0000 minimal restrictions 1 7.0000 minimal restrictions 1 7.0000 no restrictions 0 10.0000 no restrictions 0 10.0000 no restrictions 0 10.0000 minimal restrictions 1 7.0000 no restrictions 0 10.0000 no restrictions 0 10.0000 minimal restrictions 1 7.0000 no restrictions 0 10.0000 minimal restrictions 1 7.0000 minimal restrictions 1 7.0000 minimal restrictions 1 7.0000 no restrictions 0 10.0000 no restrictions 0 10.0000 no restrictions 0 10.0000 no restrictions 0 10.0000 no restrictions 0 10.0000 significant restrictions 2 4.0000 no restrictions 0 10.0000 no restrictions 0 10.0000 no restrictions 0 10.0000 significant restrictions 2 4.0000 no restrictions 0 10.0000 no restrictions 0 10.0000 no restrictions 0 10.0000 no restrictions 0 10.0000 no restrictions 0 10.0000 no restrictions 0 10.0000 no restrictions 0 10.0000 significant restrictions 2 4.0000 no restrictions 0 10.0000 significant but not justified restrictions 3 1.0000 significant restrictions 2 4.0000 no restrictions 0 10.0000 no restrictions 0 10.0000 no restrictions 0 10.0000 minimal restrictions 1 7.0000 no restrictions 0 10.0000 no restrictions 0 10.0000 minimal restrictions 1 7.0000 no restrictions 0 10.0000 minimal restrictions 1 7.0000 no restrictions 0 10.0000 no restrictions 0 10.0000 no restrictions 0 10.0000 no restrictions 0 10.0000 no restrictions 0 10.0000 no restrictions 0 10.0000 minimal restrictions 1 7.0000 no restrictions 0 10.0000 no restrictions 0 10.0000 no restrictions 0 10.0000 minimal restrictions 1 7.0000 no restrictions 0 10.0000 no restrictions 0 10.0000 minimal restrictions 1 7.0000 no restrictions 0 10.0000 no restrictions 0 10.0000 significant restrictions 2 4.0000 significant restrictions 2 4.0000 significant but not justified restrictions 3 1.0000 significant restrictions 2 4.0000 minimal restrictions 1 7.0000 no restrictions 0 10.0000 minimal restrictions 1 7.0000 no restrictions 0 10.0000 89 0 0.4831 0.1610 3 +2 [sc-drc.dg]reuse yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 2 1.0000 somewhat 1 5.5000 yes 0 10.0000 somewhat 1 5.5000 yes 0 10.0000 somewhat 1 5.5000 somewhat 1 5.5000 no 2 1.0000 yes 0 10.0000 no 2 1.0000 somewhat 1 5.5000 somewhat 1 5.5000 yes 0 10.0000 no 2 1.0000 yes 0 10.0000 somewhat 1 5.5000 no 2 1.0000 somewhat 1 5.5000 somewhat 1 5.5000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 somewhat 1 5.5000 yes 0 10.0000 yes 0 10.0000 somewhat 1 5.5000 yes 0 10.0000 somewhat 1 5.5000 yes 0 10.0000 somewhat 1 5.5000 no 2 1.0000 yes 0 10.0000 somewhat 1 5.5000 yes 0 10.0000 no 2 1.0000 somewhat 1 5.5000 yes 0 10.0000 somewhat 1 5.5000 yes 0 10.0000 no 2 1.0000 no 2 1.0000 somewhat 1 5.5000 no 2 1.0000 somewhat 1 5.5000 somewhat 1 5.5000 no 2 1.0000 somewhat 1 5.5000 somewhat 1 5.5000 somewhat 1 5.5000 yes 0 10.0000 somewhat 1 5.5000 somewhat 1 5.5000 yes 0 10.0000 yes 0 10.0000 somewhat 1 5.5000 somewhat 1 5.5000 somewhat 1 5.5000 somewhat 1 5.5000 somewhat 1 5.5000 somewhat 1 5.5000 somewhat 1 5.5000 somewhat 1 5.5000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 somewhat 1 5.5000 yes 0 10.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 somewhat 1 5.5000 somewhat 1 5.5000 somewhat 1 5.5000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 89 0 0.7191 0.3596 2 +3 [sc-drc.dg]lic-clr dataset level 0 10.0000 no license 2 1.0000 repository level 1 5.5000 no license 2 1.0000 dataset level 0 10.0000 dataset level 0 10.0000 dataset level 0 10.0000 no license 2 1.0000 repository level 1 5.5000 no license 2 1.0000 no license 2 1.0000 dataset level 0 10.0000 no license 2 1.0000 no license 2 1.0000 no license 2 1.0000 no license 2 1.0000 dataset level 0 10.0000 dataset level 0 10.0000 repository level 1 5.5000 no license 2 1.0000 no license 2 1.0000 repository level 1 5.5000 no license 2 1.0000 no license 2 1.0000 no license 2 1.0000 no license 2 1.0000 repository level 1 5.5000 repository level 1 5.5000 no license 2 1.0000 no license 2 1.0000 repository level 1 5.5000 no license 2 1.0000 no license 2 1.0000 no license 2 1.0000 dataset level 0 10.0000 no license 2 1.0000 repository level 1 5.5000 no license 2 1.0000 dataset level 0 10.0000 no license 2 1.0000 no license 2 1.0000 no license 2 1.0000 no license 2 1.0000 no license 2 1.0000 no license 2 1.0000 no license 2 1.0000 dataset level 0 10.0000 no license 2 1.0000 dataset level 0 10.0000 no license 2 1.0000 no license 2 1.0000 no license 2 1.0000 no license 2 1.0000 no license 2 1.0000 no license 2 1.0000 no license 2 1.0000 no license 2 1.0000 no license 2 1.0000 no license 2 1.0000 dataset level 0 10.0000 repository level 1 5.5000 repository level 1 5.5000 dataset level 0 10.0000 no license 2 1.0000 no license 2 1.0000 no license 2 1.0000 no license 2 1.0000 repository level 1 5.5000 no license 2 1.0000 no license 2 1.0000 no license 2 1.0000 no license 2 1.0000 dataset level 0 10.0000 dataset level 0 10.0000 no license 2 1.0000 dataset level 0 10.0000 dataset level 0 10.0000 dataset level 0 10.0000 repository level 1 5.5000 no license 2 1.0000 no license 2 1.0000 repository level 1 5.5000 no license 2 1.0000 repository level 1 5.5000 dataset level 0 10.0000 no license 2 1.0000 dataset level 0 10.0000 repository level 1 5.5000 dataset level 0 10.0000 89 0 1.3596 0.6798 2 +4 [sc-drc.dg]lic-cc good 1 7.0000 0 good 1 7.0000 0 best 0 10.0000 good 1 7.0000 good 1 7.0000 0 best 0 10.0000 0 0 best 0 10.0000 0 0 0 0 good 1 7.0000 good 1 7.0000 good 1 7.0000 0 0 good 1 7.0000 0 0 0 0 good 1 7.0000 good 1 7.0000 0 0 best 0 10.0000 0 0 0 best 0 10.0000 0 good 1 7.0000 0 best 0 10.0000 0 0 0 0 0 0 0 best 0 10.0000 0 best 0 10.0000 0 0 0 0 0 0 0 0 0 0 good 1 7.0000 somewhat open 2 4.0000 best 0 10.0000 good 1 7.0000 0 0 0 0 somewhat open 2 4.0000 0 0 0 0 best 0 10.0000 best 0 10.0000 0 best 0 10.0000 best 0 10.0000 best 0 10.0000 somewhat open 2 4.0000 0 0 somewhat open 2 4.0000 0 somewhat open 2 4.0000 best 0 10.0000 0 good 1 7.0000 good 1 7.0000 best 0 10.0000 36 0 0.6944 0.2315 2 +5 [sc-drc.dg]plat yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 89 0 0.4382 0.4382 1 +6 [sc-drc.dg]ru-doc good 1 7.0000 adequate 2 4.0000 adequate 2 4.0000 adequate 2 4.0000 good 1 7.0000 worst 3 1.0000 good 1 7.0000 best 0 10.0000 adequate 2 4.0000 adequate 2 4.0000 worst 3 1.0000 good 1 7.0000 worst 3 1.0000 adequate 2 4.0000 adequate 2 4.0000 good 1 7.0000 worst 3 1.0000 adequate 2 4.0000 worst 3 1.0000 good 1 7.0000 adequate 2 4.0000 good 1 7.0000 good 1 7.0000 worst 3 1.0000 adequate 2 4.0000 adequate 2 4.0000 best 0 10.0000 worst 3 1.0000 worst 3 1.0000 worst 3 1.0000 worst 3 1.0000 adequate 2 4.0000 adequate 2 4.0000 adequate 2 4.0000 adequate 2 4.0000 adequate 2 4.0000 adequate 2 4.0000 adequate 2 4.0000 good 1 7.0000 adequate 2 4.0000 good 1 7.0000 adequate 2 4.0000 best 0 10.0000 adequate 2 4.0000 adequate 2 4.0000 adequate 2 4.0000 adequate 2 4.0000 adequate 2 4.0000 adequate 2 4.0000 worst 3 1.0000 adequate 2 4.0000 adequate 2 4.0000 worst 3 1.0000 adequate 2 4.0000 worst 3 1.0000 worst 3 1.0000 adequate 2 4.0000 adequate 2 4.0000 adequate 2 4.0000 good 1 7.0000 worst 3 1.0000 adequate 2 4.0000 adequate 2 4.0000 adequate 2 4.0000 good 1 7.0000 good 1 7.0000 good 1 7.0000 worst 3 1.0000 good 1 7.0000 good 1 7.0000 good 1 7.0000 adequate 2 4.0000 adequate 2 4.0000 adequate 2 4.0000 adequate 2 4.0000 worst 3 1.0000 good 1 7.0000 adequate 2 4.0000 adequate 2 4.0000 good 1 7.0000 adequate 2 4.0000 adequate 2 4.0000 adequate 2 4.0000 worst 3 1.0000 worst 3 1.0000 worst 3 1.0000 good 1 7.0000 worst 3 1.0000 adequate 2 4.0000 89 0 1.9438 0.6479 3 +7 [sc-drc.dg]sch-ui yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 89 0 0.0674 0.0674 1 +8 [sc-drc.dg]pid-g no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 89 0 0.5281 0.5281 1 +9 [sc-drc.dg]orcid none 2 1.0000 none 2 1.0000 none 2 1.0000 none 2 1.0000 none 2 1.0000 none 2 1.0000 required 0 10.0000 none 2 1.0000 supported 1 5.5000 supported 1 5.5000 none 2 1.0000 supported 1 5.5000 none 2 1.0000 none 2 1.0000 none 2 1.0000 none 2 1.0000 none 2 1.0000 supported 1 5.5000 supported 1 5.5000 none 2 1.0000 none 2 1.0000 required 0 10.0000 none 2 1.0000 none 2 1.0000 none 2 1.0000 none 2 1.0000 none 2 1.0000 none 2 1.0000 none 2 1.0000 none 2 1.0000 none 2 1.0000 required 0 10.0000 none 2 1.0000 none 2 1.0000 supported 1 5.5000 none 2 1.0000 none 2 1.0000 none 2 1.0000 none 2 1.0000 supported 1 5.5000 none 2 1.0000 none 2 1.0000 none 2 1.0000 none 2 1.0000 none 2 1.0000 none 2 1.0000 none 2 1.0000 none 2 1.0000 required 0 10.0000 none 2 1.0000 supported 1 5.5000 supported 1 5.5000 none 2 1.0000 none 2 1.0000 required 0 10.0000 none 2 1.0000 none 2 1.0000 none 2 1.0000 none 2 1.0000 none 2 1.0000 none 2 1.0000 none 2 1.0000 none 2 1.0000 none 2 1.0000 none 2 1.0000 none 2 1.0000 none 2 1.0000 none 2 1.0000 none 2 1.0000 none 2 1.0000 required 0 10.0000 supported 1 5.5000 none 2 1.0000 none 2 1.0000 none 2 1.0000 none 2 1.0000 none 2 1.0000 supported 1 5.5000 none 2 1.0000 none 2 1.0000 none 2 1.0000 none 2 1.0000 none 2 1.0000 none 2 1.0000 none 2 1.0000 supported 1 5.5000 supported 1 5.5000 supported 1 5.5000 none 2 1.0000 89 0 1.7079 0.8539 2 +10 [sc-drc.dg]md-level limited 1 5.5000 limited 1 5.5000 limited 1 5.5000 limited 1 5.5000 limited 1 5.5000 minimal 2 1.0000 limited 1 5.5000 limited 1 5.5000 rich 0 10.0000 limited 1 5.5000 limited 1 5.5000 limited 1 5.5000 rich 0 10.0000 limited 1 5.5000 limited 1 5.5000 rich 0 10.0000 minimal 2 1.0000 rich 0 10.0000 limited 1 5.5000 rich 0 10.0000 limited 1 5.5000 rich 0 10.0000 rich 0 10.0000 minimal 2 1.0000 limited 1 5.5000 limited 1 5.5000 rich 0 10.0000 limited 1 5.5000 minimal 2 1.0000 minimal 2 1.0000 minimal 2 1.0000 rich 0 10.0000 limited 1 5.5000 limited 1 5.5000 limited 1 5.5000 minimal 2 1.0000 minimal 2 1.0000 minimal 2 1.0000 minimal 2 1.0000 minimal 2 1.0000 limited 1 5.5000 minimal 2 1.0000 minimal 2 1.0000 limited 1 5.5000 limited 1 5.5000 minimal 2 1.0000 minimal 2 1.0000 minimal 2 1.0000 limited 1 5.5000 rich 0 10.0000 limited 1 5.5000 limited 1 5.5000 rich 0 10.0000 limited 1 5.5000 minimal 2 1.0000 minimal 2 1.0000 minimal 2 1.0000 limited 1 5.5000 minimal 2 1.0000 rich 0 10.0000 limited 1 5.5000 limited 1 5.5000 limited 1 5.5000 minimal 2 1.0000 rich 0 10.0000 limited 1 5.5000 limited 1 5.5000 limited 1 5.5000 minimal 2 1.0000 minimal 2 1.0000 limited 1 5.5000 minimal 2 1.0000 rich 0 10.0000 limited 1 5.5000 limited 1 5.5000 limited 1 5.5000 minimal 2 1.0000 minimal 2 1.0000 minimal 2 1.0000 minimal 2 1.0000 minimal 2 1.0000 minimal 2 1.0000 minimal 2 1.0000 minimal 2 1.0000 limited 1 5.5000 limited 1 5.5000 rich 0 10.0000 limited 1 5.5000 limited 1 5.5000 89 0 1.1910 0.5955 2 +11 [sc-drc.dg]md-prv good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 best 0 10.0000 good 1 5.5000 best 0 10.0000 good 1 5.5000 worst 2 1.0000 good 1 5.5000 good 1 5.5000 good 1 5.5000 worst 2 1.0000 best 0 10.0000 worst 2 1.0000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 good 1 5.5000 good 1 5.5000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 good 1 5.5000 worst 2 1.0000 good 1 5.5000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 good 1 5.5000 worst 2 1.0000 good 1 5.5000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 good 1 5.5000 good 1 5.5000 good 1 5.5000 worst 2 1.0000 worst 2 1.0000 good 1 5.5000 worst 2 1.0000 good 1 5.5000 good 1 5.5000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 good 1 5.5000 worst 2 1.0000 worst 2 1.0000 good 1 5.5000 good 1 5.5000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 best 0 10.0000 worst 2 1.0000 good 1 5.5000 89 0 1.4382 0.7191 2 +12 [sc-drc.dg]md-daci full 0 10.0000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 full 0 10.0000 full 0 10.0000 full 0 10.0000 partial 1 5.5000 full 0 10.0000 full 0 10.0000 partial 1 5.5000 full 0 10.0000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 full 0 10.0000 partial 1 5.5000 full 0 10.0000 full 0 10.0000 full 0 10.0000 partial 1 5.5000 partial 1 5.5000 no support 2 1.0000 no support 2 1.0000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 no support 2 1.0000 partial 1 5.5000 no support 2 1.0000 partial 1 5.5000 no support 2 1.0000 partial 1 5.5000 full 0 10.0000 partial 1 5.5000 partial 1 5.5000 partial 1 5.5000 full 0 10.0000 no support 2 1.0000 partial 1 5.5000 no support 2 1.0000 no support 2 1.0000 no support 2 1.0000 no support 2 1.0000 partial 1 5.5000 full 0 10.0000 partial 1 5.5000 full 0 10.0000 no support 2 1.0000 partial 1 5.5000 no support 2 1.0000 no support 2 1.0000 no support 2 1.0000 no support 2 1.0000 no support 2 1.0000 no support 2 1.0000 no support 2 1.0000 partial 1 5.5000 full 0 10.0000 partial 1 5.5000 partial 1 5.5000 full 0 10.0000 no support 2 1.0000 partial 1 5.5000 full 0 10.0000 no support 2 1.0000 no support 2 1.0000 no support 2 1.0000 no support 2 1.0000 partial 1 5.5000 partial 1 5.5000 full 0 10.0000 full 0 10.0000 no support 2 1.0000 partial 1 5.5000 no support 2 1.0000 partial 1 5.5000 no support 2 1.0000 no support 2 1.0000 partial 1 5.5000 partial 1 5.5000 no support 2 1.0000 no support 2 1.0000 no support 2 1.0000 partial 1 5.5000 full 0 10.0000 no support 2 1.0000 partial 1 5.5000 89 0 1.1124 0.5562 2 +13 [sc-drc.dg]md-ref good 1 5.5000 good 1 5.5000 good 1 5.5000 worst 2 1.0000 worst 2 1.0000 good 1 5.5000 best 0 10.0000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 worst 2 1.0000 worst 2 1.0000 good 1 5.5000 good 1 5.5000 best 0 10.0000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 good 1 5.5000 good 1 5.5000 good 1 5.5000 worst 2 1.0000 good 1 5.5000 good 1 5.5000 good 1 5.5000 worst 2 1.0000 good 1 5.5000 worst 2 1.0000 good 1 5.5000 good 1 5.5000 worst 2 1.0000 worst 2 1.0000 good 1 5.5000 worst 2 1.0000 worst 2 1.0000 good 1 5.5000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 best 0 10.0000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 worst 2 1.0000 good 1 5.5000 good 1 5.5000 good 1 5.5000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 worst 2 1.0000 worst 2 1.0000 good 1 5.5000 best 0 10.0000 worst 2 1.0000 good 1 5.5000 good 1 5.5000 good 1 5.5000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 best 0 10.0000 worst 2 1.0000 good 1 5.5000 89 0 1.4045 0.7022 2 +14 [sc-drc.dg]md-lnk unclear 2 4.0000 unclear 2 4.0000 unclear 2 4.0000 worst 3 1.0000 unclear 2 4.0000 unclear 2 4.0000 good 1 7.0000 unclear 2 4.0000 good 1 7.0000 unclear 2 4.0000 unclear 2 4.0000 unclear 2 4.0000 unclear 2 4.0000 unclear 2 4.0000 unclear 2 4.0000 unclear 2 4.0000 unclear 2 4.0000 good 1 7.0000 best 0 10.0000 best 0 10.0000 worst 3 1.0000 unclear 2 4.0000 unclear 2 4.0000 unclear 2 4.0000 good 1 7.0000 good 1 7.0000 unclear 2 4.0000 unclear 2 4.0000 unclear 2 4.0000 unclear 2 4.0000 worst 3 1.0000 good 1 7.0000 unclear 2 4.0000 unclear 2 4.0000 good 1 7.0000 unclear 2 4.0000 unclear 2 4.0000 good 1 7.0000 worst 3 1.0000 unclear 2 4.0000 good 1 7.0000 worst 3 1.0000 worst 3 1.0000 unclear 2 4.0000 unclear 2 4.0000 unclear 2 4.0000 unclear 2 4.0000 unclear 2 4.0000 unclear 2 4.0000 unclear 2 4.0000 unclear 2 4.0000 unclear 2 4.0000 unclear 2 4.0000 unclear 2 4.0000 unclear 2 4.0000 unclear 2 4.0000 unclear 2 4.0000 unclear 2 4.0000 good 1 7.0000 good 1 7.0000 good 1 7.0000 good 1 7.0000 good 1 7.0000 worst 3 1.0000 good 1 7.0000 good 1 7.0000 unclear 2 4.0000 unclear 2 4.0000 worst 3 1.0000 unclear 2 4.0000 good 1 7.0000 unclear 2 4.0000 good 1 7.0000 good 1 7.0000 worst 3 1.0000 worst 3 1.0000 unclear 2 4.0000 good 1 7.0000 unclear 2 4.0000 good 1 7.0000 unclear 2 4.0000 unclear 2 4.0000 worst 3 1.0000 unclear 2 4.0000 worst 3 1.0000 worst 3 1.0000 good 1 7.0000 unclear 2 4.0000 good 1 7.0000 89 0 1.8427 0.6142 3 +15 [sc-drc.dg]fmt-com yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 89 0 0.2360 0.2360 1 +16 [sc-drc.dg]md-dkn best 0 10.0000 best 0 10.0000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 best 0 10.0000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 best 0 10.0000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 best 0 10.0000 worst 2 1.0000 good 1 5.5000 good 1 5.5000 worst 2 1.0000 good 1 5.5000 good 1 5.5000 best 0 10.0000 worst 2 1.0000 worst 2 1.0000 good 1 5.5000 worst 2 1.0000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 good 1 5.5000 good 1 5.5000 good 1 5.5000 worst 2 1.0000 worst 2 1.0000 good 1 5.5000 worst 2 1.0000 worst 2 1.0000 good 1 5.5000 worst 2 1.0000 worst 2 1.0000 good 1 5.5000 good 1 5.5000 worst 2 1.0000 good 1 5.5000 good 1 5.5000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 worst 2 1.0000 good 1 5.5000 good 1 5.5000 good 1 5.5000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 good 1 5.5000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 best 0 10.0000 worst 2 1.0000 good 1 5.5000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 good 1 5.5000 good 1 5.5000 worst 2 1.0000 good 1 5.5000 good 1 5.5000 89 0 1.3371 0.6685 2 +17 [sc-drc.dg]md-psst no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 by policy 0 10.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 by policy 0 10.0000 no 2 1.0000 by policy 0 10.0000 by policy 0 10.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 by policy 0 10.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 by policy 0 10.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 by policy 0 10.0000 no 2 1.0000 no 2 1.0000 by policy 0 10.0000 no 2 1.0000 no 2 1.0000 89 0 1.8202 0.9101 2 +18 [sc-drc.dg]md-FAIR minimal 2 1.0000 allowed 1 5.5000 allowed 1 5.5000 enforced 0 10.0000 minimal 2 1.0000 minimal 2 1.0000 enforced 0 10.0000 minimal 2 1.0000 enforced 0 10.0000 enforced 0 10.0000 allowed 1 5.5000 allowed 1 5.5000 minimal 2 1.0000 enforced 0 10.0000 enforced 0 10.0000 enforced 0 10.0000 minimal 2 1.0000 allowed 1 5.5000 minimal 2 1.0000 enforced 0 10.0000 allowed 1 5.5000 enforced 0 10.0000 enforced 0 10.0000 enforced 0 10.0000 enforced 0 10.0000 enforced 0 10.0000 enforced 0 10.0000 minimal 2 1.0000 enforced 0 10.0000 enforced 0 10.0000 enforced 0 10.0000 allowed 1 5.5000 allowed 1 5.5000 enforced 0 10.0000 minimal 2 1.0000 minimal 2 1.0000 enforced 0 10.0000 enforced 0 10.0000 enforced 0 10.0000 enforced 0 10.0000 enforced 0 10.0000 enforced 0 10.0000 minimal 2 1.0000 enforced 0 10.0000 minimal 2 1.0000 enforced 0 10.0000 enforced 0 10.0000 enforced 0 10.0000 minimal 2 1.0000 enforced 0 10.0000 enforced 0 10.0000 enforced 0 10.0000 minimal 2 1.0000 enforced 0 10.0000 enforced 0 10.0000 minimal 2 1.0000 enforced 0 10.0000 minimal 2 1.0000 enforced 0 10.0000 enforced 0 10.0000 enforced 0 10.0000 enforced 0 10.0000 enforced 0 10.0000 enforced 0 10.0000 enforced 0 10.0000 enforced 0 10.0000 enforced 0 10.0000 minimal 2 1.0000 enforced 0 10.0000 enforced 0 10.0000 enforced 0 10.0000 enforced 0 10.0000 minimal 2 1.0000 minimal 2 1.0000 minimal 2 1.0000 enforced 0 10.0000 enforced 0 10.0000 enforced 0 10.0000 enforced 0 10.0000 enforced 0 10.0000 enforced 0 10.0000 minimal 2 1.0000 enforced 0 10.0000 enforced 0 10.0000 minimal 2 1.0000 allowed 1 5.5000 minimal 2 1.0000 enforced 0 10.0000 minimal 2 1.0000 89 0 0.6404 0.3202 2 +19 [sc-drc.dg]land-ctsp no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 0 yes 0 10.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 0 no 1 1.0000 yes 0 10.0000 no 1 1.0000 0 yes 0 10.0000 no 1 1.0000 no 1 1.0000 0 no 1 1.0000 no 1 1.0000 0 0 no 1 1.0000 0 yes 0 10.0000 no 1 1.0000 0 yes 0 10.0000 yes 0 10.0000 0 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 0 0 0 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 0 yes 0 10.0000 no 1 1.0000 no 1 1.0000 0 0 0 0 0 0 0 0 no 1 1.0000 0 0 yes 0 10.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 0 no 1 1.0000 0 0 0 no 1 1.0000 yes 0 10.0000 no 1 1.0000 0 yes 0 10.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 0 no 1 1.0000 0 0 no 1 1.0000 0 0 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 56 0 0.5893 0.5893 1 +20 [sc-drc.dg]md-cs yes 0 10.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 89 0 0.5618 0.5618 1 +21 [sc-drc.dg]acc-api yes 0 10.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 89 0 0.5056 0.5056 1 +22 [sc-drc.dg]md-vcb yes 0 10.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 89 0 0.4045 0.4045 1 +23 [sc-drc.dg]sch-api yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 89 0 0.4157 0.4157 1 +24 [sc-drc.dg]gov-tsp good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 best 0 10.0000 worst 2 1.0000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 worst 2 1.0000 good 1 5.5000 good 1 5.5000 good 1 5.5000 worst 2 1.0000 good 1 5.5000 good 1 5.5000 best 0 10.0000 good 1 5.5000 best 0 10.0000 good 1 5.5000 worst 2 1.0000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 worst 2 1.0000 worst 2 1.0000 worst 2 1.0000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 best 0 10.0000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 best 0 10.0000 worst 2 1.0000 good 1 5.5000 good 1 5.5000 good 1 5.5000 best 0 10.0000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 worst 2 1.0000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 best 0 10.0000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 worst 2 1.0000 good 1 5.5000 good 1 5.5000 good 1 5.5000 good 1 5.5000 worst 2 1.0000 good 1 5.5000 good 1 5.5000 worst 2 1.0000 best 0 10.0000 good 1 5.5000 89 0 1.0449 0.5225 2 +25 [sc-drc.dg]oss good 1 5.5000 no 2 1.0000 good 1 5.5000 no 2 1.0000 best 0 10.0000 best 0 10.0000 best 0 10.0000 no 2 1.0000 good 1 5.5000 no 2 1.0000 no 2 1.0000 best 0 10.0000 good 1 5.5000 no 2 1.0000 good 1 5.5000 no 2 1.0000 no 2 1.0000 good 1 5.5000 good 1 5.5000 best 0 10.0000 no 2 1.0000 good 1 5.5000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 good 1 5.5000 good 1 5.5000 no 2 1.0000 no 2 1.0000 no 2 1.0000 good 1 5.5000 no 2 1.0000 no 2 1.0000 no 2 1.0000 best 0 10.0000 best 0 10.0000 good 1 5.5000 good 1 5.5000 no 2 1.0000 no 2 1.0000 good 1 5.5000 no 2 1.0000 no 2 1.0000 no 2 1.0000 good 1 5.5000 good 1 5.5000 good 1 5.5000 no 2 1.0000 no 2 1.0000 no 2 1.0000 good 1 5.5000 no 2 1.0000 best 0 10.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 good 1 5.5000 good 1 5.5000 good 1 5.5000 no 2 1.0000 no 2 1.0000 good 1 5.5000 good 1 5.5000 no 2 1.0000 no 2 1.0000 no 2 1.0000 no 2 1.0000 good 1 5.5000 no 2 1.0000 good 1 5.5000 good 1 5.5000 best 0 10.0000 best 0 10.0000 no 2 1.0000 good 1 5.5000 good 1 5.5000 no 2 1.0000 no 2 1.0000 good 1 5.5000 no 2 1.0000 no 2 1.0000 good 1 5.5000 best 0 10.0000 good 1 5.5000 good 1 5.5000 89 0 1.3933 0.6966 2 +26 [sc-drc.dg]tr-seal no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 89 0 0.9775 0.9775 1 +27 [sc-drc.dg]gov-stk good 1 7.0000 full 0 10.0000 good 1 7.0000 none 3 1.0000 weak 2 4.0000 weak 2 4.0000 good 1 7.0000 0 weak 2 4.0000 good 1 7.0000 none 3 1.0000 good 1 7.0000 0 none 3 1.0000 weak 2 4.0000 good 1 7.0000 0 full 0 10.0000 full 0 10.0000 full 0 10.0000 good 1 7.0000 full 0 10.0000 good 1 7.0000 0 weak 2 4.0000 good 1 7.0000 full 0 10.0000 good 1 7.0000 0 0 0 good 1 7.0000 weak 2 4.0000 good 1 7.0000 weak 2 4.0000 none 3 1.0000 good 1 7.0000 good 1 7.0000 good 1 7.0000 good 1 7.0000 good 1 7.0000 weak 2 4.0000 good 1 7.0000 weak 2 4.0000 good 1 7.0000 weak 2 4.0000 weak 2 4.0000 weak 2 4.0000 full 0 10.0000 0 good 1 7.0000 good 1 7.0000 good 1 7.0000 good 1 7.0000 weak 2 4.0000 weak 2 4.0000 weak 2 4.0000 weak 2 4.0000 good 1 7.0000 weak 2 4.0000 good 1 7.0000 good 1 7.0000 weak 2 4.0000 0 good 1 7.0000 good 1 7.0000 good 1 7.0000 weak 2 4.0000 good 1 7.0000 good 1 7.0000 good 1 7.0000 good 1 7.0000 weak 2 4.0000 weak 2 4.0000 good 1 7.0000 good 1 7.0000 good 1 7.0000 good 1 7.0000 0 good 1 7.0000 weak 2 4.0000 good 1 7.0000 good 1 7.0000 0 none 3 1.0000 weak 2 4.0000 0 good 1 7.0000 good 1 7.0000 77 0 1.3377 0.4459 3 +28 [sc-drc.dg]land-api 0 0 0 0 0 0 0 0 0 0 no 1 1.0000 0 0 0 yes 0 10.0000 0 no 1 1.0000 0 0 0 no 1 1.0000 0 0 0 no 1 1.0000 0 yes 0 10.0000 no 1 1.0000 no 1 1.0000 0 no 1 1.0000 yes 0 10.0000 0 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 0 0 0 0 no 1 1.0000 no 1 1.0000 no 1 1.0000 0 0 0 no 1 1.0000 0 0 yes 0 10.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 0 no 1 1.0000 no 1 1.0000 0 yes 0 10.0000 0 0 no 1 1.0000 0 no 1 1.0000 no 1 1.0000 no 1 1.0000 0 0 0 no 1 1.0000 0 0 0 0 no 1 1.0000 0 no 1 1.0000 no 1 1.0000 0 no 1 1.0000 no 1 1.0000 0 0 0 40 0 0.8250 0.8250 1 +29 [sc-drc.dg]land-pg yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 0 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 0 0 0 0 0 no 1 1.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 0 no 1 1.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 0 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 81 0 0.4321 0.4321 1 +30 [sc-drc.dg]md-pid some 1 5.5000 some 1 5.5000 some 1 5.5000 some 1 5.5000 all 0 10.0000 all 0 10.0000 all 0 10.0000 all 0 10.0000 all 0 10.0000 all 0 10.0000 some 1 5.5000 all 0 10.0000 all 0 10.0000 all 0 10.0000 some 1 5.5000 all 0 10.0000 some 1 5.5000 all 0 10.0000 all 0 10.0000 all 0 10.0000 all 0 10.0000 some 1 5.5000 all 0 10.0000 all 0 10.0000 all 0 10.0000 all 0 10.0000 some 1 5.5000 some 1 5.5000 some 1 5.5000 some 1 5.5000 some 1 5.5000 some 1 5.5000 all 0 10.0000 some 1 5.5000 all 0 10.0000 0 some 1 5.5000 all 0 10.0000 all 0 10.0000 all 0 10.0000 all 0 10.0000 all 0 10.0000 some 1 5.5000 some 1 5.5000 some 1 5.5000 some 1 5.5000 all 0 10.0000 some 1 5.5000 all 0 10.0000 all 0 10.0000 all 0 10.0000 all 0 10.0000 0 0 0 0 0 some 1 5.5000 some 1 5.5000 all 0 10.0000 some 1 5.5000 all 0 10.0000 all 0 10.0000 some 1 5.5000 all 0 10.0000 all 0 10.0000 some 1 5.5000 some 1 5.5000 0 some 1 5.5000 some 1 5.5000 all 0 10.0000 all 0 10.0000 all 0 10.0000 some 1 5.5000 all 0 10.0000 all 0 10.0000 all 0 10.0000 some 1 5.5000 some 1 5.5000 some 1 5.5000 all 0 10.0000 some 1 5.5000 all 0 10.0000 0 some 1 5.5000 all 0 10.0000 some 1 5.5000 all 0 10.0000 81 0 0.4444 0.2222 1 +31 [sc-drc.dg]pid-l yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 0 0 0 yes 0 10.0000 0 0 yes 0 10.0000 0 0 yes 0 10.0000 yes 0 10.0000 0 0 0 0 0 yes 0 10.0000 0 0 yes 0 10.0000 yes 0 10.0000 0 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 0 0 no 1 1.0000 yes 0 10.0000 0 0 0 0 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 yes 0 10.0000 0 yes 0 10.0000 0 yes 0 10.0000 0 yes 0 10.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 0 yes 0 10.0000 yes 0 10.0000 0 yes 0 10.0000 0 0 yes 0 10.0000 yes 0 10.0000 no 1 1.0000 yes 0 10.0000 yes 0 10.0000 0 0 0 yes 0 10.0000 0 0 0 0 yes 0 10.0000 yes 0 10.0000 0 0 0 no 1 1.0000 0 0 0 0 47 0 0.1702 0.1702 1 diff --git a/split-by-model/dkNET-DRP/summary/summary.json b/split-by-model/dkNET-DRP/summary/summary.json new file mode 100644 index 0000000..ebadcfc --- /dev/null +++ b/split-by-model/dkNET-DRP/summary/summary.json @@ -0,0 +1,6853 @@ +{ + "space": { + "DataRepoCompliance": { + "Open":["closed","partiallyOpen","fullyOpen"], + "FAIR": { + "Findable":["notFindable","partiallyFindable","fullyFindable"], + "Accessible":["notAccessible","partiallyAccessible","fullyAccessible"], + "Interoperable":["notInteroperable","partiallyInteroperable","fullyInteroperable"], + "Reusable":["notReusable","partiallyReusable","fullyReusable"] + }, + "Citable":["notCitable","partiallyCitable","fullyCitable"], + "Trustworthy":["significantConcerns","minorConcerns","noConcerns"], + "Properties": { + "OpenProps": { + "Restrictions":["none","minimal","significant"], + "CCLicenseCompliance":["nonCompliant","none","adequate","good","full"], + "OpenFlags": { + "openFormat":["no","yes"], + "platformSupportsDataWork":["no","yes"], + "ccLicenseOK":["no","yes"], + "restrictionsNotJustified":["no","yes"] + } + }, + "FAIRProps": { + "FindableProps": { + "PersistentIdentifier":["none","internalPID","externalPID"], + "MetadataGrade":["minimal","limited","rich"], + "IdInMetadata":["none","partial","all"], + "FindableFlags": { + "internalSearchOK":["no","yes"] + } + }, + "AccessibleProps": { + "MetadataPersistence":["no","byEvidence","byStatedPolicy"], + "AccessibleFlags": { + "humanAccessible":["no","yes"], + "machineAccessible":["no","yes"], + "persistentMetadata":["no","yes"], + "licenseOK":["no","yes"], + "stdApi":["no","yes"] + } + }, + "InteroperableProps": { + "MetadataFAIRness":["minimal","allowed","enforced"], + "MetadataReferenceQuality":["freeText","informal","formal"], + "StudyLinkage":["none","freeText","textualMetadata","machineReadableMetadata"], + "InteroperableFlags": { + "formalMetadataVocabularyOK":["no","yes"], + "fairMetadataOK":["no","yes"], + "qualifiedMetadataReferencesOK":["no","yes"], + "studyLinkageOK":["no","yes"] + } + }, + "ReusableProps": { + "DocumentationLevel":["lacking","adequate","good","full"], + "ReuseLicense":["none","repositoryLevel","datasetLevel"], + "MetadataProvenance":["unclear","adequate","full"], + "DkNetMetadataLevel":["none","dataset","datasetAndSubject"], + "ReusableFlags": { + "documentationOK":["no","yes"], + "dkNetMetadataOK":["no","yes"], + "communityStandard":["no","yes"], + "generalMetadata":["no","yes"], + "metadataProvenanceOK":["no","yes"] + } + } + }, + "CitableProps": { + "OrcidAssociation":["none","supported","required"], + "CitationMetadataLevel":["none","partial","full"], + "MachineReadableLandingPage":["none","exists","supportsDataCitation"] + }, + "TrustworthinessProps": { + "GovernanceTransparency":["opaque","partial","full"], + "StakeholderGovernance":["none","weak","good","full"], + "SourceOpen":["no","partially","yes"] + } + } + } + }, + "transcripts": { + "616712744a595": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "Properties": { + "TrustworthinessProps": { + "SourceOpen":"no", + "StakeholderGovernance":"none", + "GovernanceTransparency":"partial" + }, + "FAIRProps": { + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"yes" + } + }, + "ReusableProps": { + "DocumentationLevel":"adequate", + "MetadataProvenance":"adequate", + "DkNetMetadataLevel":"dataset", + "ReuseLicense":"none", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"no", + "generalMetadata":"yes", + "metadataProvenanceOK":"yes" + } + }, + "FindableProps": { + "PersistentIdentifier":"internalPID", + "IdInMetadata":"all", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "MetadataGrade":"limited" + }, + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "StudyLinkage":"freeText", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"no", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"no" + }, + "MetadataReferenceQuality":"informal" + } + }, + "OpenProps": { + "OpenFlags": { + "openFormat":"no", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"no", + "restrictionsNotJustified":"no" + }, + "Restrictions":"minimal", + "CCLicenseCompliance":"none" + }, + "CitableProps": { + "MachineReadableLandingPage":"exists", + "OrcidAssociation":"none", + "CitationMetadataLevel":"partial" + } + }, + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "619441c4acb77": { + "DataRepoCompliance": { + "Citable":"notCitable", + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Properties": { + "FAIRProps": { + "FindableProps": { + "PersistentIdentifier":"internalPID", + "IdInMetadata":"partial", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "MetadataGrade":"limited" + }, + "InteroperableProps": { + "MetadataFAIRness":"minimal", + "StudyLinkage":"freeText", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"no", + "fairMetadataOK":"no", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"no" + }, + "MetadataReferenceQuality":"informal" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"no", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"no" + } + }, + "ReusableProps": { + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"no", + "generalMetadata":"no", + "metadataProvenanceOK":"yes" + }, + "DocumentationLevel":"adequate", + "MetadataProvenance":"adequate", + "DkNetMetadataLevel":"dataset", + "ReuseLicense":"none" + } + }, + "TrustworthinessProps": { + "SourceOpen":"no", + "StakeholderGovernance":"weak", + "GovernanceTransparency":"partial" + }, + "OpenProps": { + "CCLicenseCompliance":"none", + "Restrictions":"none" + }, + "CitableProps": { + "MachineReadableLandingPage":"none", + "CitationMetadataLevel":"none", + "OrcidAssociation":"none" + } + }, + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "61af958cc8abe": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "Properties": { + "TrustworthinessProps": { + "SourceOpen":"partially", + "GovernanceTransparency":"opaque" + }, + "CitableProps": { + "MachineReadableLandingPage":"supportsDataCitation", + "CitationMetadataLevel":"none", + "OrcidAssociation":"none" + }, + "FAIRProps": { + "FindableProps": { + "IdInMetadata":"partial", + "MetadataGrade":"minimal", + "PersistentIdentifier":"externalPID" + }, + "ReusableProps": { + "MetadataProvenance":"unclear", + "ReuseLicense":"repositoryLevel", + "DocumentationLevel":"adequate", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"no", + "communityStandard":"yes", + "generalMetadata":"yes", + "metadataProvenanceOK":"no" + }, + "DkNetMetadataLevel":"none" + }, + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"no", + "studyLinkageOK":"no" + }, + "StudyLinkage":"freeText", + "MetadataReferenceQuality":"freeText" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"yes", + "stdApi":"no" + } + } + }, + "OpenProps": { + "Restrictions":"minimal", + "CCLicenseCompliance":"adequate", + "OpenFlags": { + "openFormat":"no", + "platformSupportsDataWork":"no", + "ccLicenseOK":"yes", + "restrictionsNotJustified":"no" + } + } + }, + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "61573f38ef525": { + "DataRepoCompliance": { + "Citable":"fullyCitable", + "Trustworthy":"minorConcerns", + "Properties": { + "CitableProps": { + "CitationMetadataLevel":"full", + "MachineReadableLandingPage":"supportsDataCitation", + "OrcidAssociation":"none" + }, + "TrustworthinessProps": { + "StakeholderGovernance":"weak", + "SourceOpen":"yes", + "GovernanceTransparency":"partial" + }, + "OpenProps": { + "Restrictions":"minimal", + "CCLicenseCompliance":"full", + "OpenFlags": { + "openFormat":"yes", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"yes", + "restrictionsNotJustified":"no" + } + }, + "FAIRProps": { + "ReusableProps": { + "ReuseLicense":"datasetLevel", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"yes", + "generalMetadata":"no", + "metadataProvenanceOK":"yes" + }, + "MetadataProvenance":"adequate", + "DkNetMetadataLevel":"dataset", + "DocumentationLevel":"good" + }, + "InteroperableProps": { + "MetadataFAIRness":"minimal", + "StudyLinkage":"freeText", + "MetadataReferenceQuality":"freeText" + }, + "FindableProps": { + "IdInMetadata":"all", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "PersistentIdentifier":"externalPID", + "MetadataGrade":"limited" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"yes", + "stdApi":"yes" + } + } + } + }, + "Open":"partiallyOpen", + "FAIR": { + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible", + "Interoperable":"notInteroperable" + } + } + }, + "61942b69bdeab": { + "DataRepoCompliance": { + "Properties": { + "FAIRProps": { + "InteroperableProps": { + "MetadataFAIRness":"minimal", + "StudyLinkage":"freeText", + "MetadataReferenceQuality":"freeText" + }, + "ReusableProps": { + "MetadataProvenance":"unclear", + "ReuseLicense":"none", + "DkNetMetadataLevel":"none", + "DocumentationLevel":"lacking" + }, + "FindableProps": { + "PersistentIdentifier":"none", + "MetadataGrade":"minimal" + }, + "AccessibleProps": { + "MetadataPersistence":"no" + } + }, + "TrustworthinessProps": { + "SourceOpen":"no", + "StakeholderGovernance":"weak", + "GovernanceTransparency":"partial" + }, + "CitableProps": { + "MachineReadableLandingPage":"none", + "CitationMetadataLevel":"none", + "OrcidAssociation":"none" + }, + "OpenProps": { + "Restrictions":"significant", + "CCLicenseCompliance":"none" + } + }, + "Citable":"notCitable", + "Trustworthy":"minorConcerns", + "Open":"closed", + "FAIR": { + "Findable":"notFindable", + "Reusable":"notReusable", + "Accessible":"notAccessible", + "Interoperable":"notInteroperable" + } + } + }, + "61b2654669195": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + }, + "Properties": { + "CitableProps": { + "MachineReadableLandingPage":"exists", + "CitationMetadataLevel":"none", + "OrcidAssociation":"none" + }, + "TrustworthinessProps": { + "SourceOpen":"no", + "GovernanceTransparency":"opaque" + }, + "OpenProps": { + "CCLicenseCompliance":"adequate", + "OpenFlags": { + "openFormat":"no", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"yes", + "restrictionsNotJustified":"yes" + }, + "Restrictions":"significant" + }, + "FAIRProps": { + "FindableProps": { + "IdInMetadata":"all", + "MetadataGrade":"minimal", + "PersistentIdentifier":"externalPID" + }, + "ReusableProps": { + "MetadataProvenance":"unclear", + "ReuseLicense":"repositoryLevel", + "ReusableFlags": { + "documentationOK":"no", + "dkNetMetadataOK":"no", + "communityStandard":"yes", + "generalMetadata":"no", + "metadataProvenanceOK":"no" + }, + "DkNetMetadataLevel":"none", + "DocumentationLevel":"lacking" + }, + "AccessibleProps": { + "MetadataPersistence":"byStatedPolicy", + "AccessibleFlags": { + "humanAccessible":"no", + "machineAccessible":"yes", + "persistentMetadata":"yes", + "licenseOK":"yes", + "stdApi":"no" + } + }, + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"no", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"no", + "studyLinkageOK":"no" + }, + "StudyLinkage":"freeText", + "MetadataReferenceQuality":"freeText" + } + } + } + } + }, + "61a9183722786": { + "DataRepoCompliance": { + "Citable":"fullyCitable", + "Trustworthy":"minorConcerns", + "Properties": { + "TrustworthinessProps": { + "SourceOpen":"no", + "StakeholderGovernance":"weak", + "GovernanceTransparency":"partial" + }, + "CitableProps": { + "CitationMetadataLevel":"full", + "MachineReadableLandingPage":"supportsDataCitation", + "OrcidAssociation":"none" + }, + "OpenProps": { + "CCLicenseCompliance":"full", + "Restrictions":"none", + "OpenFlags": { + "openFormat":"yes", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"yes", + "restrictionsNotJustified":"no" + } + }, + "FAIRProps": { + "InteroperableProps": { + "MetadataFAIRness":"minimal", + "StudyLinkage":"textualMetadata", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"no", + "fairMetadataOK":"no", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"yes" + }, + "MetadataReferenceQuality":"informal" + }, + "ReusableProps": { + "ReuseLicense":"datasetLevel", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"no", + "communityStandard":"yes", + "generalMetadata":"no", + "metadataProvenanceOK":"yes" + }, + "DocumentationLevel":"adequate", + "MetadataProvenance":"adequate", + "DkNetMetadataLevel":"none" + }, + "FindableProps": { + "MetadataGrade":"rich", + "IdInMetadata":"all", + "PersistentIdentifier":"externalPID" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"yes", + "stdApi":"no" + } + } + } + }, + "Open":"fullyOpen", + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "6164baf6052a8": { + "DataRepoCompliance": { + "Citable":"fullyCitable", + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Accessible":"fullyAccessible", + "Reusable":"fullyReusable" + }, + "Properties": { + "TrustworthinessProps": { + "StakeholderGovernance":"good", + "SourceOpen":"yes", + "GovernanceTransparency":"partial" + }, + "CitableProps": { + "CitationMetadataLevel":"full", + "MachineReadableLandingPage":"supportsDataCitation", + "OrcidAssociation":"supported" + }, + "FAIRProps": { + "InteroperableProps": { + "MetadataFAIRness":"allowed", + "StudyLinkage":"freeText", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"no" + }, + "MetadataReferenceQuality":"informal" + }, + "FindableProps": { + "IdInMetadata":"all", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "PersistentIdentifier":"externalPID", + "MetadataGrade":"limited" + }, + "AccessibleProps": { + "MetadataPersistence":"byStatedPolicy", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"yes", + "licenseOK":"yes", + "stdApi":"yes" + } + }, + "ReusableProps": { + "ReuseLicense":"datasetLevel", + "MetadataProvenance":"adequate", + "DkNetMetadataLevel":"dataset", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"yes", + "generalMetadata":"yes", + "metadataProvenanceOK":"yes" + }, + "DocumentationLevel":"good" + } + }, + "OpenProps": { + "Restrictions":"minimal", + "CCLicenseCompliance":"full", + "OpenFlags": { + "openFormat":"yes", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"yes", + "restrictionsNotJustified":"no" + } + } + } + } + }, + "617c779c5bc94": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "Properties": { + "TrustworthinessProps": { + "SourceOpen":"no", + "StakeholderGovernance":"weak", + "GovernanceTransparency":"partial" + }, + "OpenProps": { + "OpenFlags": { + "openFormat":"no", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"no", + "restrictionsNotJustified":"no" + }, + "Restrictions":"minimal", + "CCLicenseCompliance":"none" + }, + "CitableProps": { + "MachineReadableLandingPage":"exists", + "CitationMetadataLevel":"none", + "OrcidAssociation":"none" + }, + "FAIRProps": { + "InteroperableProps": { + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"no", + "studyLinkageOK":"no" + }, + "MetadataFAIRness":"allowed", + "StudyLinkage":"freeText", + "MetadataReferenceQuality":"freeText" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"no" + } + }, + "ReusableProps": { + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"yes", + "generalMetadata":"no", + "metadataProvenanceOK":"no" + }, + "MetadataProvenance":"unclear", + "DocumentationLevel":"adequate", + "DkNetMetadataLevel":"dataset", + "ReuseLicense":"none" + }, + "FindableProps": { + "PersistentIdentifier":"internalPID", + "IdInMetadata":"all", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "MetadataGrade":"limited" + } + } + }, + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "61a909a92778f": { + "DataRepoCompliance": { + "Properties": { + "TrustworthinessProps": { + "SourceOpen":"partially", + "GovernanceTransparency":"full", + "StakeholderGovernance":"good" + }, + "OpenProps": { + "Restrictions":"minimal", + "CCLicenseCompliance":"none" + }, + "FAIRProps": { + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"no" + } + }, + "FindableProps": { + "IdInMetadata":"all", + "MetadataGrade":"minimal", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "PersistentIdentifier":"externalPID" + }, + "ReusableProps": { + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"no", + "communityStandard":"yes", + "generalMetadata":"no", + "metadataProvenanceOK":"yes" + }, + "DocumentationLevel":"adequate", + "MetadataProvenance":"adequate", + "ReuseLicense":"none", + "DkNetMetadataLevel":"none" + }, + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "StudyLinkage":"freeText", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"no", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"no" + }, + "MetadataReferenceQuality":"informal" + } + }, + "CitableProps": { + "MachineReadableLandingPage":"exists", + "OrcidAssociation":"supported", + "CitationMetadataLevel":"partial" + } + }, + "Trustworthy":"noConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "61676b2d11524": { + "DataRepoCompliance": { + "Properties": { + "CitableProps": { + "MachineReadableLandingPage":"supportsDataCitation", + "OrcidAssociation":"none", + "CitationMetadataLevel":"partial" + }, + "TrustworthinessProps": { + "SourceOpen":"partially", + "StakeholderGovernance":"weak", + "GovernanceTransparency":"partial" + }, + "OpenProps": { + "OpenFlags": { + "openFormat":"no", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"no", + "restrictionsNotJustified":"no" + }, + "CCLicenseCompliance":"none", + "Restrictions":"none" + }, + "FAIRProps": { + "FindableProps": { + "PersistentIdentifier":"internalPID", + "IdInMetadata":"partial", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "MetadataGrade":"limited" + }, + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "StudyLinkage":"freeText", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"no" + }, + "MetadataReferenceQuality":"informal" + }, + "ReusableProps": { + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"yes", + "generalMetadata":"no", + "metadataProvenanceOK":"no" + }, + "MetadataProvenance":"unclear", + "DocumentationLevel":"adequate", + "DkNetMetadataLevel":"dataset", + "ReuseLicense":"none" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"no", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"yes" + } + } + } + }, + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "6176e09b35d7f": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "Properties": { + "TrustworthinessProps": { + "SourceOpen":"no", + "StakeholderGovernance":"good", + "GovernanceTransparency":"partial" + }, + "OpenProps": { + "OpenFlags": { + "openFormat":"no", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"no", + "restrictionsNotJustified":"no" + }, + "CCLicenseCompliance":"none", + "Restrictions":"none" + }, + "CitableProps": { + "MachineReadableLandingPage":"none", + "OrcidAssociation":"none", + "CitationMetadataLevel":"partial" + }, + "FAIRProps": { + "ReusableProps": { + "DocumentationLevel":"adequate", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"no", + "communityStandard":"yes", + "generalMetadata":"yes", + "metadataProvenanceOK":"yes" + }, + "MetadataProvenance":"adequate", + "ReuseLicense":"none", + "DkNetMetadataLevel":"none" + }, + "AccessibleProps": { + "MetadataPersistence":"no" + }, + "FindableProps": { + "PersistentIdentifier":"internalPID", + "IdInMetadata":"all", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "MetadataGrade":"limited" + }, + "InteroperableProps": { + "InteroperableFlags": { + "formalMetadataVocabularyOK":"no", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"no", + "studyLinkageOK":"no" + }, + "MetadataFAIRness":"allowed", + "MetadataReferenceQuality":"freeText", + "StudyLinkage":"none" + } + } + }, + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"notAccessible" + } + } + }, + "6182df9c3d9e3": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "Properties": { + "FAIRProps": { + "ReusableProps": { + "MetadataProvenance":"adequate", + "DkNetMetadataLevel":"dataset", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"yes", + "generalMetadata":"yes", + "metadataProvenanceOK":"yes" + }, + "ReuseLicense":"none", + "DocumentationLevel":"good" + }, + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"yes" + }, + "StudyLinkage":"textualMetadata", + "MetadataReferenceQuality":"informal" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"yes" + } + }, + "FindableProps": { + "IdInMetadata":"all", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "PersistentIdentifier":"externalPID", + "MetadataGrade":"limited" + } + }, + "TrustworthinessProps": { + "SourceOpen":"no", + "StakeholderGovernance":"good", + "GovernanceTransparency":"partial" + }, + "CitableProps": { + "MachineReadableLandingPage":"exists", + "OrcidAssociation":"none", + "CitationMetadataLevel":"partial" + }, + "OpenProps": { + "OpenFlags": { + "openFormat":"no", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"no", + "restrictionsNotJustified":"no" + }, + "Restrictions":"significant", + "CCLicenseCompliance":"none" + } + }, + "FAIR": { + "Interoperable":"fullyInteroperable", + "Findable":"partiallyFindable", + "Accessible":"partiallyAccessible", + "Reusable":"fullyReusable" + } + } + }, + "61b23d317644e": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "Properties": { + "OpenProps": { + "CCLicenseCompliance":"adequate", + "Restrictions":"significant", + "OpenFlags": { + "openFormat":"no", + "platformSupportsDataWork":"no", + "ccLicenseOK":"yes", + "restrictionsNotJustified":"no" + } + }, + "TrustworthinessProps": { + "SourceOpen":"no", + "StakeholderGovernance":"good", + "GovernanceTransparency":"partial" + }, + "FAIRProps": { + "ReusableProps": { + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"no", + "communityStandard":"yes", + "generalMetadata":"no", + "metadataProvenanceOK":"yes" + }, + "ReuseLicense":"repositoryLevel", + "DocumentationLevel":"adequate", + "MetadataProvenance":"adequate", + "DkNetMetadataLevel":"none" + }, + "InteroperableProps": { + "MetadataFAIRness":"minimal", + "StudyLinkage":"freeText", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"no", + "fairMetadataOK":"no", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"no" + }, + "MetadataReferenceQuality":"informal" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"no", + "machineAccessible":"no", + "persistentMetadata":"no", + "licenseOK":"yes", + "stdApi":"yes" + } + }, + "FindableProps": { + "IdInMetadata":"all", + "MetadataGrade":"minimal", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "PersistentIdentifier":"externalPID" + } + }, + "CitableProps": { + "MachineReadableLandingPage":"none", + "OrcidAssociation":"none", + "CitationMetadataLevel":"partial" + } + }, + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "6171d0459add1": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "Properties": { + "OpenProps": { + "CCLicenseCompliance":"good", + "Restrictions":"none", + "OpenFlags": { + "openFormat":"no", + "platformSupportsDataWork":"no", + "ccLicenseOK":"yes", + "restrictionsNotJustified":"no" + } + }, + "FAIRProps": { + "InteroperableProps": { + "MetadataFAIRness":"minimal", + "StudyLinkage":"freeText", + "MetadataReferenceQuality":"freeText" + }, + "ReusableProps": { + "ReuseLicense":"datasetLevel", + "MetadataProvenance":"unclear", + "DkNetMetadataLevel":"none", + "DocumentationLevel":"lacking" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"no", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"yes", + "stdApi":"no" + } + }, + "FindableProps": { + "IdInMetadata":"partial", + "MetadataGrade":"minimal", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "PersistentIdentifier":"externalPID" + } + }, + "CitableProps": { + "MachineReadableLandingPage":"none", + "OrcidAssociation":"none", + "CitationMetadataLevel":"partial" + }, + "TrustworthinessProps": { + "SourceOpen":"no", + "GovernanceTransparency":"opaque" + } + }, + "FAIR": { + "Findable":"partiallyFindable", + "Reusable":"notReusable", + "Accessible":"partiallyAccessible", + "Interoperable":"notInteroperable" + } + } + }, + "616066f394fa6": { + "DataRepoCompliance": { + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Accessible":"partiallyAccessible", + "Reusable":"fullyReusable" + }, + "Citable":"fullyCitable", + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Properties": { + "TrustworthinessProps": { + "SourceOpen":"no", + "StakeholderGovernance":"good", + "GovernanceTransparency":"partial" + }, + "CitableProps": { + "CitationMetadataLevel":"full", + "MachineReadableLandingPage":"supportsDataCitation", + "OrcidAssociation":"supported" + }, + "FAIRProps": { + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"no" + } + }, + "ReusableProps": { + "DocumentationLevel":"adequate", + "MetadataProvenance":"adequate", + "DkNetMetadataLevel":"dataset", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"yes", + "generalMetadata":"yes", + "metadataProvenanceOK":"yes" + }, + "ReuseLicense":"none" + }, + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "StudyLinkage":"freeText", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"no" + }, + "MetadataReferenceQuality":"informal" + }, + "FindableProps": { + "IdInMetadata":"all", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "PersistentIdentifier":"externalPID", + "MetadataGrade":"limited" + } + }, + "OpenProps": { + "OpenFlags": { + "openFormat":"yes", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"no", + "restrictionsNotJustified":"no" + }, + "Restrictions":"minimal", + "CCLicenseCompliance":"none" + } + } + } + }, + "61ae6db443359": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "Properties": { + "TrustworthinessProps": { + "StakeholderGovernance":"good", + "SourceOpen":"yes", + "GovernanceTransparency":"partial" + }, + "CitableProps": { + "MachineReadableLandingPage":"exists", + "CitationMetadataLevel":"none", + "OrcidAssociation":"none" + }, + "FAIRProps": { + "ReusableProps": { + "ReuseLicense":"datasetLevel", + "MetadataProvenance":"unclear", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"no", + "communityStandard":"yes", + "generalMetadata":"no", + "metadataProvenanceOK":"no" + }, + "DkNetMetadataLevel":"none", + "DocumentationLevel":"good" + }, + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "StudyLinkage":"freeText", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"no" + }, + "MetadataReferenceQuality":"informal" + }, + "FindableProps": { + "IdInMetadata":"all", + "MetadataGrade":"minimal", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "PersistentIdentifier":"externalPID" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"yes", + "stdApi":"yes" + } + } + }, + "OpenProps": { + "CCLicenseCompliance":"full", + "Restrictions":"none", + "OpenFlags": { + "openFormat":"yes", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"yes", + "restrictionsNotJustified":"no" + } + } + }, + "Citable":"partiallyCitable", + "Open":"fullyOpen", + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "6198331eb3593": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"fullyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + }, + "Properties": { + "TrustworthinessProps": { + "SourceOpen":"no", + "StakeholderGovernance":"weak", + "GovernanceTransparency":"partial" + }, + "OpenProps": { + "OpenFlags": { + "openFormat":"yes", + "platformSupportsDataWork":"no", + "ccLicenseOK":"yes", + "restrictionsNotJustified":"no" + }, + "CCLicenseCompliance":"good", + "Restrictions":"minimal" + }, + "CitableProps": { + "CitationMetadataLevel":"full", + "MachineReadableLandingPage":"exists", + "OrcidAssociation":"none" + }, + "FAIRProps": { + "ReusableProps": { + "ReuseLicense":"datasetLevel", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"yes", + "generalMetadata":"no", + "metadataProvenanceOK":"yes" + }, + "MetadataProvenance":"adequate", + "DkNetMetadataLevel":"dataset", + "DocumentationLevel":"good" + }, + "FindableProps": { + "MetadataGrade":"rich", + "IdInMetadata":"all", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "PersistentIdentifier":"externalPID" + }, + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "StudyLinkage":"textualMetadata", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"no", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"yes" + }, + "MetadataReferenceQuality":"informal" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"yes", + "stdApi":"no" + } + } + } + } + } + }, + "6179b9da80888": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + }, + "Properties": { + "TrustworthinessProps": { + "SourceOpen":"partially", + "StakeholderGovernance":"good", + "GovernanceTransparency":"partial" + }, + "CitableProps": { + "MachineReadableLandingPage":"none", + "OrcidAssociation":"none", + "CitationMetadataLevel":"partial" + }, + "OpenProps": { + "OpenFlags": { + "openFormat":"yes", + "platformSupportsDataWork":"no", + "ccLicenseOK":"yes", + "restrictionsNotJustified":"no" + }, + "CCLicenseCompliance":"good", + "Restrictions":"minimal" + }, + "FAIRProps": { + "FindableProps": { + "PersistentIdentifier":"internalPID", + "IdInMetadata":"partial", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "MetadataGrade":"limited" + }, + "InteroperableProps": { + "MetadataFAIRness":"minimal", + "StudyLinkage":"freeText", + "MetadataReferenceQuality":"informal", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"no", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"no" + } + }, + "AccessibleProps": { + "AccessibleFlags": { + "humanAccessible":"no", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"yes", + "stdApi":"yes" + }, + "MetadataPersistence":"no" + }, + "ReusableProps": { + "ReusableFlags": { + "documentationOK":"no", + "dkNetMetadataOK":"no", + "communityStandard":"yes", + "generalMetadata":"no", + "metadataProvenanceOK":"yes" + }, + "ReuseLicense":"repositoryLevel", + "MetadataProvenance":"adequate", + "DkNetMetadataLevel":"none", + "DocumentationLevel":"lacking" + } + } + } + } + }, + "618eaa9fcd36f": { + "DataRepoCompliance": { + "Properties": { + "OpenProps": { + "CCLicenseCompliance":"none", + "Restrictions":"none" + }, + "CitableProps": { + "MachineReadableLandingPage":"none", + "CitationMetadataLevel":"none", + "OrcidAssociation":"none" + }, + "TrustworthinessProps": { + "GovernanceTransparency":"full", + "StakeholderGovernance":"good", + "SourceOpen":"yes" + }, + "FAIRProps": { + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"no", + "studyLinkageOK":"no" + }, + "StudyLinkage":"freeText", + "MetadataReferenceQuality":"freeText" + }, + "FindableProps": { + "PersistentIdentifier":"none", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "MetadataGrade":"limited" + }, + "ReusableProps": { + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"yes", + "generalMetadata":"yes", + "metadataProvenanceOK":"no" + }, + "MetadataProvenance":"unclear", + "DocumentationLevel":"adequate", + "DkNetMetadataLevel":"dataset", + "ReuseLicense":"none" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"no", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"yes" + } + } + } + }, + "Citable":"notCitable", + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "615b89fa770b6": { + "DataRepoCompliance": { + "Citable":"fullyCitable", + "Trustworthy":"minorConcerns", + "Properties": { + "CitableProps": { + "CitationMetadataLevel":"full", + "MachineReadableLandingPage":"supportsDataCitation", + "OrcidAssociation":"required" + }, + "OpenProps": { + "CCLicenseCompliance":"good", + "Restrictions":"none", + "OpenFlags": { + "openFormat":"yes", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"yes", + "restrictionsNotJustified":"no" + } + }, + "TrustworthinessProps": { + "GovernanceTransparency":"full", + "StakeholderGovernance":"good", + "SourceOpen":"yes" + }, + "FAIRProps": { + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "MetadataReferenceQuality":"formal", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"yes" + }, + "StudyLinkage":"textualMetadata" + }, + "FindableProps": { + "IdInMetadata":"all", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "PersistentIdentifier":"externalPID", + "MetadataGrade":"limited" + }, + "ReusableProps": { + "ReuseLicense":"datasetLevel", + "MetadataProvenance":"full", + "DkNetMetadataLevel":"datasetAndSubject", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"yes", + "generalMetadata":"yes", + "metadataProvenanceOK":"yes" + }, + "DocumentationLevel":"good" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"yes", + "stdApi":"yes" + } + } + } + }, + "Open":"fullyOpen", + "FAIR": { + "Interoperable":"fullyInteroperable", + "Findable":"partiallyFindable", + "Accessible":"partiallyAccessible", + "Reusable":"fullyReusable" + } + } + }, + "6194378833fb3": { + "DataRepoCompliance": { + "Citable":"notCitable", + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Properties": { + "TrustworthinessProps": { + "SourceOpen":"no", + "StakeholderGovernance":"weak", + "GovernanceTransparency":"partial" + }, + "OpenProps": { + "CCLicenseCompliance":"none", + "Restrictions":"none" + }, + "CitableProps": { + "MachineReadableLandingPage":"none", + "CitationMetadataLevel":"none", + "OrcidAssociation":"none" + }, + "FAIRProps": { + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"no", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"no" + } + }, + "ReusableProps": { + "MetadataProvenance":"unclear", + "DocumentationLevel":"adequate", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"no", + "communityStandard":"yes", + "generalMetadata":"no", + "metadataProvenanceOK":"no" + }, + "ReuseLicense":"none", + "DkNetMetadataLevel":"none" + }, + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"no", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"no", + "studyLinkageOK":"no" + }, + "StudyLinkage":"freeText", + "MetadataReferenceQuality":"freeText" + }, + "FindableProps": { + "PersistentIdentifier":"none", + "MetadataGrade":"minimal", + "FindableFlags": { + "internalSearchOK":"yes" + } + } + } + }, + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "621d682e591a5": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "Citable":"partiallyCitable", + "Open":"fullyOpen", + "Properties": { + "TrustworthinessProps": { + "SourceOpen":"partially", + "StakeholderGovernance":"good", + "GovernanceTransparency":"partial" + }, + "CitableProps": { + "MachineReadableLandingPage":"exists", + "OrcidAssociation":"none", + "CitationMetadataLevel":"partial" + }, + "FAIRProps": { + "ReusableProps": { + "ReuseLicense":"datasetLevel", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"yes", + "generalMetadata":"no", + "metadataProvenanceOK":"yes" + }, + "DocumentationLevel":"adequate", + "MetadataProvenance":"adequate", + "DkNetMetadataLevel":"dataset" + }, + "FindableProps": { + "IdInMetadata":"all", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "PersistentIdentifier":"externalPID", + "MetadataGrade":"limited" + }, + "InteroperableProps": { + "MetadataFAIRness":"minimal", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"no", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"yes" + }, + "StudyLinkage":"textualMetadata", + "MetadataReferenceQuality":"informal" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"yes", + "stdApi":"yes" + } + } + }, + "OpenProps": { + "CCLicenseCompliance":"full", + "Restrictions":"none", + "OpenFlags": { + "openFormat":"yes", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"yes", + "restrictionsNotJustified":"no" + } + } + }, + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "61816a627e26b": { + "DataRepoCompliance": { + "Properties": { + "FAIRProps": { + "ReusableProps": { + "MetadataProvenance":"unclear", + "ReuseLicense":"repositoryLevel", + "DocumentationLevel":"adequate", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"no", + "communityStandard":"yes", + "generalMetadata":"yes", + "metadataProvenanceOK":"no" + }, + "DkNetMetadataLevel":"none" + }, + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"no", + "studyLinkageOK":"no" + }, + "StudyLinkage":"freeText", + "MetadataReferenceQuality":"freeText" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"no", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"yes", + "stdApi":"no" + } + }, + "FindableProps": { + "PersistentIdentifier":"internalPID", + "IdInMetadata":"partial", + "MetadataGrade":"minimal", + "FindableFlags": { + "internalSearchOK":"yes" + } + } + }, + "OpenProps": { + "OpenFlags": { + "openFormat":"yes", + "platformSupportsDataWork":"no", + "ccLicenseOK":"yes", + "restrictionsNotJustified":"no" + }, + "CCLicenseCompliance":"good", + "Restrictions":"none" + }, + "CitableProps": { + "MachineReadableLandingPage":"none", + "OrcidAssociation":"none", + "CitationMetadataLevel":"partial" + }, + "TrustworthinessProps": { + "GovernanceTransparency":"full", + "StakeholderGovernance":"good", + "SourceOpen":"yes" + } + }, + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "61b38f199319f": { + "DataRepoCompliance": { + "Citable":"notCitable", + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "FAIR": { + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible", + "Interoperable":"notInteroperable" + }, + "Properties": { + "FAIRProps": { + "InteroperableProps": { + "MetadataFAIRness":"minimal", + "MetadataReferenceQuality":"freeText", + "StudyLinkage":"none" + }, + "ReusableProps": { + "ReuseLicense":"datasetLevel", + "MetadataProvenance":"unclear", + "DkNetMetadataLevel":"dataset", + "ReusableFlags": { + "documentationOK":"no", + "dkNetMetadataOK":"yes", + "communityStandard":"no", + "generalMetadata":"no", + "metadataProvenanceOK":"no" + }, + "DocumentationLevel":"lacking" + }, + "FindableProps": { + "PersistentIdentifier":"none", + "MetadataGrade":"limited" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"no", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"yes", + "stdApi":"no" + } + } + }, + "TrustworthinessProps": { + "SourceOpen":"no", + "StakeholderGovernance":"none", + "GovernanceTransparency":"partial" + }, + "OpenProps": { + "CCLicenseCompliance":"full", + "Restrictions":"significant", + "OpenFlags": { + "openFormat":"no", + "platformSupportsDataWork":"no", + "ccLicenseOK":"yes", + "restrictionsNotJustified":"no" + } + }, + "CitableProps": { + "MachineReadableLandingPage":"none", + "CitationMetadataLevel":"none", + "OrcidAssociation":"none" + } + } + } + }, + "6177452b8b6c7": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "Properties": { + "OpenProps": { + "CCLicenseCompliance":"good", + "Restrictions":"minimal", + "OpenFlags": { + "openFormat":"yes", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"yes", + "restrictionsNotJustified":"no" + } + }, + "FAIRProps": { + "FindableProps": { + "IdInMetadata":"partial", + "MetadataGrade":"rich", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "PersistentIdentifier":"externalPID" + }, + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"no", + "studyLinkageOK":"no" + }, + "StudyLinkage":"freeText", + "MetadataReferenceQuality":"freeText" + }, + "ReusableProps": { + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"yes", + "generalMetadata":"no", + "metadataProvenanceOK":"yes" + }, + "ReuseLicense":"repositoryLevel", + "MetadataProvenance":"adequate", + "DkNetMetadataLevel":"dataset", + "DocumentationLevel":"good" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"yes", + "stdApi":"yes" + } + } + }, + "TrustworthinessProps": { + "SourceOpen":"partially", + "GovernanceTransparency":"full", + "StakeholderGovernance":"full" + }, + "CitableProps": { + "MachineReadableLandingPage":"supportsDataCitation", + "OrcidAssociation":"required", + "CitationMetadataLevel":"partial" + } + }, + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "6176c84f1c023": { + "DataRepoCompliance": { + "Trustworthy":"noConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "Properties": { + "OpenProps": { + "OpenFlags": { + "openFormat":"yes", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"no", + "restrictionsNotJustified":"no" + }, + "CCLicenseCompliance":"none", + "Restrictions":"none" + }, + "CitableProps": { + "CitationMetadataLevel":"full", + "MachineReadableLandingPage":"exists", + "OrcidAssociation":"none" + }, + "TrustworthinessProps": { + "GovernanceTransparency":"full", + "SourceOpen":"yes", + "StakeholderGovernance":"full" + }, + "FAIRProps": { + "FindableProps": { + "MetadataGrade":"rich", + "IdInMetadata":"all", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "PersistentIdentifier":"externalPID" + }, + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "MetadataReferenceQuality":"formal", + "StudyLinkage":"machineReadableMetadata", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"yes" + } + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"yes" + } + }, + "ReusableProps": { + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"yes", + "generalMetadata":"no", + "metadataProvenanceOK":"yes" + }, + "MetadataProvenance":"adequate", + "DkNetMetadataLevel":"datasetAndSubject", + "ReuseLicense":"none", + "DocumentationLevel":"good" + } + } + }, + "FAIR": { + "Interoperable":"fullyInteroperable", + "Findable":"fullyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "614ba8756c8db": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "Properties": { + "OpenProps": { + "OpenFlags": { + "openFormat":"yes", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"no", + "restrictionsNotJustified":"no" + }, + "CCLicenseCompliance":"none", + "Restrictions":"none" + }, + "FAIRProps": { + "FindableProps": { + "PersistentIdentifier":"internalPID", + "IdInMetadata":"partial", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "MetadataGrade":"limited" + }, + "ReusableProps": { + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"no", + "generalMetadata":"no", + "metadataProvenanceOK":"yes" + }, + "DocumentationLevel":"adequate", + "MetadataProvenance":"adequate", + "DkNetMetadataLevel":"datasetAndSubject", + "ReuseLicense":"none" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"no" + } + }, + "InteroperableProps": { + "MetadataFAIRness":"allowed", + "StudyLinkage":"freeText", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"no", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"no" + }, + "MetadataReferenceQuality":"informal" + } + }, + "CitableProps": { + "MachineReadableLandingPage":"exists", + "OrcidAssociation":"none", + "CitationMetadataLevel":"partial" + }, + "TrustworthinessProps": { + "SourceOpen":"no", + "GovernanceTransparency":"partial", + "StakeholderGovernance":"full" + } + }, + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "61a7bc9e3a002": { + "DataRepoCompliance": { + "Citable":"notCitable", + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Properties": { + "TrustworthinessProps": { + "SourceOpen":"no", + "StakeholderGovernance":"good", + "GovernanceTransparency":"partial" + }, + "OpenProps": { + "CCLicenseCompliance":"none", + "Restrictions":"none" + }, + "FAIRProps": { + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"no", + "studyLinkageOK":"no" + }, + "MetadataReferenceQuality":"freeText", + "StudyLinkage":"none" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"no", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"no" + } + }, + "ReusableProps": { + "MetadataProvenance":"unclear", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"no", + "communityStandard":"yes", + "generalMetadata":"no", + "metadataProvenanceOK":"no" + }, + "ReuseLicense":"none", + "DkNetMetadataLevel":"none", + "DocumentationLevel":"good" + }, + "FindableProps": { + "PersistentIdentifier":"none", + "MetadataGrade":"minimal", + "FindableFlags": { + "internalSearchOK":"yes" + } + } + }, + "CitableProps": { + "MachineReadableLandingPage":"none", + "CitationMetadataLevel":"none", + "OrcidAssociation":"none" + } + }, + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "61818c0471e3e": { + "DataRepoCompliance": { + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Accessible":"partiallyAccessible", + "Reusable":"fullyReusable" + }, + "Citable":"fullyCitable", + "Trustworthy":"minorConcerns", + "Open":"fullyOpen", + "Properties": { + "FAIRProps": { + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"no", + "studyLinkageOK":"no" + }, + "MetadataReferenceQuality":"freeText", + "StudyLinkage":"none" + }, + "FindableProps": { + "IdInMetadata":"all", + "MetadataGrade":"minimal", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "PersistentIdentifier":"externalPID" + }, + "ReusableProps": { + "ReuseLicense":"datasetLevel", + "MetadataProvenance":"adequate", + "DkNetMetadataLevel":"dataset", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"yes", + "generalMetadata":"yes", + "metadataProvenanceOK":"yes" + }, + "DocumentationLevel":"good" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"yes", + "stdApi":"yes" + } + } + }, + "TrustworthinessProps": { + "SourceOpen":"partially", + "StakeholderGovernance":"good", + "GovernanceTransparency":"partial" + }, + "CitableProps": { + "CitationMetadataLevel":"full", + "MachineReadableLandingPage":"supportsDataCitation", + "OrcidAssociation":"none" + }, + "OpenProps": { + "CCLicenseCompliance":"full", + "Restrictions":"none", + "OpenFlags": { + "openFormat":"yes", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"yes", + "restrictionsNotJustified":"no" + } + } + } + } + }, + "6182f66084e0b": { + "DataRepoCompliance": { + "Properties": { + "OpenProps": { + "OpenFlags": { + "openFormat":"yes", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"no", + "restrictionsNotJustified":"no" + }, + "CCLicenseCompliance":"none", + "Restrictions":"none" + }, + "TrustworthinessProps": { + "SourceOpen":"partially", + "StakeholderGovernance":"weak", + "GovernanceTransparency":"partial" + }, + "CitableProps": { + "MachineReadableLandingPage":"none", + "CitationMetadataLevel":"none", + "OrcidAssociation":"none" + }, + "FAIRProps": { + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"no", + "studyLinkageOK":"no" + }, + "MetadataReferenceQuality":"freeText", + "StudyLinkage":"none" + }, + "FindableProps": { + "PersistentIdentifier":"internalPID", + "IdInMetadata":"all", + "MetadataGrade":"minimal", + "FindableFlags": { + "internalSearchOK":"yes" + } + }, + "ReusableProps": { + "MetadataProvenance":"unclear", + "DocumentationLevel":"adequate", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"no", + "communityStandard":"yes", + "generalMetadata":"no", + "metadataProvenanceOK":"no" + }, + "ReuseLicense":"none", + "DkNetMetadataLevel":"none" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"no", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"yes" + } + } + } + }, + "Citable":"notCitable", + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "618180e41db0e": { + "DataRepoCompliance": { + "Properties": { + "FAIRProps": { + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"yes" + }, + "StudyLinkage":"textualMetadata", + "MetadataReferenceQuality":"informal" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"yes" + } + }, + "ReusableProps": { + "MetadataProvenance":"unclear", + "DocumentationLevel":"adequate", + "ReuseLicense":"none", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"no", + "communityStandard":"yes", + "generalMetadata":"yes", + "metadataProvenanceOK":"no" + }, + "DkNetMetadataLevel":"none" + }, + "FindableProps": { + "IdInMetadata":"all", + "MetadataGrade":"minimal", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "PersistentIdentifier":"externalPID" + } + }, + "OpenProps": { + "CCLicenseCompliance":"none", + "Restrictions":"none" + }, + "TrustworthinessProps": { + "SourceOpen":"partially", + "StakeholderGovernance":"good", + "GovernanceTransparency":"partial" + }, + "CitableProps": { + "MachineReadableLandingPage":"exists", + "OrcidAssociation":"none", + "CitationMetadataLevel":"partial" + } + }, + "Trustworthy":"minorConcerns", + "FAIR": { + "Interoperable":"fullyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + }, + "Open":"partiallyOpen", + "Citable":"partiallyCitable" + } + }, + "61afc04c3cc7a": { + "DataRepoCompliance": { + "Properties": { + "FAIRProps": { + "ReusableProps": { + "MetadataProvenance":"unclear", + "ReuseLicense":"none", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"no", + "communityStandard":"yes", + "generalMetadata":"yes", + "metadataProvenanceOK":"no" + }, + "DkNetMetadataLevel":"none", + "DocumentationLevel":"good" + }, + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"yes" + }, + "StudyLinkage":"textualMetadata", + "MetadataReferenceQuality":"informal" + }, + "FindableProps": { + "PersistentIdentifier":"internalPID", + "IdInMetadata":"partial", + "MetadataGrade":"minimal", + "FindableFlags": { + "internalSearchOK":"yes" + } + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"no", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"yes" + } + } + }, + "OpenProps": { + "OpenFlags": { + "openFormat":"yes", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"no", + "restrictionsNotJustified":"no" + }, + "CCLicenseCompliance":"none", + "Restrictions":"none" + }, + "TrustworthinessProps": { + "SourceOpen":"partially", + "StakeholderGovernance":"good", + "GovernanceTransparency":"partial" + }, + "CitableProps": { + "MachineReadableLandingPage":"none", + "CitationMetadataLevel":"none", + "OrcidAssociation":"none" + } + }, + "Citable":"notCitable", + "Trustworthy":"minorConcerns", + "FAIR": { + "Interoperable":"fullyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + }, + "Open":"partiallyOpen" + } + }, + "6155e4b25d5bb": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "Properties": { + "TrustworthinessProps": { + "SourceOpen":"no", + "StakeholderGovernance":"none", + "GovernanceTransparency":"partial" + }, + "CitableProps": { + "MachineReadableLandingPage":"exists", + "OrcidAssociation":"none", + "CitationMetadataLevel":"partial" + }, + "FAIRProps": { + "FindableProps": { + "PersistentIdentifier":"internalPID", + "IdInMetadata":"partial", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "MetadataGrade":"limited" + }, + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"no", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"no", + "studyLinkageOK":"no" + }, + "MetadataReferenceQuality":"freeText", + "StudyLinkage":"none" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"no" + } + }, + "ReusableProps": { + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"yes", + "generalMetadata":"no", + "metadataProvenanceOK":"yes" + }, + "DocumentationLevel":"adequate", + "MetadataProvenance":"adequate", + "DkNetMetadataLevel":"dataset", + "ReuseLicense":"none" + } + }, + "OpenProps": { + "OpenFlags": { + "openFormat":"yes", + "platformSupportsDataWork":"no", + "ccLicenseOK":"no", + "restrictionsNotJustified":"no" + }, + "CCLicenseCompliance":"none", + "Restrictions":"none" + } + }, + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "618063fc1eba5": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "Properties": { + "CitableProps": { + "MachineReadableLandingPage":"supportsDataCitation", + "OrcidAssociation":"none", + "CitationMetadataLevel":"partial" + }, + "TrustworthinessProps": { + "StakeholderGovernance":"none", + "SourceOpen":"yes", + "GovernanceTransparency":"partial" + }, + "FAIRProps": { + "InteroperableProps": { + "MetadataFAIRness":"minimal", + "StudyLinkage":"freeText", + "MetadataReferenceQuality":"freeText", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"no", + "qualifiedMetadataReferencesOK":"no", + "studyLinkageOK":"no" + } + }, + "ReusableProps": { + "MetadataProvenance":"unclear", + "DocumentationLevel":"adequate", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"no", + "communityStandard":"yes", + "generalMetadata":"no", + "metadataProvenanceOK":"no" + }, + "ReuseLicense":"none", + "DkNetMetadataLevel":"none" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"no", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"yes" + } + }, + "FindableProps": { + "PersistentIdentifier":"none", + "MetadataGrade":"minimal", + "FindableFlags": { + "internalSearchOK":"yes" + } + } + }, + "OpenProps": { + "CCLicenseCompliance":"none", + "Restrictions":"none" + } + }, + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "61a92a18640d8": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "Citable":"partiallyCitable", + "Open":"fullyOpen", + "Properties": { + "TrustworthinessProps": { + "SourceOpen":"partially", + "StakeholderGovernance":"weak", + "GovernanceTransparency":"partial" + }, + "FAIRProps": { + "FindableProps": { + "IdInMetadata":"all", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "PersistentIdentifier":"externalPID", + "MetadataGrade":"limited" + }, + "ReusableProps": { + "ReuseLicense":"datasetLevel", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"no", + "communityStandard":"yes", + "generalMetadata":"no", + "metadataProvenanceOK":"yes" + }, + "DocumentationLevel":"adequate", + "MetadataProvenance":"adequate", + "DkNetMetadataLevel":"none" + }, + "InteroperableProps": { + "MetadataFAIRness":"minimal", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"no", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"yes" + }, + "StudyLinkage":"textualMetadata", + "MetadataReferenceQuality":"informal" + }, + "AccessibleProps": { + "MetadataPersistence":"byStatedPolicy", + "AccessibleFlags": { + "humanAccessible":"no", + "machineAccessible":"yes", + "persistentMetadata":"yes", + "licenseOK":"yes", + "stdApi":"yes" + } + } + }, + "CitableProps": { + "CitationMetadataLevel":"full", + "MachineReadableLandingPage":"exists", + "OrcidAssociation":"none" + }, + "OpenProps": { + "CCLicenseCompliance":"full", + "Restrictions":"none", + "OpenFlags": { + "openFormat":"yes", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"yes", + "restrictionsNotJustified":"no" + } + } + }, + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "6198648660f3d": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "FAIR": { + "Interoperable":"fullyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + }, + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "Properties": { + "TrustworthinessProps": { + "SourceOpen":"partially", + "StakeholderGovernance":"good", + "GovernanceTransparency":"partial" + }, + "FAIRProps": { + "AccessibleProps": { + "AccessibleFlags": { + "humanAccessible":"no", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"yes", + "stdApi":"yes" + }, + "MetadataPersistence":"no" + }, + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"yes" + }, + "StudyLinkage":"textualMetadata", + "MetadataReferenceQuality":"informal" + }, + "ReusableProps": { + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"yes", + "generalMetadata":"yes", + "metadataProvenanceOK":"no" + }, + "MetadataProvenance":"unclear", + "ReuseLicense":"repositoryLevel", + "DocumentationLevel":"adequate", + "DkNetMetadataLevel":"dataset" + }, + "FindableProps": { + "PersistentIdentifier":"internalPID", + "IdInMetadata":"all", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "MetadataGrade":"limited" + } + }, + "CitableProps": { + "MachineReadableLandingPage":"none", + "OrcidAssociation":"none", + "CitationMetadataLevel":"partial" + }, + "OpenProps": { + "OpenFlags": { + "openFormat":"no", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"yes", + "restrictionsNotJustified":"no" + }, + "CCLicenseCompliance":"full", + "Restrictions":"none" + } + } + } + }, + "61941c2f7748c": { + "DataRepoCompliance": { + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Reusable":"partiallyReusable", + "Findable":"notFindable", + "Accessible":"partiallyAccessible" + }, + "Trustworthy":"minorConcerns", + "Open":"closed", + "Citable":"partiallyCitable", + "Properties": { + "FAIRProps": { + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"no", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"no" + } + }, + "ReusableProps": { + "MetadataProvenance":"unclear", + "ReusableFlags": { + "documentationOK":"no", + "dkNetMetadataOK":"no", + "communityStandard":"yes", + "generalMetadata":"no", + "metadataProvenanceOK":"no" + }, + "ReuseLicense":"none", + "DkNetMetadataLevel":"none", + "DocumentationLevel":"lacking" + }, + "FindableProps": { + "PersistentIdentifier":"none", + "MetadataGrade":"minimal" + }, + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"no", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"no", + "studyLinkageOK":"no" + }, + "StudyLinkage":"freeText", + "MetadataReferenceQuality":"freeText" + } + }, + "TrustworthinessProps": { + "SourceOpen":"no", + "StakeholderGovernance":"weak", + "GovernanceTransparency":"partial" + }, + "CitableProps": { + "MachineReadableLandingPage":"none", + "OrcidAssociation":"required", + "CitationMetadataLevel":"none" + }, + "OpenProps": { + "OpenFlags": { + "openFormat":"no", + "platformSupportsDataWork":"no", + "ccLicenseOK":"no", + "restrictionsNotJustified":"yes" + }, + "Restrictions":"significant", + "CCLicenseCompliance":"none" + } + } + } + }, + "618afa63748f2": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Properties": { + "OpenProps": { + "CCLicenseCompliance":"none", + "Restrictions":"none" + }, + "CitableProps": { + "MachineReadableLandingPage":"exists", + "CitationMetadataLevel":"none", + "OrcidAssociation":"none" + }, + "TrustworthinessProps": { + "SourceOpen":"no", + "GovernanceTransparency":"opaque" + }, + "FAIRProps": { + "AccessibleProps": { + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"no", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"no" + }, + "MetadataPersistence":"no" + }, + "ReusableProps": { + "MetadataProvenance":"unclear", + "DkNetMetadataLevel":"dataset", + "ReuseLicense":"none", + "ReusableFlags": { + "documentationOK":"no", + "dkNetMetadataOK":"yes", + "communityStandard":"no", + "generalMetadata":"no", + "metadataProvenanceOK":"no" + }, + "DocumentationLevel":"lacking" + }, + "FindableProps": { + "PersistentIdentifier":"internalPID", + "MetadataGrade":"rich", + "IdInMetadata":"all", + "FindableFlags": { + "internalSearchOK":"yes" + } + }, + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"no", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"no", + "studyLinkageOK":"no" + }, + "StudyLinkage":"freeText", + "MetadataReferenceQuality":"freeText" + } + } + }, + "Citable":"partiallyCitable", + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"notAccessible" + } + } + }, + "617ad2ab32afc": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "Properties": { + "CitableProps": { + "MachineReadableLandingPage":"exists", + "OrcidAssociation":"none", + "CitationMetadataLevel":"partial" + }, + "TrustworthinessProps": { + "SourceOpen":"no", + "GovernanceTransparency":"opaque" + }, + "OpenProps": { + "OpenFlags": { + "openFormat":"yes", + "platformSupportsDataWork":"no", + "ccLicenseOK":"no", + "restrictionsNotJustified":"no" + }, + "CCLicenseCompliance":"none", + "Restrictions":"none" + }, + "FAIRProps": { + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"no" + } + }, + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "StudyLinkage":"freeText", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"no" + }, + "MetadataReferenceQuality":"informal" + }, + "ReusableProps": { + "ReusableFlags": { + "documentationOK":"no", + "dkNetMetadataOK":"yes", + "communityStandard":"yes", + "generalMetadata":"no", + "metadataProvenanceOK":"no" + }, + "MetadataProvenance":"unclear", + "DkNetMetadataLevel":"dataset", + "ReuseLicense":"none", + "DocumentationLevel":"lacking" + }, + "FindableProps": { + "PersistentIdentifier":"internalPID", + "IdInMetadata":"partial", + "MetadataGrade":"minimal", + "FindableFlags": { + "internalSearchOK":"yes" + } + } + } + }, + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "6165b40aacafb": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"fullyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + }, + "Properties": { + "FAIRProps": { + "FindableProps": { + "MetadataGrade":"rich", + "IdInMetadata":"all", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "PersistentIdentifier":"externalPID" + }, + "ReusableProps": { + "ReusableFlags": { + "documentationOK":"no", + "dkNetMetadataOK":"yes", + "communityStandard":"yes", + "generalMetadata":"yes", + "metadataProvenanceOK":"yes" + }, + "MetadataProvenance":"adequate", + "DkNetMetadataLevel":"dataset", + "ReuseLicense":"none", + "DocumentationLevel":"lacking" + }, + "InteroperableProps": { + "MetadataFAIRness":"minimal", + "StudyLinkage":"freeText", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"no", + "fairMetadataOK":"no", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"no" + }, + "MetadataReferenceQuality":"informal" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"yes" + } + } + }, + "OpenProps": { + "CCLicenseCompliance":"none", + "Restrictions":"none" + }, + "CitableProps": { + "MachineReadableLandingPage":"exists", + "OrcidAssociation":"none", + "CitationMetadataLevel":"partial" + }, + "TrustworthinessProps": { + "SourceOpen":"partially", + "GovernanceTransparency":"opaque" + } + } + } + }, + "618e95d1e58c7": { + "DataRepoCompliance": { + "FAIR": { + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"notAccessible", + "Interoperable":"notInteroperable" + }, + "Citable":"notCitable", + "Trustworthy":"minorConcerns", + "Open":"closed", + "Properties": { + "TrustworthinessProps": { + "SourceOpen":"no", + "StakeholderGovernance":"good", + "GovernanceTransparency":"partial" + }, + "FAIRProps": { + "InteroperableProps": { + "MetadataFAIRness":"minimal", + "StudyLinkage":"freeText", + "MetadataReferenceQuality":"freeText" + }, + "ReusableProps": { + "MetadataProvenance":"unclear", + "DkNetMetadataLevel":"dataset", + "ReuseLicense":"none", + "ReusableFlags": { + "documentationOK":"no", + "dkNetMetadataOK":"yes", + "communityStandard":"no", + "generalMetadata":"no", + "metadataProvenanceOK":"no" + }, + "DocumentationLevel":"lacking" + }, + "FindableProps": { + "MetadataGrade":"rich", + "PersistentIdentifier":"none", + "FindableFlags": { + "internalSearchOK":"yes" + } + }, + "AccessibleProps": { + "MetadataPersistence":"no" + } + }, + "CitableProps": { + "MachineReadableLandingPage":"none", + "CitationMetadataLevel":"none", + "OrcidAssociation":"none" + }, + "OpenProps": { + "Restrictions":"significant", + "CCLicenseCompliance":"none" + } + } + } + }, + "617accb79fb53": { + "DataRepoCompliance": { + "Citable":"notCitable", + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Properties": { + "FAIRProps": { + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"no", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"no" + } + }, + "ReusableProps": { + "MetadataProvenance":"unclear", + "ReusableFlags": { + "documentationOK":"no", + "dkNetMetadataOK":"no", + "communityStandard":"yes", + "generalMetadata":"no", + "metadataProvenanceOK":"no" + }, + "ReuseLicense":"none", + "DkNetMetadataLevel":"none", + "DocumentationLevel":"lacking" + }, + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "StudyLinkage":"freeText", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"no" + }, + "MetadataReferenceQuality":"informal" + }, + "FindableProps": { + "PersistentIdentifier":"internalPID", + "IdInMetadata":"partial", + "MetadataGrade":"minimal", + "FindableFlags": { + "internalSearchOK":"yes" + } + } + }, + "OpenProps": { + "OpenFlags": { + "openFormat":"yes", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"no", + "restrictionsNotJustified":"no" + }, + "CCLicenseCompliance":"none", + "Restrictions":"none" + }, + "CitableProps": { + "MachineReadableLandingPage":"none", + "CitationMetadataLevel":"none", + "OrcidAssociation":"none" + }, + "TrustworthinessProps": { + "SourceOpen":"no", + "GovernanceTransparency":"opaque" + } + }, + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "615b72f7e220a": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "Citable":"partiallyCitable", + "Open":"fullyOpen", + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + }, + "Properties": { + "FAIRProps": { + "ReusableProps": { + "ReusableFlags": { + "documentationOK":"no", + "dkNetMetadataOK":"yes", + "communityStandard":"no", + "generalMetadata":"no", + "metadataProvenanceOK":"yes" + }, + "ReuseLicense":"datasetLevel", + "MetadataProvenance":"adequate", + "DkNetMetadataLevel":"dataset", + "DocumentationLevel":"lacking" + }, + "InteroperableProps": { + "MetadataFAIRness":"minimal", + "StudyLinkage":"freeText", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"no", + "fairMetadataOK":"no", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"no" + }, + "MetadataReferenceQuality":"informal" + }, + "FindableProps": { + "IdInMetadata":"all", + "MetadataGrade":"minimal", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "PersistentIdentifier":"externalPID" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"yes", + "stdApi":"no" + } + } + }, + "OpenProps": { + "CCLicenseCompliance":"good", + "Restrictions":"none", + "OpenFlags": { + "openFormat":"yes", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"yes", + "restrictionsNotJustified":"no" + } + }, + "TrustworthinessProps": { + "StakeholderGovernance":"weak", + "SourceOpen":"yes", + "GovernanceTransparency":"partial" + }, + "CitableProps": { + "CitationMetadataLevel":"full", + "MachineReadableLandingPage":"exists", + "OrcidAssociation":"none" + } + } + } + }, + "61799d37c3555": { + "DataRepoCompliance": { + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Accessible":"partiallyAccessible", + "Reusable":"fullyReusable" + }, + "Trustworthy":"minorConcerns", + "Citable":"partiallyCitable", + "Open":"fullyOpen", + "Properties": { + "FAIRProps": { + "AccessibleProps": { + "AccessibleFlags": { + "humanAccessible":"no", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"yes", + "stdApi":"yes" + }, + "MetadataPersistence":"no" + }, + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"no", + "studyLinkageOK":"no" + }, + "StudyLinkage":"freeText", + "MetadataReferenceQuality":"freeText" + }, + "FindableProps": { + "PersistentIdentifier":"internalPID", + "IdInMetadata":"partial", + "MetadataGrade":"rich", + "FindableFlags": { + "internalSearchOK":"yes" + } + }, + "ReusableProps": { + "ReuseLicense":"repositoryLevel", + "MetadataProvenance":"adequate", + "DkNetMetadataLevel":"datasetAndSubject", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"yes", + "generalMetadata":"yes", + "metadataProvenanceOK":"yes" + }, + "DocumentationLevel":"full" + } + }, + "TrustworthinessProps": { + "SourceOpen":"partially", + "GovernanceTransparency":"partial", + "StakeholderGovernance":"full" + }, + "CitableProps": { + "MachineReadableLandingPage":"exists", + "OrcidAssociation":"none", + "CitationMetadataLevel":"partial" + }, + "OpenProps": { + "CCLicenseCompliance":"good", + "Restrictions":"none", + "OpenFlags": { + "openFormat":"yes", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"yes", + "restrictionsNotJustified":"no" + } + } + } + } + }, + "61782a94716e3": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"fullyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + }, + "Properties": { + "TrustworthinessProps": { + "SourceOpen":"no", + "StakeholderGovernance":"good", + "GovernanceTransparency":"partial" + }, + "OpenProps": { + "OpenFlags": { + "openFormat":"no", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"no", + "restrictionsNotJustified":"no" + }, + "Restrictions":"minimal", + "CCLicenseCompliance":"none" + }, + "CitableProps": { + "MachineReadableLandingPage":"exists", + "CitationMetadataLevel":"none", + "OrcidAssociation":"none" + }, + "FAIRProps": { + "FindableProps": { + "MetadataGrade":"rich", + "IdInMetadata":"all", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "PersistentIdentifier":"externalPID" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"no" + } + }, + "ReusableProps": { + "MetadataProvenance":"adequate", + "DkNetMetadataLevel":"dataset", + "ReuseLicense":"none", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"no", + "generalMetadata":"yes", + "metadataProvenanceOK":"yes" + }, + "DocumentationLevel":"good" + }, + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"no", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"no", + "studyLinkageOK":"no" + }, + "StudyLinkage":"freeText", + "MetadataReferenceQuality":"freeText" + } + } + } + } + }, + "614b619410622": { + "DataRepoCompliance": { + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Accessible":"partiallyAccessible", + "Reusable":"fullyReusable" + }, + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "Properties": { + "OpenProps": { + "CCLicenseCompliance":"good", + "Restrictions":"minimal", + "OpenFlags": { + "openFormat":"yes", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"yes", + "restrictionsNotJustified":"no" + } + }, + "TrustworthinessProps": { + "SourceOpen":"partially", + "StakeholderGovernance":"good", + "GovernanceTransparency":"partial" + }, + "CitableProps": { + "CitationMetadataLevel":"full", + "MachineReadableLandingPage":"exists", + "OrcidAssociation":"none" + }, + "FAIRProps": { + "FindableProps": { + "PersistentIdentifier":"internalPID", + "IdInMetadata":"partial", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "MetadataGrade":"limited" + }, + "InteroperableProps": { + "MetadataFAIRness":"minimal", + "StudyLinkage":"freeText", + "MetadataReferenceQuality":"informal", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"no", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"no" + } + }, + "ReusableProps": { + "ReuseLicense":"datasetLevel", + "MetadataProvenance":"adequate", + "DkNetMetadataLevel":"datasetAndSubject", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"yes", + "generalMetadata":"yes", + "metadataProvenanceOK":"yes" + }, + "DocumentationLevel":"good" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"yes", + "stdApi":"yes" + } + } + } + } + } + }, + "615dc910773b8": { + "DataRepoCompliance": { + "Citable":"fullyCitable", + "Properties": { + "TrustworthinessProps": { + "SourceOpen":"partially", + "StakeholderGovernance":"weak", + "GovernanceTransparency":"partial" + }, + "FAIRProps": { + "FindableProps": { + "MetadataGrade":"rich", + "IdInMetadata":"all", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "PersistentIdentifier":"externalPID" + }, + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "StudyLinkage":"textualMetadata", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"no", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"yes" + }, + "MetadataReferenceQuality":"informal" + }, + "ReusableProps": { + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"no", + "generalMetadata":"no", + "metadataProvenanceOK":"yes" + }, + "ReuseLicense":"repositoryLevel", + "DocumentationLevel":"adequate", + "MetadataProvenance":"full", + "DkNetMetadataLevel":"dataset" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"yes", + "stdApi":"no" + } + } + }, + "CitableProps": { + "CitationMetadataLevel":"full", + "MachineReadableLandingPage":"supportsDataCitation", + "OrcidAssociation":"supported" + }, + "OpenProps": { + "OpenFlags": { + "openFormat":"no", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"yes", + "restrictionsNotJustified":"no" + }, + "CCLicenseCompliance":"full", + "Restrictions":"none" + } + }, + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"fullyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "61732f9ba6055": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "FAIR": { + "Interoperable":"fullyInteroperable", + "Findable":"fullyFindable", + "Accessible":"fullyAccessible", + "Reusable":"partiallyReusable" + }, + "Properties": { + "FAIRProps": { + "InteroperableProps": { + "MetadataFAIRness":"allowed", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"yes" + }, + "StudyLinkage":"textualMetadata", + "MetadataReferenceQuality":"informal" + }, + "FindableProps": { + "MetadataGrade":"rich", + "IdInMetadata":"all", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "PersistentIdentifier":"externalPID" + }, + "AccessibleProps": { + "MetadataPersistence":"byStatedPolicy", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"yes", + "licenseOK":"yes", + "stdApi":"yes" + } + }, + "ReusableProps": { + "ReuseLicense":"datasetLevel", + "DocumentationLevel":"adequate", + "MetadataProvenance":"adequate", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"no", + "communityStandard":"no", + "generalMetadata":"yes", + "metadataProvenanceOK":"yes" + }, + "DkNetMetadataLevel":"none" + } + }, + "TrustworthinessProps": { + "SourceOpen":"partially", + "GovernanceTransparency":"partial", + "StakeholderGovernance":"full" + }, + "OpenProps": { + "OpenFlags": { + "openFormat":"no", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"yes", + "restrictionsNotJustified":"no" + }, + "CCLicenseCompliance":"good", + "Restrictions":"minimal" + }, + "CitableProps": { + "CitationMetadataLevel":"full", + "MachineReadableLandingPage":"exists", + "OrcidAssociation":"supported" + } + } + } + }, + "616f4035b5cf4": { + "DataRepoCompliance": { + "Citable":"fullyCitable", + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"fullyFindable", + "Accessible":"partiallyAccessible", + "Reusable":"fullyReusable" + }, + "Properties": { + "FAIRProps": { + "FindableProps": { + "MetadataGrade":"rich", + "IdInMetadata":"all", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "PersistentIdentifier":"externalPID" + }, + "ReusableProps": { + "MetadataProvenance":"full", + "DkNetMetadataLevel":"datasetAndSubject", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"yes", + "generalMetadata":"yes", + "metadataProvenanceOK":"yes" + }, + "ReuseLicense":"none", + "DocumentationLevel":"good" + }, + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"no", + "studyLinkageOK":"no" + }, + "StudyLinkage":"freeText", + "MetadataReferenceQuality":"freeText" + }, + "AccessibleProps": { + "MetadataPersistence":"byStatedPolicy", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"yes", + "licenseOK":"no", + "stdApi":"yes" + } + } + }, + "OpenProps": { + "OpenFlags": { + "openFormat":"yes", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"no", + "restrictionsNotJustified":"no" + }, + "CCLicenseCompliance":"none", + "Restrictions":"none" + }, + "TrustworthinessProps": { + "SourceOpen":"no", + "StakeholderGovernance":"good", + "GovernanceTransparency":"partial" + }, + "CitableProps": { + "CitationMetadataLevel":"full", + "MachineReadableLandingPage":"supportsDataCitation", + "OrcidAssociation":"none" + } + } + } + }, + "6198116a6dfed": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "Properties": { + "TrustworthinessProps": { + "SourceOpen":"no", + "StakeholderGovernance":"good", + "GovernanceTransparency":"partial" + }, + "FAIRProps": { + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"yes" + }, + "StudyLinkage":"textualMetadata", + "MetadataReferenceQuality":"informal" + }, + "ReusableProps": { + "DocumentationLevel":"adequate", + "MetadataProvenance":"adequate", + "DkNetMetadataLevel":"dataset", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"yes", + "generalMetadata":"yes", + "metadataProvenanceOK":"yes" + }, + "ReuseLicense":"none" + }, + "FindableProps": { + "PersistentIdentifier":"internalPID", + "IdInMetadata":"partial", + "MetadataGrade":"minimal", + "FindableFlags": { + "internalSearchOK":"yes" + } + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"no", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"yes" + } + } + }, + "OpenProps": { + "CCLicenseCompliance":"none", + "Restrictions":"none" + }, + "CitableProps": { + "MachineReadableLandingPage":"none", + "OrcidAssociation":"none", + "CitationMetadataLevel":"partial" + } + }, + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "FAIR": { + "Interoperable":"fullyInteroperable", + "Findable":"partiallyFindable", + "Accessible":"partiallyAccessible", + "Reusable":"fullyReusable" + } + } + }, + "61805a39e24bb": { + "DataRepoCompliance": { + "Properties": { + "TrustworthinessProps": { + "SourceOpen":"no", + "StakeholderGovernance":"weak", + "GovernanceTransparency":"partial" + }, + "CitableProps": { + "CitationMetadataLevel":"full", + "MachineReadableLandingPage":"supportsDataCitation", + "OrcidAssociation":"supported" + }, + "FAIRProps": { + "ReusableProps": { + "ReuseLicense":"datasetLevel", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"no", + "generalMetadata":"no", + "metadataProvenanceOK":"yes" + }, + "DocumentationLevel":"adequate", + "MetadataProvenance":"adequate", + "DkNetMetadataLevel":"dataset" + }, + "FindableProps": { + "IdInMetadata":"all", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "PersistentIdentifier":"externalPID", + "MetadataGrade":"limited" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"no", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"yes", + "stdApi":"no" + } + }, + "InteroperableProps": { + "MetadataFAIRness":"minimal", + "StudyLinkage":"textualMetadata", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"no", + "fairMetadataOK":"no", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"yes" + }, + "MetadataReferenceQuality":"informal" + } + }, + "OpenProps": { + "OpenFlags": { + "openFormat":"yes", + "platformSupportsDataWork":"no", + "ccLicenseOK":"yes", + "restrictionsNotJustified":"no" + }, + "Restrictions":"minimal", + "CCLicenseCompliance":"full" + } + }, + "Citable":"fullyCitable", + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "61afe38742749": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "Properties": { + "TrustworthinessProps": { + "SourceOpen":"no", + "StakeholderGovernance":"weak", + "GovernanceTransparency":"partial" + }, + "OpenProps": { + "OpenFlags": { + "openFormat":"no", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"no", + "restrictionsNotJustified":"no" + }, + "CCLicenseCompliance":"none", + "Restrictions":"none" + }, + "CitableProps": { + "MachineReadableLandingPage":"exists", + "OrcidAssociation":"none", + "CitationMetadataLevel":"partial" + }, + "FAIRProps": { + "ReusableProps": { + "DocumentationLevel":"adequate", + "MetadataProvenance":"adequate", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"no", + "communityStandard":"no", + "generalMetadata":"no", + "metadataProvenanceOK":"yes" + }, + "ReuseLicense":"none", + "DkNetMetadataLevel":"none" + }, + "AccessibleProps": { + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"no", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"no" + }, + "MetadataPersistence":"no" + }, + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "StudyLinkage":"freeText", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"no", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"no" + }, + "MetadataReferenceQuality":"informal" + }, + "FindableProps": { + "PersistentIdentifier":"internalPID", + "IdInMetadata":"partial", + "MetadataGrade":"minimal", + "FindableFlags": { + "internalSearchOK":"yes" + } + } + } + }, + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"notAccessible" + } + } + }, + "61800a6acbf2a": { + "DataRepoCompliance": { + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Accessible":"partiallyAccessible", + "Reusable":"fullyReusable" + }, + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "Properties": { + "TrustworthinessProps": { + "SourceOpen":"no", + "StakeholderGovernance":"good", + "GovernanceTransparency":"partial" + }, + "FAIRProps": { + "ReusableProps": { + "DocumentationLevel":"adequate", + "MetadataProvenance":"adequate", + "DkNetMetadataLevel":"dataset", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"yes", + "generalMetadata":"yes", + "metadataProvenanceOK":"yes" + }, + "ReuseLicense":"none" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"no", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"no" + } + }, + "FindableProps": { + "IdInMetadata":"partial", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "PersistentIdentifier":"externalPID", + "MetadataGrade":"limited" + }, + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "StudyLinkage":"freeText", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"no", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"no" + }, + "MetadataReferenceQuality":"informal" + } + }, + "CitableProps": { + "MachineReadableLandingPage":"none", + "OrcidAssociation":"none", + "CitationMetadataLevel":"partial" + }, + "OpenProps": { + "OpenFlags": { + "openFormat":"yes", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"no", + "restrictionsNotJustified":"no" + }, + "Restrictions":"minimal", + "CCLicenseCompliance":"none" + } + } + } + }, + "617850897d411": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "FAIR": { + "Interoperable":"fullyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + }, + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "Properties": { + "TrustworthinessProps": { + "SourceOpen":"no", + "StakeholderGovernance":"weak", + "GovernanceTransparency":"partial" + }, + "OpenProps": { + "OpenFlags": { + "openFormat":"no", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"no", + "restrictionsNotJustified":"no" + }, + "CCLicenseCompliance":"none", + "Restrictions":"none" + }, + "CitableProps": { + "MachineReadableLandingPage":"none", + "OrcidAssociation":"none", + "CitationMetadataLevel":"partial" + }, + "FAIRProps": { + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"yes" + }, + "StudyLinkage":"textualMetadata", + "MetadataReferenceQuality":"informal" + }, + "FindableProps": { + "PersistentIdentifier":"internalPID", + "IdInMetadata":"all", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "MetadataGrade":"limited" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"no", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"yes" + } + }, + "ReusableProps": { + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"no", + "generalMetadata":"no", + "metadataProvenanceOK":"yes" + }, + "DocumentationLevel":"adequate", + "MetadataProvenance":"adequate", + "DkNetMetadataLevel":"dataset", + "ReuseLicense":"none" + } + } + } + } + }, + "618af1fa72f85": { + "DataRepoCompliance": { + "Citable":"fullyCitable", + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Properties": { + "CitableProps": { + "CitationMetadataLevel":"full", + "MachineReadableLandingPage":"supportsDataCitation", + "OrcidAssociation":"required" + }, + "FAIRProps": { + "ReusableProps": { + "ReuseLicense":"datasetLevel", + "DocumentationLevel":"adequate", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"no", + "communityStandard":"yes", + "generalMetadata":"yes", + "metadataProvenanceOK":"yes" + }, + "MetadataProvenance":"adequate", + "DkNetMetadataLevel":"none" + }, + "AccessibleProps": { + "MetadataPersistence":"byStatedPolicy", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"yes", + "licenseOK":"yes", + "stdApi":"no" + } + }, + "FindableProps": { + "IdInMetadata":"all", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "PersistentIdentifier":"externalPID", + "MetadataGrade":"limited" + }, + "InteroperableProps": { + "MetadataFAIRness":"minimal", + "StudyLinkage":"freeText", + "MetadataReferenceQuality":"formal", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"no", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"no" + } + } + }, + "TrustworthinessProps": { + "SourceOpen":"no", + "GovernanceTransparency":"full", + "StakeholderGovernance":"full" + }, + "OpenProps": { + "OpenFlags": { + "openFormat":"yes", + "platformSupportsDataWork":"no", + "ccLicenseOK":"yes", + "restrictionsNotJustified":"no" + }, + "CCLicenseCompliance":"full", + "Restrictions":"none" + } + }, + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "619c1b5161bad": { + "DataRepoCompliance": { + "Citable":"fullyCitable", + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Properties": { + "OpenProps": { + "CCLicenseCompliance":"good", + "Restrictions":"minimal", + "OpenFlags": { + "openFormat":"yes", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"yes", + "restrictionsNotJustified":"no" + } + }, + "FAIRProps": { + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"yes" + }, + "StudyLinkage":"textualMetadata", + "MetadataReferenceQuality":"informal" + }, + "FindableProps": { + "IdInMetadata":"all", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "PersistentIdentifier":"externalPID", + "MetadataGrade":"limited" + }, + "ReusableProps": { + "ReuseLicense":"datasetLevel", + "DocumentationLevel":"adequate", + "MetadataProvenance":"adequate", + "DkNetMetadataLevel":"dataset", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"yes", + "generalMetadata":"yes", + "metadataProvenanceOK":"yes" + } + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"yes", + "stdApi":"yes" + } + } + }, + "TrustworthinessProps": { + "SourceOpen":"partially", + "StakeholderGovernance":"weak", + "GovernanceTransparency":"partial" + }, + "CitableProps": { + "CitationMetadataLevel":"full", + "MachineReadableLandingPage":"supportsDataCitation", + "OrcidAssociation":"none" + } + }, + "FAIR": { + "Interoperable":"fullyInteroperable", + "Findable":"partiallyFindable", + "Accessible":"partiallyAccessible", + "Reusable":"fullyReusable" + } + } + }, + "61b3991a21735": { + "DataRepoCompliance": { + "Properties": { + "CitableProps": { + "MachineReadableLandingPage":"none", + "OrcidAssociation":"supported", + "CitationMetadataLevel":"partial" + }, + "TrustworthinessProps": { + "SourceOpen":"partially", + "StakeholderGovernance":"weak", + "GovernanceTransparency":"partial" + }, + "OpenProps": { + "Restrictions":"minimal", + "CCLicenseCompliance":"none" + }, + "FAIRProps": { + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"no", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"no" + } + }, + "ReusableProps": { + "ReusableFlags": { + "documentationOK":"no", + "dkNetMetadataOK":"yes", + "communityStandard":"yes", + "generalMetadata":"no", + "metadataProvenanceOK":"no" + }, + "MetadataProvenance":"unclear", + "DkNetMetadataLevel":"dataset", + "ReuseLicense":"none", + "DocumentationLevel":"lacking" + }, + "FindableProps": { + "IdInMetadata":"partial", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "PersistentIdentifier":"externalPID", + "MetadataGrade":"limited" + }, + "InteroperableProps": { + "InteroperableFlags": { + "formalMetadataVocabularyOK":"no", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"no", + "studyLinkageOK":"no" + }, + "MetadataFAIRness":"allowed", + "MetadataReferenceQuality":"freeText", + "StudyLinkage":"none" + } + } + }, + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "61a66d491f16a": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "Properties": { + "FAIRProps": { + "ReusableProps": { + "MetadataProvenance":"adequate", + "DkNetMetadataLevel":"dataset", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"yes", + "generalMetadata":"yes", + "metadataProvenanceOK":"yes" + }, + "ReuseLicense":"none", + "DocumentationLevel":"good" + }, + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"yes" + }, + "StudyLinkage":"textualMetadata", + "MetadataReferenceQuality":"informal" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"yes" + } + }, + "FindableProps": { + "IdInMetadata":"all", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "PersistentIdentifier":"externalPID", + "MetadataGrade":"limited" + } + }, + "OpenProps": { + "CCLicenseCompliance":"none", + "Restrictions":"none" + }, + "TrustworthinessProps": { + "SourceOpen":"partially", + "StakeholderGovernance":"good", + "GovernanceTransparency":"partial" + }, + "CitableProps": { + "CitationMetadataLevel":"full", + "MachineReadableLandingPage":"exists", + "OrcidAssociation":"none" + } + }, + "FAIR": { + "Interoperable":"fullyInteroperable", + "Findable":"partiallyFindable", + "Accessible":"partiallyAccessible", + "Reusable":"fullyReusable" + } + } + }, + "619c34b5360d2": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "Properties": { + "OpenProps": { + "OpenFlags": { + "openFormat":"yes", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"no", + "restrictionsNotJustified":"no" + }, + "CCLicenseCompliance":"none", + "Restrictions":"none" + }, + "FAIRProps": { + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"no", + "studyLinkageOK":"no" + }, + "MetadataReferenceQuality":"freeText", + "StudyLinkage":"none" + }, + "ReusableProps": { + "MetadataProvenance":"unclear", + "DocumentationLevel":"adequate", + "ReuseLicense":"none", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"no", + "communityStandard":"yes", + "generalMetadata":"yes", + "metadataProvenanceOK":"no" + }, + "DkNetMetadataLevel":"none" + }, + "FindableProps": { + "PersistentIdentifier":"internalPID", + "IdInMetadata":"partial", + "MetadataGrade":"minimal", + "FindableFlags": { + "internalSearchOK":"yes" + } + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"no", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"yes" + } + } + }, + "CitableProps": { + "MachineReadableLandingPage":"exists", + "CitationMetadataLevel":"none", + "OrcidAssociation":"none" + }, + "TrustworthinessProps": { + "SourceOpen":"no", + "GovernanceTransparency":"opaque" + } + }, + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "618772a4eda31": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "Open":"closed", + "Citable":"partiallyCitable", + "FAIR": { + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible", + "Interoperable":"notInteroperable" + }, + "Properties": { + "TrustworthinessProps": { + "SourceOpen":"no", + "StakeholderGovernance":"good", + "GovernanceTransparency":"partial" + }, + "CitableProps": { + "MachineReadableLandingPage":"exists", + "CitationMetadataLevel":"none", + "OrcidAssociation":"none" + }, + "OpenProps": { + "Restrictions":"significant", + "CCLicenseCompliance":"none" + }, + "FAIRProps": { + "FindableProps": { + "PersistentIdentifier":"internalPID", + "IdInMetadata":"partial", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "MetadataGrade":"limited" + }, + "InteroperableProps": { + "MetadataFAIRness":"minimal", + "StudyLinkage":"freeText", + "MetadataReferenceQuality":"freeText" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"no" + } + }, + "ReusableProps": { + "MetadataProvenance":"unclear", + "DocumentationLevel":"adequate", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"no", + "communityStandard":"yes", + "generalMetadata":"no", + "metadataProvenanceOK":"no" + }, + "ReuseLicense":"none", + "DkNetMetadataLevel":"none" + } + } + } + } + }, + "61a7e89c785ae": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "FAIR": { + "Interoperable":"fullyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + }, + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "Properties": { + "FAIRProps": { + "FindableProps": { + "PersistentIdentifier":"internalPID", + "IdInMetadata":"partial", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "MetadataGrade":"limited" + }, + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"yes" + }, + "StudyLinkage":"textualMetadata", + "MetadataReferenceQuality":"informal" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"no", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"no" + } + }, + "ReusableProps": { + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"yes", + "generalMetadata":"no", + "metadataProvenanceOK":"yes" + }, + "MetadataProvenance":"adequate", + "DkNetMetadataLevel":"dataset", + "ReuseLicense":"none", + "DocumentationLevel":"good" + } + }, + "TrustworthinessProps": { + "SourceOpen":"no", + "StakeholderGovernance":"good", + "GovernanceTransparency":"partial" + }, + "OpenProps": { + "CCLicenseCompliance":"none", + "Restrictions":"none" + }, + "CitableProps": { + "MachineReadableLandingPage":"none", + "OrcidAssociation":"required", + "CitationMetadataLevel":"partial" + } + } + } + }, + "61783f6c4c8e7": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"notAccessible" + }, + "Properties": { + "FAIRProps": { + "FindableProps": { + "PersistentIdentifier":"internalPID", + "IdInMetadata":"all", + "MetadataGrade":"minimal", + "FindableFlags": { + "internalSearchOK":"yes" + } + }, + "AccessibleProps": { + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"no", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"no" + }, + "MetadataPersistence":"no" + }, + "ReusableProps": { + "ReusableFlags": { + "documentationOK":"no", + "dkNetMetadataOK":"no", + "communityStandard":"no", + "generalMetadata":"no", + "metadataProvenanceOK":"yes" + }, + "MetadataProvenance":"adequate", + "ReuseLicense":"none", + "DkNetMetadataLevel":"none", + "DocumentationLevel":"lacking" + }, + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "StudyLinkage":"freeText", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"no", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"no" + }, + "MetadataReferenceQuality":"informal" + } + }, + "CitableProps": { + "MachineReadableLandingPage":"exists", + "CitationMetadataLevel":"none", + "OrcidAssociation":"none" + }, + "OpenProps": { + "Restrictions":"minimal", + "CCLicenseCompliance":"none" + }, + "TrustworthinessProps": { + "SourceOpen":"no", + "GovernanceTransparency":"opaque" + } + } + } + }, + "617864ac7b873": { + "DataRepoCompliance": { + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Accessible":"partiallyAccessible", + "Reusable":"fullyReusable" + }, + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "Properties": { + "TrustworthinessProps": { + "SourceOpen":"no", + "StakeholderGovernance":"good", + "GovernanceTransparency":"partial" + }, + "OpenProps": { + "OpenFlags": { + "openFormat":"no", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"no", + "restrictionsNotJustified":"no" + }, + "CCLicenseCompliance":"none", + "Restrictions":"none" + }, + "CitableProps": { + "MachineReadableLandingPage":"exists", + "OrcidAssociation":"none", + "CitationMetadataLevel":"partial" + }, + "FAIRProps": { + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "StudyLinkage":"textualMetadata", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"no", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"yes" + }, + "MetadataReferenceQuality":"informal" + }, + "ReusableProps": { + "DocumentationLevel":"adequate", + "MetadataProvenance":"adequate", + "DkNetMetadataLevel":"dataset", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"yes", + "generalMetadata":"yes", + "metadataProvenanceOK":"yes" + }, + "ReuseLicense":"none" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"yes" + } + }, + "FindableProps": { + "IdInMetadata":"all", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "PersistentIdentifier":"externalPID", + "MetadataGrade":"limited" + } + } + } + } + }, + "61b257c62b44a": { + "DataRepoCompliance": { + "Citable":"notCitable", + "Trustworthy":"minorConcerns", + "Properties": { + "TrustworthinessProps": { + "SourceOpen":"partially", + "StakeholderGovernance":"good", + "GovernanceTransparency":"partial" + }, + "CitableProps": { + "MachineReadableLandingPage":"none", + "CitationMetadataLevel":"none", + "OrcidAssociation":"none" + }, + "FAIRProps": { + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"no", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"no", + "studyLinkageOK":"no" + }, + "MetadataReferenceQuality":"freeText", + "StudyLinkage":"none" + }, + "AccessibleProps": { + "MetadataPersistence":"no" + }, + "FindableProps": { + "IdInMetadata":"partial", + "MetadataGrade":"minimal", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "PersistentIdentifier":"externalPID" + }, + "ReusableProps": { + "MetadataProvenance":"unclear", + "DocumentationLevel":"adequate", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"no", + "communityStandard":"yes", + "generalMetadata":"no", + "metadataProvenanceOK":"no" + }, + "ReuseLicense":"none", + "DkNetMetadataLevel":"none" + } + }, + "OpenProps": { + "OpenFlags": { + "openFormat":"no", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"no", + "restrictionsNotJustified":"no" + }, + "Restrictions":"significant", + "CCLicenseCompliance":"none" + } + }, + "Open":"partiallyOpen", + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"notAccessible" + } + } + }, + "618194d41e94a": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "Properties": { + "TrustworthinessProps": { + "SourceOpen":"no", + "StakeholderGovernance":"good", + "GovernanceTransparency":"partial" + }, + "FAIRProps": { + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"no" + } + }, + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"no", + "studyLinkageOK":"no" + }, + "StudyLinkage":"freeText", + "MetadataReferenceQuality":"freeText" + }, + "ReusableProps": { + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"yes", + "generalMetadata":"yes", + "metadataProvenanceOK":"no" + }, + "MetadataProvenance":"unclear", + "DocumentationLevel":"adequate", + "DkNetMetadataLevel":"dataset", + "ReuseLicense":"none" + }, + "FindableProps": { + "IdInMetadata":"all", + "MetadataGrade":"minimal", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "PersistentIdentifier":"externalPID" + } + }, + "OpenProps": { + "OpenFlags": { + "openFormat":"no", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"no", + "restrictionsNotJustified":"no" + }, + "CCLicenseCompliance":"none", + "Restrictions":"none" + }, + "CitableProps": { + "MachineReadableLandingPage":"supportsDataCitation", + "OrcidAssociation":"supported", + "CitationMetadataLevel":"none" + } + }, + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "614df6a85b9b1": { + "DataRepoCompliance": { + "Properties": { + "FAIRProps": { + "FindableProps": { + "PersistentIdentifier":"internalPID", + "IdInMetadata":"partial", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "MetadataGrade":"limited" + }, + "InteroperableProps": { + "MetadataFAIRness":"allowed", + "StudyLinkage":"freeText", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"no" + }, + "MetadataReferenceQuality":"informal" + }, + "ReusableProps": { + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"yes", + "generalMetadata":"no", + "metadataProvenanceOK":"yes" + }, + "ReuseLicense":"repositoryLevel", + "DocumentationLevel":"adequate", + "MetadataProvenance":"adequate", + "DkNetMetadataLevel":"dataset" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"yes", + "stdApi":"no" + } + } + }, + "OpenProps": { + "CCLicenseCompliance":"good", + "Restrictions":"minimal", + "OpenFlags": { + "openFormat":"yes", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"yes", + "restrictionsNotJustified":"no" + } + }, + "TrustworthinessProps": { + "SourceOpen":"partially", + "StakeholderGovernance":"good", + "GovernanceTransparency":"partial" + }, + "CitableProps": { + "MachineReadableLandingPage":"exists", + "OrcidAssociation":"none", + "CitationMetadataLevel":"partial" + } + }, + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "617af3b01bff2": { + "DataRepoCompliance": { + "Citable":"notCitable", + "Trustworthy":"minorConcerns", + "Properties": { + "CitableProps": { + "MachineReadableLandingPage":"none", + "CitationMetadataLevel":"none", + "OrcidAssociation":"none" + }, + "TrustworthinessProps": { + "SourceOpen":"no", + "GovernanceTransparency":"opaque" + }, + "FAIRProps": { + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"no", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"no", + "studyLinkageOK":"no" + }, + "MetadataReferenceQuality":"freeText", + "StudyLinkage":"none" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"no", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"yes", + "stdApi":"no" + } + }, + "ReusableProps": { + "MetadataProvenance":"unclear", + "ReuseLicense":"repositoryLevel", + "ReusableFlags": { + "documentationOK":"no", + "dkNetMetadataOK":"no", + "communityStandard":"yes", + "generalMetadata":"no", + "metadataProvenanceOK":"no" + }, + "DkNetMetadataLevel":"none", + "DocumentationLevel":"lacking" + }, + "FindableProps": { + "PersistentIdentifier":"internalPID", + "IdInMetadata":"partial", + "MetadataGrade":"minimal", + "FindableFlags": { + "internalSearchOK":"yes" + } + } + }, + "OpenProps": { + "OpenFlags": { + "openFormat":"yes", + "platformSupportsDataWork":"no", + "ccLicenseOK":"yes", + "restrictionsNotJustified":"no" + }, + "Restrictions":"minimal", + "CCLicenseCompliance":"full" + } + }, + "Open":"partiallyOpen", + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "618b0ed289968": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + }, + "Properties": { + "CitableProps": { + "MachineReadableLandingPage":"none", + "OrcidAssociation":"supported", + "CitationMetadataLevel":"none" + }, + "TrustworthinessProps": { + "SourceOpen":"partially", + "StakeholderGovernance":"good", + "GovernanceTransparency":"partial" + }, + "OpenProps": { + "OpenFlags": { + "openFormat":"no", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"no", + "restrictionsNotJustified":"no" + }, + "CCLicenseCompliance":"none", + "Restrictions":"none" + }, + "FAIRProps": { + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"no", + "studyLinkageOK":"no" + }, + "StudyLinkage":"freeText", + "MetadataReferenceQuality":"freeText" + }, + "ReusableProps": { + "MetadataProvenance":"unclear", + "DocumentationLevel":"adequate", + "ReuseLicense":"none", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"no", + "communityStandard":"yes", + "generalMetadata":"yes", + "metadataProvenanceOK":"no" + }, + "DkNetMetadataLevel":"none" + }, + "FindableProps": { + "PersistentIdentifier":"internalPID", + "IdInMetadata":"all", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "MetadataGrade":"limited" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"no", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"yes" + } + } + } + } + } + }, + "61aa8e7632db5": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "Properties": { + "CitableProps": { + "MachineReadableLandingPage":"supportsDataCitation", + "OrcidAssociation":"none", + "CitationMetadataLevel":"partial" + }, + "TrustworthinessProps": { + "StakeholderGovernance":"good", + "SourceOpen":"yes", + "GovernanceTransparency":"partial" + }, + "FAIRProps": { + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"no", + "studyLinkageOK":"no" + }, + "MetadataReferenceQuality":"freeText", + "StudyLinkage":"none" + }, + "ReusableProps": { + "ReuseLicense":"datasetLevel", + "ReusableFlags": { + "documentationOK":"no", + "dkNetMetadataOK":"yes", + "communityStandard":"no", + "generalMetadata":"yes", + "metadataProvenanceOK":"no" + }, + "MetadataProvenance":"unclear", + "DkNetMetadataLevel":"datasetAndSubject", + "DocumentationLevel":"lacking" + }, + "FindableProps": { + "IdInMetadata":"all", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "PersistentIdentifier":"externalPID", + "MetadataGrade":"limited" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"yes", + "stdApi":"yes" + } + } + }, + "OpenProps": { + "OpenFlags": { + "openFormat":"yes", + "platformSupportsDataWork":"no", + "ccLicenseOK":"yes", + "restrictionsNotJustified":"no" + }, + "Restrictions":"minimal", + "CCLicenseCompliance":"full" + } + }, + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "61830248beb1c": { + "DataRepoCompliance": { + "Citable":"notCitable", + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + }, + "Properties": { + "TrustworthinessProps": { + "SourceOpen":"no", + "StakeholderGovernance":"good", + "GovernanceTransparency":"partial" + }, + "OpenProps": { + "OpenFlags": { + "openFormat":"no", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"no", + "restrictionsNotJustified":"no" + }, + "CCLicenseCompliance":"none", + "Restrictions":"none" + }, + "CitableProps": { + "MachineReadableLandingPage":"none", + "CitationMetadataLevel":"none", + "OrcidAssociation":"none" + }, + "FAIRProps": { + "ReusableProps": { + "MetadataProvenance":"unclear", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"no", + "communityStandard":"yes", + "generalMetadata":"no", + "metadataProvenanceOK":"no" + }, + "ReuseLicense":"none", + "DkNetMetadataLevel":"none", + "DocumentationLevel":"full" + }, + "FindableProps": { + "PersistentIdentifier":"internalPID", + "IdInMetadata":"partial", + "MetadataGrade":"minimal", + "FindableFlags": { + "internalSearchOK":"yes" + } + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"no", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"yes" + } + }, + "InteroperableProps": { + "MetadataFAIRness":"minimal", + "MetadataReferenceQuality":"freeText", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"no", + "qualifiedMetadataReferencesOK":"no", + "studyLinkageOK":"no" + }, + "StudyLinkage":"none" + } + } + } + } + }, + "621d4ff4808c5": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "Properties": { + "FAIRProps": { + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"no", + "studyLinkageOK":"no" + }, + "StudyLinkage":"freeText", + "MetadataReferenceQuality":"freeText" + }, + "FindableProps": { + "IdInMetadata":"partial", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "PersistentIdentifier":"externalPID", + "MetadataGrade":"limited" + }, + "ReusableProps": { + "MetadataProvenance":"unclear", + "ReuseLicense":"repositoryLevel", + "ReusableFlags": { + "documentationOK":"no", + "dkNetMetadataOK":"yes", + "communityStandard":"yes", + "generalMetadata":"yes", + "metadataProvenanceOK":"no" + }, + "DkNetMetadataLevel":"dataset", + "DocumentationLevel":"lacking" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"yes", + "stdApi":"no" + } + } + }, + "TrustworthinessProps": { + "SourceOpen":"partially", + "GovernanceTransparency":"full", + "StakeholderGovernance":"good" + }, + "OpenProps": { + "OpenFlags": { + "openFormat":"yes", + "platformSupportsDataWork":"no", + "ccLicenseOK":"yes", + "restrictionsNotJustified":"no" + }, + "CCLicenseCompliance":"good", + "Restrictions":"minimal" + }, + "CitableProps": { + "MachineReadableLandingPage":"supportsDataCitation", + "OrcidAssociation":"supported", + "CitationMetadataLevel":"none" + } + }, + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "61aa5882975cb": { + "DataRepoCompliance": { + "Citable":"notCitable", + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Properties": { + "TrustworthinessProps": { + "SourceOpen":"partially", + "StakeholderGovernance":"good", + "GovernanceTransparency":"partial" + }, + "CitableProps": { + "MachineReadableLandingPage":"none", + "CitationMetadataLevel":"none", + "OrcidAssociation":"none" + }, + "OpenProps": { + "OpenFlags": { + "openFormat":"yes", + "platformSupportsDataWork":"no", + "ccLicenseOK":"no", + "restrictionsNotJustified":"no" + }, + "CCLicenseCompliance":"none", + "Restrictions":"none" + }, + "FAIRProps": { + "FindableProps": { + "PersistentIdentifier":"internalPID", + "IdInMetadata":"partial", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "MetadataGrade":"limited" + }, + "ReusableProps": { + "MetadataProvenance":"unclear", + "DocumentationLevel":"adequate", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"no", + "communityStandard":"no", + "generalMetadata":"no", + "metadataProvenanceOK":"no" + }, + "ReuseLicense":"none", + "DkNetMetadataLevel":"none" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"no", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"yes" + } + }, + "InteroperableProps": { + "MetadataFAIRness":"minimal", + "MetadataReferenceQuality":"freeText", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"no", + "qualifiedMetadataReferencesOK":"no", + "studyLinkageOK":"no" + }, + "StudyLinkage":"none" + } + } + }, + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "61609db5051d2": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + }, + "Properties": { + "TrustworthinessProps": { + "SourceOpen":"no", + "StakeholderGovernance":"none", + "GovernanceTransparency":"partial" + }, + "OpenProps": { + "CCLicenseCompliance":"none", + "Restrictions":"none" + }, + "FAIRProps": { + "FindableProps": { + "PersistentIdentifier":"internalPID", + "IdInMetadata":"partial", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "MetadataGrade":"limited" + }, + "ReusableProps": { + "ReusableFlags": { + "documentationOK":"no", + "dkNetMetadataOK":"yes", + "communityStandard":"no", + "generalMetadata":"yes", + "metadataProvenanceOK":"no" + }, + "MetadataProvenance":"unclear", + "DkNetMetadataLevel":"dataset", + "ReuseLicense":"none", + "DocumentationLevel":"lacking" + }, + "InteroperableProps": { + "MetadataFAIRness":"allowed", + "StudyLinkage":"freeText", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"no" + }, + "MetadataReferenceQuality":"informal" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"no", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"yes" + } + } + }, + "CitableProps": { + "MachineReadableLandingPage":"none", + "OrcidAssociation":"none", + "CitationMetadataLevel":"partial" + } + } + } + }, + "61ae7c9289651": { + "DataRepoCompliance": { + "Properties": { + "FAIRProps": { + "ReusableProps": { + "ReuseLicense":"datasetLevel", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"yes", + "generalMetadata":"no", + "metadataProvenanceOK":"yes" + }, + "DocumentationLevel":"adequate", + "MetadataProvenance":"adequate", + "DkNetMetadataLevel":"dataset" + }, + "FindableProps": { + "IdInMetadata":"all", + "MetadataGrade":"minimal", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "PersistentIdentifier":"externalPID" + }, + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "MetadataReferenceQuality":"formal", + "StudyLinkage":"textualMetadata", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"no", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"yes" + } + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"yes", + "stdApi":"no" + } + } + }, + "TrustworthinessProps": { + "SourceOpen":"no", + "StakeholderGovernance":"good", + "GovernanceTransparency":"partial" + }, + "CitableProps": { + "MachineReadableLandingPage":"exists", + "OrcidAssociation":"supported", + "CitationMetadataLevel":"partial" + }, + "OpenProps": { + "CCLicenseCompliance":"full", + "Restrictions":"none", + "OpenFlags": { + "openFormat":"yes", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"yes", + "restrictionsNotJustified":"no" + } + } + }, + "Trustworthy":"minorConcerns", + "Citable":"partiallyCitable", + "Open":"fullyOpen", + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "618ab2f1efc9f": { + "DataRepoCompliance": { + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Accessible":"partiallyAccessible", + "Reusable":"fullyReusable" + }, + "Citable":"fullyCitable", + "Trustworthy":"minorConcerns", + "Properties": { + "TrustworthinessProps": { + "SourceOpen":"partially", + "StakeholderGovernance":"weak", + "GovernanceTransparency":"partial" + }, + "CitableProps": { + "CitationMetadataLevel":"full", + "MachineReadableLandingPage":"supportsDataCitation", + "OrcidAssociation":"none" + }, + "FAIRProps": { + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"no", + "studyLinkageOK":"no" + }, + "StudyLinkage":"freeText", + "MetadataReferenceQuality":"freeText" + }, + "FindableProps": { + "IdInMetadata":"all", + "MetadataGrade":"minimal", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "PersistentIdentifier":"externalPID" + }, + "ReusableProps": { + "ReuseLicense":"datasetLevel", + "DocumentationLevel":"adequate", + "MetadataProvenance":"adequate", + "DkNetMetadataLevel":"dataset", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"yes", + "generalMetadata":"yes", + "metadataProvenanceOK":"yes" + } + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"yes", + "stdApi":"no" + } + } + }, + "OpenProps": { + "CCLicenseCompliance":"full", + "Restrictions":"none", + "OpenFlags": { + "openFormat":"yes", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"yes", + "restrictionsNotJustified":"no" + } + } + }, + "Open":"fullyOpen" + } + }, + "61a50fa18488f": { + "DataRepoCompliance": { + "Trustworthy":"noConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"fullyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + }, + "Properties": { + "TrustworthinessProps": { + "SourceOpen":"no", + "StakeholderGovernance":"good", + "GovernanceTransparency":"partial" + }, + "CitableProps": { + "MachineReadableLandingPage":"exists", + "OrcidAssociation":"none", + "CitationMetadataLevel":"partial" + }, + "FAIRProps": { + "FindableProps": { + "MetadataGrade":"rich", + "IdInMetadata":"all", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "PersistentIdentifier":"externalPID" + }, + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "StudyLinkage":"textualMetadata", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"no", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"yes" + }, + "MetadataReferenceQuality":"informal" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"no" + } + }, + "ReusableProps": { + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"yes", + "generalMetadata":"no", + "metadataProvenanceOK":"yes" + }, + "MetadataProvenance":"adequate", + "DkNetMetadataLevel":"dataset", + "ReuseLicense":"none", + "DocumentationLevel":"good" + } + }, + "OpenProps": { + "Restrictions":"minimal", + "CCLicenseCompliance":"none" + } + } + } + }, + "618ac6bb76674": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "Properties": { + "TrustworthinessProps": { + "SourceOpen":"partially", + "StakeholderGovernance":"weak", + "GovernanceTransparency":"partial" + }, + "OpenProps": { + "OpenFlags": { + "openFormat":"no", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"no", + "restrictionsNotJustified":"no" + }, + "CCLicenseCompliance":"none", + "Restrictions":"none" + }, + "CitableProps": { + "MachineReadableLandingPage":"none", + "OrcidAssociation":"none", + "CitationMetadataLevel":"partial" + }, + "FAIRProps": { + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"no", + "studyLinkageOK":"no" + }, + "StudyLinkage":"freeText", + "MetadataReferenceQuality":"freeText" + }, + "ReusableProps": { + "MetadataProvenance":"unclear", + "DocumentationLevel":"adequate", + "ReuseLicense":"none", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"no", + "communityStandard":"yes", + "generalMetadata":"yes", + "metadataProvenanceOK":"no" + }, + "DkNetMetadataLevel":"none" + }, + "FindableProps": { + "PersistentIdentifier":"internalPID", + "IdInMetadata":"partial", + "MetadataGrade":"minimal", + "FindableFlags": { + "internalSearchOK":"yes" + } + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"no", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"yes" + } + } + } + }, + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "621d12cf667c7": { + "DataRepoCompliance": { + "Citable":"fullyCitable", + "Trustworthy":"minorConcerns", + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"fullyFindable", + "Accessible":"fullyAccessible", + "Reusable":"partiallyReusable" + }, + "Open":"fullyOpen", + "Properties": { + "FAIRProps": { + "FindableProps": { + "MetadataGrade":"rich", + "IdInMetadata":"all", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "PersistentIdentifier":"externalPID" + }, + "InteroperableProps": { + "MetadataFAIRness":"minimal", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"no", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"yes" + }, + "MetadataReferenceQuality":"formal", + "StudyLinkage":"textualMetadata" + }, + "AccessibleProps": { + "MetadataPersistence":"byStatedPolicy", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"yes", + "licenseOK":"yes", + "stdApi":"yes" + } + }, + "ReusableProps": { + "ReuseLicense":"datasetLevel", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"no", + "communityStandard":"yes", + "generalMetadata":"no", + "metadataProvenanceOK":"yes" + }, + "MetadataProvenance":"full", + "DkNetMetadataLevel":"none", + "DocumentationLevel":"good" + } + }, + "CitableProps": { + "CitationMetadataLevel":"full", + "MachineReadableLandingPage":"supportsDataCitation", + "OrcidAssociation":"supported" + }, + "OpenProps": { + "CCLicenseCompliance":"good", + "Restrictions":"none", + "OpenFlags": { + "openFormat":"yes", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"yes", + "restrictionsNotJustified":"no" + } + }, + "TrustworthinessProps": { + "GovernanceTransparency":"opaque", + "SourceOpen":"yes" + } + } + } + }, + "618b05ddaf1c8": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "Properties": { + "TrustworthinessProps": { + "SourceOpen":"no", + "StakeholderGovernance":"good", + "GovernanceTransparency":"partial" + }, + "FAIRProps": { + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"no", + "studyLinkageOK":"no" + }, + "StudyLinkage":"freeText", + "MetadataReferenceQuality":"freeText" + }, + "FindableProps": { + "IdInMetadata":"all", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "PersistentIdentifier":"externalPID", + "MetadataGrade":"limited" + }, + "ReusableProps": { + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"yes", + "generalMetadata":"yes", + "metadataProvenanceOK":"no" + }, + "MetadataProvenance":"unclear", + "DocumentationLevel":"adequate", + "DkNetMetadataLevel":"dataset", + "ReuseLicense":"none" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"no", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"yes" + } + } + }, + "OpenProps": { + "CCLicenseCompliance":"none", + "Restrictions":"none" + }, + "CitableProps": { + "MachineReadableLandingPage":"exists", + "OrcidAssociation":"supported", + "CitationMetadataLevel":"partial" + } + }, + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "61856c2f8d135": { + "DataRepoCompliance": { + "Citable":"notCitable", + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Properties": { + "OpenProps": { + "OpenFlags": { + "openFormat":"yes", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"no", + "restrictionsNotJustified":"no" + }, + "CCLicenseCompliance":"none", + "Restrictions":"none" + }, + "TrustworthinessProps": { + "SourceOpen":"no", + "StakeholderGovernance":"weak", + "GovernanceTransparency":"partial" + }, + "FAIRProps": { + "FindableProps": { + "PersistentIdentifier":"internalPID", + "IdInMetadata":"partial", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "MetadataGrade":"limited" + }, + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"no", + "studyLinkageOK":"no" + }, + "StudyLinkage":"freeText", + "MetadataReferenceQuality":"freeText" + }, + "ReusableProps": { + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"yes", + "generalMetadata":"no", + "metadataProvenanceOK":"no" + }, + "MetadataProvenance":"unclear", + "DocumentationLevel":"adequate", + "DkNetMetadataLevel":"dataset", + "ReuseLicense":"none" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"no", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"yes" + } + } + }, + "CitableProps": { + "MachineReadableLandingPage":"none", + "CitationMetadataLevel":"none", + "OrcidAssociation":"none" + } + }, + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "61a67946c16f2": { + "DataRepoCompliance": { + "Citable":"notCitable", + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + }, + "Properties": { + "TrustworthinessProps": { + "SourceOpen":"partially", + "StakeholderGovernance":"good", + "GovernanceTransparency":"partial" + }, + "OpenProps": { + "OpenFlags": { + "openFormat":"no", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"no", + "restrictionsNotJustified":"no" + }, + "CCLicenseCompliance":"none", + "Restrictions":"none" + }, + "CitableProps": { + "MachineReadableLandingPage":"none", + "CitationMetadataLevel":"none", + "OrcidAssociation":"none" + }, + "FAIRProps": { + "FindableProps": { + "PersistentIdentifier":"internalPID", + "IdInMetadata":"partial", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "MetadataGrade":"limited" + }, + "ReusableProps": { + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"yes", + "generalMetadata":"yes", + "metadataProvenanceOK":"no" + }, + "MetadataProvenance":"unclear", + "DkNetMetadataLevel":"dataset", + "ReuseLicense":"none", + "DocumentationLevel":"good" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"no", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"no" + } + }, + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "StudyLinkage":"freeText", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"no" + }, + "MetadataReferenceQuality":"informal" + } + } + } + } + }, + "615ca4418ee3f": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "Properties": { + "FAIRProps": { + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"no" + } + }, + "InteroperableProps": { + "MetadataFAIRness":"minimal", + "StudyLinkage":"freeText", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"no", + "fairMetadataOK":"no", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"no" + }, + "MetadataReferenceQuality":"informal" + }, + "ReusableProps": { + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"no", + "generalMetadata":"no", + "metadataProvenanceOK":"yes" + }, + "MetadataProvenance":"adequate", + "DkNetMetadataLevel":"dataset", + "ReuseLicense":"none", + "DocumentationLevel":"full" + }, + "FindableProps": { + "PersistentIdentifier":"internalPID", + "IdInMetadata":"all", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "MetadataGrade":"limited" + } + }, + "OpenProps": { + "CCLicenseCompliance":"none", + "Restrictions":"none" + }, + "CitableProps": { + "MachineReadableLandingPage":"exists", + "OrcidAssociation":"none", + "CitationMetadataLevel":"partial" + }, + "TrustworthinessProps": { + "SourceOpen":"no", + "GovernanceTransparency":"opaque" + } + }, + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "61a67f53dc9b3": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "Properties": { + "OpenProps": { + "CCLicenseCompliance":"adequate", + "Restrictions":"none", + "OpenFlags": { + "openFormat":"no", + "platformSupportsDataWork":"no", + "ccLicenseOK":"yes", + "restrictionsNotJustified":"no" + } + }, + "TrustworthinessProps": { + "SourceOpen":"no", + "StakeholderGovernance":"weak", + "GovernanceTransparency":"partial" + }, + "CitableProps": { + "MachineReadableLandingPage":"exists", + "CitationMetadataLevel":"none", + "OrcidAssociation":"none" + }, + "FAIRProps": { + "FindableProps": { + "PersistentIdentifier":"internalPID", + "IdInMetadata":"partial", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "MetadataGrade":"limited" + }, + "InteroperableProps": { + "MetadataFAIRness":"minimal", + "StudyLinkage":"freeText", + "MetadataReferenceQuality":"freeText" + }, + "ReusableProps": { + "MetadataProvenance":"unclear", + "ReuseLicense":"repositoryLevel", + "DkNetMetadataLevel":"none", + "DocumentationLevel":"lacking" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"yes", + "stdApi":"no" + } + } + } + }, + "FAIR": { + "Findable":"partiallyFindable", + "Reusable":"notReusable", + "Accessible":"partiallyAccessible", + "Interoperable":"notInteroperable" + } + } + }, + "61a7d3c6aeb5d": { + "DataRepoCompliance": { + "Citable":"notCitable", + "Trustworthy":"minorConcerns", + "Properties": { + "FAIRProps": { + "ReusableProps": { + "MetadataProvenance":"unclear", + "ReuseLicense":"none", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"no", + "communityStandard":"yes", + "generalMetadata":"yes", + "metadataProvenanceOK":"no" + }, + "DkNetMetadataLevel":"none", + "DocumentationLevel":"good" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"no", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"no" + } + }, + "FindableProps": { + "PersistentIdentifier":"internalPID", + "IdInMetadata":"partial", + "MetadataGrade":"minimal", + "FindableFlags": { + "internalSearchOK":"yes" + } + }, + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"no", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"no", + "studyLinkageOK":"no" + }, + "StudyLinkage":"freeText", + "MetadataReferenceQuality":"freeText" + } + }, + "TrustworthinessProps": { + "SourceOpen":"no", + "StakeholderGovernance":"good", + "GovernanceTransparency":"partial" + }, + "OpenProps": { + "CCLicenseCompliance":"none", + "Restrictions":"none" + }, + "CitableProps": { + "MachineReadableLandingPage":"none", + "CitationMetadataLevel":"none", + "OrcidAssociation":"none" + } + }, + "Open":"partiallyOpen", + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "61735ece7e758": { + "DataRepoCompliance": { + "Citable":"fullyCitable", + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Properties": { + "TrustworthinessProps": { + "SourceOpen":"partially", + "GovernanceTransparency":"partial", + "StakeholderGovernance":"full" + }, + "CitableProps": { + "CitationMetadataLevel":"full", + "MachineReadableLandingPage":"supportsDataCitation", + "OrcidAssociation":"supported" + }, + "FAIRProps": { + "ReusableProps": { + "ReusableFlags": { + "documentationOK":"no", + "dkNetMetadataOK":"no", + "communityStandard":"yes", + "generalMetadata":"yes", + "metadataProvenanceOK":"yes" + }, + "ReuseLicense":"repositoryLevel", + "MetadataProvenance":"adequate", + "DkNetMetadataLevel":"none", + "DocumentationLevel":"lacking" + }, + "InteroperableProps": { + "MetadataFAIRness":"minimal", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"no", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"yes" + }, + "StudyLinkage":"machineReadableMetadata", + "MetadataReferenceQuality":"informal" + }, + "FindableProps": { + "IdInMetadata":"all", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "PersistentIdentifier":"externalPID", + "MetadataGrade":"limited" + }, + "AccessibleProps": { + "MetadataPersistence":"byStatedPolicy", + "AccessibleFlags": { + "humanAccessible":"no", + "machineAccessible":"yes", + "persistentMetadata":"yes", + "licenseOK":"yes", + "stdApi":"yes" + } + } + }, + "OpenProps": { + "OpenFlags": { + "openFormat":"no", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"yes", + "restrictionsNotJustified":"no" + }, + "CCLicenseCompliance":"good", + "Restrictions":"none" + } + }, + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + } + } + }, + "617aff8495ced": { + "DataRepoCompliance": { + "Properties": { + "OpenProps": { + "OpenFlags": { + "openFormat":"yes", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"no", + "restrictionsNotJustified":"no" + }, + "CCLicenseCompliance":"none", + "Restrictions":"none" + }, + "CitableProps": { + "MachineReadableLandingPage":"supportsDataCitation", + "OrcidAssociation":"required", + "CitationMetadataLevel":"partial" + }, + "TrustworthinessProps": { + "SourceOpen":"partially", + "StakeholderGovernance":"good", + "GovernanceTransparency":"partial" + }, + "FAIRProps": { + "InteroperableProps": { + "MetadataFAIRness":"allowed", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"yes" + }, + "StudyLinkage":"textualMetadata", + "MetadataReferenceQuality":"informal" + }, + "FindableProps": { + "PersistentIdentifier":"internalPID", + "IdInMetadata":"partial", + "MetadataGrade":"rich", + "FindableFlags": { + "internalSearchOK":"yes" + } + }, + "ReusableProps": { + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"yes", + "communityStandard":"yes", + "generalMetadata":"yes", + "metadataProvenanceOK":"no" + }, + "MetadataProvenance":"unclear", + "DocumentationLevel":"adequate", + "DkNetMetadataLevel":"dataset", + "ReuseLicense":"none" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"no", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"yes" + } + } + } + }, + "Trustworthy":"minorConcerns", + "FAIR": { + "Interoperable":"fullyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + }, + "Open":"partiallyOpen", + "Citable":"partiallyCitable" + } + }, + "61895560bbab4": { + "DataRepoCompliance": { + "Trustworthy":"minorConcerns", + "Open":"partiallyOpen", + "Citable":"partiallyCitable", + "FAIR": { + "Interoperable":"partiallyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + }, + "Properties": { + "CitableProps": { + "MachineReadableLandingPage":"supportsDataCitation", + "OrcidAssociation":"none", + "CitationMetadataLevel":"partial" + }, + "TrustworthinessProps": { + "SourceOpen":"partially", + "StakeholderGovernance":"weak", + "GovernanceTransparency":"partial" + }, + "OpenProps": { + "OpenFlags": { + "openFormat":"no", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"no", + "restrictionsNotJustified":"no" + }, + "CCLicenseCompliance":"none", + "Restrictions":"none" + }, + "FAIRProps": { + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"no", + "studyLinkageOK":"no" + }, + "StudyLinkage":"freeText", + "MetadataReferenceQuality":"freeText" + }, + "AccessibleProps": { + "MetadataPersistence":"no", + "AccessibleFlags": { + "humanAccessible":"yes", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"no", + "stdApi":"yes" + } + }, + "ReusableProps": { + "MetadataProvenance":"unclear", + "DocumentationLevel":"adequate", + "ReuseLicense":"none", + "ReusableFlags": { + "documentationOK":"yes", + "dkNetMetadataOK":"no", + "communityStandard":"yes", + "generalMetadata":"yes", + "metadataProvenanceOK":"no" + }, + "DkNetMetadataLevel":"none" + }, + "FindableProps": { + "PersistentIdentifier":"internalPID", + "IdInMetadata":"partial", + "MetadataGrade":"minimal", + "FindableFlags": { + "internalSearchOK":"yes" + } + } + } + } + } + }, + "61985711a8424": { + "DataRepoCompliance": { + "Properties": { + "TrustworthinessProps": { + "SourceOpen":"partially", + "StakeholderGovernance":"good", + "GovernanceTransparency":"partial" + }, + "CitableProps": { + "MachineReadableLandingPage":"none", + "OrcidAssociation":"none", + "CitationMetadataLevel":"partial" + }, + "FAIRProps": { + "FindableProps": { + "PersistentIdentifier":"internalPID", + "IdInMetadata":"partial", + "FindableFlags": { + "internalSearchOK":"yes" + }, + "MetadataGrade":"limited" + }, + "ReusableProps": { + "ReusableFlags": { + "documentationOK":"no", + "dkNetMetadataOK":"yes", + "communityStandard":"yes", + "generalMetadata":"no", + "metadataProvenanceOK":"no" + }, + "MetadataProvenance":"unclear", + "ReuseLicense":"repositoryLevel", + "DkNetMetadataLevel":"dataset", + "DocumentationLevel":"lacking" + }, + "AccessibleProps": { + "AccessibleFlags": { + "humanAccessible":"no", + "machineAccessible":"yes", + "persistentMetadata":"no", + "licenseOK":"yes", + "stdApi":"yes" + }, + "MetadataPersistence":"no" + }, + "InteroperableProps": { + "MetadataFAIRness":"enforced", + "InteroperableFlags": { + "formalMetadataVocabularyOK":"yes", + "fairMetadataOK":"yes", + "qualifiedMetadataReferencesOK":"yes", + "studyLinkageOK":"yes" + }, + "StudyLinkage":"textualMetadata", + "MetadataReferenceQuality":"informal" + } + }, + "OpenProps": { + "OpenFlags": { + "openFormat":"no", + "platformSupportsDataWork":"yes", + "ccLicenseOK":"yes", + "restrictionsNotJustified":"no" + }, + "CCLicenseCompliance":"adequate", + "Restrictions":"none" + } + }, + "Trustworthy":"minorConcerns", + "FAIR": { + "Interoperable":"fullyInteroperable", + "Findable":"partiallyFindable", + "Reusable":"partiallyReusable", + "Accessible":"partiallyAccessible" + }, + "Open":"partiallyOpen", + "Citable":"partiallyCitable" + } + } + } +} \ No newline at end of file