diff --git a/.github/workflows/shellspec.yml b/.github/workflows/shellspec.yml index 90b1486b512..3320d9d08a4 100644 --- a/.github/workflows/shellspec.yml +++ b/.github/workflows/shellspec.yml @@ -24,11 +24,11 @@ jobs: run: | cd tests/shell shellspec - shellspec-rocky8: - name: "RockyLinux 8" + shellspec-rocky9: + name: "RockyLinux 9" runs-on: ubuntu-latest container: - image: rockylinux/rockylinux:8 + image: rockylinux/rockylinux:9 steps: - uses: actions/checkout@v2 - name: Install shellspec diff --git a/doc/release-notes/10169-JSON-schema-validation.md b/doc/release-notes/10169-JSON-schema-validation.md new file mode 100644 index 00000000000..92ff4a917d5 --- /dev/null +++ b/doc/release-notes/10169-JSON-schema-validation.md @@ -0,0 +1,3 @@ +### Improved JSON Schema validation for datasets + +Enhanced JSON schema validation with checks for required and allowed child objects, type checking for field types including `primitive`, `compound` and `controlledVocabulary`. More user-friendly error messages to help pinpoint the issues in the dataset JSON. See [Retrieve a Dataset JSON Schema for a Collection](https://guides.dataverse.org/en/6.3/api/native-api.html#retrieve-a-dataset-json-schema-for-a-collection) in the API Guide and PR #10543. diff --git a/doc/release-notes/10726-dataverse-facets-api-extension.md b/doc/release-notes/10726-dataverse-facets-api-extension.md new file mode 100644 index 00000000000..baf6f798e35 --- /dev/null +++ b/doc/release-notes/10726-dataverse-facets-api-extension.md @@ -0,0 +1,3 @@ +New optional query parameter "returnDetails" added to "dataverses/{identifier}/facets/" endpoint to include detailed information of each DataverseFacet. + +New endpoint "datasetfields/facetables" that lists all facetable dataset fields defined in the installation. diff --git a/doc/release-notes/10733-add-publication-status-to-search-api-results.md b/doc/release-notes/10733-add-publication-status-to-search-api-results.md new file mode 100644 index 00000000000..d015a50a00d --- /dev/null +++ b/doc/release-notes/10733-add-publication-status-to-search-api-results.md @@ -0,0 +1,14 @@ +Search API (/api/search) response will now include publicationStatuses in the Json response as long as the list is not empty + +Example: +```javascript +"items": [ + { + "name": "Darwin's Finches", + ... + "publicationStatuses": [ + "Unpublished", + "Draft" + ], +(etc, etc) +``` diff --git a/doc/release-notes/10749-dataverse-user-permissions-api-extension.md b/doc/release-notes/10749-dataverse-user-permissions-api-extension.md new file mode 100644 index 00000000000..706b1f42641 --- /dev/null +++ b/doc/release-notes/10749-dataverse-user-permissions-api-extension.md @@ -0,0 +1 @@ +New API endpoint "dataverses/{identifier}/userPermissions" for obtaining the user permissions on a dataverse. diff --git a/doc/release-notes/10758-rust-client.md b/doc/release-notes/10758-rust-client.md new file mode 100644 index 00000000000..e206f27ce65 --- /dev/null +++ b/doc/release-notes/10758-rust-client.md @@ -0,0 +1,3 @@ +### Rust API client library + +An API client library for the Rust programming language is now available at https://github.com/gdcc/rust-dataverse and has been added to the [list of client libraries](https://dataverse-guide--10758.org.readthedocs.build/en/10758/api/client-libraries.html) in the API Guide. See also #10758. diff --git a/doc/release-notes/7068-reserve-file-pids.md b/doc/release-notes/7068-reserve-file-pids.md new file mode 100644 index 00000000000..182a0d7f67b --- /dev/null +++ b/doc/release-notes/7068-reserve-file-pids.md @@ -0,0 +1,9 @@ +## Release Highlights + +### Pre-Publish File DOI Reservation with DataCite + +Dataverse installations using DataCite (or other persistent identifier (PID) Providers that support reserving PIDs) will be able to reserve PIDs for files when they are uploaded (rather than at publication time). Note that reserving file DOIs can slow uploads with large numbers of files so administrators may need to adjust timeouts (specifically any Apache "``ProxyPass / ajp://localhost:8009/ timeout=``" setting in the recommended Dataverse configuration). + +## Major Use Cases + +- Users will have DOIs/PIDs reserved for their files as part of file upload instead of at publication time. (Issue #7068, PR #7334) diff --git a/doc/release-notes/make-data-count-.md b/doc/release-notes/make-data-count-.md new file mode 100644 index 00000000000..9022582dddb --- /dev/null +++ b/doc/release-notes/make-data-count-.md @@ -0,0 +1,3 @@ +### Counter Processor 1.05 Support + +This release includes support for counter-processor-1.05 for processing Make Data Count metrics. If you are running Make Data Counts support, you should reinstall/reconfigure counter-processor as described in the latest Guides. (For existing installations, note that counter-processor-1.05 requires a Python3, so you will need to follow the full counter-processor install. Also note that if you configure the new version the same way, it will reprocess the days in the current month when it is first run. This is normal and will not affect the metrics in Dataverse.) diff --git a/doc/sphinx-guides/source/_static/util/counter_daily.sh b/doc/sphinx-guides/source/_static/util/counter_daily.sh index 674972b18f2..5095a83b7e2 100644 --- a/doc/sphinx-guides/source/_static/util/counter_daily.sh +++ b/doc/sphinx-guides/source/_static/util/counter_daily.sh @@ -1,6 +1,6 @@ #! /bin/bash -COUNTER_PROCESSOR_DIRECTORY="/usr/local/counter-processor-0.1.04" +COUNTER_PROCESSOR_DIRECTORY="/usr/local/counter-processor-1.05" MDC_LOG_DIRECTORY="/usr/local/payara6/glassfish/domains/domain1/logs/mdc" # counter_daily.sh diff --git a/doc/sphinx-guides/source/admin/make-data-count.rst b/doc/sphinx-guides/source/admin/make-data-count.rst index fe32af6649a..51bc2c4a9fe 100644 --- a/doc/sphinx-guides/source/admin/make-data-count.rst +++ b/doc/sphinx-guides/source/admin/make-data-count.rst @@ -16,7 +16,7 @@ Architecture Dataverse installations who would like support for Make Data Count must install `Counter Processor`_, a Python project created by California Digital Library (CDL) which is part of the Make Data Count project and which runs the software in production as part of their `DASH`_ data sharing platform. -.. _Counter Processor: https://github.com/CDLUC3/counter-processor +.. _Counter Processor: https://github.com/gdcc/counter-processor .. _DASH: https://cdluc3.github.io/dash/ The diagram below shows how Counter Processor interacts with your Dataverse installation and the DataCite hub, once configured. Dataverse installations using Handles rather than DOIs should note the limitations in the next section of this page. @@ -84,9 +84,9 @@ Configure Counter Processor * Change to the directory where you installed Counter Processor. - * ``cd /usr/local/counter-processor-0.1.04`` + * ``cd /usr/local/counter-processor-1.05`` -* Download :download:`counter-processor-config.yaml <../_static/admin/counter-processor-config.yaml>` to ``/usr/local/counter-processor-0.1.04``. +* Download :download:`counter-processor-config.yaml <../_static/admin/counter-processor-config.yaml>` to ``/usr/local/counter-processor-1.05``. * Edit the config file and pay particular attention to the FIXME lines. @@ -99,7 +99,7 @@ Soon we will be setting up a cron job to run nightly but we start with a single * Change to the directory where you installed Counter Processor. - * ``cd /usr/local/counter-processor-0.1.04`` + * ``cd /usr/local/counter-processor-1.05`` * If you are running Counter Processor for the first time in the middle of a month, you will need create blank log files for the previous days. e.g.: diff --git a/doc/sphinx-guides/source/api/client-libraries.rst b/doc/sphinx-guides/source/api/client-libraries.rst index bd0aa55ba99..6279ea8329e 100755 --- a/doc/sphinx-guides/source/api/client-libraries.rst +++ b/doc/sphinx-guides/source/api/client-libraries.rst @@ -78,3 +78,10 @@ Ruby https://github.com/libis/dataverse_api is a Ruby gem for Dataverse APIs. It is registered as a library on Rubygems (https://rubygems.org/search?query=dataverse). The gem is created and maintained by the LIBIS team (https://www.libis.be) at the University of Leuven (https://www.kuleuven.be). + +Rust +---- + +https://github.com/gdcc/rust-dataverse + +The Rust Dataverse client is a comprehensive crate designed for seamless interaction with the Dataverse API. It facilitates essential operations such as collection, dataset, and file management. Additionally, the crate includes a user-friendly command-line interface (CLI) that brings the full functionality of the library to the command line. This project is actively maintained by `Jan Range `_. diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index a3a004969dc..462f145ab4f 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -224,6 +224,22 @@ The fully expanded example above (without environment variables) looks like this curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/dataverses/root/facets" +By default, this endpoint will return an array including the facet names. If more detailed information is needed, we can set the query parameter ``returnDetails`` to ``true``, which will return the display name and id in addition to the name for each facet: + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export ID=root + + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/dataverses/$ID/facets?returnDetails=true" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/dataverses/root/facets?returnDetails=true" + Set Facets for a Dataverse Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -566,9 +582,7 @@ The fully expanded example above (without environment variables) looks like this Retrieve a Dataset JSON Schema for a Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Retrieves a JSON schema customized for a given collection in order to validate a dataset JSON file prior to creating the dataset. This -first version of the schema only includes required elements and fields. In the future we plan to improve the schema by adding controlled -vocabulary and more robust dataset field format testing: +Retrieves a JSON schema customized for a given collection in order to validate a dataset JSON file prior to creating the dataset: .. code-block:: bash @@ -593,8 +607,22 @@ While it is recommended to download a copy of the JSON Schema from the collectio Validate Dataset JSON File for a Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Validates a dataset JSON file customized for a given collection prior to creating the dataset. The validation only tests for json formatting -and the presence of required elements: +Validates a dataset JSON file customized for a given collection prior to creating the dataset. + +The validation tests for: + +- JSON formatting +- required fields +- typeClass must follow these rules: + + - if multiple = true then value must be a list + - if typeClass = ``primitive`` the value object is a String or a List of Strings depending on the multiple flag + - if typeClass = ``compound`` the value object is a FieldDTO or a List of FieldDTOs depending on the multiple flag + - if typeClass = ``controlledVocabulary`` the values are checked against the list of allowed values stored in the database + - typeName validations (child objects with their required and allowed typeNames are configured automatically by the database schema). Examples include: + + - dsDescription validation includes checks for typeName = ``dsDescriptionValue`` (required) and ``dsDescriptionDate`` (optional) + - datasetContact validation includes checks for typeName = ``datasetContactName`` (required) and ``datasetContactEmail``; ``datasetContactAffiliation`` (optional) .. code-block:: bash @@ -678,6 +706,29 @@ The fully expanded example above (without environment variables) looks like this Note: You must have "Edit Dataverse" permission in the given Dataverse to invoke this endpoint. +Get User Permissions on a Dataverse +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This API call returns the permissions that the calling user has on a particular dataverse. + +In particular, the user permissions that this API call checks, returned as booleans, are the following: + +* Can add a dataverse +* Can add a dataset +* Can view the unpublished dataverse +* Can edit the dataverse +* Can manage the dataverse permissions +* Can publish the dataverse +* Can delete the dataverse + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export ID=root + + curl -H "X-Dataverse-key: $API_TOKEN" -X GET "$SERVER_URL/api/dataverses/$ID/userPermissions" + .. _create-dataset-command: Create a Dataset in a Dataverse Collection @@ -4690,6 +4741,28 @@ The fully expanded example above (without environment variables) looks like this curl "https://demo.dataverse.org/api/metadatablocks/citation" +.. _dataset-fields-api: + +Dataset Fields +-------------- + +List All Facetable Dataset Fields +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +List all facetable dataset fields defined in the installation. + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + + curl "$SERVER_URL/api/datasetfields/facetables" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/datasetfields/facetables" + .. _Notifications: Notifications @@ -5106,7 +5179,7 @@ The fully expanded example above (without environment variables) looks like this Reserve a PID ~~~~~~~~~~~~~ -Reserved a PID for a dataset. A superuser API token is required. +Reserve a PID for a dataset if not yet registered, and, if FilePIDs are enabled, reserve any file PIDs that are not yet registered. A superuser API token is required. .. note:: See :ref:`curl-examples-and-environment-variables` if you are unfamiliar with the use of export below. diff --git a/doc/sphinx-guides/source/api/search.rst b/doc/sphinx-guides/source/api/search.rst index e8d0a0b3ea7..297f1283ef7 100755 --- a/doc/sphinx-guides/source/api/search.rst +++ b/doc/sphinx-guides/source/api/search.rst @@ -114,6 +114,9 @@ https://demo.dataverse.org/api/search?q=trees "identifier_of_dataverse":"dvbe69f5e1", "name_of_dataverse":"dvbe69f5e1", "citation":"Finch, Fiona; Spruce, Sabrina; Poe, Edgar Allen; Mulligan, Hercules, 2019, \"Darwin's Finches\", https://doi.org/10.70122/FK2/MB5VGR, Root, V3", + "publicationStatuses": [ + "Published" + ], "storageIdentifier":"file://10.70122/FK2/MB5VGR", "subjects":[ "Astronomy and Astrophysics", @@ -207,6 +210,9 @@ In this example, ``show_relevance=true`` matches per field are shown. Available "published_at":"2016-05-10T12:57:45Z", "citationHtml":"Finch, Fiona, 2016, \"Darwin's Finches\", http://dx.doi.org/10.5072/FK2/G2VPE7, Root Dataverse, V1", "citation":"Finch, Fiona, 2016, \"Darwin's Finches\", http://dx.doi.org/10.5072/FK2/G2VPE7, Root Dataverse, V1", + "publicationStatuses": [ + "Published" + ], "matches":[ { "authorName":{ @@ -297,6 +303,9 @@ The above example ``fq=publicationStatus:Published`` retrieves only "RELEASED" v "identifier_of_dataverse": "rahman", "name_of_dataverse": "mdmizanur rahman Dataverse collection", "citation": "Finch, Fiona, 2019, \"Darwin's Finches\", https://doi.org/10.70122/FK2/GUAS41, Demo Dataverse, V1", + "publicationStatuses": [ + "Published" + ], "storageIdentifier": "file://10.70122/FK2/GUAS41", "subjects": [ "Medicine, Health and Life Sciences" @@ -330,6 +339,9 @@ The above example ``fq=publicationStatus:Published`` retrieves only "RELEASED" v "identifier_of_dataverse": "demo", "name_of_dataverse": "Demo Dataverse", "citation": "Finch, Fiona, 2020, \"Darwin's Finches\", https://doi.org/10.70122/FK2/7ZXYRH, Demo Dataverse, V1", + "publicationStatuses": [ + "Published" + ], "storageIdentifier": "file://10.70122/FK2/7ZXYRH", "subjects": [ "Medicine, Health and Life Sciences" @@ -386,6 +398,10 @@ The above example ``metadata_fields=citation:*`` returns under "metadataBlocks" "identifier_of_dataverse": "Sample_data", "name_of_dataverse": "Sample Data", "citation": "MĂ©tropole, 2021, \"JDD avec GeoJson 2021-07-13T10:23:46.409Z\", https://doi.org/10.5072/FK2/GIWCKB, Root, DRAFT VERSION", + "publicationStatuses": [ + "Unpublished", + "Draft" + ], "storageIdentifier": "file://10.5072/FK2/GIWCKB", "subjects": [ "Other" diff --git a/doc/sphinx-guides/source/contributor/code.md b/doc/sphinx-guides/source/contributor/code.md index 2a1dec08c05..c7154d14169 100644 --- a/doc/sphinx-guides/source/contributor/code.md +++ b/doc/sphinx-guides/source/contributor/code.md @@ -20,6 +20,7 @@ The primary codebase and issue tracker for Dataverse is (TypeScript) - (Javascript) - (Python) +- (Rust) - (Ansible) - (Javascript) diff --git a/doc/sphinx-guides/source/contributor/index.md b/doc/sphinx-guides/source/contributor/index.md index e75cc58bccd..1017f15f0ed 100644 --- a/doc/sphinx-guides/source/contributor/index.md +++ b/doc/sphinx-guides/source/contributor/index.md @@ -43,7 +43,7 @@ If you speak multiple languages, you are very welcome to help us translate Datav ## Code -Dataverse is open source and we love code contributions. Developers are not limited to the main Dataverse code in this git repo. We have projects in C, C++, Go, Java, Javascript, Julia, PHP, Python, R, Ruby, TypeScript and more. To get started, please see the following pages: +Dataverse is open source and we love code contributions. Developers are not limited to the main Dataverse code in this git repo. We have projects in C, C++, Go, Java, Javascript, Julia, PHP, Python, R, Ruby, Rust, TypeScript and more. To get started, please see the following pages: ```{toctree} :maxdepth: 1 diff --git a/doc/sphinx-guides/source/developers/make-data-count.rst b/doc/sphinx-guides/source/developers/make-data-count.rst index edad580e451..f347e7b8ff9 100644 --- a/doc/sphinx-guides/source/developers/make-data-count.rst +++ b/doc/sphinx-guides/source/developers/make-data-count.rst @@ -1,7 +1,7 @@ Make Data Count =============== -Support for Make Data Count is a feature of the Dataverse Software that is described in the :doc:`/admin/make-data-count` section of the Admin Guide. In order for developers to work on the feature, they must install Counter Processor, a Python 3 application, as described below. Counter Processor can be found at https://github.com/CDLUC3/counter-processor +Support for Make Data Count is a feature of the Dataverse Software that is described in the :doc:`/admin/make-data-count` section of the Admin Guide. In order for developers to work on the feature, they must install Counter Processor, a Python 3 application, as described below. Counter Processor can be found at https://github.com/gdcc/counter-processor .. contents:: |toctitle| :local: @@ -49,7 +49,7 @@ Once you are done with your configuration, you can run Counter Processor like th ``su - counter`` -``cd /usr/local/counter-processor-0.1.04`` +``cd /usr/local/counter-processor-1.05`` ``CONFIG_FILE=counter-processor-config.yaml python39 main.py`` @@ -82,7 +82,7 @@ Second, if you are also sending your SUSHI report to Make Data Count, you will n ``curl -H "Authorization: Bearer $JSON_WEB_TOKEN" -X DELETE https://$MDC_SERVER/reports/$REPORT_ID`` -To get the ``REPORT_ID``, look at the logs generated in ``/usr/local/counter-processor-0.1.04/tmp/datacite_response_body.txt`` +To get the ``REPORT_ID``, look at the logs generated in ``/usr/local/counter-processor-1.05/tmp/datacite_response_body.txt`` To read more about the Make Data Count api, see https://github.com/datacite/sashimi diff --git a/doc/sphinx-guides/source/installation/prerequisites.rst b/doc/sphinx-guides/source/installation/prerequisites.rst index 151d44e6841..f61321ef245 100644 --- a/doc/sphinx-guides/source/installation/prerequisites.rst +++ b/doc/sphinx-guides/source/installation/prerequisites.rst @@ -428,7 +428,7 @@ firewalled from your Dataverse installation host). Counter Processor ----------------- -Counter Processor is required to enable Make Data Count metrics in a Dataverse installation. See the :doc:`/admin/make-data-count` section of the Admin Guide for a description of this feature. Counter Processor is open source and we will be downloading it from https://github.com/CDLUC3/counter-processor +Counter Processor is required to enable Make Data Count metrics in a Dataverse installation. See the :doc:`/admin/make-data-count` section of the Admin Guide for a description of this feature. Counter Processor is open source and we will be downloading it from https://github.com/gdcc/counter-processor Installing Counter Processor ============================ @@ -438,9 +438,9 @@ A scripted installation using Ansible is mentioned in the :doc:`/developers/make As root, download and install Counter Processor:: cd /usr/local - wget https://github.com/CDLUC3/counter-processor/archive/v0.1.04.tar.gz - tar xvfz v0.1.04.tar.gz - cd /usr/local/counter-processor-0.1.04 + wget https://github.com/gdcc/counter-processor/archive/refs/tags/v1.05.tar.gz + tar xvfz v1.05.tar.gz + cd /usr/local/counter-processor-1.05 Installing GeoLite Country Database =================================== @@ -451,7 +451,7 @@ The process required to sign up, download the database, and to configure automat As root, change to the Counter Processor directory you just created, download the GeoLite2-Country tarball from MaxMind, untar it, and copy the geoip database into place:: - + tar xvfz GeoLite2-Country.tar.gz cp GeoLite2-Country_*/GeoLite2-Country.mmdb maxmind_geoip @@ -461,12 +461,12 @@ Creating a counter User As root, create a "counter" user and change ownership of Counter Processor directory to this new user:: useradd counter - chown -R counter:counter /usr/local/counter-processor-0.1.04 + chown -R counter:counter /usr/local/counter-processor-1.05 Installing Counter Processor Python Requirements ================================================ -Counter Processor version 0.1.04 requires Python 3.7 or higher. This version of Python is available in many operating systems, and is purportedly available for RHEL7 or CentOS 7 via Red Hat Software Collections. Alternately, one may compile it from source. +Counter Processor version 1.05 requires Python 3.7 or higher. This version of Python is available in many operating systems, and is purportedly available for RHEL7 or CentOS 7 via Red Hat Software Collections. Alternately, one may compile it from source. The following commands are intended to be run as root but we are aware that Pythonistas might prefer fancy virtualenv or similar setups. Pull requests are welcome to improve these steps! @@ -477,7 +477,7 @@ Install Python 3.9:: Install Counter Processor Python requirements:: python3.9 -m ensurepip - cd /usr/local/counter-processor-0.1.04 + cd /usr/local/counter-processor-1.05 pip3 install -r requirements.txt See the :doc:`/admin/make-data-count` section of the Admin Guide for how to configure and run Counter Processor. diff --git a/scripts/search/tests/data/dataset-finch3.json b/scripts/search/tests/data/dataset-finch3.json new file mode 100644 index 00000000000..903b0aa124d --- /dev/null +++ b/scripts/search/tests/data/dataset-finch3.json @@ -0,0 +1,102 @@ +{ + "datasetVersion": { + "license": { + "name": "CC0 1.0", + "uri": "http://creativecommons.org/publicdomain/zero/1.0" + }, + "metadataBlocks": { + "citation": { + "fields": [ + { + "value": "HTML & More", + "typeClass": "primitive", + "multiple": false, + "typeName": "title" + }, + { + "value": [ + { + "authorName": { + "value": "Markup, Marty", + "typeClass": "primitive", + "multiple": false, + "typeName": "authorName" + }, + "authorAffiliation": { + "value": "W4C", + "typeClass": "primitive", + "multiple": false, + "typeName": "authorAffiliation" + } + } + ], + "typeClass": "compound", + "multiple": true, + "typeName": "author" + }, + { + "value": [ + { + "datasetContactEmail": { + "typeClass": "primitive", + "multiple": false, + "typeName": "datasetContactEmail", + "value": "markup@mailinator.com" + }, + "datasetContactName": { + "typeClass": "primitive", + "multiple": false, + "typeName": "datasetContactName", + "value": "Markup, Marty" + } + } + ], + "typeClass": "compound", + "multiple": true, + "typeName": "datasetContact" + }, + { + "value": [ + { + "dsDescriptionValue": { + "value": "BEGIN

END", + "multiple": false, + "typeClass": "primitive", + "typeName": "dsDescriptionValue" + }, + "dsDescriptionDate": { + "typeName": "dsDescriptionDate", + "multiple": false, + "typeClass": "primitive", + "value": "2021-07-13" + } + } + ], + "typeClass": "compound", + "multiple": true, + "typeName": "dsDescription" + }, + { + "value": [ + "Medicine, Health and Life Sciences" + ], + "typeClass": "controlledVocabulary", + "multiple": true, + "typeName": "subject" + }, + { + "typeName": "language", + "multiple": true, + "typeClass": "controlledVocabulary", + "value": [ + "English", + "Afar", + "aar" + ] + } + ], + "displayName": "Citation Metadata" + } + } + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/DataCitation.java b/src/main/java/edu/harvard/iq/dataverse/DataCitation.java index a012175deae..3977023fc4b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataCitation.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataCitation.java @@ -15,6 +15,7 @@ import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.Writer; +import java.nio.charset.StandardCharsets; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; @@ -253,7 +254,7 @@ public String toBibtexString() { public void writeAsBibtexCitation(OutputStream os) throws IOException { // Use UTF-8 - Writer out = new BufferedWriter(new OutputStreamWriter(os, "utf-8")); + Writer out = new BufferedWriter(new OutputStreamWriter(os, StandardCharsets.UTF_8)); if(getFileTitle() !=null && isDirect()) { out.write("@incollection{"); } else { @@ -317,7 +318,7 @@ public String toRISString() { public void writeAsRISCitation(OutputStream os) throws IOException { // Use UTF-8 - Writer out = new BufferedWriter(new OutputStreamWriter(os, "utf-8")); + Writer out = new BufferedWriter(new OutputStreamWriter(os, StandardCharsets.UTF_8)); out.write("Provider: " + publisher + "\r\n"); out.write("Content: text/plain; charset=\"utf-8\"" + "\r\n"); // Using type "DATA" - see https://github.com/IQSS/dataverse/issues/4816 diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java index 21f925f8981..9331ec67d12 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java @@ -1248,14 +1248,6 @@ public List selectFilesWithMissingOriginalSizes() { } - /** - * Check that a identifier entered by the user is unique (not currently used - * for any other study in this Dataverse Network). Also check for duplicate - * in the remote PID service if needed - * @param datafileId - * @param storageLocation - * @return {@code true} iff the global identifier is unique. - */ public void finalizeFileDelete(Long dataFileId, String storageLocation) throws IOException { // Verify that the DataFile no longer exists: if (find(dataFileId) != null) { diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java index 10b5d800c21..00774bbd3bf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java @@ -22,7 +22,7 @@ import edu.harvard.iq.dataverse.storageuse.StorageQuota; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; -import edu.harvard.iq.dataverse.util.json.JsonUtil; + import java.io.File; import java.io.IOException; import java.sql.Timestamp; @@ -34,6 +34,7 @@ import java.util.logging.Logger; import java.util.Properties; +import edu.harvard.iq.dataverse.validation.JSONDataValidation; import jakarta.ejb.EJB; import jakarta.ejb.Stateless; import jakarta.inject.Inject; @@ -888,14 +889,16 @@ public List getDatasetTitlesWithinDataverse(Long dataverseId) { return em.createNativeQuery(cqString).getResultList(); } - public String getCollectionDatasetSchema(String dataverseAlias) { + return getCollectionDatasetSchema(dataverseAlias, null); + } + public String getCollectionDatasetSchema(String dataverseAlias, Map>> schemaChildMap) { Dataverse testDV = this.findByAlias(dataverseAlias); while (!testDV.isMetadataBlockRoot()) { if (testDV.getOwner() == null) { - break; // we are at the root; which by defintion is metadata blcok root, regarldess of the value + break; // we are at the root; which by definition is metadata block root, regardless of the value } testDV = testDV.getOwner(); } @@ -932,6 +935,8 @@ public String getCollectionDatasetSchema(String dataverseAlias) { dsft.setRequiredDV(dsft.isRequired()); dsft.setInclude(true); } + List childrenRequired = new ArrayList<>(); + List childrenAllowed = new ArrayList<>(); if (dsft.isHasChildren()) { for (DatasetFieldType child : dsft.getChildDatasetFieldTypes()) { DataverseFieldTypeInputLevel dsfIlChild = dataverseFieldTypeInputLevelService.findByDataverseIdDatasetFieldTypeId(testDV.getId(), child.getId()); @@ -944,8 +949,18 @@ public String getCollectionDatasetSchema(String dataverseAlias) { child.setRequiredDV(child.isRequired() && dsft.isRequired()); child.setInclude(true); } + if (child.isRequired()) { + childrenRequired.add(child.getName()); + } + childrenAllowed.add(child.getName()); } } + if (schemaChildMap != null) { + Map> map = new HashMap<>(); + map.put("required", childrenRequired); + map.put("allowed", childrenAllowed); + schemaChildMap.put(dsft.getName(), map); + } if(dsft.isRequiredDV()){ requiredDSFT.add(dsft); } @@ -1021,11 +1036,13 @@ private String getCustomMDBSchema (MetadataBlock mdb, List req } public String isDatasetJsonValid(String dataverseAlias, String jsonInput) { - JSONObject rawSchema = new JSONObject(new JSONTokener(getCollectionDatasetSchema(dataverseAlias))); + Map>> schemaChildMap = new HashMap<>(); + JSONObject rawSchema = new JSONObject(new JSONTokener(getCollectionDatasetSchema(dataverseAlias, schemaChildMap))); - try { + try { Schema schema = SchemaLoader.load(rawSchema); schema.validate(new JSONObject(jsonInput)); // throws a ValidationException if this object is invalid + JSONDataValidation.validate(schema, schemaChildMap, jsonInput); // throws a ValidationException if any objects are invalid } catch (ValidationException vx) { logger.info(BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + vx.getErrorMessage()); String accumulatedexceptions = ""; diff --git a/src/main/java/edu/harvard/iq/dataverse/Shib.java b/src/main/java/edu/harvard/iq/dataverse/Shib.java index f9cf061e771..a3dfbf81512 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Shib.java +++ b/src/main/java/edu/harvard/iq/dataverse/Shib.java @@ -19,6 +19,7 @@ import org.apache.commons.lang3.StringUtils; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.sql.Timestamp; import java.util.ArrayList; import java.util.Arrays; @@ -458,9 +459,9 @@ private String getRequiredValueFromAssertion(String key) throws Exception { if (attributeValue.isEmpty()) { throw new Exception(key + " was empty"); } - if(systemConfig.isShibAttributeCharacterSetConversionEnabled()) { - attributeValue= new String( attributeValue.getBytes("ISO-8859-1"), "UTF-8"); - } + if (systemConfig.isShibAttributeCharacterSetConversionEnabled()) { + attributeValue= new String( attributeValue.getBytes(StandardCharsets.ISO_8859_1), StandardCharsets.UTF_8); + } String trimmedValue = attributeValue.trim(); logger.fine("The SAML assertion for \"" + key + "\" (required) was \"" + attributeValue + "\" and was trimmed to \"" + trimmedValue + "\"."); return trimmedValue; diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java index 00da4990996..16ac884180b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java @@ -286,7 +286,7 @@ private DataFile findDataFileOrDieWrapper(String fileId){ @GET @AuthRequired @Path("datafile/{fileId:.+}") - @Produces({"application/xml"}) + @Produces({"application/xml","*/*"}) public Response datafile(@Context ContainerRequestContext crc, @PathParam("fileId") String fileId, @QueryParam("gbrecs") boolean gbrecs, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) /*throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ { // check first if there's a trailing slash, and chop it: diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index d60884bad2f..550ad1b3043 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -65,6 +65,7 @@ import java.io.InputStream; import java.io.StringReader; +import java.nio.charset.StandardCharsets; import java.util.Map; import java.util.Map.Entry; import java.util.logging.Level; @@ -1153,7 +1154,7 @@ public void write(OutputStream os) throws IOException, os.write(",\n".getBytes()); } - os.write(output.build().toString().getBytes("UTF8")); + os.write(output.build().toString().getBytes(StandardCharsets.UTF_8)); if (!wroteObject) { wroteObject = true; @@ -1267,7 +1268,7 @@ public void write(OutputStream os) throws IOException, os.write(",\n".getBytes()); } - os.write(output.build().toString().getBytes("UTF8")); + os.write(output.build().toString().getBytes(StandardCharsets.UTF_8)); if (!wroteObject) { wroteObject = true; diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DatasetFields.java b/src/main/java/edu/harvard/iq/dataverse/api/DatasetFields.java new file mode 100644 index 00000000000..2ec35c896d9 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/api/DatasetFields.java @@ -0,0 +1,29 @@ +package edu.harvard.iq.dataverse.api; + +import edu.harvard.iq.dataverse.DatasetFieldServiceBean; +import edu.harvard.iq.dataverse.DatasetFieldType; +import jakarta.ejb.EJB; +import jakarta.ws.rs.*; +import jakarta.ws.rs.core.Response; + +import java.util.List; + +import static edu.harvard.iq.dataverse.util.json.JsonPrinter.jsonDatasetFieldTypes; + +/** + * Api bean for managing dataset fields. + */ +@Path("datasetfields") +@Produces("application/json") +public class DatasetFields extends AbstractApiBean { + + @EJB + DatasetFieldServiceBean datasetFieldService; + + @GET + @Path("facetables") + public Response listAllFacetableDatasetFields() { + List datasetFieldTypes = datasetFieldService.findAllFacetableFieldTypes(); + return ok(jsonDatasetFieldTypes(datasetFieldTypes)); + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index d576022389c..2dbe1cb41f4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -1,7 +1,6 @@ package edu.harvard.iq.dataverse.api; import com.amazonaws.services.s3.model.PartETag; - import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.DatasetLock.Reason; import edu.harvard.iq.dataverse.actionlogging.ActionLogRecord; diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java index 81db5f7d782..ed2a8db5e06 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java @@ -111,6 +111,9 @@ public class Dataverses extends AbstractApiBean { @EJB SwordServiceBean swordService; + + @EJB + PermissionServiceBean permissionService; @POST @AuthRequired @@ -846,22 +849,29 @@ public Response setMetadataRoot(@Context ContainerRequestContext crc, @PathParam /** * return list of facets for the dataverse with alias `dvIdtf` */ - public Response listFacets(@Context ContainerRequestContext crc, @PathParam("identifier") String dvIdtf) { + public Response listFacets(@Context ContainerRequestContext crc, + @PathParam("identifier") String dvIdtf, + @QueryParam("returnDetails") boolean returnDetails) { try { - User u = getRequestUser(crc); - DataverseRequest r = createDataverseRequest(u); + User user = getRequestUser(crc); + DataverseRequest request = createDataverseRequest(user); Dataverse dataverse = findDataverseOrDie(dvIdtf); - JsonArrayBuilder fs = Json.createArrayBuilder(); - for (DataverseFacet f : execCommand(new ListFacetsCommand(r, dataverse))) { - fs.add(f.getDatasetFieldType().getName()); + List dataverseFacets = execCommand(new ListFacetsCommand(request, dataverse)); + + if (returnDetails) { + return ok(jsonDataverseFacets(dataverseFacets)); + } else { + JsonArrayBuilder facetsBuilder = Json.createArrayBuilder(); + for (DataverseFacet facet : dataverseFacets) { + facetsBuilder.add(facet.getDatasetFieldType().getName()); + } + return ok(facetsBuilder); } - return ok(fs); } catch (WrappedResponse e) { return e.getResponse(); } } - @GET @AuthRequired @Path("{identifier}/featured") @@ -1647,4 +1657,25 @@ public Response linkDataverse(@Context ContainerRequestContext crc, @PathParam(" } } + @GET + @AuthRequired + @Path("{identifier}/userPermissions") + public Response getUserPermissionsOnDataverse(@Context ContainerRequestContext crc, @PathParam("identifier") String dvIdtf) { + Dataverse dataverse; + try { + dataverse = findDataverseOrDie(dvIdtf); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + User requestUser = getRequestUser(crc); + JsonObjectBuilder jsonObjectBuilder = Json.createObjectBuilder(); + jsonObjectBuilder.add("canAddDataverse", permissionService.userOn(requestUser, dataverse).has(Permission.AddDataverse)); + jsonObjectBuilder.add("canAddDataset", permissionService.userOn(requestUser, dataverse).has(Permission.AddDataset)); + jsonObjectBuilder.add("canViewUnpublishedDataverse", permissionService.userOn(requestUser, dataverse).has(Permission.ViewUnpublishedDataverse)); + jsonObjectBuilder.add("canEditDataverse", permissionService.userOn(requestUser, dataverse).has(Permission.EditDataverse)); + jsonObjectBuilder.add("canManageDataversePermissions", permissionService.userOn(requestUser, dataverse).has(Permission.ManageDataversePermissions)); + jsonObjectBuilder.add("canPublishDataverse", permissionService.userOn(requestUser, dataverse).has(Permission.PublishDataverse)); + jsonObjectBuilder.add("canDeleteDataverse", permissionService.userOn(requestUser, dataverse).has(Permission.DeleteDataverse)); + return ok(jsonObjectBuilder); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/PasswordEncryption.java b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/PasswordEncryption.java index 4446f68228d..aef8b375b63 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/PasswordEncryption.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/PasswordEncryption.java @@ -1,6 +1,6 @@ package edu.harvard.iq.dataverse.authorization.providers.builtin; -import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import org.apache.commons.lang3.RandomStringUtils; @@ -36,13 +36,13 @@ public interface Algorithm { public String encrypt(String plainText) { try { MessageDigest md = MessageDigest.getInstance("SHA"); - md.update(plainText.getBytes("UTF-8")); + md.update(plainText.getBytes(StandardCharsets.UTF_8)); byte[] raw = md.digest(); //String hash = Base64.encodeToString(raw, true); String hash = Base64.getEncoder().encodeToString(raw); return hash; - } catch (NoSuchAlgorithmException | UnsupportedEncodingException e) { + } catch (NoSuchAlgorithmException e) { throw new RuntimeException(e); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java index c369010c8cd..a42bb35615f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java @@ -20,30 +20,16 @@ package edu.harvard.iq.dataverse.dataaccess; -import edu.harvard.iq.dataverse.DataFile; -import edu.harvard.iq.dataverse.datavariable.DataVariable; - -import java.io.BufferedInputStream; -import java.io.BufferedOutputStream; import java.io.BufferedWriter; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; -import java.io.FileOutputStream; import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.List; import java.util.Scanner; -import java.util.Set; -import java.math.BigDecimal; -import java.math.MathContext; -import java.math.RoundingMode; -import java.nio.ByteBuffer; -import java.nio.channels.FileChannel; -import java.nio.file.Paths; -import java.nio.file.StandardOpenOption; import java.util.logging.Logger; import java.util.regex.Matcher; diff --git a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java index 060b8694e9c..4c84384b271 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java @@ -17,12 +17,11 @@ import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; -import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; -import java.io.UnsupportedEncodingException; import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Paths; import java.util.*; @@ -281,7 +280,7 @@ public static Dataset persistDatasetLogoToStorageAndCreateThumbnails(Dataset dat try { tmpFile = FileUtil.inputStreamToFile(inputStream); } catch (IOException ex) { - logger.severe(ex.getMessage()); + logger.severe("FileUtil.inputStreamToFile failed for tmpFile: " + ex.getMessage()); } StorageIO dataAccess = null; @@ -307,7 +306,7 @@ public static Dataset persistDatasetLogoToStorageAndCreateThumbnails(Dataset dat fullSizeImage = ImageIO.read(tmpFile); } catch (IOException ex) { IOUtils.closeQuietly(inputStream); - logger.severe(ex.getMessage()); + logger.severe("ImageIO.read failed for tmpFile: " + ex.getMessage()); return null; } if (fullSizeImage == null) { @@ -318,25 +317,14 @@ public static Dataset persistDatasetLogoToStorageAndCreateThumbnails(Dataset dat int width = fullSizeImage.getWidth(); int height = fullSizeImage.getHeight(); FileChannel src = null; - try { - src = new FileInputStream(tmpFile).getChannel(); - } catch (FileNotFoundException ex) { - IOUtils.closeQuietly(inputStream); - logger.severe(ex.getMessage()); - return null; - } FileChannel dest = null; - try { - dest = new FileOutputStream(tmpFile).getChannel(); - } catch (FileNotFoundException ex) { - IOUtils.closeQuietly(inputStream); - logger.severe(ex.getMessage()); - return null; - } - try { + try (FileInputStream fis = new FileInputStream(tmpFile); FileOutputStream fos = new FileOutputStream(tmpFile)) { + src = fis.getChannel(); + dest = fos.getChannel(); dest.transferFrom(src, 0, src.size()); } catch (IOException ex) { - logger.severe(ex.getMessage()); + IOUtils.closeQuietly(inputStream); + logger.severe("Error occurred during transfer using FileChannels: " + ex.getMessage()); return null; } File tmpFileForResize = null; @@ -344,7 +332,7 @@ public static Dataset persistDatasetLogoToStorageAndCreateThumbnails(Dataset dat //The stream was used around line 274 above, so this creates an empty file (OK since all it is used for is getting a path, but not reusing it here would make it easier to close it above.) tmpFileForResize = FileUtil.inputStreamToFile(inputStream); } catch (IOException ex) { - logger.severe(ex.getMessage()); + logger.severe("FileUtil.inputStreamToFile failed for tmpFileForResize: " + ex.getMessage()); return null; } finally { IOUtils.closeQuietly(inputStream); @@ -409,14 +397,8 @@ public static InputStream getThumbnailAsInputStream(Dataset dataset, int size) { String base64Image = datasetThumbnail.getBase64image(); String leadingStringToRemove = FileUtil.DATA_URI_SCHEME; String encodedImg = base64Image.substring(leadingStringToRemove.length()); - byte[] decodedImg = null; - try { - decodedImg = Base64.getDecoder().decode(encodedImg.getBytes("UTF-8")); - logger.fine("returning this many bytes for " + "dataset id: " + dataset.getId() + ", persistentId: " + dataset.getIdentifier() + " :" + decodedImg.length); - } catch (UnsupportedEncodingException ex) { - logger.info("dataset thumbnail could not be decoded for dataset id " + dataset.getId() + ": " + ex); - return null; - } + byte[] decodedImg = Base64.getDecoder().decode(encodedImg.getBytes(StandardCharsets.UTF_8)); + logger.fine("returning this many bytes for " + "dataset id: " + dataset.getId() + ", persistentId: " + dataset.getIdentifier() + " :" + decodedImg.length); ByteArrayInputStream nonDefaultDatasetThumbnail = new ByteArrayInputStream(decodedImg); logger.fine("For dataset id " + dataset.getId() + " a thumbnail was found and is being returned."); return nonDefaultDatasetThumbnail; @@ -627,7 +609,7 @@ public static boolean validateDatasetMetadataExternally(Dataset ds, String execu try { File tempFile = File.createTempFile("datasetMetadataCheck", ".tmp"); - FileUtils.writeStringToFile(tempFile, jsonMetadata); + FileUtils.writeStringToFile(tempFile, jsonMetadata, StandardCharsets.UTF_8); // run the external executable: String[] params = { executable, tempFile.getAbsolutePath() }; diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java index 1a1f4f9318b..bd38245d334 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java @@ -1,5 +1,6 @@ package edu.harvard.iq.dataverse.engine.command.impl; +import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetField; import edu.harvard.iq.dataverse.DatasetFieldServiceBean; @@ -18,9 +19,11 @@ import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; import edu.harvard.iq.dataverse.pidproviders.PidProvider; import edu.harvard.iq.dataverse.pidproviders.PidUtil; +import edu.harvard.iq.dataverse.pidproviders.doi.fake.FakeDOIProvider; import edu.harvard.iq.dataverse.util.BundleUtil; import java.sql.Timestamp; +import java.util.Arrays; import java.util.Date; import java.util.Set; import java.util.logging.Level; @@ -169,13 +172,12 @@ protected void registerExternalIdentifier(Dataset theDataset, CommandContext ctx } while (pidProvider.alreadyRegistered(theDataset) && attempts <= FOOLPROOF_RETRIAL_ATTEMPTS_LIMIT); } if(!retry) { - logger.warning("Reserving PID for: " + getDataset().getId() + " during publication failed."); - throw new IllegalCommandException(BundleUtil.getStringFromBundle("publishDatasetCommand.pidNotReserved"), this); + logger.warning("Reserving PID for: " + getDataset().getId() + " failed."); + throw new CommandExecutionException(BundleUtil.getStringFromBundle("abstractDatasetCommand.pidNotReserved", Arrays.asList(theDataset.getIdentifier())), this); } if(attempts > FOOLPROOF_RETRIAL_ATTEMPTS_LIMIT) { //Didn't work - we existed the loop with too many tries - throw new CommandExecutionException("This dataset may not be published because its identifier is already in use by another dataset; " - + "gave up after " + attempts + " attempts. Current (last requested) identifier: " + theDataset.getIdentifier(), this); + throw new CommandExecutionException(BundleUtil.getStringFromBundle("abstractDatasetCommand.pidReservationRetryExceeded", Arrays.asList(Integer.toString(attempts), theDataset.getIdentifier())), this); } } // Invariant: Dataset identifier does not exist in the remote registry @@ -188,6 +190,9 @@ protected void registerExternalIdentifier(Dataset theDataset, CommandContext ctx } } catch (Throwable e) { + if (e instanceof CommandException) { + throw (CommandException) e; + } throw new CommandException(BundleUtil.getStringFromBundle("dataset.publish.error", pidProvider.getProviderInformation()), this); } } else { @@ -217,6 +222,73 @@ protected Timestamp getTimestamp() { return timestamp; } + protected void registerFilePidsIfNeeded(Dataset theDataset, CommandContext ctxt, boolean b) throws CommandException { + // Register file PIDs if needed + PidProvider pidGenerator = ctxt.dvObjects().getEffectivePidGenerator(getDataset()); + boolean shouldRegister = !pidGenerator.registerWhenPublished() && + ctxt.systemConfig().isFilePIDsEnabledForCollection(getDataset().getOwner()) && + pidGenerator.canCreatePidsLike(getDataset().getGlobalId()); + if (shouldRegister) { + for (DataFile dataFile : theDataset.getFiles()) { + logger.fine(dataFile.getId() + " is registered?: " + dataFile.isIdentifierRegistered()); + if (!dataFile.isIdentifierRegistered()) { + // pre-register a persistent id + registerFileExternalIdentifier(dataFile, pidGenerator, ctxt, true); + } + } + } + } + + private void registerFileExternalIdentifier(DataFile dataFile, PidProvider pidProvider, CommandContext ctxt, boolean retry) throws CommandException { + + if (!dataFile.isIdentifierRegistered()) { + + if (pidProvider instanceof FakeDOIProvider) { + retry = false; // No reason to allow a retry with the FakeProvider (even if it allows + // pre-registration someday), so set false for efficiency + } + try { + if (pidProvider.alreadyRegistered(dataFile)) { + int attempts = 0; + if (retry) { + do { + pidProvider.generatePid(dataFile); + logger.log(Level.INFO, "Attempting to register external identifier for datafile {0} (trying: {1}).", + new Object[] { dataFile.getId(), dataFile.getIdentifier() }); + attempts++; + } while (pidProvider.alreadyRegistered(dataFile) && attempts <= FOOLPROOF_RETRIAL_ATTEMPTS_LIMIT); + } + if (!retry) { + logger.warning("Reserving File PID for: " + getDataset().getId() + ", fileId: " + dataFile.getId() + ", during publication failed."); + throw new CommandExecutionException(BundleUtil.getStringFromBundle("abstractDatasetCommand.filePidNotReserved", Arrays.asList(getDataset().getIdentifier())), this); + } + if (attempts > FOOLPROOF_RETRIAL_ATTEMPTS_LIMIT) { + // Didn't work - we existed the loop with too many tries + throw new CommandExecutionException("This dataset may not be published because its identifier is already in use by another dataset; " + + "gave up after " + attempts + " attempts. Current (last requested) identifier: " + dataFile.getIdentifier(), this); + } + } + // Invariant: DataFile identifier does not exist in the remote registry + try { + pidProvider.createIdentifier(dataFile); + dataFile.setGlobalIdCreateTime(getTimestamp()); + dataFile.setIdentifierRegistered(true); + } catch (Throwable ex) { + logger.info("Call to globalIdServiceBean.createIdentifier failed: " + ex); + } + + } catch (Throwable e) { + if (e instanceof CommandException) { + throw (CommandException) e; + } + throw new CommandException(BundleUtil.getStringFromBundle("file.register.error", pidProvider.getProviderInformation()), this); + } + } else { + throw new IllegalCommandException("This datafile may not have a PID because its id registry service is not supported.", this); + } + + } + protected void checkSystemMetadataKeyIfNeeded(DatasetVersion newVersion, DatasetVersion persistedVersion) throws IllegalCommandException { Set changedMDBs = DatasetVersionDifference.getBlocksWithChanges(newVersion, persistedVersion); for (MetadataBlock mdb : changedMDBs) { diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java index d6d7b49d172..94f983f0c13 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java @@ -5,7 +5,6 @@ import edu.harvard.iq.dataverse.DatasetLock.Reason; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.ApiToken; -import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.workflow.step.Failure; @@ -14,7 +13,7 @@ import java.io.IOException; import java.io.PipedInputStream; import java.io.PipedOutputStream; -import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.security.DigestInputStream; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; @@ -32,7 +31,7 @@ import org.duracloud.error.ContentStoreException; @RequiredPermissions(Permission.PublishDataset) -public class DuraCloudSubmitToArchiveCommand extends AbstractSubmitToArchiveCommand implements Command { +public class DuraCloudSubmitToArchiveCommand extends AbstractSubmitToArchiveCommand { private static final Logger logger = Logger.getLogger(DuraCloudSubmitToArchiveCommand.class.getName()); private static final String DEFAULT_PORT = "443"; @@ -117,7 +116,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t public void run() { try (PipedOutputStream dataciteOut = new PipedOutputStream(dataciteIn)) { - dataciteOut.write(dataciteXml.getBytes(Charset.forName("utf-8"))); + dataciteOut.write(dataciteXml.getBytes(StandardCharsets.UTF_8)); dataciteOut.close(); success=true; } catch (Exception e) { diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java index 287e877f6e0..69ebe6feed8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java @@ -102,13 +102,13 @@ public Dataset execute(CommandContext ctxt) throws CommandException { try { // This can potentially throw a CommandException, so let's make // sure we exit cleanly: - - registerExternalIdentifier(theDataset, ctxt, false); + registerExternalIdentifier(theDataset, ctxt, false); + registerFilePidsIfNeeded(theDataset, ctxt, false); } catch (CommandException comEx) { - logger.warning("Failed to reserve the identifier "+theDataset.getGlobalId().asString()+"; notifying the user(s), unlocking the dataset"); - // Send failure notification to the user: + logger.warning("Failed to reserve the identifier " + theDataset.getGlobalId().asString() + "; notifying the user(s), unlocking the dataset"); + // Send failure notification to the user: notifyUsersDatasetPublishStatus(ctxt, theDataset, UserNotification.Type.PUBLISHFAILED_PIDREG); - // Remove the dataset lock: + // Remove the dataset lock: ctxt.datasets().removeDatasetLocks(theDataset, DatasetLock.Reason.finalizePublication); // re-throw the exception: throw comEx; @@ -395,8 +395,7 @@ private void publicizeExternalIdentifier(Dataset dataset, CommandContext ctxt) t // we can't get "dependent" DOIs assigned to files in a dataset // with the registered id that is a handle; or even a DOI, but in // an authority that's different from what's currently configured. - // Additionaly in 4.9.3 we have added a system variable to disable - // registering file PIDs on the installation level. + // File PIDs may be enabled/disabled per collection. boolean registerGlobalIdsForFiles = ctxt.systemConfig().isFilePIDsEnabledForCollection( getDataset().getOwner()) && pidProvider.canCreatePidsLike(dataset.getGlobalId()); @@ -422,8 +421,8 @@ private void publicizeExternalIdentifier(Dataset dataset, CommandContext ctxt) t // pidProvider. dataset.setIdentifierRegistered(true); } catch (Throwable e) { - logger.warning("Failed to register the identifier " + dataset.getGlobalId().asString() - + ", or to register a file in the dataset; notifying the user(s), unlocking the dataset"); + logger.warning("Failed to publicize the identifier " + dataset.getGlobalId().asString() + + ", or to publicize a file in the dataset; notifying the user(s), unlocking the dataset"); // Send failure notification to the user: notifyUsersDatasetPublishStatus(ctxt, dataset, UserNotification.Type.PUBLISHFAILED_PIDREG); @@ -440,8 +439,9 @@ private void updateFiles(Timestamp updateTime, CommandContext ctxt) throws Comma if (dataFile.getPublicationDate() == null) { // this is a new, previously unpublished file, so publish by setting date dataFile.setPublicationDate(updateTime); - - // check if any prexisting roleassignments have file download and send notifications + + // check if any pre-existing role assignments have file download and send + // notifications notifyUsersFileDownload(ctxt, dataFile); } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java index 512987866d4..7d749262b87 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java @@ -11,7 +11,6 @@ import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.ApiToken; -import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.settings.JvmSettings; @@ -26,14 +25,14 @@ import java.io.IOException; import java.io.PipedInputStream; import java.io.PipedOutputStream; -import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.security.DigestInputStream; import java.security.MessageDigest; import java.util.Map; import java.util.logging.Logger; @RequiredPermissions(Permission.PublishDataset) -public class GoogleCloudSubmitToArchiveCommand extends AbstractSubmitToArchiveCommand implements Command { +public class GoogleCloudSubmitToArchiveCommand extends AbstractSubmitToArchiveCommand { private static final Logger logger = Logger.getLogger(GoogleCloudSubmitToArchiveCommand.class.getName()); private static final String GOOGLECLOUD_BUCKET = ":GoogleCloudBucket"; @@ -82,7 +81,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t public void run() { try (PipedOutputStream dataciteOut = new PipedOutputStream(dataciteIn)) { - dataciteOut.write(dataciteXml.getBytes(Charset.forName("utf-8"))); + dataciteOut.write(dataciteXml.getBytes(StandardCharsets.UTF_8)); dataciteOut.close(); success = true; } catch (Exception e) { diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PublishDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PublishDatasetCommand.java index 6b95f3b6de1..1ac41105237 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PublishDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PublishDatasetCommand.java @@ -4,20 +4,13 @@ import edu.harvard.iq.dataverse.DatasetLock; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; -import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; -import edu.harvard.iq.dataverse.pidproviders.PidProvider; -import edu.harvard.iq.dataverse.privateurl.PrivateUrl; -import edu.harvard.iq.dataverse.settings.SettingsServiceBean; -import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.workflow.Workflow; import edu.harvard.iq.dataverse.workflow.WorkflowContext.TriggerType; -import java.util.Date; -import java.util.List; import java.util.Optional; import java.util.logging.Logger; import static java.util.stream.Collectors.joining; diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReservePidCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReservePidCommand.java index b7e3ddd8ce6..77b06e4e152 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReservePidCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReservePidCommand.java @@ -3,27 +3,21 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; -import edu.harvard.iq.dataverse.engine.command.AbstractVoidCommand; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; -import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; import edu.harvard.iq.dataverse.engine.command.exception.PermissionException; -import edu.harvard.iq.dataverse.pidproviders.PidProvider; -import edu.harvard.iq.dataverse.pidproviders.PidUtil; -import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; -import java.util.Arrays; import java.util.Collections; -import java.util.Date; import java.util.logging.Logger; /** * No required permissions because we check for superuser status. + * @param */ @RequiredPermissions({}) -public class ReservePidCommand extends AbstractVoidCommand { +public class ReservePidCommand extends AbstractDatasetCommand { private static final Logger logger = Logger.getLogger(ReservePidCommand.class.getCanonicalName()); @@ -35,27 +29,15 @@ public ReservePidCommand(DataverseRequest request, Dataset dataset) { } @Override - protected void executeImpl(CommandContext ctxt) throws CommandException { + public Dataset execute(CommandContext ctxt) throws CommandException { if (!(getUser() instanceof AuthenticatedUser) || !getUser().isSuperuser()) { throw new PermissionException(BundleUtil.getStringFromBundle("admin.api.auth.mustBeSuperUser"), this, Collections.singleton(Permission.EditDataset), dataset); } - - PidProvider pidProvider = ctxt.dvObjects().getEffectivePidGenerator(dataset); - - try { - String returnString = pidProvider.createIdentifier(dataset); - logger.fine(returnString); - // No errors caught, so mark PID as reserved. - dataset.setGlobalIdCreateTime(new Date()); - // We don't setIdentifierRegistered(true) yet. - ctxt.datasets().merge(dataset); - } catch (Throwable ex) { - String message = BundleUtil.getStringFromBundle("pids.commands.reservePid.failure", Arrays.asList(dataset.getId().toString(), ex.getLocalizedMessage())); - logger.info(message); - throw new IllegalCommandException(message, this); - } + registerExternalIdentifier(getDataset(), ctxt, true); + registerFilePidsIfNeeded(getDataset(), ctxt, true); + return dataset; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java index f02edd54b86..a660b1a4d59 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java @@ -5,7 +5,6 @@ import edu.harvard.iq.dataverse.DatasetLock.Reason; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.ApiToken; -import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.util.bagit.BagGenerator; @@ -17,6 +16,7 @@ import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; +import java.nio.charset.StandardCharsets; import java.util.Map; import java.util.logging.Logger; @@ -41,7 +41,7 @@ import com.amazonaws.services.s3.transfer.TransferManagerBuilder; @RequiredPermissions(Permission.PublishDataset) -public class S3SubmitToArchiveCommand extends AbstractSubmitToArchiveCommand implements Command { +public class S3SubmitToArchiveCommand extends AbstractSubmitToArchiveCommand { private static final Logger logger = Logger.getLogger(S3SubmitToArchiveCommand.class.getName()); private static final String S3_CONFIG = ":S3ArchiverConfig"; @@ -86,7 +86,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t spaceName = getSpaceName(dataset); String dataciteXml = getDataCiteXml(dv); - try (ByteArrayInputStream dataciteIn = new ByteArrayInputStream(dataciteXml.getBytes("UTF-8"))) { + try (ByteArrayInputStream dataciteIn = new ByteArrayInputStream(dataciteXml.getBytes(StandardCharsets.UTF_8))) { // Add datacite.xml file ObjectMetadata om = new ObjectMetadata(); om.setContentLength(dataciteIn.available()); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java index 994f4c7dfb6..768bb88fd43 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java @@ -154,7 +154,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException { throw e; } } - + //Set creator and create date for files if needed for (DataFile dataFile : theDataset.getFiles()) { if (dataFile.getCreateDate() == null) { dataFile.setCreateDate(getTimestamp()); @@ -259,6 +259,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException { for(FileMetadata fmd: theDataset.getOrCreateEditVersion().getFileMetadatas()) { logger.fine("FMD: " + fmd.getId() + " for file: " + fmd.getDataFile().getId() + "is in final draft version"); } + registerFilePidsIfNeeded(theDataset, ctxt, true); if (recalculateUNF) { ctxt.ingest().recalculateDatasetVersionUNF(theDataset.getOrCreateEditVersion()); diff --git a/src/main/java/edu/harvard/iq/dataverse/export/JSONExporter.java b/src/main/java/edu/harvard/iq/dataverse/export/JSONExporter.java index a54e61c7c1e..cf3afd1a39a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/JSONExporter.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/JSONExporter.java @@ -7,10 +7,10 @@ import io.gdcc.spi.export.Exporter; import edu.harvard.iq.dataverse.util.BundleUtil; import java.io.OutputStream; +import java.nio.charset.StandardCharsets; import java.util.Locale; import java.util.Optional; -import jakarta.json.JsonObject; import jakarta.ws.rs.core.MediaType; @@ -35,7 +35,7 @@ public String getDisplayName(Locale locale) { @Override public void exportDataset(ExportDataProvider dataProvider, OutputStream outputStream) throws ExportException { try{ - outputStream.write(dataProvider.getDatasetJson().toString().getBytes("UTF8")); + outputStream.write(dataProvider.getDatasetJson().toString().getBytes(StandardCharsets.UTF_8)); outputStream.flush(); } catch (Exception e){ throw new ExportException("Unknown exception caught during JSON export."); diff --git a/src/main/java/edu/harvard/iq/dataverse/export/OAI_OREExporter.java b/src/main/java/edu/harvard/iq/dataverse/export/OAI_OREExporter.java index feec4403570..86af45195d7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/OAI_OREExporter.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/OAI_OREExporter.java @@ -7,11 +7,11 @@ import edu.harvard.iq.dataverse.util.BundleUtil; import java.io.OutputStream; +import java.nio.charset.StandardCharsets; import java.util.Locale; import java.util.Optional; import java.util.logging.Logger; -import jakarta.json.JsonObject; import jakarta.ws.rs.core.MediaType; @AutoService(Exporter.class) @@ -25,7 +25,7 @@ public class OAI_OREExporter implements Exporter { public void exportDataset(ExportDataProvider dataProvider, OutputStream outputStream) throws ExportException { try { - outputStream.write(dataProvider.getDatasetORE().toString().getBytes("UTF8")); + outputStream.write(dataProvider.getDatasetORE().toString().getBytes(StandardCharsets.UTF_8)); outputStream.flush(); } catch (Exception e) { logger.severe(e.getMessage()); diff --git a/src/main/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporter.java b/src/main/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporter.java index 5428715b905..0c4b39fd641 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporter.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporter.java @@ -7,6 +7,7 @@ import edu.harvard.iq.dataverse.util.BundleUtil; import java.io.IOException; import java.io.OutputStream; +import java.nio.charset.StandardCharsets; import java.util.Locale; import java.util.logging.Logger; import jakarta.ws.rs.core.MediaType; @@ -75,7 +76,7 @@ public class SchemaDotOrgExporter implements Exporter { @Override public void exportDataset(ExportDataProvider dataProvider, OutputStream outputStream) throws ExportException { try { - outputStream.write(dataProvider.getDatasetSchemaDotOrg().toString().getBytes("UTF8")); + outputStream.write(dataProvider.getDatasetSchemaDotOrg().toString().getBytes(StandardCharsets.UTF_8)); } catch (IOException ex) { logger.info("IOException calling outputStream.write: " + ex); } diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestableDataChecker.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestableDataChecker.java index 9b62b62fe61..fa83552a9ec 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestableDataChecker.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestableDataChecker.java @@ -24,6 +24,7 @@ import java.io.*; import java.nio.*; import java.nio.channels.*; +import java.nio.charset.StandardCharsets; import java.util.*; import java.lang.reflect.*; import java.util.regex.*; @@ -252,7 +253,7 @@ public String testDTAformat(MappedByteBuffer buff) { try { headerBuffer = new byte[STATA_13_HEADER.length()]; buff.get(headerBuffer, 0, STATA_13_HEADER.length()); - headerString = new String(headerBuffer, "US-ASCII"); + headerString = new String(headerBuffer, StandardCharsets.US_ASCII); } catch (Exception ex) { // probably a buffer underflow exception; // we don't have to do anything... null will @@ -273,7 +274,7 @@ public String testDTAformat(MappedByteBuffer buff) { try { headerBuffer = new byte[STATA_14_HEADER.length()]; buff.get(headerBuffer, 0, STATA_14_HEADER.length()); - headerString = new String(headerBuffer, "US-ASCII"); + headerString = new String(headerBuffer, StandardCharsets.US_ASCII); } catch (Exception ex) { // probably a buffer underflow exception; // we don't have to do anything... null will @@ -292,7 +293,7 @@ public String testDTAformat(MappedByteBuffer buff) { try { headerBuffer = new byte[STATA_15_HEADER.length()]; buff.get(headerBuffer, 0, STATA_15_HEADER.length()); - headerString = new String(headerBuffer, "US-ASCII"); + headerString = new String(headerBuffer, StandardCharsets.US_ASCII); } catch (Exception ex) { // probably a buffer underflow exception; // we don't have to do anything... null will diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReader.java index 73818f8fb62..f0262af9e33 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReader.java @@ -29,6 +29,7 @@ import java.io.PrintWriter; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.nio.charset.StandardCharsets; import java.text.DecimalFormat; import java.text.NumberFormat; import java.text.ParseException; @@ -685,7 +686,7 @@ private void decodeHeader(BufferedInputStream stream) throws IOException { } String data_label = new String(Arrays.copyOfRange(header, dl_offset, - (dl_offset + dataLabelLength)), "ISO-8859-1"); + (dl_offset + dataLabelLength)), StandardCharsets.ISO_8859_1); if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("data_label_length=" + data_label.length()); @@ -710,7 +711,7 @@ private void decodeHeader(BufferedInputStream stream) throws IOException { if (releaseNumber > 104) { int ts_offset = dl_offset + dataLabelLength; String time_stamp = new String(Arrays.copyOfRange(header, ts_offset, - ts_offset + TIME_STAMP_LENGTH), "ISO-8859-1"); + ts_offset + TIME_STAMP_LENGTH), StandardCharsets.ISO_8859_1); if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("time_stamp_length=" + time_stamp.length()); } @@ -912,7 +913,7 @@ private void decodeDescriptorVarNameList(BufferedInputStream stream, int nvar) t for (DataVariable dataVariable: dataTable.getDataVariables()) { offset_end += length_var_name; String vari = new String(Arrays.copyOfRange(variableNameBytes, offset_start, - offset_end), "ISO-8859-1"); + offset_end), StandardCharsets.ISO_8859_1); String varName = getNullStrippedString(vari); dataVariable.setName(varName); dbgLog.fine("next name=[" + varName + "]"); @@ -978,7 +979,7 @@ private void decodeDescriptorVariableFormat(BufferedInputStream stream, int nvar for (int i = 0; i < nvar; i++) { offset_end += length_var_format; String vari = new String(Arrays.copyOfRange(variableFormatList, offset_start, - offset_end), "ISO-8859-1"); + offset_end), StandardCharsets.ISO_8859_1); String variableFormat = getNullStrippedString(vari); if (dbgLog.isLoggable(Level.FINE)) dbgLog.fine(i + "-th format=[" + variableFormat + "]"); @@ -1045,7 +1046,7 @@ private void decodeDescriptorValueLabel(BufferedInputStream stream, int nvar) th for (int i = 0; i < nvar; i++) { offset_end += length_label_name; String vari = new String(Arrays.copyOfRange(labelNameList, offset_start, - offset_end), "ISO-8859-1"); + offset_end), StandardCharsets.ISO_8859_1); labelNames[i] = getNullStrippedString(vari); dbgLog.fine(i + "-th label=[" + labelNames[i] + "]"); offset_start = offset_end; @@ -1090,7 +1091,7 @@ private void decodeVariableLabels(BufferedInputStream stream) throws IOException for (int i = 0; i < nvar; i++) { offset_end += length_var_label; String vari = new String(Arrays.copyOfRange(variableLabelBytes, offset_start, - offset_end), "ISO-8859-1"); + offset_end), StandardCharsets.ISO_8859_1); String variableLabelParsed = getNullStrippedString(vari); if (dbgLog.isLoggable(Level.FINE)) { @@ -1272,7 +1273,7 @@ void parseValueLabelsRelease105(BufferedInputStream stream) throws IOException { valueLabelHeader, value_label_table_length, (value_label_table_length + length_label_name)), - "ISO-8859-1"); + StandardCharsets.ISO_8859_1); if (dbgLog.isLoggable(Level.FINE)) { dbgLog.fine("rawLabelName(length)=" + rawLabelName.length()); @@ -1335,7 +1336,7 @@ void parseValueLabelsRelease105(BufferedInputStream stream) throws IOException { for (int l = 0; l < no_value_label_pairs; l++) { String string_l = new String(Arrays.copyOfRange(valueLabelTable_i, offset_start, - offset_end), "ISO-8859-1"); + offset_end), StandardCharsets.ISO_8859_1); int null_position = string_l.indexOf(0); if (null_position != -1) { @@ -1485,7 +1486,7 @@ private void parseValueLabelsReleasel108(BufferedInputStream stream) throws IOEx valueLabelHeader, value_label_table_length, (value_label_table_length + length_label_name)), - "ISO-8859-1"); + StandardCharsets.ISO_8859_1); String labelName = getNullStrippedString(rawLabelName); if (dbgLog.isLoggable(Level.FINE)) { @@ -1581,7 +1582,7 @@ private void parseValueLabelsReleasel108(BufferedInputStream stream) throws IOEx String label_segment = new String( Arrays.copyOfRange(valueLabelTable_i, offset_value, - (length_label_segment + offset_value)), "ISO-8859-1"); + (length_label_segment + offset_value)), StandardCharsets.ISO_8859_1); // L.A. -- 2011.2.25: // This assumes that the labels are already stored in the right @@ -1701,7 +1702,7 @@ private void decodeData(BufferedInputStream stream, boolean saveWithVariableHead ingesteddata.setTabDelimitedFile(tabDelimitedDataFile); fileOutTab = new FileOutputStream(tabDelimitedDataFile); - pwout = new PrintWriter(new OutputStreamWriter(fileOutTab, "utf8"), true); + pwout = new PrintWriter(new OutputStreamWriter(fileOutTab, StandardCharsets.UTF_8), true); /* Should we lose this dateFormat thing in 4.0? * the UNF should be calculatable on the app side solely from the data @@ -1932,7 +1933,7 @@ private void decodeData(BufferedInputStream stream, boolean saveWithVariableHead // String case int strVarLength = StringLengthTable.get(columnCounter); String raw_datum = new String(Arrays.copyOfRange(dataRowBytes, byte_offset, - (byte_offset + strVarLength)), "ISO-8859-1"); + (byte_offset + strVarLength)), StandardCharsets.ISO_8859_1); // TODO: // is it the right thing to do, to default to "ISO-8859-1"? // (it may be; since there's no mechanism for specifying diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DataReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DataReader.java index 0822f6eed72..913c0ebeab2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DataReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DataReader.java @@ -4,6 +4,7 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.logging.Logger; @@ -273,7 +274,7 @@ public float readFloat() throws IOException { */ public String readString(int n) throws IOException { - String ret = new String(readBytes(n), "US-ASCII"); + String ret = new String(readBytes(n), StandardCharsets.US_ASCII); // Remove the terminating and/or padding zero bytes: if (ret != null && ret.indexOf(0) > -1) { @@ -287,7 +288,7 @@ public String readString(int n) throws IOException { */ public String readUtfString(int n) throws IOException { - String ret = new String(readBytes(n), "UTF8"); + String ret = new String(readBytes(n), StandardCharsets.UTF_8); // Remove the terminating and/or padding zero bytes: if (ret.indexOf(0) > -1) { @@ -314,11 +315,11 @@ public byte[] readPrimitiveSection(String tag, int length) throws IOException { } public String readPrimitiveStringSection(String tag) throws IOException { - return new String(readPrimitiveSection(tag), "US-ASCII"); + return new String(readPrimitiveSection(tag), StandardCharsets.US_ASCII); } public String readPrimitiveStringSection(String tag, int length) throws IOException { - return new String(readPrimitiveSection(tag, length), "US-ASCII"); + return new String(readPrimitiveSection(tag, length), StandardCharsets.US_ASCII); } public String readLabelSection(String tag, int limit) throws IOException { @@ -332,7 +333,7 @@ public String readLabelSection(String tag, int limit) throws IOException { logger.fine("length of label: " + lengthOfLabel); String label = null; if (lengthOfLabel > 0) { - label = new String(readBytes(lengthOfLabel), "US-ASCII"); + label = new String(readBytes(lengthOfLabel), StandardCharsets.US_ASCII); } logger.fine("ret: " + label); readClosingTag(tag); @@ -358,7 +359,7 @@ public String readDefinedStringSection(String tag, int limit) throws IOException } String ret = null; if (number > 0) { - ret = new String(readBytes(number), "US-ASCII"); + ret = new String(readBytes(number), StandardCharsets.US_ASCII); } logger.fine("ret: " + ret); readClosingTag(tag); @@ -400,7 +401,7 @@ public boolean checkTag(String tag) throws IOException { int n = tag.length(); if ((this.buffer_size - buffer_byte_offset) >= n) { - return (tag).equals(new String(Arrays.copyOfRange(buffer, buffer_byte_offset, buffer_byte_offset+n),"US-ASCII")); + return (tag).equals(new String(Arrays.copyOfRange(buffer, buffer_byte_offset, buffer_byte_offset+n),StandardCharsets.US_ASCII)); } else{ bufferMoreBytes(); @@ -414,7 +415,7 @@ public void readOpeningTag(String tag) throws IOException { throw new IOException("opening tag must be a non-empty string."); } - String openTagString = new String(readBytes(tag.length() + 2), "US-ASCII"); + String openTagString = new String(readBytes(tag.length() + 2), StandardCharsets.US_ASCII); if (openTagString == null || !openTagString.equals("<"+tag+">")) { throw new IOException("Could not read opening tag <"+tag+">"); } @@ -425,7 +426,7 @@ public void readClosingTag(String tag) throws IOException { throw new IOException("closing tag must be a non-empty string."); } - String closeTagString = new String(readBytes(tag.length() + 3), "US-ASCII"); + String closeTagString = new String(readBytes(tag.length() + 3), StandardCharsets.US_ASCII); logger.fine("closeTagString: " + closeTagString); if (closeTagString == null || !closeTagString.equals("")) { diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReader.java index 53607d541de..b0f2c50c997 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReader.java @@ -7,6 +7,7 @@ import java.io.IOException; import java.io.OutputStreamWriter; import java.io.PrintWriter; +import java.nio.charset.StandardCharsets; import java.text.DecimalFormat; import java.text.NumberFormat; import java.text.ParseException; @@ -735,7 +736,7 @@ private void readData(DataReader reader, String variableHeaderLine) throws IOExc ingesteddata.setTabDelimitedFile(tabDelimitedDataFile); FileOutputStream fileOutTab = new FileOutputStream(tabDelimitedDataFile); - PrintWriter pwout = new PrintWriter(new OutputStreamWriter(fileOutTab, "utf8"), true); + PrintWriter pwout = new PrintWriter(new OutputStreamWriter(fileOutTab, StandardCharsets.UTF_8), true); // add the variable header here, if needed if (variableHeaderLine != null) { @@ -1001,7 +1002,7 @@ private void readSTRLs(DataReader reader) throws IOException { File finalTabFile = File.createTempFile("finalTabfile.", ".tab"); FileOutputStream fileOutTab = new FileOutputStream(finalTabFile); - PrintWriter pwout = new PrintWriter(new OutputStreamWriter(fileOutTab, "utf8"), true); + PrintWriter pwout = new PrintWriter(new OutputStreamWriter(fileOutTab, StandardCharsets.UTF_8), true); logger.fine("Setting the tab-delimited file to " + finalTabFile.getName()); ingesteddata.setTabDelimitedFile(finalTabFile); @@ -1130,9 +1131,9 @@ private String readGSO(DataReader reader, long v, long o) throws IOException { String gsoString; if (binary) { - gsoString = new String(contents, "utf8"); + gsoString = new String(contents, StandardCharsets.UTF_8); } else { - gsoString = new String(contents, 0, (int) length - 1, "US-ASCII"); + gsoString = new String(contents, 0, (int) length - 1, StandardCharsets.US_ASCII); } logger.fine("GSO " + v + "," + o + ": " + gsoString); @@ -1226,7 +1227,7 @@ private void readValueLabels(DataReader reader) throws IOException { } label_length = (int)(label_end - label_offset); - category_value_labels[i] = new String(Arrays.copyOfRange(labelBytes, (int)label_offset, (int)label_end-1), "UTF8"); + category_value_labels[i] = new String(Arrays.copyOfRange(labelBytes, (int)label_offset, (int)label_end-1), StandardCharsets.UTF_8); total_label_bytes += label_length; } diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/por/PORFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/por/PORFileReader.java index 2ee966c3e31..13325ca8f60 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/por/PORFileReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/por/PORFileReader.java @@ -31,7 +31,7 @@ import java.io.PrintWriter; import java.io.Writer; import java.nio.ByteBuffer; - +import java.nio.charset.StandardCharsets; import java.text.DecimalFormat; import java.text.NumberFormat; import java.text.SimpleDateFormat; @@ -195,7 +195,7 @@ public TabularDataIngest read(BufferedInputStream stream, boolean storeWithVaria BufferedReader bfReader = null; try { - bfReader = new BufferedReader(new InputStreamReader(new FileInputStream(tempPORfile.getAbsolutePath()), "US-ASCII")); + bfReader = new BufferedReader(new InputStreamReader(new FileInputStream(tempPORfile.getAbsolutePath()), StandardCharsets.US_ASCII)); if (bfReader == null){ dbgLog.fine("bfReader is null"); throw new IOException("bufferedReader is null"); @@ -567,7 +567,7 @@ private File decodeHeader(BufferedInputStream stream) throws IOException { try { tempPORfile = File.createTempFile("tempPORfile.", ".por"); fileOutPOR = new FileOutputStream(tempPORfile); - fileWriter = new BufferedWriter(new OutputStreamWriter(fileOutPOR, "utf8")); + fileWriter = new BufferedWriter(new OutputStreamWriter(fileOutPOR, StandardCharsets.UTF_8)); porScanner = new Scanner(stream); // Because 64-bit and 32-bit machines decode POR's first 40-byte @@ -1115,7 +1115,7 @@ private void decodeData(BufferedReader reader, boolean storeWithVariableHeader) try { fileOutTab = new FileOutputStream(tabDelimitedDataFile); - pwout = new PrintWriter(new OutputStreamWriter(fileOutTab, "utf8"), true); + pwout = new PrintWriter(new OutputStreamWriter(fileOutTab, StandardCharsets.UTF_8), true); variableFormatTypeList = new String[varQnty]; for (int i = 0; i < varQnty; i++) { diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java index 50f2f89e354..215c7a5e6d2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java @@ -22,12 +22,11 @@ import java.io.*; import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import java.text.*; import java.util.logging.*; import java.util.*; -import jakarta.inject.Inject; - // Rosuda Wrappers and Methods for R-calls to Rserve import edu.harvard.iq.dataverse.settings.JvmSettings; import org.rosuda.REngine.REXP; @@ -504,10 +503,10 @@ public TabularDataIngest read(BufferedInputStream stream, boolean saveWithVariab // created! // - L.A. RTabFileParser csvFileReader = new RTabFileParser('\t'); - BufferedReader localBufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(localCsvFile), "UTF-8")); + BufferedReader localBufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(localCsvFile), StandardCharsets.UTF_8)); File tabFileDestination = File.createTempFile("data-", ".tab"); - PrintWriter tabFileWriter = new PrintWriter(tabFileDestination.getAbsolutePath(), "UTF-8"); + PrintWriter tabFileWriter = new PrintWriter(tabFileDestination.getAbsolutePath(), StandardCharsets.UTF_8); int lineCount = csvFileReader.read(localBufferedReader, dataTable, saveWithVariableHeader, tabFileWriter); @@ -685,7 +684,7 @@ private static String readLocalResource(String path) { // Try opening a buffered reader stream try { - BufferedReader rd = new BufferedReader(new InputStreamReader(resourceStream, "UTF-8")); + BufferedReader rd = new BufferedReader(new InputStreamReader(resourceStream, StandardCharsets.UTF_8)); String line = null; while ((line = rd.readLine()) != null) { diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/sav/SAVFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/sav/SAVFileReader.java index 5eecbdfb666..308ff352b2a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/sav/SAVFileReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/sav/SAVFileReader.java @@ -29,7 +29,7 @@ import java.io.UnsupportedEncodingException; import java.nio.ByteBuffer; import java.nio.ByteOrder; - +import java.nio.charset.StandardCharsets; import java.text.DecimalFormat; import java.text.NumberFormat; import java.text.SimpleDateFormat; @@ -58,10 +58,7 @@ import edu.harvard.iq.dataverse.DataTable; import edu.harvard.iq.dataverse.datavariable.DataVariable; -import edu.harvard.iq.dataverse.datavariable.SummaryStatistic; import edu.harvard.iq.dataverse.datavariable.VariableCategory; -import edu.harvard.iq.dataverse.datavariable.VariableRange; - import edu.harvard.iq.dataverse.ingest.tabulardata.TabularDataFileReader; import edu.harvard.iq.dataverse.ingest.tabulardata.spi.TabularDataFileReaderSpi; import edu.harvard.iq.dataverse.ingest.tabulardata.TabularDataIngest; @@ -633,7 +630,7 @@ void decodeRecordType1(BufferedInputStream stream) throws IOException { int offset_end = LENGTH_SPSS_PRODUCT_INFO; // 60 bytes String productInfo = new String(Arrays.copyOfRange(recordType1, offset_start, - offset_end),"US-ASCII"); + offset_end),StandardCharsets.US_ASCII); dbgLog.fine("productInfo:\n"+productInfo+"\n"); dataTable.setOriginalFormatVersion(productInfo); @@ -872,7 +869,7 @@ void decodeRecordType1(BufferedInputStream stream) throws IOException { offset_end += LENGTH_FILE_CREATION_INFO; // 84 bytes String fileCreationInfo = getNullStrippedString(new String(Arrays.copyOfRange(recordType1, offset_start, - offset_end),"US-ASCII")); + offset_end),StandardCharsets.US_ASCII)); dbgLog.fine("fileCreationInfo:\n"+fileCreationInfo+"\n"); @@ -1220,7 +1217,7 @@ void decodeRecordType2(BufferedInputStream stream) throws IOException { // borders. So we always read the bytes, but only use them for // the real variable entries. /*String variableLabel = new String(Arrays.copyOfRange(variable_label, - 0, rawVariableLabelLength),"US-ASCII");*/ + 0, rawVariableLabelLength),StandardCharsets.US_ASCII);*/ variableLabelMap.put(variableName, variableLabel); } @@ -2075,7 +2072,7 @@ void decodeRecordType7(BufferedInputStream stream) throws IOException { byte[] work = new byte[unitLength*numberOfUnits]; int nbtyes13 = stream.read(work); - String[] variableShortLongNamePairs = new String(work,"US-ASCII").split("\t"); + String[] variableShortLongNamePairs = new String(work,StandardCharsets.US_ASCII).split("\t"); for (int i=0; i params = URLEncodedUtils.parse(url.getQuery(), Charset.forName("UTF-8")); + List params = URLEncodedUtils.parse(url.getQuery(), StandardCharsets.UTF_8); String hash = null; String dateString = null; String allowedMethod = null; @@ -156,7 +156,7 @@ public static boolean isValidUrl(String signedUrl, String user, String method, S public static boolean hasToken(String urlString) { try { URL url = new URL(urlString); - List params = URLEncodedUtils.parse(url.getQuery(), Charset.forName("UTF-8")); + List params = URLEncodedUtils.parse(url.getQuery(), StandardCharsets.UTF_8); for (NameValuePair nvp : params) { if (nvp.getName().equals(SIGNED_URL_TOKEN)) { return true; diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java index b7c44014b80..e47426149f9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java @@ -9,10 +9,10 @@ import java.io.InputStreamReader; import java.io.OutputStream; import java.io.PrintWriter; -import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.net.URI; import java.net.URISyntaxException; +import java.nio.charset.StandardCharsets; import java.nio.file.Paths; import java.security.KeyManagementException; import java.security.KeyStoreException; @@ -686,12 +686,7 @@ private void createFileFromString(final String relPath, final String content) archiveEntry.setMethod(ZipEntry.DEFLATED); InputStreamSupplier supp = new InputStreamSupplier() { public InputStream get() { - try { - return new ByteArrayInputStream(content.getBytes("UTF-8")); - } catch (UnsupportedEncodingException e) { - e.printStackTrace(); - } - return null; + return new ByteArrayInputStream(content.getBytes(StandardCharsets.UTF_8)); } }; diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java index 84bc7834ab9..60ab9407269 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java @@ -11,6 +11,7 @@ import edu.harvard.iq.dataverse.util.json.JsonPrinter; import java.io.OutputStream; +import java.nio.charset.StandardCharsets; import java.time.LocalDate; import java.util.List; import java.util.Map; @@ -68,7 +69,7 @@ public OREMap(DatasetVersion dv, boolean exclude) { } public void writeOREMap(OutputStream outputStream) throws Exception { - outputStream.write(getOREMap().toString().getBytes("UTF8")); + outputStream.write(getOREMap().toString().getBytes(StandardCharsets.UTF_8)); outputStream.flush(); } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java index d9c2250ef6f..e30154cb7fe 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java @@ -79,7 +79,7 @@ public static void injectSettingsService(SettingsServiceBean ssb, DatasetFieldSe } public JsonPrinter() { - + } public static final BriefJsonPrinter brief = new BriefJsonPrinter(); @@ -122,7 +122,7 @@ public static JsonObjectBuilder json(AuthenticatedUser authenticatedUser) { .add("authenticationProviderId", authenticatedUser.getAuthenticatedUserLookup().getAuthenticationProviderId()); return builder; } - + public static JsonObjectBuilder json(RoleAssignment ra) { return jsonObjectBuilder() .add("id", ra.getId()) @@ -147,7 +147,7 @@ public static JsonObjectBuilder json(DatasetLock lock) { .add("dataset", lock.getDataset().getGlobalId().asString()) .add("message", lock.getInfo()); } - + public static JsonObjectBuilder json( RoleAssigneeDisplayInfo d ) { return jsonObjectBuilder() .add("title", d.getTitle()) @@ -171,17 +171,17 @@ public static JsonObjectBuilder json(IpGroup grp) { .add("id", grp.getId() ) .add("name", grp.getDisplayName() ) .add("description", grp.getDescription() ); - + if ( ! singles.isEmpty() ) { bld.add("addresses", asJsonArray(singles) ); } - + if ( ! ranges.isEmpty() ) { JsonArrayBuilder rangesBld = Json.createArrayBuilder(); ranges.forEach( r -> rangesBld.add( Json.createArrayBuilder().add(r.get(0)).add(r.get(1))) ); bld.add("ranges", rangesBld ); } - + return bld; } @@ -192,7 +192,7 @@ public static JsonObjectBuilder json(ShibGroup grp) { .add("pattern", grp.getPattern()) .add("id", grp.getId()); } - + public static JsonObjectBuilder json(MailDomainGroup grp) { JsonObjectBuilder bld = jsonObjectBuilder() .add("alias", grp.getPersistedGroupAlias() ) @@ -235,14 +235,14 @@ public static JsonObjectBuilder json(DataverseRole role) { return bld; } - + public static JsonObjectBuilder json(Workflow wf){ JsonObjectBuilder bld = jsonObjectBuilder(); bld.add("name", wf.getName()); if ( wf.getId() != null ) { bld.add("id", wf.getId()); } - + if ( wf.getSteps()!=null && !wf.getSteps().isEmpty()) { JsonArrayBuilder arr = Json.createArrayBuilder(); for ( WorkflowStepData stp : wf.getSteps() ) { @@ -253,10 +253,10 @@ public static JsonObjectBuilder json(Workflow wf){ } bld.add("steps", arr ); } - + return bld; } - + public static JsonObjectBuilder json(Dataverse dv) { return json(dv, false, false); } @@ -268,7 +268,7 @@ public static JsonObjectBuilder json(Dataverse dv, Boolean hideEmail, Boolean re .add("alias", dv.getAlias()) .add("name", dv.getName()) .add("affiliation", dv.getAffiliation()); - if(!hideEmail) { + if(!hideEmail) { bld.add("dataverseContacts", JsonPrinter.json(dv.getDataverseContacts())); } if (returnOwners){ @@ -312,11 +312,11 @@ public static JsonArrayBuilder json(List dataverseContacts) { } return jsonArrayOfContacts; } - + public static JsonObjectBuilder getOwnersFromDvObject(DvObject dvObject){ return getOwnersFromDvObject(dvObject, null); } - + public static JsonObjectBuilder getOwnersFromDvObject(DvObject dvObject, DatasetVersion dsv) { List ownerList = new ArrayList(); dvObject = dvObject.getOwner(); // We're going to ignore the object itself @@ -324,7 +324,7 @@ public static JsonObjectBuilder getOwnersFromDvObject(DvObject dvObject, Dataset while (dvObject != null) { ownerList.add(0, dvObject); dvObject = dvObject.getOwner(); - } + } //then work "inside out" JsonObjectBuilder saved = null; for (DvObject dvo : ownerList) { @@ -332,7 +332,7 @@ public static JsonObjectBuilder getOwnersFromDvObject(DvObject dvObject, Dataset } return saved; } - + private static JsonObjectBuilder addEmbeddedOwnerObject(DvObject dvo, JsonObjectBuilder isPartOf, DatasetVersion dsv ) { JsonObjectBuilder ownerObject = jsonObjectBuilder(); @@ -353,16 +353,16 @@ private static JsonObjectBuilder addEmbeddedOwnerObject(DvObject dvo, JsonObject ownerObject.add("version", versionString); } } - + ownerObject.add("displayName", dvo.getDisplayName()); - + if (isPartOf != null) { ownerObject.add("isPartOf", isPartOf); } - + return ownerObject; } - + public static JsonObjectBuilder json( DataverseTheme theme ) { final NullSafeJsonBuilder baseObject = jsonObjectBuilder() .add("id", theme.getId() ) @@ -385,7 +385,7 @@ public static JsonObjectBuilder json(BuiltinUser user) { .add("id", user.getId()) .add("userName", user.getUserName()); } - + public static JsonObjectBuilder json(Dataset ds){ return json(ds, false); } @@ -427,7 +427,7 @@ public static JsonObjectBuilder json(DatasetVersion dsv, List anonymized boolean includeFiles, boolean returnOwners) { return json( dsv, anonymizedFieldTypeNamesList, includeFiles, returnOwners,true); } - public static JsonObjectBuilder json(DatasetVersion dsv, List anonymizedFieldTypeNamesList, + public static JsonObjectBuilder json(DatasetVersion dsv, List anonymizedFieldTypeNamesList, boolean includeFiles, boolean returnOwners, boolean includeMetadataBlocks) { Dataset dataset = dsv.getDataset(); JsonObjectBuilder bld = jsonObjectBuilder() @@ -490,19 +490,19 @@ public static JsonObjectBuilder json(DatasetVersion dsv, List anonymized } public static JsonObjectBuilder jsonDataFileList(List dataFiles){ - + if (dataFiles==null){ throw new NullPointerException("dataFiles cannot be null"); } - + JsonObjectBuilder bld = jsonObjectBuilder(); - - + + List dataFileList = dataFiles.stream() .map(x -> x.getFileMetadata()) .collect(Collectors.toList()); - + bld.add("files", jsonFileMetadatas(dataFileList)); return bld; @@ -591,7 +591,7 @@ public static JsonObjectBuilder json(MetadataBlock block, List fie blockBld.add("displayName", block.getDisplayName()); blockBld.add("name", block.getName()); - + final JsonArrayBuilder fieldsArray = Json.createArrayBuilder(); Map cvocMap = (datasetFieldService==null) ? new HashMap() :datasetFieldService.getCVocConf(true); DatasetFieldWalker.walk(fields, settingsService, cvocMap, new DatasetFieldsToJson(fieldsArray, anonymizedFieldTypeNamesList)); @@ -670,6 +670,14 @@ public static JsonObjectBuilder json(MetadataBlock metadataBlock, boolean printO return jsonObjectBuilder; } + public static JsonArrayBuilder jsonDatasetFieldTypes(List fields) { + JsonArrayBuilder fieldsJson = Json.createArrayBuilder(); + for (DatasetFieldType field : fields) { + fieldsJson.add(JsonPrinter.json(field)); + } + return fieldsJson; + } + public static JsonObjectBuilder json(DatasetFieldType fld) { return json(fld, null); } @@ -712,7 +720,7 @@ public static JsonObjectBuilder json(DatasetFieldType fld, Dataverse ownerDatave return fieldsBld; } - + public static JsonObjectBuilder json(FileMetadata fmd){ return json(fmd, false, false); } @@ -758,11 +766,11 @@ public static JsonObjectBuilder json(AuxiliaryFile auxFile) { public static JsonObjectBuilder json(DataFile df) { return JsonPrinter.json(df, null, false); } - + public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata, boolean forExportDataProvider){ return json(df, fileMetadata, forExportDataProvider, false); } - + public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata, boolean forExportDataProvider, boolean returnOwners) { // File names are no longer stored in the DataFile entity; // (they are instead in the FileMetadata (as "labels") - this way @@ -773,13 +781,13 @@ public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata, boo // *correct* file name - i.e., that it comes from the right version. // (TODO...? L.A. 4.5, Aug 7 2016) String fileName = null; - + if (fileMetadata == null){ // Note that this may not necessarily grab the file metadata from the // version *you want*! (L.A.) fileMetadata = df.getFileMetadata(); } - + fileName = fileMetadata.getLabel(); GlobalId filePid = df.getGlobalId(); String pidURL = (filePid!=null)? filePid.asURL(): null; @@ -846,7 +854,7 @@ public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata, boo } return builder; } - + //Started from https://github.com/RENCI-NRIG/dataverse/, i.e. https://github.com/RENCI-NRIG/dataverse/commit/2b5a1225b42cf1caba85e18abfeb952171c6754a public static JsonArrayBuilder jsonDT(List ldt) { JsonArrayBuilder ldtArr = Json.createArrayBuilder(); @@ -887,8 +895,8 @@ public static JsonObjectBuilder json(DataVariable dv) { .add("variableFormatType", dv.getType().name()) // varFormat .add("formatCategory", dv.getFormatCategory()) .add("format", dv.getFormat()) - .add("isOrderedCategorical", dv.isOrderedCategorical()) - .add("fileOrder", dv.getFileOrder()) + .add("isOrderedCategorical", dv.isOrderedCategorical()) + .add("fileOrder", dv.getFileOrder()) .add("UNF",dv.getUnf()) .add("fileStartPosition", dv.getFileStartPosition()) .add("fileEndPosition", dv.getFileEndPosition()) @@ -916,7 +924,7 @@ private static JsonArrayBuilder jsonInvalidRanges(Collection inva .add("hasEndValueType", vr.getEndValueType()!=null) .add("endValueTypeMax", vr.isEndValueTypeMax()) .add("endValueTypeMaxExcl", vr.isEndValueTypeMaxExcl()); - + invRanges.add(job); } return invRanges; @@ -948,7 +956,7 @@ private static JsonArrayBuilder jsonCatStat(Collection catStat } return catArr; } - + private static JsonArrayBuilder jsonVarGroup(List varGroups) { JsonArrayBuilder vgArr = Json.createArrayBuilder(); for (VarGroup vg : varGroups) { @@ -962,7 +970,7 @@ private static JsonArrayBuilder jsonVarGroup(List varGroups) { } return vgArr; } - + private static JsonArrayBuilder jsonVarMetadata(Collection varMetadatas) { JsonArrayBuilder vmArr = Json.createArrayBuilder(); for (VariableMetadata vm : varMetadatas) { @@ -983,7 +991,7 @@ private static JsonArrayBuilder jsonVarMetadata(Collection var } return vmArr; } - + private static JsonArrayBuilder json(Collection categoriesMetadata) { JsonArrayBuilder cmArr = Json.createArrayBuilder(); for(CategoryMetadata cm: categoriesMetadata) { @@ -997,9 +1005,9 @@ private static JsonArrayBuilder json(Collection categoriesMeta public static JsonObjectBuilder json(HarvestingClient harvestingClient) { if (harvestingClient == null) { - return null; + return null; } - + return jsonObjectBuilder().add("nickName", harvestingClient.getName()). add("dataverseAlias", harvestingClient.getDataverse().getAlias()). add("type", harvestingClient.getHarvestType()). @@ -1021,7 +1029,7 @@ public static JsonObjectBuilder json(HarvestingClient harvestingClient) { add("lastDatasetsDeleted", harvestingClient.getLastDeletedDatasetCount()). // == null ? "N/A" : harvestingClient.getLastDeletedDatasetCount().toString()). add("lastDatasetsFailed", harvestingClient.getLastFailedDatasetCount()); // == null ? "N/A" : harvestingClient.getLastFailedDatasetCount().toString()); } - + public static String format(Date d) { return (d == null) ? null : Util.getDateTimeFormat().format(d); } @@ -1058,7 +1066,7 @@ public static JsonArrayBuilder getTabularFileTags(DataFile df) { } return tabularTags; } - + private static class DatasetFieldsToJson implements DatasetFieldWalker.Listener { Deque objectStack = new LinkedList<>(); @@ -1194,11 +1202,20 @@ public static JsonObjectBuilder json( ExplicitGroup eg ) { .add("displayName", eg.getDisplayName()) .add("containedRoleAssignees", ras); } - - public static JsonObjectBuilder json( DataverseFacet aFacet ) { + + public static JsonArrayBuilder jsonDataverseFacets(List dataverseFacets) { + JsonArrayBuilder dataverseFacetsJson = Json.createArrayBuilder(); + for(DataverseFacet facet: dataverseFacets) { + dataverseFacetsJson.add(json(facet)); + } + return dataverseFacetsJson; + } + + public static JsonObjectBuilder json(DataverseFacet aFacet) { return jsonObjectBuilder() .add("id", String.valueOf(aFacet.getId())) // TODO should just be id I think - .add("name", aFacet.getDatasetFieldType().getDisplayName()); + .add("displayName", aFacet.getDatasetFieldType().getDisplayName()) + .add("name", aFacet.getDatasetFieldType().getName()); } public static JsonObjectBuilder json(Embargo embargo) { @@ -1336,7 +1353,7 @@ public static JsonObjectBuilder getChecksumTypeAndValue(DataFile.ChecksumType ch return null; } } - + /** * Takes a map, returns a Json object for this map. * If map is {@code null}, returns {@code null}. diff --git a/src/main/java/edu/harvard/iq/dataverse/validation/JSONDataValidation.java b/src/main/java/edu/harvard/iq/dataverse/validation/JSONDataValidation.java new file mode 100644 index 00000000000..fb19a14e7de --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/validation/JSONDataValidation.java @@ -0,0 +1,190 @@ +package edu.harvard.iq.dataverse.validation; + +import com.mashape.unirest.http.JsonNode; +import edu.harvard.iq.dataverse.DatasetFieldServiceBean; +import edu.harvard.iq.dataverse.DatasetFieldType; +import edu.harvard.iq.dataverse.util.BundleUtil; +import jakarta.enterprise.inject.spi.CDI; +import org.everit.json.schema.Schema; +import org.everit.json.schema.ValidationException; +import org.json.JSONArray; + +import java.util.*; +import java.util.logging.Logger; +import java.util.stream.Collectors; + +public class JSONDataValidation { + private static final Logger logger = Logger.getLogger(JSONDataValidation.class.getCanonicalName()); + private static DatasetFieldServiceBean datasetFieldService = null; + + /** + * + * @param schema Schema file defining the JSON objects to be validated + * @param jsonInput JSON string to validate against the schema + * @throws ValidationException + */ + public static void validate(Schema schema, Map>> schemaChildMap, String jsonInput) throws ValidationException { + if (datasetFieldService == null) { + datasetFieldService = CDI.current().select(DatasetFieldServiceBean.class).get(); + } + JsonNode node = new JsonNode(jsonInput); + if (node.isArray()) { + JSONArray arrayNode = node.getArray(); + validateObject(schema, schemaChildMap, "root", arrayNode.toList()); + } else { + node.getObject().toMap().forEach((k,v) -> { + validateObject(schema, schemaChildMap, k, (v instanceof JSONArray) ? ((JSONArray) v).toList() : v); + }); + } + } + + /* + * Validate objects recursively + */ + private static void validateObject(Schema schema, Map>> schemaChildMap, String key, Object value) { + if (value instanceof Map) { + validateSchemaObject(schema, schemaChildMap, key, (Map) value); + + ((Map) value).entrySet().forEach(e -> { + validateObject(schema, schemaChildMap, (String) e.getKey(), e.getValue()); + }); + } else if (value instanceof List) { + ((List) value).listIterator().forEachRemaining(v -> { + validateObject(schema, schemaChildMap, key, v); + }); + } + } + + /* + * Validate objects specific to a type. Currently only validating Datasets + */ + private static void validateSchemaObject(Schema schema, Map>> schemaChildMap, String key, Map valueMap) { + if (schema.definesProperty("datasetVersion")) { + validateDatasetObject(schema, schemaChildMap, key, valueMap); + } + } + + /* + * Specific validation for Dataset objects + */ + private static void validateDatasetObject(Schema schema, Map>> schemaChildMap, String key, Map valueMap) { + if (valueMap != null && valueMap.containsKey("typeClass")) { + validateTypeClass(schema, schemaChildMap, key, valueMap, valueMap.get("value"), "dataset"); + } + } + + /* + * key: The name of the parent object + * valueMap: Map of all the metadata of the object + * value: The value field of the object + * messageType: Refers to the parent: if this is an object from a dataset the messageType would be 'dataset' + * This needs to match the Bundle.properties for mapping the error messages when an exception occurs + * + * Rules for typeClass: + * The contents of value depend on the field attributes + * if single/primitive, value is a String + * if multiple, value is a JsonArray + * multiple/primitive: each JsonArray element will contain String + * multiple/compound: each JsonArray element will contain Set of FieldDTOs + */ + private static void validateTypeClass(Schema schema, Map>> schemaChildMap, String key, Map valueMap, Object value, String messageType) { + + String typeClass = valueMap.containsKey("typeClass") ? valueMap.get("typeClass").toString() : ""; + String typeName = valueMap.containsKey("typeName") ? valueMap.get("typeName").toString() : ""; + boolean multiple = Boolean.valueOf(String.valueOf(valueMap.getOrDefault("multiple", "false"))); + + // make sure there is a value since 'value' is required + if (value == null) { + throwValidationException("value.missing", List.of(key, typeName)); + } + + if (multiple && !(value instanceof List)) { + throwValidationException("notlist.multiple", List.of(key, typeName, typeClass)); + } + if (!multiple && value instanceof List) { + throwValidationException("list.notmultiple", List.of(key, typeName)); + } + if ("primitive".equals(typeClass) && !multiple && !(value instanceof String)) { + throwValidationException("type", List.of(key, typeName, typeClass)); + } + if ("primitive".equals(typeClass) && multiple) { + ((List) value).listIterator().forEachRemaining(primitive -> { + if (!(primitive instanceof String)) { + throwValidationException("type", List.of(key, typeName, typeClass)); + } + }); + } + if ("compound".equals(typeClass)) { + if (multiple && value instanceof List) { + ((List) value).listIterator().forEachRemaining(item -> { + if (!(item instanceof Map)) { + throwValidationException("compound", List.of(key, typeName, typeClass)); + } else { + ((Map) item).forEach((k,val) -> { + if (!(val instanceof Map)) { + throwValidationException("compound", List.of(key, typeName, typeClass)); + } + // validate mismatch between compound object key and typeName in value + String valTypeName = ((Map) val).containsKey("typeName") ? (String) ((Map) val).get("typeName") : ""; + if (!k.equals(valTypeName)) { + throwValidationException("compound.mismatch", List.of((String) k, valTypeName)); + } + }); + validateChildren(schema, schemaChildMap, key, ((Map) item).values(), typeName, messageType); + } + }); + } + } + + if ("controlledVocabulary".equals(typeClass)) { + DatasetFieldType dsft = datasetFieldService.findByName(typeName); + if (value instanceof List) { + ((List) value).listIterator().forEachRemaining(cvv -> { + if (datasetFieldService.findControlledVocabularyValueByDatasetFieldTypeAndStrValue(dsft, (String) cvv, true) == null) { + throwValidationException("dataset", "cvv.missing", List.of(key, typeName, (String) cvv)); + } + }); + } else { + if (datasetFieldService.findControlledVocabularyValueByDatasetFieldTypeAndStrValue(dsft, (String) value, true) == null) { + throwValidationException("dataset", "cvv.missing", List.of(key, typeName, (String) value)); + } + } + } + } + + // If value is another object or list of objects that need to be further validated then childType refers to the parent + // Example: If this is a dsDescriptionValue from a dataset the messageType would be dataset.dsDescriptionValue + // This needs to match the Bundle.properties for mapping the error messages when an exception occurs + private static void validateChildren(Schema schema, Map>> schemaChildMap, String key, Collection children, String typeName, String messageType) { + if (children == null || children.isEmpty()) { + return; + } + List requiredFields = new ArrayList<>(); + requiredFields.addAll((List)schemaChildMap.getOrDefault(typeName, Collections.EMPTY_MAP).getOrDefault("required", Collections.EMPTY_LIST)); + List allowedFields = (List)schemaChildMap.getOrDefault(typeName, Collections.EMPTY_MAP).getOrDefault("allowed", Collections.EMPTY_LIST); + children.forEach(child -> { + if (child instanceof Map) { + String childTypeName = ((Map) child).containsKey("typeName") ? (String)((Map) child).get("typeName") : ""; + if (!allowedFields.isEmpty() && !allowedFields.contains(childTypeName)) { + throwValidationException(messageType, "invalidType", List.of(typeName, childTypeName, allowedFields.stream().collect(Collectors.joining(", ")))); + } + if (!requiredFields.isEmpty() && requiredFields.contains(childTypeName)) { + requiredFields.remove(childTypeName); + } + } + }); + if (!requiredFields.isEmpty()) { + throwValidationException(messageType, "required.missing", List.of(typeName, requiredFields.stream().collect(Collectors.joining(", ")), typeName)); + } + } + private static void throwValidationException(String key, List argList) { + throw new ValidationException(BundleUtil.getStringFromBundle("schema.validation.exception." + key, argList)); + } + private static void throwValidationException(String type, String message, List argList) { + if (type != null) { + throwValidationException(type + "." + message, argList); + } else { + throwValidationException(message, argList); + } + } +} diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 4b366522966..a092b0f456b 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -2739,7 +2739,7 @@ dataverses.api.create.dataset.error.mustIncludeVersion=Please provide initial ve dataverses.api.create.dataset.error.superuserFiles=Only a superuser may add files via this api dataverses.api.create.dataset.error.mustIncludeAuthorName=Please provide author name in the dataset json dataverses.api.validate.json.succeeded=The Dataset JSON provided is valid for this Dataverse Collection. -dataverses.api.validate.json.failed=The Dataset JSON provided failed validation with the following error: +dataverses.api.validate.json.failed=The Dataset JSON provided failed validation with the following error: dataverses.api.validate.json.exception=Validation failed with following exception: #Access.java @@ -3008,19 +3008,31 @@ pids.api.reservePid.success=PID reserved for {0} pids.api.deletePid.success=PID deleted for {0} pids.deletePid.failureExpected=Unable to delete PID {0}. Status code: {1}. pids.deletePid.failureOther=Problem deleting PID {0}: {1} -pids.commands.reservePid.failure=Problem reserving PID for dataset id {0}: {1}. pids.datacite.errors.noResponseCode=Problem getting HTTP status code from {0}. Is it in DNS? Is doi.dataciterestapiurlstring configured properly? pids.datacite.errors.DoiOnly=Only doi: is supported. -#PublishDatasetCommand -publishDatasetCommand.pidNotReserved=Cannot publish dataset because its persistent identifier has not been reserved. +#AbstractDatasetCommand +abstractDatasetCommand.pidNotReserved=Unable to reserve a persistent identifier for the dataset: {0}. +abstractDatasetCommand.filePidNotReserved=Unable to reserve a persistent identifier for one or more files in the dataset: {0}. +abstractDatasetCommand.pidReservationRetryExceeded="This dataset may not be registered because its identifier is already in use by another dataset: gave up after {0} attempts. Current (last requested) identifier: {1}" # APIs api.errors.invalidApiToken=Invalid API token. api.ldninbox.citation.alert={0},

The {1} has just been notified that the {2}, {3}, cites "{6}" in this repository. api.ldninbox.citation.subject={0}: A Dataset Citation has been reported! +#Schema Validation +schema.validation.exception.value.missing=Invalid data for key:{0} typeName:{1}. 'value' missing. +schema.validation.exception.list.notmultiple=Invalid data for key:{0} typeName:{1}. Found value as list but ''multiple'' is set to false. +schema.validation.exception.notlist.multiple=Invalid data for key:{0} typeName:{1}. Fields with ''multiple'' set to true must be a list. +schema.validation.exception.compound=Compound data type must be accompanied by a value that is either an object (multiple=false) or a list of objects (multiple=true) +schema.validation.exception.compound.mismatch=Compound value {0} must match typeName of the object. Found {1} +schema.validation.exception.dataset.cvv.missing=Controlled vocabulary for key:{0} typeName:{1} value:''{2}'' is not valid. +schema.validation.exception.dataset.invalidType=Invalid data for key:{0} typeName:{1}. Only {2} allowed. +schema.validation.exception.dataset.required.missing=Invalid data for key:{0}. {1} is(are) required if field type is {2}. + #Info.java openapi.exception.invalid.format=Invalid format {0}, currently supported formats are YAML and JSON. openapi.exception=Supported format definition not found. openapi.exception.unaligned=Unaligned parameters on Headers [{0}] and Request [{1}] + diff --git a/src/main/java/propertyFiles/MimeTypeDetectionByFileExtension.properties b/src/main/java/propertyFiles/MimeTypeDetectionByFileExtension.properties index 97b2eed111c..630539d912e 100644 --- a/src/main/java/propertyFiles/MimeTypeDetectionByFileExtension.properties +++ b/src/main/java/propertyFiles/MimeTypeDetectionByFileExtension.properties @@ -38,3 +38,4 @@ nf=text/x-nextflow Rmd=text/x-r-notebook rb=text/x-ruby-script dag=text/x-dagman +glb=model/gltf-binary diff --git a/src/main/java/propertyFiles/MimeTypeDisplay.properties b/src/main/java/propertyFiles/MimeTypeDisplay.properties index 8e5a251abbf..549b2b13442 100644 --- a/src/main/java/propertyFiles/MimeTypeDisplay.properties +++ b/src/main/java/propertyFiles/MimeTypeDisplay.properties @@ -219,6 +219,8 @@ video/quicktime=Quicktime Video video/webm=WebM Video # Network Data text/xml-graphml=GraphML Network Data +# 3D Data +model/gltf-binary=3D Model # Other application/octet-stream=Unknown application/x-docker-file=Docker Image File diff --git a/src/main/java/propertyFiles/MimeTypeFacets.properties b/src/main/java/propertyFiles/MimeTypeFacets.properties index 0dad8daff4c..0b0fde89cbd 100644 --- a/src/main/java/propertyFiles/MimeTypeFacets.properties +++ b/src/main/java/propertyFiles/MimeTypeFacets.properties @@ -223,6 +223,8 @@ video/webm=Video # (anything else that looks like image/* will also be indexed as facet type "Video") # Network Data text/xml-graphml=Network Data +# 3D Data +model/gltf-binary=3D Data # Other application/octet-stream=Unknown application/ld+json;\u0020profile\u003d\u0022http\u003a//www.w3.org/ns/json-ld#flattened\u0020http\u003a//www.w3.org/ns/json-ld#compacted\u0020https\u003a//w3id.org/ro/crate\u0022=Metadata diff --git a/src/main/webapp/WEB-INF/web.xml b/src/main/webapp/WEB-INF/web.xml index 427615f2f0b..732c634205f 100644 --- a/src/main/webapp/WEB-INF/web.xml +++ b/src/main/webapp/WEB-INF/web.xml @@ -182,6 +182,11 @@ webmanifest application/manifest+json + + xhtml + text/html + + diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetFieldsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetFieldsIT.java new file mode 100644 index 00000000000..ae90ddf0b4c --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetFieldsIT.java @@ -0,0 +1,29 @@ +package edu.harvard.iq.dataverse.api; + +import io.restassured.RestAssured; +import io.restassured.response.Response; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import static jakarta.ws.rs.core.Response.Status.OK; +import static org.hamcrest.CoreMatchers.equalTo; + +public class DatasetFieldsIT { + + @BeforeAll + public static void setUpClass() { + RestAssured.baseURI = UtilIT.getRestAssuredBaseUri(); + } + + @Test + void testListAllFacetableDatasetFields() { + Response listAllFacetableDatasetFieldsResponse = UtilIT.listAllFacetableDatasetFields(); + listAllFacetableDatasetFieldsResponse.then().assertThat().statusCode(OK.getStatusCode()); + int expectedNumberOfFacetableDatasetFields = 59; + listAllFacetableDatasetFieldsResponse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data[0].name", equalTo("authorName")) + .body("data[0].displayName", equalTo("Author Name")) + .body("data.size()", equalTo(expectedNumberOfFacetableDatasetFields)); + } +} diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index cb9481d3491..4f1ee1717c1 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -182,6 +182,90 @@ public void testCollectionSchema(){ } + @Test + public void testDatasetSchemaValidation() { + + Response createUser = UtilIT.createRandomUser(); + createUser.prettyPrint(); + String username = UtilIT.getUsernameFromResponse(createUser); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response getCollectionSchemaResponse = UtilIT.getCollectionSchema(dataverseAlias, apiToken); + getCollectionSchemaResponse.prettyPrint(); + getCollectionSchemaResponse.then().assertThat() + .statusCode(200); + + JsonObject expectedSchema = null; + try { + expectedSchema = JsonUtil.getJsonObjectFromFile("doc/sphinx-guides/source/_static/api/dataset-schema.json"); + } catch (IOException ex) { + } + + assertEquals(JsonUtil.prettyPrint(expectedSchema), JsonUtil.prettyPrint(getCollectionSchemaResponse.body().asString())); + + // add a language that is not in the Controlled vocabulary + testDatasetSchemaValidationHelper(dataverseAlias, apiToken, + "\"aar\"", + "\"aar\",\"badlang\"", + BundleUtil.getStringFromBundle("schema.validation.exception.dataset.cvv.missing", List.of("fields", "language", "badlang")) + ); + + // change multiple to true on value that is a not a List + testDatasetSchemaValidationHelper(dataverseAlias, apiToken, + "multiple\": false,\n" + + " \"typeName\": \"title", + "multiple\": true,\n" + + " \"typeName\": \"title", + BundleUtil.getStringFromBundle("schema.validation.exception.notlist.multiple", List.of("fields", "title")) + ); + + // change multiple to false on value that is a List + testDatasetSchemaValidationHelper(dataverseAlias, apiToken, + "typeName\": \"language\",\n" + + " \"multiple\": true", + "typeName\": \"language\",\n" + + " \"multiple\": false", + BundleUtil.getStringFromBundle("schema.validation.exception.list.notmultiple", List.of("fields", "language")) + ); + + // add a mismatched typeName + testDatasetSchemaValidationHelper(dataverseAlias, apiToken, + "\"typeName\": \"datasetContactName\",", + "\"typeName\": \"datasetContactNme\",", + BundleUtil.getStringFromBundle("schema.validation.exception.compound.mismatch", List.of("datasetContactName", "datasetContactNme")) + ); + + // add a typeName which is not allowed + testDatasetSchemaValidationHelper(dataverseAlias, apiToken, + "\"datasetContactEmail\": {\n" + + " \"typeClass\": \"primitive\",\n" + + " \"multiple\": false,\n" + + " \"typeName\": \"datasetContactEmail\",", + "\"datasetContactNotAllowed\": {\n" + + " \"typeClass\": \"primitive\",\n" + + " \"multiple\": false,\n" + + " \"typeName\": \"datasetContactNotAllowed\",", + BundleUtil.getStringFromBundle("schema.validation.exception.dataset.invalidType", List.of("datasetContact", "datasetContactNotAllowed", "datasetContactName, datasetContactAffiliation, datasetContactEmail")) + ); + + Response deleteDataverseResponse = UtilIT.deleteDataverse(dataverseAlias, apiToken); + deleteDataverseResponse.prettyPrint(); + assertEquals(200, deleteDataverseResponse.getStatusCode()); + } + private void testDatasetSchemaValidationHelper(String dataverseAlias, String apiToken, String origString, String replacementString, String expectedError) { + String json = UtilIT.getDatasetJson("scripts/search/tests/data/dataset-finch3.json"); + json = json.replace(origString, replacementString); + Response validateDatasetJsonResponse = UtilIT.validateDatasetJson(dataverseAlias, json, apiToken); + validateDatasetJsonResponse.prettyPrint(); + validateDatasetJsonResponse.then().assertThat() + .statusCode(200) + .body(containsString(expectedError)); + } + @Test public void testCreateDataset() { diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java index 09b60e46e7e..1b7440465ec 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java @@ -1045,4 +1045,107 @@ public void testAddDataverse() { .statusCode(BAD_REQUEST.getStatusCode()) .body("message", equalTo("Invalid metadata block name: \"" + invalidMetadataBlockName + "\"")); } + + @Test + public void testListFacets() { + Response createUserResponse = UtilIT.createRandomUser(); + String apiToken = UtilIT.getApiTokenFromResponse(createUserResponse); + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.then().assertThat().statusCode(CREATED.getStatusCode()); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + String[] expectedFacetNames = {"authorName", "subject", "keywordValue", "dateOfDeposit"}; + + // returnDetails is false + Response listFacetsResponse = UtilIT.listDataverseFacets(dataverseAlias, false, apiToken); + listFacetsResponse.then().assertThat().statusCode(OK.getStatusCode()); + String actualFacetName = listFacetsResponse.then().extract().path("data[0]"); + assertThat(expectedFacetNames, hasItemInArray(actualFacetName)); + + // returnDetails is true + String[] expectedDisplayNames = {"Author Name", "Subject", "Keyword Term", "Deposit Date"}; + listFacetsResponse = UtilIT.listDataverseFacets(dataverseAlias, true, apiToken); + listFacetsResponse.then().assertThat().statusCode(OK.getStatusCode()); + actualFacetName = listFacetsResponse.then().extract().path("data[0].name"); + assertThat(expectedFacetNames, hasItemInArray(actualFacetName)); + String actualDisplayName = listFacetsResponse.then().extract().path("data[0].displayName"); + assertThat(expectedDisplayNames, hasItemInArray(actualDisplayName)); + String actualId = listFacetsResponse.then().extract().path("data[0].id"); + assertNotNull(actualId); + + // Dataverse with custom facets + String dataverseWithCustomFacetsAlias = UtilIT.getRandomDvAlias() + "customFacets"; + + String[] testFacetNames = {"authorName", "authorAffiliation"}; + String[] testMetadataBlockNames = {"citation", "geospatial"}; + + Response createSubDataverseResponse = UtilIT.createSubDataverse(dataverseWithCustomFacetsAlias, null, apiToken, "root", null, testFacetNames, testMetadataBlockNames); + createSubDataverseResponse.then().assertThat().statusCode(CREATED.getStatusCode()); + + listFacetsResponse = UtilIT.listDataverseFacets(dataverseWithCustomFacetsAlias, true, apiToken); + listFacetsResponse.then().assertThat().statusCode(OK.getStatusCode()); + + String actualFacetName1 = listFacetsResponse.then().extract().path("data[0].name"); + String actualFacetName2 = listFacetsResponse.then().extract().path("data[1].name"); + assertNotEquals(actualFacetName1, actualFacetName2); + assertThat(testFacetNames, hasItemInArray(actualFacetName1)); + assertThat(testFacetNames, hasItemInArray(actualFacetName2)); + + String[] testFacetExpectedDisplayNames = {"Author Name", "Author Affiliation"}; + String actualFacetDisplayName1 = listFacetsResponse.then().extract().path("data[0].displayName"); + String actualFacetDisplayName2 = listFacetsResponse.then().extract().path("data[1].displayName"); + assertNotEquals(actualFacetDisplayName1, actualFacetDisplayName2); + assertThat(testFacetExpectedDisplayNames, hasItemInArray(actualFacetDisplayName1)); + assertThat(testFacetExpectedDisplayNames, hasItemInArray(actualFacetDisplayName2)); + } + + @Test + public void testGetUserPermissionsOnDataverse() { + Response createUserResponse = UtilIT.createRandomUser(); + String apiToken = UtilIT.getApiTokenFromResponse(createUserResponse); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.then().assertThat().statusCode(CREATED.getStatusCode()); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + // Call for dataverse created by the user + Response getUserPermissionsOnDataverseResponse = UtilIT.getUserPermissionsOnDataverse(dataverseAlias, apiToken); + getUserPermissionsOnDataverseResponse.then().assertThat().statusCode(OK.getStatusCode()); + boolean canAddDataverse = JsonPath.from(getUserPermissionsOnDataverseResponse.body().asString()).getBoolean("data.canAddDataverse"); + assertTrue(canAddDataverse); + boolean canAddDataset = JsonPath.from(getUserPermissionsOnDataverseResponse.body().asString()).getBoolean("data.canAddDataset"); + assertTrue(canAddDataset); + boolean canViewUnpublishedDataverse = JsonPath.from(getUserPermissionsOnDataverseResponse.body().asString()).getBoolean("data.canViewUnpublishedDataverse"); + assertTrue(canViewUnpublishedDataverse); + boolean canEditDataverse = JsonPath.from(getUserPermissionsOnDataverseResponse.body().asString()).getBoolean("data.canEditDataverse"); + assertTrue(canEditDataverse); + boolean canManageDataversePermissions = JsonPath.from(getUserPermissionsOnDataverseResponse.body().asString()).getBoolean("data.canManageDataversePermissions"); + assertTrue(canManageDataversePermissions); + boolean canPublishDataverse = JsonPath.from(getUserPermissionsOnDataverseResponse.body().asString()).getBoolean("data.canPublishDataverse"); + assertTrue(canPublishDataverse); + boolean canDeleteDataverse = JsonPath.from(getUserPermissionsOnDataverseResponse.body().asString()).getBoolean("data.canDeleteDataverse"); + assertTrue(canDeleteDataverse); + + // Call for root dataverse + getUserPermissionsOnDataverseResponse = UtilIT.getUserPermissionsOnDataverse("root", apiToken); + getUserPermissionsOnDataverseResponse.then().assertThat().statusCode(OK.getStatusCode()); + canAddDataverse = JsonPath.from(getUserPermissionsOnDataverseResponse.body().asString()).getBoolean("data.canAddDataverse"); + assertTrue(canAddDataverse); + canAddDataset = JsonPath.from(getUserPermissionsOnDataverseResponse.body().asString()).getBoolean("data.canAddDataset"); + assertTrue(canAddDataset); + canViewUnpublishedDataverse = JsonPath.from(getUserPermissionsOnDataverseResponse.body().asString()).getBoolean("data.canViewUnpublishedDataverse"); + assertFalse(canViewUnpublishedDataverse); + canEditDataverse = JsonPath.from(getUserPermissionsOnDataverseResponse.body().asString()).getBoolean("data.canEditDataverse"); + assertFalse(canEditDataverse); + canManageDataversePermissions = JsonPath.from(getUserPermissionsOnDataverseResponse.body().asString()).getBoolean("data.canManageDataversePermissions"); + assertFalse(canManageDataversePermissions); + canPublishDataverse = JsonPath.from(getUserPermissionsOnDataverseResponse.body().asString()).getBoolean("data.canPublishDataverse"); + assertFalse(canPublishDataverse); + canDeleteDataverse = JsonPath.from(getUserPermissionsOnDataverseResponse.body().asString()).getBoolean("data.canDeleteDataverse"); + assertFalse(canDeleteDataverse); + + // Call with invalid dataverse alias + Response getUserPermissionsOnDataverseInvalidIdResponse = UtilIT.getUserPermissionsOnDataverse("testInvalidAlias", apiToken); + getUserPermissionsOnDataverseInvalidIdResponse.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java b/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java index 6e4fd5b0bb3..9fa13bb2939 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java @@ -25,7 +25,6 @@ import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; import java.awt.image.BufferedImage; import java.io.IOException; -import static java.lang.Thread.sleep; import javax.imageio.ImageIO; import static jakarta.ws.rs.core.Response.Status.CREATED; import static jakarta.ws.rs.core.Response.Status.NOT_FOUND; @@ -113,6 +112,7 @@ public void testSearchPermisions() throws InterruptedException { .body("data.total_count", CoreMatchers.is(1)) .body("data.count_in_response", CoreMatchers.is(1)) .body("data.items[0].name", CoreMatchers.is("Darwin's Finches")) + .body("data.items[0].publicationStatuses", CoreMatchers.hasItems("Unpublished", "Draft")) .statusCode(OK.getStatusCode()); Response publishDataverse = UtilIT.publishDataverseViaSword(dataverseAlias, apiToken1); @@ -599,7 +599,7 @@ public void testDatasetThumbnail() { String datasetLogo = "src/main/webapp/resources/images/cc0.png"; File datasetLogoFile = new File(datasetLogo); - String datasetLogoAsBase64 = datasetLogoAsBase64 = ImageThumbConverter.generateImageThumbnailFromFileAsBase64(datasetLogoFile, ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE); + String datasetLogoAsBase64 = ImageThumbConverter.generateImageThumbnailFromFileAsBase64(datasetLogoFile, ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE); if (datasetLogoAsBase64 == null) { Logger.getLogger(SearchIT.class.getName()).log(Level.SEVERE, "Failed to generate a base64 thumbnail from the file dataverseproject.png"); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 917154c80cc..8f1fcdf57eb 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -3776,6 +3776,12 @@ static Response getUserPermissionsOnDataset(String datasetId, String apiToken) { .get("/api/datasets/" + datasetId + "/userPermissions"); } + static Response getUserPermissionsOnDataverse(String dataverseAlias, String apiToken) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .get("/api/dataverses/" + dataverseAlias + "/userPermissions"); + } + static Response getCanDownloadAtLeastOneFile(String datasetId, String versionId, String apiToken) { return given() .header(API_TOKEN_HTTP_HEADER, apiToken) @@ -4025,9 +4031,13 @@ public static Response getOpenAPI(String accept, String format) { } static Response listDataverseFacets(String dataverseAlias, String apiToken) { + return listDataverseFacets(dataverseAlias, false, apiToken); + } + + static Response listDataverseFacets(String dataverseAlias, boolean returnDetails, String apiToken) { return given() .header(API_TOKEN_HTTP_HEADER, apiToken) - .contentType("application/json") + .queryParam("returnDetails", returnDetails) .get("/api/dataverses/" + dataverseAlias + "/facets"); } @@ -4037,4 +4047,9 @@ static Response listDataverseInputLevels(String dataverseAlias, String apiToken) .contentType("application/json") .get("/api/dataverses/" + dataverseAlias + "/inputLevels"); } + + static Response listAllFacetableDatasetFields() { + return given() + .get("/api/datasetfields/facetables"); + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/search/SolrSearchResultTest.java b/src/test/java/edu/harvard/iq/dataverse/search/SolrSearchResultTest.java index 4fb29869db7..d7deaa2dbc1 100644 --- a/src/test/java/edu/harvard/iq/dataverse/search/SolrSearchResultTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/search/SolrSearchResultTest.java @@ -225,6 +225,29 @@ public void testSetPublicationStatuses14() { assertTrue(this.solrSearchResult.isDeaccessionedState()); } + @Test + public void testSetPublicationStatusesJson() { + + boolean showRelevance = false; + boolean showEntityIds = false; + boolean showApiUrls = false; + + SolrSearchResult result01 = new SolrSearchResult("myQuery", "myName"); + result01.setType(SearchConstants.DATAVERSES); + result01.setPublicationStatuses(List.of("Unpublished", "Draft")); + JsonObjectBuilder actual01 = result01.json(showRelevance, showEntityIds, showApiUrls); + JsonObject actual = actual01.build(); + System.out.println("actual: " + actual); + + JsonObjectBuilder expResult = Json.createObjectBuilder(); + expResult.add("type", SearchConstants.DATAVERSE); + expResult.add("publicationStatuses", Json.createArrayBuilder().add("Unpublished").add("Draft").build()); + JsonObject expected = expResult.build(); + System.out.println("expect: " + expected); + + assertEquals(expected, actual); + } + @Test public void testJson() { diff --git a/src/test/java/edu/harvard/iq/dataverse/validation/JSONDataValidationTest.java b/src/test/java/edu/harvard/iq/dataverse/validation/JSONDataValidationTest.java new file mode 100644 index 00000000000..107dcecba35 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/validation/JSONDataValidationTest.java @@ -0,0 +1,237 @@ +package edu.harvard.iq.dataverse.validation; + +import edu.harvard.iq.dataverse.ControlledVocabularyValue; +import edu.harvard.iq.dataverse.DatasetFieldServiceBean; +import edu.harvard.iq.dataverse.DatasetFieldType; +import org.everit.json.schema.Schema; +import org.everit.json.schema.ValidationException; +import org.everit.json.schema.loader.SchemaLoader; +import org.json.JSONObject; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.mockito.Mock; +import org.mockito.Mockito; + +import java.io.IOException; +import java.lang.reflect.Field; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.fail; +import static org.mockito.ArgumentMatchers.any; + +public class JSONDataValidationTest { + + @Mock + static DatasetFieldServiceBean datasetFieldServiceMock; + @Mock + static DatasetFieldType datasetFieldTypeMock; + static ControlledVocabularyValue cvv = new ControlledVocabularyValue(); + static Map>> schemaChildMap = new HashMap<>(); + + static JSONObject rawSchema = null; + static JSONObject rawSchema() throws IOException { + if (rawSchema == null) { + Path file = Path.of("doc/sphinx-guides/source/_static/api/dataset-schema.json"); + String schema = Files.readString(file, StandardCharsets.UTF_8); + rawSchema = new JSONObject(schema); + } + return rawSchema; + } + + static String jsonInput() { + return """ + { + "datasetVersion": { + "license": { + "name": "CC0 1.0", + "uri": "http://creativecommons.org/publicdomain/zero/1.0" + }, + "metadataBlocks": { + "citation": { + "fields": [ + { + "value": "Darwin's Finches", + "typeClass": "primitive", + "multiple": false, + "typeName": "title" + }, + { + "value": [ + { + "authorName": { + "value": "Finch, Fiona", + "typeClass": "primitive", + "multiple": false, + "typeName": "authorName" + }, + "authorAffiliation": { + "value": "Birds Inc.", + "typeClass": "primitive", + "multiple": false, + "typeName": "authorAffiliation" + } + } + ], + "typeClass": "compound", + "multiple": true, + "typeName": "author" + }, + { + "value": [ + { "datasetContactEmail" : { + "typeClass": "primitive", + "multiple": false, + "typeName": "datasetContactEmail", + "value" : "finch@mailinator.com" + }, + "datasetContactName" : { + "typeClass": "primitive", + "multiple": false, + "typeName": "datasetContactName", + "value": "Finch, Fiona" + } + }], + "typeClass": "compound", + "multiple": true, + "typeName": "datasetContact" + }, + { + "value": [{ + "dsDescriptionValue":{ + "value": "Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds.", + "multiple": false, + "typeClass": "primitive", + "typeName": "dsDescriptionValue" + }, + "dsDescriptionDate": { + "typeName": "dsDescriptionDate", + "multiple": false, + "typeClass": "primitive", + "value": "2021-07-13" + } + }], + "typeClass": "compound", + "multiple": true, + "typeName": "dsDescription" + }, + { + "value": [ + "Medicine, Health and Life Sciences", + "Social Sciences" + ], + "typeClass": "controlledVocabulary", + "multiple": true, + "typeName": "subject" + } + ], + "displayName": "Citation Metadata" + } + } + } + } + """; + } + + @BeforeAll + static void setup() throws NoSuchFieldException, IllegalAccessException { + datasetFieldServiceMock = Mockito.mock(DatasetFieldServiceBean.class); + datasetFieldTypeMock = Mockito.mock(DatasetFieldType.class); + Field datasetFieldServiceField = JSONDataValidation.class.getDeclaredField("datasetFieldService"); + datasetFieldServiceField.setAccessible(true); + datasetFieldServiceField.set(JSONDataValidation.class, datasetFieldServiceMock); + + Mockito.when(datasetFieldServiceMock.findByName(any(String.class))).thenReturn(datasetFieldTypeMock); + List cvvList = List.of("Medicine, Health and Life Sciences", "Social Sciences"); + cvvList.forEach(i -> { + Mockito.when(datasetFieldServiceMock.findControlledVocabularyValueByDatasetFieldTypeAndStrValue(datasetFieldTypeMock, i,true)).thenReturn(cvv); + }); + Mockito.when(datasetFieldServiceMock.findControlledVocabularyValueByDatasetFieldTypeAndStrValue(datasetFieldTypeMock, "Bad",true)).thenReturn(null); + + Map> datasetContact = new HashMap<>(); + datasetContact.put("required", List.of("datasetContactName")); + datasetContact.put("allowed", List.of("datasetContactName", "datasetContactEmail","datasetContactAffiliation")); + schemaChildMap.put("datasetContact",datasetContact); + Map> dsDescription = new HashMap<>(); + dsDescription.put("required", List.of("dsDescriptionValue")); + dsDescription.put("allowed", List.of("dsDescriptionValue", "dsDescriptionDate")); + schemaChildMap.put("dsDescription",dsDescription); + + } + @Test + public void testSchema() throws IOException { + Schema schema = SchemaLoader.load(rawSchema()); + schema.validate(new JSONObject(jsonInput())); + } + @Test + public void testValid() throws IOException { + Schema schema = SchemaLoader.load(rawSchema()); + JSONDataValidation.validate(schema, schemaChildMap, jsonInput()); + } + @Test + public void testInvalid() throws IOException { + Schema schema = SchemaLoader.load(rawSchema()); + try { + JSONDataValidation.validate(schema, schemaChildMap, jsonInput().replace("\"Social Sciences\"", "\"Social Sciences\",\"Bad\"")); + fail(); + } catch (ValidationException e) { + System.out.println(e.getMessage()); + System.out.println(e.getStackTrace()); + } + + try { + // test multiple = false but value is list + JSONDataValidation.validate(schema, schemaChildMap, jsonInput().replaceAll("true", "false")); + fail(); + } catch (ValidationException e) { + System.out.println(e.getMessage()); + } + + // verify that child objects are also validated + String childTest = "\"multiple\": false, \"typeName\": \"authorAffiliation\""; + try { + String trimmedStr = jsonInput().replaceAll("\\s{2,}", " "); + // test child object with multiple set to true + JSONDataValidation.validate(schema, schemaChildMap, trimmedStr.replace(childTest, childTest.replace("false", "true"))); + fail(); + } catch (ValidationException e) { + System.out.println(e.getMessage()); + } + + try { + // test dsDescription but dsDescriptionValue missing + JSONDataValidation.validate(schema, schemaChildMap, jsonInput().replace("typeName\": \"dsDescriptionValue", "typeName\": \"notdsDescriptionValue")); + fail(); + } catch (ValidationException e) { + System.out.println(e.getMessage()); + } + + try { + // test dsDescription but child dsDescriptionValue missing + JSONDataValidation.validate(schema, schemaChildMap, jsonInput().replace("dsDescriptionValue\":{", "notdsDescriptionValue\":{")); + fail(); + } catch (ValidationException e) { + System.out.println(e.getMessage()); + } + + try { + // test required dataType missing + JSONDataValidation.validate(schema, schemaChildMap, jsonInput().replaceAll("\"datasetContactName\"", "\"datasetContactAffiliation\"")); + fail(); + } catch (ValidationException e) { + System.out.println(e.getMessage()); + } + + try { + // test dataType not allowed + JSONDataValidation.validate(schema, schemaChildMap, jsonInput().replaceAll("\"datasetContactEmail\"", "\"datasetContactNotAllowed\"")); + fail(); + } catch (ValidationException e) { + System.out.println(e.getMessage()); + } + } +} diff --git a/tests/integration-tests.txt b/tests/integration-tests.txt index 44bbfdcceb7..64610d07e50 100644 --- a/tests/integration-tests.txt +++ b/tests/integration-tests.txt @@ -1 +1 @@ -DataversesIT,DatasetsIT,SwordIT,AdminIT,BuiltinUsersIT,UsersIT,UtilIT,ConfirmEmailIT,FileMetadataIT,FilesIT,SearchIT,InReviewWorkflowIT,HarvestingServerIT,HarvestingClientsIT,MoveIT,MakeDataCountApiIT,FileTypeDetectionIT,EditDDIIT,ExternalToolsIT,AccessIT,DuplicateFilesIT,DownloadFilesIT,LinkIT,DeleteUsersIT,DeactivateUsersIT,AuxiliaryFilesIT,InvalidCharactersIT,LicensesIT,NotificationsIT,BagIT,MetadataBlocksIT,NetcdfIT,SignpostingIT,FitsIT,LogoutIT,DataRetrieverApiIT,ProvIT,S3AccessIT,OpenApiIT,InfoIT +DataversesIT,DatasetsIT,SwordIT,AdminIT,BuiltinUsersIT,UsersIT,UtilIT,ConfirmEmailIT,FileMetadataIT,FilesIT,SearchIT,InReviewWorkflowIT,HarvestingServerIT,HarvestingClientsIT,MoveIT,MakeDataCountApiIT,FileTypeDetectionIT,EditDDIIT,ExternalToolsIT,AccessIT,DuplicateFilesIT,DownloadFilesIT,LinkIT,DeleteUsersIT,DeactivateUsersIT,AuxiliaryFilesIT,InvalidCharactersIT,LicensesIT,NotificationsIT,BagIT,MetadataBlocksIT,NetcdfIT,SignpostingIT,FitsIT,LogoutIT,DataRetrieverApiIT,ProvIT,S3AccessIT,OpenApiIT,InfoIT,DatasetFieldsIT diff --git a/tests/shell/spec/spec_helper.sh b/tests/shell/spec/spec_helper.sh index 93f19083cd2..0cf2106f5e0 100644 --- a/tests/shell/spec/spec_helper.sh +++ b/tests/shell/spec/spec_helper.sh @@ -22,3 +22,4 @@ spec_helper_configure() { # Available functions: import, before_each, after_each, before_all, after_all : import 'support/custom_matcher' } +