From 9bdcba600915d44117db9ae8ca046c1e32d07c3d Mon Sep 17 00:00:00 2001 From: lubitchv Date: Mon, 13 Mar 2023 11:54:39 -0400 Subject: [PATCH 01/25] Alternative Title --- conf/solr/8.11.1/schema.xml | 2 +- scripts/api/data/metadatablocks/citation.tsv | 2 +- .../dataverse/export/ddi/DdiExportUtil.java | 8 +++-- .../export/openaire/OpenAireExportUtil.java | 32 +++++++++++++++++-- .../dataverse/export/dataset-all-defaults.txt | 4 +-- .../dataset-create-new-all-ddi-fields.json | 4 +-- 6 files changed, 42 insertions(+), 10 deletions(-) diff --git a/conf/solr/8.11.1/schema.xml b/conf/solr/8.11.1/schema.xml index f11938621fc..5fa9c7f0616 100644 --- a/conf/solr/8.11.1/schema.xml +++ b/conf/solr/8.11.1/schema.xml @@ -250,7 +250,7 @@ - + diff --git a/scripts/api/data/metadatablocks/citation.tsv b/scripts/api/data/metadatablocks/citation.tsv index be32bb7134e..6b0f231c7b7 100644 --- a/scripts/api/data/metadatablocks/citation.tsv +++ b/scripts/api/data/metadatablocks/citation.tsv @@ -3,7 +3,7 @@ #datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id termURI title Title The main title of the Dataset text 0 TRUE FALSE FALSE FALSE TRUE TRUE citation http://purl.org/dc/terms/title subtitle Subtitle A secondary title that amplifies or states certain limitations on the main title text 1 FALSE FALSE FALSE FALSE FALSE FALSE citation - alternativeTitle Alternative Title Either 1) a title commonly used to refer to the Dataset or 2) an abbreviation of the main title text 2 FALSE FALSE FALSE FALSE FALSE FALSE citation http://purl.org/dc/terms/alternative + alternativeTitle Alternative Title Either 1) a title commonly used to refer to the Dataset or 2) an abbreviation of the main title text 2 FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/alternative alternativeURL Alternative URL Another URL where one can view or access the data in the Dataset, e.g. a project or personal webpage https:// url 3 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE citation https://schema.org/distribution otherId Other Identifier Another unique identifier for the Dataset (e.g. producer's or another repository's identifier) none 4 : FALSE FALSE TRUE FALSE FALSE FALSE citation otherIdAgency Agency The name of the agency that generated the other identifier text 5 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE otherId citation diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java index eb7632dd03c..eb53473d4d9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java @@ -202,8 +202,12 @@ private static void createStdyDscr(XMLStreamWriter xmlw, DatasetDTO datasetDto) writeFullElement(xmlw, "titl", dto2Primitive(version, DatasetFieldConstant.title), datasetDto.getMetadataLanguage()); writeFullElement(xmlw, "subTitl", dto2Primitive(version, DatasetFieldConstant.subTitle)); - writeFullElement(xmlw, "altTitl", dto2Primitive(version, DatasetFieldConstant.alternativeTitle)); - + + FieldDTO altField = dto2FieldDTO( version, DatasetFieldConstant.alternativeTitle, "citation" ); + if (altField != null) { + writeMultipleElement(xmlw, "altTitl", altField, datasetDto.getMetadataLanguage()); + } + xmlw.writeStartElement("IDNo"); writeAttribute(xmlw, "agency", persistentAgency); diff --git a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java index bea3858a60e..34cb7a4e138 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java @@ -7,6 +7,7 @@ import java.util.Map; import java.util.Set; import java.util.logging.Logger; +import java.util.List; import javax.json.JsonObject; import javax.xml.stream.XMLOutputFactory; @@ -368,8 +369,8 @@ public static void writeTitlesElement(XMLStreamWriter xmlw, DatasetVersionDTO da String subtitle = dto2Primitive(datasetVersionDTO, DatasetFieldConstant.subTitle); title_check = writeTitleElement(xmlw, "Subtitle", subtitle, title_check, language); - String alternativeTitle = dto2Primitive(datasetVersionDTO, DatasetFieldConstant.alternativeTitle); - title_check = writeTitleElement(xmlw, "AlternativeTitle", alternativeTitle, title_check, language); + title_check = writeMultipleTitleElement(xmlw, "AlternativeTitle", datasetVersionDTO, "citation", title_check, language); + writeEndTag(xmlw, title_check); } @@ -404,6 +405,33 @@ private static boolean writeTitleElement(XMLStreamWriter xmlw, String titleType, } return title_check; } + + private static boolean writeMultipleTitleElement(XMLStreamWriter xmlw, String titleType, DatasetVersionDTO datasetVersionDTO, String metadataBlockName, boolean title_check, String language) throws XMLStreamException { + MetadataBlockDTO block = datasetVersionDTO.getMetadataBlocks().get(metadataBlockName); + if (block != null) { + logger.info("Block is not empty"); + List fieldsBlock = block.getFields(); + if (fieldsBlock != null) { + for (FieldDTO fieldDTO : fieldsBlock) { + logger.info(titleType + " " + fieldDTO.getTypeName()); + if (titleType.toLowerCase().equals(fieldDTO.getTypeName().toLowerCase())) { + logger.info("Found Alt title"); + List fields = fieldDTO.getMultiplePrimitive(); + for (String value : fields) { + if (!writeTitleElement(xmlw, titleType, value, title_check, language)) + title_check = false; + } + break; + } + } + } + } + + return title_check; + } + + + /** * 5, PublicationYear (M) diff --git a/src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt b/src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt index a3f0dffc767..62f2cd37447 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt +++ b/src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt @@ -42,9 +42,9 @@ }, { "typeName": "alternativeTitle", - "multiple": false, + "multiple": true, "typeClass": "primitive", - "value": "Alternative Title" + "value": ["Alternative Title"] }, { "typeName": "alternativeURL", diff --git a/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json index 1b327c15496..96f058b1b02 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json +++ b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json @@ -51,9 +51,9 @@ }, { "typeName": "alternativeTitle", - "multiple": false, + "multiple": true, "typeClass": "primitive", - "value": "Alternative Title" + "value": ["Alternative Title"] }, { "typeName": "otherId", From 7b8281c7a0f98350533047dac1790603557e463b Mon Sep 17 00:00:00 2001 From: lubitchv Date: Mon, 13 Mar 2023 13:08:24 -0400 Subject: [PATCH 02/25] citation --- scripts/api/data/metadatablocks/citation.tsv | 509 +++++++++---------- 1 file changed, 254 insertions(+), 255 deletions(-) diff --git a/scripts/api/data/metadatablocks/citation.tsv b/scripts/api/data/metadatablocks/citation.tsv index 6b0f231c7b7..20d858ddb4b 100644 --- a/scripts/api/data/metadatablocks/citation.tsv +++ b/scripts/api/data/metadatablocks/citation.tsv @@ -3,8 +3,8 @@ #datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id termURI title Title The main title of the Dataset text 0 TRUE FALSE FALSE FALSE TRUE TRUE citation http://purl.org/dc/terms/title subtitle Subtitle A secondary title that amplifies or states certain limitations on the main title text 1 FALSE FALSE FALSE FALSE FALSE FALSE citation - alternativeTitle Alternative Title Either 1) a title commonly used to refer to the Dataset or 2) an abbreviation of the main title text 2 FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/alternative - alternativeURL Alternative URL Another URL where one can view or access the data in the Dataset, e.g. a project or personal webpage https:// url 3 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE citation https://schema.org/distribution + alternativeTitle Alternative Title Either 1) a title commonly used to refer to the Dataset or 2) an abbreviation of the main title text 2 FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/alternative + alternativeURL Alternative URL Another URL where one can view or access the data in the Dataset, e.g. a project or personal webpage https:// url 3 "#VALUE" FALSE FALSE FALSE FALSE FALSE FALSE citation https://schema.org/distribution otherId Other Identifier Another unique identifier for the Dataset (e.g. producer's or another repository's identifier) none 4 : FALSE FALSE TRUE FALSE FALSE FALSE citation otherIdAgency Agency The name of the agency that generated the other identifier text 5 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE otherId citation otherIdValue Identifier Another identifier uniquely identifies the Dataset text 6 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE otherId citation @@ -24,26 +24,26 @@ keyword Keyword A key term that describes an important aspect of the Dataset and information about any controlled vocabulary used none 20 FALSE FALSE TRUE FALSE TRUE FALSE citation keywordValue Term A key term that describes important aspects of the Dataset text 21 #VALUE TRUE FALSE FALSE TRUE TRUE FALSE keyword citation keywordVocabulary Controlled Vocabulary Name The controlled vocabulary used for the keyword term (e.g. LCSH, MeSH) text 22 (#VALUE) FALSE FALSE FALSE FALSE TRUE FALSE keyword citation - keywordVocabularyURI Controlled Vocabulary URL The URL where one can access information about the term's controlled vocabulary https:// url 23 #VALUE FALSE FALSE FALSE FALSE TRUE FALSE keyword citation + keywordVocabularyURI Controlled Vocabulary URL The URL where one can access information about the term's controlled vocabulary https:// url 23 "#VALUE" FALSE FALSE FALSE FALSE TRUE FALSE keyword citation topicClassification Topic Classification Indicates a broad, important topic or subject that the Dataset covers and information about any controlled vocabulary used none 24 FALSE FALSE TRUE FALSE FALSE FALSE citation topicClassValue Term A topic or subject term text 25 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE topicClassification citation topicClassVocab Controlled Vocabulary Name The controlled vocabulary used for the keyword term (e.g. LCSH, MeSH) text 26 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE topicClassification citation - topicClassVocabURI Controlled Vocabulary URL The URL where one can access information about the term's controlled vocabulary https:// url 27 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE topicClassification citation + topicClassVocabURI Controlled Vocabulary URL The URL where one can access information about the term's controlled vocabulary https:// url 27 "#VALUE" FALSE FALSE FALSE FALSE FALSE FALSE topicClassification citation publication Related Publication The article or report that uses the data in the Dataset. The full list of related publications will be displayed on the metadata tab none 28 FALSE FALSE TRUE FALSE TRUE FALSE citation http://purl.org/dc/terms/isReferencedBy publicationCitation Citation The full bibliographic citation for the related publication textbox 29 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE publication citation http://purl.org/dc/terms/bibliographicCitation publicationIDType Identifier Type The type of identifier that uniquely identifies a related publication text 30 #VALUE: TRUE TRUE FALSE FALSE TRUE FALSE publication citation http://purl.org/spar/datacite/ResourceIdentifierScheme publicationIDNumber Identifier The identifier for a related publication text 31 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE publication citation http://purl.org/spar/datacite/ResourceIdentifier - publicationURL URL The URL form of the identifier entered in the Identifier field, e.g. the DOI URL if a DOI was entered in the Identifier field. Used to display what was entered in the ID Type and ID Number fields as a link. If what was entered in the Identifier field has no URL form, the URL of the publication webpage is used, e.g. a journal article webpage https:// url 32 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE publication citation https://schema.org/distribution + publicationURL URL The URL form of the identifier entered in the Identifier field, e.g. the DOI URL if a DOI was entered in the Identifier field. Used to display what was entered in the ID Type and ID Number fields as a link. If what was entered in the Identifier field has no URL form, the URL of the publication webpage is used, e.g. a journal article webpage https:// url 32 "#VALUE" FALSE FALSE FALSE FALSE FALSE FALSE publication citation https://schema.org/distribution notesText Notes Additional information about the Dataset textbox 33 FALSE FALSE FALSE FALSE TRUE FALSE citation language Language A language that the Dataset's files is written in text 34 TRUE TRUE TRUE TRUE FALSE FALSE citation http://purl.org/dc/terms/language producer Producer The entity, such a person or organization, managing the finances or other administrative processes involved in the creation of the Dataset none 35 FALSE FALSE TRUE FALSE FALSE FALSE citation producerName Name The name of the entity, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 36 #VALUE TRUE FALSE FALSE TRUE FALSE TRUE producer citation producerAffiliation Affiliation The name of the entity affiliated with the producer, e.g. an organization's name Organization XYZ text 37 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE producer citation producerAbbreviation Abbreviated Name The producer's abbreviated name (e.g. IQSS, ICPSR) text 38 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE producer citation - producerURL URL The URL of the producer's website https:// url 39 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE producer citation - producerLogoURL Logo URL The URL of the producer's logo https:// url 40
FALSE FALSE FALSE FALSE FALSE FALSE producer citation + producerURL URL The URL of the producer's website https:// url 39 "#VALUE" FALSE FALSE FALSE FALSE FALSE FALSE producer citation + producerLogoURL Logo URL The URL of the producer's logo https:// url 40 "
" FALSE FALSE FALSE FALSE FALSE FALSE producer citation productionDate Production Date The date when the data were produced (not distributed, published, or archived) YYYY-MM-DD date 41 TRUE FALSE FALSE TRUE FALSE FALSE citation - productionPlace Production Location The location where the data and any related materials were produced or collected text 42 TRUE FALSE TRUE TRUE FALSE FALSE citation + productionPlace Production Location The location where the data and any related materials were produced or collected text 42 FALSE FALSE FALSE FALSE FALSE FALSE citation contributor Contributor The entity, such as a person or organization, responsible for collecting, managing, or otherwise contributing to the development of the Dataset none 43 : FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/contributor contributorType Type Indicates the type of contribution made to the dataset text 44 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE contributor citation contributorName Name The name of the contributor, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 45 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE contributor citation @@ -54,8 +54,8 @@ distributorName Name The name of the entity, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 50 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE distributor citation distributorAffiliation Affiliation The name of the entity affiliated with the distributor, e.g. an organization's name Organization XYZ text 51 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE distributor citation distributorAbbreviation Abbreviated Name The distributor's abbreviated name (e.g. IQSS, ICPSR) text 52 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE distributor citation - distributorURL URL The URL of the distributor's webpage https:// url 53 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE distributor citation - distributorLogoURL Logo URL The URL of the distributor's logo image, used to show the image on the Dataset's page https:// url 54
FALSE FALSE FALSE FALSE FALSE FALSE distributor citation + distributorURL URL The URL of the distributor's webpage https:// url 53 "#VALUE" FALSE FALSE FALSE FALSE FALSE FALSE distributor citation + distributorLogoURL Logo URL The URL of the distributor's logo image, used to show the image on the Dataset's page https:// url 54 "
" FALSE FALSE FALSE FALSE FALSE FALSE distributor citation distributionDate Distribution Date The date when the Dataset was made available for distribution/presentation YYYY-MM-DD date 55 TRUE FALSE FALSE TRUE FALSE FALSE citation depositor Depositor The entity, such as a person or organization, that deposited the Dataset in the repository 1) FamilyName, GivenName or 2) Organization text 56 FALSE FALSE FALSE FALSE FALSE FALSE citation dateOfDeposit Deposit Date The date when the Dataset was deposited into the repository YYYY-MM-DD date 57 FALSE FALSE FALSE TRUE FALSE FALSE citation http://purl.org/dc/terms/dateSubmitted @@ -79,248 +79,247 @@ originOfSources Origin of Historical Sources For historical sources, the origin and any rules followed in establishing them as sources textbox 75 FALSE FALSE FALSE FALSE FALSE FALSE citation characteristicOfSources Characteristic of Sources Characteristics not already noted elsewhere textbox 76 FALSE FALSE FALSE FALSE FALSE FALSE citation accessToSources Documentation and Access to Sources 1) Methods or procedures for accessing data sources and 2) any special permissions needed for access textbox 77 FALSE FALSE FALSE FALSE FALSE FALSE citation -#controlledVocabulary DatasetField Value identifier displayOrder - subject Agricultural Sciences D01 0 - subject Arts and Humanities D0 1 - subject Astronomy and Astrophysics D1 2 - subject Business and Management D2 3 - subject Chemistry D3 4 - subject Computer and Information Science D7 5 - subject Earth and Environmental Sciences D4 6 - subject Engineering D5 7 - subject Law D8 8 - subject Mathematical Sciences D9 9 - subject Medicine, Health and Life Sciences D6 10 - subject Physics D10 11 - subject Social Sciences D11 12 - subject Other D12 13 - publicationIDType ark 0 - publicationIDType arXiv 1 - publicationIDType bibcode 2 - publicationIDType cstr 3 - publicationIDType doi 4 - publicationIDType ean13 5 - publicationIDType eissn 6 - publicationIDType handle 7 - publicationIDType isbn 8 - publicationIDType issn 9 - publicationIDType istc 10 - publicationIDType lissn 11 - publicationIDType lsid 12 - publicationIDType pmid 13 - publicationIDType purl 14 - publicationIDType upc 15 - publicationIDType url 16 - publicationIDType urn 17 - publicationIDType DASH-NRS 18 - contributorType Data Collector 0 - contributorType Data Curator 1 - contributorType Data Manager 2 - contributorType Editor 3 - contributorType Funder 4 - contributorType Hosting Institution 5 - contributorType Project Leader 6 - contributorType Project Manager 7 - contributorType Project Member 8 - contributorType Related Person 9 - contributorType Researcher 10 - contributorType Research Group 11 - contributorType Rights Holder 12 - contributorType Sponsor 13 - contributorType Supervisor 14 - contributorType Work Package Leader 15 - contributorType Other 16 - authorIdentifierScheme ORCID 0 - authorIdentifierScheme ISNI 1 - authorIdentifierScheme LCNA 2 - authorIdentifierScheme VIAF 3 - authorIdentifierScheme GND 4 - authorIdentifierScheme DAI 5 - authorIdentifierScheme ResearcherID 6 - authorIdentifierScheme ScopusID 7 - language Abkhaz 0 - language Afar 1 aar aa - language Afrikaans 2 afr af - language Akan 3 aka ak - language Albanian 4 sqi alb sq - language Amharic 5 amh am - language Arabic 6 ara ar - language Aragonese 7 arg an - language Armenian 8 hye arm hy - language Assamese 9 asm as - language Avaric 10 ava av - language Avestan 11 ave ae - language Aymara 12 aym ay - language Azerbaijani 13 aze az - language Bambara 14 bam bm - language Bashkir 15 bak ba - language Basque 16 eus baq eu - language Belarusian 17 bel be - language Bengali, Bangla 18 ben bn - language Bihari 19 bih bh - language Bislama 20 bis bi - language Bosnian 21 bos bs - language Breton 22 bre br - language Bulgarian 23 bul bg - language Burmese 24 mya bur my - language Catalan,Valencian 25 cat ca - language Chamorro 26 cha ch - language Chechen 27 che ce - language Chichewa, Chewa, Nyanja 28 nya ny - language Chinese 29 zho chi zh - language Chuvash 30 chv cv - language Cornish 31 cor kw - language Corsican 32 cos co - language Cree 33 cre cr - language Croatian 34 hrv src hr - language Czech 35 ces cze cs - language Danish 36 dan da - language Divehi, Dhivehi, Maldivian 37 div dv - language Dutch 38 nld dut nl - language Dzongkha 39 dzo dz - language English 40 eng en - language Esperanto 41 epo eo - language Estonian 42 est et - language Ewe 43 ewe ee - language Faroese 44 fao fo - language Fijian 45 fij fj - language Finnish 46 fin fi - language French 47 fra fre fr - language Fula, Fulah, Pulaar, Pular 48 ful ff - language Galician 49 glg gl - language Georgian 50 kat geo ka - language German 51 deu ger de - language Greek (modern) 52 gre ell el - language Guaraní 53 grn gn - language Gujarati 54 guj gu - language Haitian, Haitian Creole 55 hat ht - language Hausa 56 hau ha - language Hebrew (modern) 57 heb he - language Herero 58 her hz - language Hindi 59 hin hi - language Hiri Motu 60 hmo ho - language Hungarian 61 hun hu - language Interlingua 62 ina ia - language Indonesian 63 ind id - language Interlingue 64 ile ie - language Irish 65 gle ga - language Igbo 66 ibo ig - language Inupiaq 67 ipk ik - language Ido 68 ido io - language Icelandic 69 isl ice is - language Italian 70 ita it - language Inuktitut 71 iku iu - language Japanese 72 jpn ja - language Javanese 73 jav jv - language Kalaallisut, Greenlandic 74 kal kl - language Kannada 75 kan kn - language Kanuri 76 kau kr - language Kashmiri 77 kas ks - language Kazakh 78 kaz kk - language Khmer 79 khm km - language Kikuyu, Gikuyu 80 kik ki - language Kinyarwanda 81 kin rw - language Kyrgyz 82 - language Komi 83 kom kv - language Kongo 84 kon kg - language Korean 85 kor ko - language Kurdish 86 kur ku - language Kwanyama, Kuanyama 87 kua kj - language Latin 88 lat la - language Luxembourgish, Letzeburgesch 89 ltz lb - language Ganda 90 lug lg - language Limburgish, Limburgan, Limburger 91 lim li - language Lingala 92 lin ln - language Lao 93 lao lo - language Lithuanian 94 lit lt - language Luba-Katanga 95 lub lu - language Latvian 96 lav lv - language Manx 97 glv gv - language Macedonian 98 mkd mac mk - language Malagasy 99 mlg mg - language Malay 100 may msa ms - language Malayalam 101 mal ml - language Maltese 102 mlt mt - language Māori 103 mao mri mi - language Marathi (Marāṭhī) 104 mar mr - language Marshallese 105 mah mh - language Mixtepec Mixtec 106 mix - language Mongolian 107 mon mn - language Nauru 108 nau na - language Navajo, Navaho 109 nav nv - language Northern Ndebele 110 nde nd - language Nepali 111 nep ne - language Ndonga 112 ndo ng - language Norwegian Bokmål 113 nob nb - language Norwegian Nynorsk 114 nno nn - language Norwegian 115 nor no - language Nuosu 116 - language Southern Ndebele 117 nbl nr - language Occitan 118 oci oc - language Ojibwe, Ojibwa 119 oji oj - language Old Church Slavonic,Church Slavonic,Old Bulgarian 120 chu cu - language Oromo 121 orm om - language Oriya 122 ori or - language Ossetian, Ossetic 123 oss os - language Panjabi, Punjabi 124 pan pa - language Pāli 125 pli pi - language Persian (Farsi) 126 per fas fa - language Polish 127 pol pl - language Pashto, Pushto 128 pus ps - language Portuguese 129 por pt - language Quechua 130 que qu - language Romansh 131 roh rm - language Kirundi 132 run rn - language Romanian 133 ron rum ro - language Russian 134 rus ru - language Sanskrit (Saṁskṛta) 135 san sa - language Sardinian 136 srd sc - language Sindhi 137 snd sd - language Northern Sami 138 sme se - language Samoan 139 smo sm - language Sango 140 sag sg - language Serbian 141 srp scc sr - language Scottish Gaelic, Gaelic 142 gla gd - language Shona 143 sna sn - language Sinhala, Sinhalese 144 sin si - language Slovak 145 slk slo sk - language Slovene 146 slv sl - language Somali 147 som so - language Southern Sotho 148 sot st - language Spanish, Castilian 149 spa es - language Sundanese 150 sun su - language Swahili 151 swa sw - language Swati 152 ssw ss - language Swedish 153 swe sv - language Tamil 154 tam ta - language Telugu 155 tel te - language Tajik 156 tgk tg - language Thai 157 tha th - language Tigrinya 158 tir ti - language Tibetan Standard, Tibetan, Central 159 tib bod bo - language Turkmen 160 tuk tk - language Tagalog 161 tgl tl - language Tswana 162 tsn tn - language Tonga (Tonga Islands) 163 ton to - language Turkish 164 tur tr - language Tsonga 165 tso ts - language Tatar 166 tat tt - language Twi 167 twi tw - language Tahitian 168 tah ty - language Uyghur, Uighur 169 uig ug - language Ukrainian 170 ukr uk - language Urdu 171 urd ur - language Uzbek 172 uzb uz - language Venda 173 ven ve - language Vietnamese 174 vie vi - language Volapük 175 vol vo - language Walloon 176 wln wa - language Welsh 177 cym wel cy - language Wolof 178 wol wo - language Western Frisian 179 fry fy - language Xhosa 180 xho xh - language Yiddish 181 yid yi - language Yoruba 182 yor yo - language Zhuang, Chuang 183 zha za - language Zulu 184 zul zu - language Not applicable 185 +#controlledVocabulary DatasetField Value identifier displayOrder + subject Agricultural Sciences D01 0 + subject Arts and Humanities D0 1 + subject Astronomy and Astrophysics D1 2 + subject Business and Management D2 3 + subject Chemistry D3 4 + subject Computer and Information Science D7 5 + subject Earth and Environmental Sciences D4 6 + subject Engineering D5 7 + subject Law D8 8 + subject Mathematical Sciences D9 9 + subject Medicine, Health and Life Sciences D6 10 + subject Physics D10 11 + subject Social Sciences D11 12 + subject Other D12 13 + publicationIDType ark 0 + publicationIDType arXiv 1 + publicationIDType bibcode 2 + publicationIDType doi 3 + publicationIDType ean13 4 + publicationIDType eissn 5 + publicationIDType handle 6 + publicationIDType isbn 7 + publicationIDType issn 8 + publicationIDType istc 9 + publicationIDType lissn 10 + publicationIDType lsid 11 + publicationIDType pmid 12 + publicationIDType purl 13 + publicationIDType upc 14 + publicationIDType url 15 + publicationIDType urn 16 + publicationIDType DASH-NRS 17 + contributorType Data Collector 0 + contributorType Data Curator 1 + contributorType Data Manager 2 + contributorType Editor 3 + contributorType Funder 4 + contributorType Hosting Institution 5 + contributorType Project Leader 6 + contributorType Project Manager 7 + contributorType Project Member 8 + contributorType Related Person 9 + contributorType Researcher 10 + contributorType Research Group 11 + contributorType Rights Holder 12 + contributorType Sponsor 13 + contributorType Supervisor 14 + contributorType Work Package Leader 15 + contributorType Other 16 + authorIdentifierScheme ORCID 0 + authorIdentifierScheme ISNI 1 + authorIdentifierScheme LCNA 2 + authorIdentifierScheme VIAF 3 + authorIdentifierScheme GND 4 + authorIdentifierScheme DAI 5 + authorIdentifierScheme ResearcherID 6 + authorIdentifierScheme ScopusID 7 + language Abkhaz 0 + language Afar 1 aar aa + language Afrikaans 2 afr af + language Akan 3 aka ak + language Albanian 4 sqi alb sq + language Amharic 5 amh am + language Arabic 6 ara ar + language Aragonese 7 arg an + language Armenian 8 hye arm hy + language Assamese 9 asm as + language Avaric 10 ava av + language Avestan 11 ave ae + language Aymara 12 aym ay + language Azerbaijani 13 aze az + language Bambara 14 bam bm + language Bashkir 15 bak ba + language Basque 16 eus baq eu + language Belarusian 17 bel be + language Bengali, Bangla 18 ben bn + language Bihari 19 bih bh + language Bislama 20 bis bi + language Bosnian 21 bos bs + language Breton 22 bre br + language Bulgarian 23 bul bg + language Burmese 24 mya bur my + language Catalan,Valencian 25 cat ca + language Chamorro 26 cha ch + language Chechen 27 che ce + language Chichewa, Chewa, Nyanja 28 nya ny + language Chinese 29 zho chi zh + language Chuvash 30 chv cv + language Cornish 31 cor kw + language Corsican 32 cos co + language Cree 33 cre cr + language Croatian 34 hrv src hr + language Czech 35 ces cze cs + language Danish 36 dan da + language Divehi, Dhivehi, Maldivian 37 div dv + language Dutch 38 nld dut nl + language Dzongkha 39 dzo dz + language English 40 eng en + language Esperanto 41 epo eo + language Estonian 42 est et + language Ewe 43 ewe ee + language Faroese 44 fao fo + language Fijian 45 fij fj + language Finnish 46 fin fi + language French 47 fra fre fr + language Fula, Fulah, Pulaar, Pular 48 ful ff + language Galician 49 glg gl + language Georgian 50 kat geo ka + language German 51 deu ger de + language Greek (modern) 52 gre ell el + language Guaraní 53 grn gn + language Gujarati 54 guj gu + language Haitian, Haitian Creole 55 hat ht + language Hausa 56 hau ha + language Hebrew (modern) 57 heb he + language Herero 58 her hz + language Hindi 59 hin hi + language Hiri Motu 60 hmo ho + language Hungarian 61 hun hu + language Interlingua 62 ina ia + language Indonesian 63 ind id + language Interlingue 64 ile ie + language Irish 65 gle ga + language Igbo 66 ibo ig + language Inupiaq 67 ipk ik + language Ido 68 ido io + language Icelandic 69 isl ice is + language Italian 70 ita it + language Inuktitut 71 iku iu + language Japanese 72 jpn ja + language Javanese 73 jav jv + language Kalaallisut, Greenlandic 74 kal kl + language Kannada 75 kan kn + language Kanuri 76 kau kr + language Kashmiri 77 kas ks + language Kazakh 78 kaz kk + language Khmer 79 khm km + language Kikuyu, Gikuyu 80 kik ki + language Kinyarwanda 81 kin rw + language Kyrgyz 82 + language Komi 83 kom kv + language Kongo 84 kon kg + language Korean 85 kor ko + language Kurdish 86 kur ku + language Kwanyama, Kuanyama 87 kua kj + language Latin 88 lat la + language Luxembourgish, Letzeburgesch 89 ltz lb + language Ganda 90 lug lg + language Limburgish, Limburgan, Limburger 91 lim li + language Lingala 92 lin ln + language Lao 93 lao lo + language Lithuanian 94 lit lt + language Luba-Katanga 95 lub lu + language Latvian 96 lav lv + language Manx 97 glv gv + language Macedonian 98 mkd mac mk + language Malagasy 99 mlg mg + language Malay 100 may msa ms + language Malayalam 101 mal ml + language Maltese 102 mlt mt + language Māori 103 mao mri mi + language Marathi (Marāṭhī) 104 mar mr + language Marshallese 105 mah mh + language Mixtepec Mixtec 106 mix + language Mongolian 107 mon mn + language Nauru 108 nau na + language Navajo, Navaho 109 nav nv + language Northern Ndebele 110 nde nd + language Nepali 111 nep ne + language Ndonga 112 ndo ng + language Norwegian Bokmål 113 nob nb + language Norwegian Nynorsk 114 nno nn + language Norwegian 115 nor no + language Nuosu 116 + language Southern Ndebele 117 nbl nr + language Occitan 118 oci oc + language Ojibwe, Ojibwa 119 oji oj + language Old Church Slavonic,Church Slavonic,Old Bulgarian 120 chu cu + language Oromo 121 orm om + language Oriya 122 ori or + language Ossetian, Ossetic 123 oss os + language Panjabi, Punjabi 124 pan pa + language Pāli 125 pli pi + language Persian (Farsi) 126 per fas fa + language Polish 127 pol pl + language Pashto, Pushto 128 pus ps + language Portuguese 129 por pt + language Quechua 130 que qu + language Romansh 131 roh rm + language Kirundi 132 run rn + language Romanian 133 ron rum ro + language Russian 134 rus ru + language Sanskrit (Saṁskṛta) 135 san sa + language Sardinian 136 srd sc + language Sindhi 137 snd sd + language Northern Sami 138 sme se + language Samoan 139 smo sm + language Sango 140 sag sg + language Serbian 141 srp scc sr + language Scottish Gaelic, Gaelic 142 gla gd + language Shona 143 sna sn + language Sinhala, Sinhalese 144 sin si + language Slovak 145 slk slo sk + language Slovene 146 slv sl + language Somali 147 som so + language Southern Sotho 148 sot st + language Spanish, Castilian 149 spa es + language Sundanese 150 sun su + language Swahili 151 swa sw + language Swati 152 ssw ss + language Swedish 153 swe sv + language Tamil 154 tam ta + language Telugu 155 tel te + language Tajik 156 tgk tg + language Thai 157 tha th + language Tigrinya 158 tir ti + language Tibetan Standard, Tibetan, Central 159 tib bod bo + language Turkmen 160 tuk tk + language Tagalog 161 tgl tl + language Tswana 162 tsn tn + language Tonga (Tonga Islands) 163 ton to + language Turkish 164 tur tr + language Tsonga 165 tso ts + language Tatar 166 tat tt + language Twi 167 twi tw + language Tahitian 168 tah ty + language Uyghur, Uighur 169 uig ug + language Ukrainian 170 ukr uk + language Urdu 171 urd ur + language Uzbek 172 uzb uz + language Venda 173 ven ve + language Vietnamese 174 vie vi + language Volapük 175 vol vo + language Walloon 176 wln wa + language Welsh 177 cym wel cy + language Wolof 178 wol wo + language Western Frisian 179 fry fy + language Xhosa 180 xho xh + language Yiddish 181 yid yi + language Yoruba 182 yor yo + language Zhuang, Chuang 183 zha za + language Zulu 184 zul zu + language Not applicable 185 From e5a356a5132cb7296ca1878f0120f0a52faebbdb Mon Sep 17 00:00:00 2001 From: lubitchv Date: Mon, 13 Mar 2023 13:35:42 -0400 Subject: [PATCH 03/25] release notes --- doc/release-notes/9428-alternative-title.md | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 doc/release-notes/9428-alternative-title.md diff --git a/doc/release-notes/9428-alternative-title.md b/doc/release-notes/9428-alternative-title.md new file mode 100644 index 00000000000..d6eaa680612 --- /dev/null +++ b/doc/release-notes/9428-alternative-title.md @@ -0,0 +1,6 @@ +Alternative Title is made repeatable. +- One will need to update database with updated citation block. +`curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file scripts/api/data/metadatablocks/citation.tsv` +- One will also need to update solr schema: +Change in "alternativeTitle" field multiValued="true" in `/usr/local/solr/solr-8.11.1/server/solr/collection1/conf/schema.xml` +Reload solr schema: `curl "http://localhost:8983/solr/admin/cores?action=RELOAD&core=collection1"` From 5d81fc7042e81b152cc89773a3e67afd47c17e5a Mon Sep 17 00:00:00 2001 From: lubitchv Date: Tue, 14 Mar 2023 16:15:48 -0400 Subject: [PATCH 04/25] import --- .../iq/dataverse/api/imports/ImportDDIServiceBean.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java index d9433832309..f7f0e30ea6e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java @@ -1396,6 +1396,7 @@ private void processProdStmt(XMLStreamReader xmlr, MetadataBlockDTO citation) th private void processTitlStmt(XMLStreamReader xmlr, DatasetDTO datasetDTO) throws XMLStreamException, ImportException { MetadataBlockDTO citation = datasetDTO.getDatasetVersion().getMetadataBlocks().get("citation"); List> otherIds = new ArrayList<>(); + List altTitles = new ArrayList<>(); for (int event = xmlr.next(); event != XMLStreamConstants.END_DOCUMENT; event = xmlr.next()) { if (event == XMLStreamConstants.START_ELEMENT) { @@ -1406,8 +1407,7 @@ private void processTitlStmt(XMLStreamReader xmlr, DatasetDTO datasetDTO) throws FieldDTO field = FieldDTO.createPrimitiveFieldDTO("subtitle", parseText(xmlr)); citation.getFields().add(field); } else if (xmlr.getLocalName().equals("altTitl")) { - FieldDTO field = FieldDTO.createPrimitiveFieldDTO("alternativeTitle", parseText(xmlr)); - citation.getFields().add(field); + altTitles.add(parseText(xmlr)); } else if (xmlr.getLocalName().equals("IDNo")) { if ( AGENCY_HANDLE.equals( xmlr.getAttributeValue(null, "agency") ) || AGENCY_DOI.equals( xmlr.getAttributeValue(null, "agency") ) ) { importGenericService.reassignIdentifierAsGlobalId(parseText(xmlr), datasetDTO); @@ -1435,6 +1435,9 @@ private void processTitlStmt(XMLStreamReader xmlr, DatasetDTO datasetDTO) throws if (otherIds.size()>0) { citation.addField(FieldDTO.createMultipleCompoundFieldDTO("otherId", otherIds)); } + if (altTitles.size()>0) { + citation.addField(FieldDTO.createMultiplePrimitiveFieldDTO("alternativeTitle", altTitles)); + } return; } } From 991c5f9faf5a378c0c9e21848788d24cb918fbbc Mon Sep 17 00:00:00 2001 From: lubitchv Date: Wed, 15 Mar 2023 17:20:01 -0400 Subject: [PATCH 05/25] prodPlac --- scripts/api/data/metadatablocks/citation.tsv | 507 +++++++++--------- .../api/imports/ImportDDIServiceBean.java | 6 +- .../dataverse/export/ddi/DdiExportUtil.java | 9 +- .../edu/harvard/iq/dataverse/api/AdminIT.java | 2 +- .../dataset-create-new-all-ddi-fields.json | 4 +- 5 files changed, 268 insertions(+), 260 deletions(-) diff --git a/scripts/api/data/metadatablocks/citation.tsv b/scripts/api/data/metadatablocks/citation.tsv index 20d858ddb4b..3fbc38438b8 100644 --- a/scripts/api/data/metadatablocks/citation.tsv +++ b/scripts/api/data/metadatablocks/citation.tsv @@ -4,7 +4,7 @@ title Title The main title of the Dataset text 0 TRUE FALSE FALSE FALSE TRUE TRUE citation http://purl.org/dc/terms/title subtitle Subtitle A secondary title that amplifies or states certain limitations on the main title text 1 FALSE FALSE FALSE FALSE FALSE FALSE citation alternativeTitle Alternative Title Either 1) a title commonly used to refer to the Dataset or 2) an abbreviation of the main title text 2 FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/alternative - alternativeURL Alternative URL Another URL where one can view or access the data in the Dataset, e.g. a project or personal webpage https:// url 3 "#VALUE" FALSE FALSE FALSE FALSE FALSE FALSE citation https://schema.org/distribution + alternativeURL Alternative URL Another URL where one can view or access the data in the Dataset, e.g. a project or personal webpage https:// url 3 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE citation https://schema.org/distribution otherId Other Identifier Another unique identifier for the Dataset (e.g. producer's or another repository's identifier) none 4 : FALSE FALSE TRUE FALSE FALSE FALSE citation otherIdAgency Agency The name of the agency that generated the other identifier text 5 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE otherId citation otherIdValue Identifier Another identifier uniquely identifies the Dataset text 6 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE otherId citation @@ -24,26 +24,26 @@ keyword Keyword A key term that describes an important aspect of the Dataset and information about any controlled vocabulary used none 20 FALSE FALSE TRUE FALSE TRUE FALSE citation keywordValue Term A key term that describes important aspects of the Dataset text 21 #VALUE TRUE FALSE FALSE TRUE TRUE FALSE keyword citation keywordVocabulary Controlled Vocabulary Name The controlled vocabulary used for the keyword term (e.g. LCSH, MeSH) text 22 (#VALUE) FALSE FALSE FALSE FALSE TRUE FALSE keyword citation - keywordVocabularyURI Controlled Vocabulary URL The URL where one can access information about the term's controlled vocabulary https:// url 23 "#VALUE" FALSE FALSE FALSE FALSE TRUE FALSE keyword citation + keywordVocabularyURI Controlled Vocabulary URL The URL where one can access information about the term's controlled vocabulary https:// url 23 #VALUE FALSE FALSE FALSE FALSE TRUE FALSE keyword citation topicClassification Topic Classification Indicates a broad, important topic or subject that the Dataset covers and information about any controlled vocabulary used none 24 FALSE FALSE TRUE FALSE FALSE FALSE citation topicClassValue Term A topic or subject term text 25 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE topicClassification citation topicClassVocab Controlled Vocabulary Name The controlled vocabulary used for the keyword term (e.g. LCSH, MeSH) text 26 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE topicClassification citation - topicClassVocabURI Controlled Vocabulary URL The URL where one can access information about the term's controlled vocabulary https:// url 27 "#VALUE" FALSE FALSE FALSE FALSE FALSE FALSE topicClassification citation + topicClassVocabURI Controlled Vocabulary URL The URL where one can access information about the term's controlled vocabulary https:// url 27 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE topicClassification citation publication Related Publication The article or report that uses the data in the Dataset. The full list of related publications will be displayed on the metadata tab none 28 FALSE FALSE TRUE FALSE TRUE FALSE citation http://purl.org/dc/terms/isReferencedBy publicationCitation Citation The full bibliographic citation for the related publication textbox 29 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE publication citation http://purl.org/dc/terms/bibliographicCitation publicationIDType Identifier Type The type of identifier that uniquely identifies a related publication text 30 #VALUE: TRUE TRUE FALSE FALSE TRUE FALSE publication citation http://purl.org/spar/datacite/ResourceIdentifierScheme publicationIDNumber Identifier The identifier for a related publication text 31 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE publication citation http://purl.org/spar/datacite/ResourceIdentifier - publicationURL URL The URL form of the identifier entered in the Identifier field, e.g. the DOI URL if a DOI was entered in the Identifier field. Used to display what was entered in the ID Type and ID Number fields as a link. If what was entered in the Identifier field has no URL form, the URL of the publication webpage is used, e.g. a journal article webpage https:// url 32 "#VALUE" FALSE FALSE FALSE FALSE FALSE FALSE publication citation https://schema.org/distribution + publicationURL URL The URL form of the identifier entered in the Identifier field, e.g. the DOI URL if a DOI was entered in the Identifier field. Used to display what was entered in the ID Type and ID Number fields as a link. If what was entered in the Identifier field has no URL form, the URL of the publication webpage is used, e.g. a journal article webpage https:// url 32 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE publication citation https://schema.org/distribution notesText Notes Additional information about the Dataset textbox 33 FALSE FALSE FALSE FALSE TRUE FALSE citation language Language A language that the Dataset's files is written in text 34 TRUE TRUE TRUE TRUE FALSE FALSE citation http://purl.org/dc/terms/language producer Producer The entity, such a person or organization, managing the finances or other administrative processes involved in the creation of the Dataset none 35 FALSE FALSE TRUE FALSE FALSE FALSE citation producerName Name The name of the entity, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 36 #VALUE TRUE FALSE FALSE TRUE FALSE TRUE producer citation producerAffiliation Affiliation The name of the entity affiliated with the producer, e.g. an organization's name Organization XYZ text 37 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE producer citation producerAbbreviation Abbreviated Name The producer's abbreviated name (e.g. IQSS, ICPSR) text 38 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE producer citation - producerURL URL The URL of the producer's website https:// url 39 "#VALUE" FALSE FALSE FALSE FALSE FALSE FALSE producer citation - producerLogoURL Logo URL The URL of the producer's logo https:// url 40 "
" FALSE FALSE FALSE FALSE FALSE FALSE producer citation + producerURL URL The URL of the producer's website https:// url 39 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE producer citation + producerLogoURL Logo URL The URL of the producer's logo https:// url 40
FALSE FALSE FALSE FALSE FALSE FALSE producer citation productionDate Production Date The date when the data were produced (not distributed, published, or archived) YYYY-MM-DD date 41 TRUE FALSE FALSE TRUE FALSE FALSE citation - productionPlace Production Location The location where the data and any related materials were produced or collected text 42 FALSE FALSE FALSE FALSE FALSE FALSE citation + productionPlace Production Location The location where the data and any related materials were produced or collected text 42 TRUE FALSE TRUE TRUE FALSE FALSE citation contributor Contributor The entity, such as a person or organization, responsible for collecting, managing, or otherwise contributing to the development of the Dataset none 43 : FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/contributor contributorType Type Indicates the type of contribution made to the dataset text 44 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE contributor citation contributorName Name The name of the contributor, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 45 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE contributor citation @@ -54,8 +54,8 @@ distributorName Name The name of the entity, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 50 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE distributor citation distributorAffiliation Affiliation The name of the entity affiliated with the distributor, e.g. an organization's name Organization XYZ text 51 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE distributor citation distributorAbbreviation Abbreviated Name The distributor's abbreviated name (e.g. IQSS, ICPSR) text 52 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE distributor citation - distributorURL URL The URL of the distributor's webpage https:// url 53 "#VALUE" FALSE FALSE FALSE FALSE FALSE FALSE distributor citation - distributorLogoURL Logo URL The URL of the distributor's logo image, used to show the image on the Dataset's page https:// url 54 "
" FALSE FALSE FALSE FALSE FALSE FALSE distributor citation + distributorURL URL The URL of the distributor's webpage https:// url 53 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE distributor citation + distributorLogoURL Logo URL The URL of the distributor's logo image, used to show the image on the Dataset's page https:// url 54
FALSE FALSE FALSE FALSE FALSE FALSE distributor citation distributionDate Distribution Date The date when the Dataset was made available for distribution/presentation YYYY-MM-DD date 55 TRUE FALSE FALSE TRUE FALSE FALSE citation depositor Depositor The entity, such as a person or organization, that deposited the Dataset in the repository 1) FamilyName, GivenName or 2) Organization text 56 FALSE FALSE FALSE FALSE FALSE FALSE citation dateOfDeposit Deposit Date The date when the Dataset was deposited into the repository YYYY-MM-DD date 57 FALSE FALSE FALSE TRUE FALSE FALSE citation http://purl.org/dc/terms/dateSubmitted @@ -79,247 +79,248 @@ originOfSources Origin of Historical Sources For historical sources, the origin and any rules followed in establishing them as sources textbox 75 FALSE FALSE FALSE FALSE FALSE FALSE citation characteristicOfSources Characteristic of Sources Characteristics not already noted elsewhere textbox 76 FALSE FALSE FALSE FALSE FALSE FALSE citation accessToSources Documentation and Access to Sources 1) Methods or procedures for accessing data sources and 2) any special permissions needed for access textbox 77 FALSE FALSE FALSE FALSE FALSE FALSE citation -#controlledVocabulary DatasetField Value identifier displayOrder - subject Agricultural Sciences D01 0 - subject Arts and Humanities D0 1 - subject Astronomy and Astrophysics D1 2 - subject Business and Management D2 3 - subject Chemistry D3 4 - subject Computer and Information Science D7 5 - subject Earth and Environmental Sciences D4 6 - subject Engineering D5 7 - subject Law D8 8 - subject Mathematical Sciences D9 9 - subject Medicine, Health and Life Sciences D6 10 - subject Physics D10 11 - subject Social Sciences D11 12 - subject Other D12 13 - publicationIDType ark 0 - publicationIDType arXiv 1 - publicationIDType bibcode 2 - publicationIDType doi 3 - publicationIDType ean13 4 - publicationIDType eissn 5 - publicationIDType handle 6 - publicationIDType isbn 7 - publicationIDType issn 8 - publicationIDType istc 9 - publicationIDType lissn 10 - publicationIDType lsid 11 - publicationIDType pmid 12 - publicationIDType purl 13 - publicationIDType upc 14 - publicationIDType url 15 - publicationIDType urn 16 - publicationIDType DASH-NRS 17 - contributorType Data Collector 0 - contributorType Data Curator 1 - contributorType Data Manager 2 - contributorType Editor 3 - contributorType Funder 4 - contributorType Hosting Institution 5 - contributorType Project Leader 6 - contributorType Project Manager 7 - contributorType Project Member 8 - contributorType Related Person 9 - contributorType Researcher 10 - contributorType Research Group 11 - contributorType Rights Holder 12 - contributorType Sponsor 13 - contributorType Supervisor 14 - contributorType Work Package Leader 15 - contributorType Other 16 - authorIdentifierScheme ORCID 0 - authorIdentifierScheme ISNI 1 - authorIdentifierScheme LCNA 2 - authorIdentifierScheme VIAF 3 - authorIdentifierScheme GND 4 - authorIdentifierScheme DAI 5 - authorIdentifierScheme ResearcherID 6 - authorIdentifierScheme ScopusID 7 - language Abkhaz 0 - language Afar 1 aar aa - language Afrikaans 2 afr af - language Akan 3 aka ak - language Albanian 4 sqi alb sq - language Amharic 5 amh am - language Arabic 6 ara ar - language Aragonese 7 arg an - language Armenian 8 hye arm hy - language Assamese 9 asm as - language Avaric 10 ava av - language Avestan 11 ave ae - language Aymara 12 aym ay - language Azerbaijani 13 aze az - language Bambara 14 bam bm - language Bashkir 15 bak ba - language Basque 16 eus baq eu - language Belarusian 17 bel be - language Bengali, Bangla 18 ben bn - language Bihari 19 bih bh - language Bislama 20 bis bi - language Bosnian 21 bos bs - language Breton 22 bre br - language Bulgarian 23 bul bg - language Burmese 24 mya bur my - language Catalan,Valencian 25 cat ca - language Chamorro 26 cha ch - language Chechen 27 che ce - language Chichewa, Chewa, Nyanja 28 nya ny - language Chinese 29 zho chi zh - language Chuvash 30 chv cv - language Cornish 31 cor kw - language Corsican 32 cos co - language Cree 33 cre cr - language Croatian 34 hrv src hr - language Czech 35 ces cze cs - language Danish 36 dan da - language Divehi, Dhivehi, Maldivian 37 div dv - language Dutch 38 nld dut nl - language Dzongkha 39 dzo dz - language English 40 eng en - language Esperanto 41 epo eo - language Estonian 42 est et - language Ewe 43 ewe ee - language Faroese 44 fao fo - language Fijian 45 fij fj - language Finnish 46 fin fi - language French 47 fra fre fr - language Fula, Fulah, Pulaar, Pular 48 ful ff - language Galician 49 glg gl - language Georgian 50 kat geo ka - language German 51 deu ger de - language Greek (modern) 52 gre ell el - language Guaraní 53 grn gn - language Gujarati 54 guj gu - language Haitian, Haitian Creole 55 hat ht - language Hausa 56 hau ha - language Hebrew (modern) 57 heb he - language Herero 58 her hz - language Hindi 59 hin hi - language Hiri Motu 60 hmo ho - language Hungarian 61 hun hu - language Interlingua 62 ina ia - language Indonesian 63 ind id - language Interlingue 64 ile ie - language Irish 65 gle ga - language Igbo 66 ibo ig - language Inupiaq 67 ipk ik - language Ido 68 ido io - language Icelandic 69 isl ice is - language Italian 70 ita it - language Inuktitut 71 iku iu - language Japanese 72 jpn ja - language Javanese 73 jav jv - language Kalaallisut, Greenlandic 74 kal kl - language Kannada 75 kan kn - language Kanuri 76 kau kr - language Kashmiri 77 kas ks - language Kazakh 78 kaz kk - language Khmer 79 khm km - language Kikuyu, Gikuyu 80 kik ki - language Kinyarwanda 81 kin rw - language Kyrgyz 82 - language Komi 83 kom kv - language Kongo 84 kon kg - language Korean 85 kor ko - language Kurdish 86 kur ku - language Kwanyama, Kuanyama 87 kua kj - language Latin 88 lat la - language Luxembourgish, Letzeburgesch 89 ltz lb - language Ganda 90 lug lg - language Limburgish, Limburgan, Limburger 91 lim li - language Lingala 92 lin ln - language Lao 93 lao lo - language Lithuanian 94 lit lt - language Luba-Katanga 95 lub lu - language Latvian 96 lav lv - language Manx 97 glv gv - language Macedonian 98 mkd mac mk - language Malagasy 99 mlg mg - language Malay 100 may msa ms - language Malayalam 101 mal ml - language Maltese 102 mlt mt - language Māori 103 mao mri mi - language Marathi (Marāṭhī) 104 mar mr - language Marshallese 105 mah mh - language Mixtepec Mixtec 106 mix - language Mongolian 107 mon mn - language Nauru 108 nau na - language Navajo, Navaho 109 nav nv - language Northern Ndebele 110 nde nd - language Nepali 111 nep ne - language Ndonga 112 ndo ng - language Norwegian Bokmål 113 nob nb - language Norwegian Nynorsk 114 nno nn - language Norwegian 115 nor no - language Nuosu 116 - language Southern Ndebele 117 nbl nr - language Occitan 118 oci oc - language Ojibwe, Ojibwa 119 oji oj - language Old Church Slavonic,Church Slavonic,Old Bulgarian 120 chu cu - language Oromo 121 orm om - language Oriya 122 ori or - language Ossetian, Ossetic 123 oss os - language Panjabi, Punjabi 124 pan pa - language Pāli 125 pli pi - language Persian (Farsi) 126 per fas fa - language Polish 127 pol pl - language Pashto, Pushto 128 pus ps - language Portuguese 129 por pt - language Quechua 130 que qu - language Romansh 131 roh rm - language Kirundi 132 run rn - language Romanian 133 ron rum ro - language Russian 134 rus ru - language Sanskrit (Saṁskṛta) 135 san sa - language Sardinian 136 srd sc - language Sindhi 137 snd sd - language Northern Sami 138 sme se - language Samoan 139 smo sm - language Sango 140 sag sg - language Serbian 141 srp scc sr - language Scottish Gaelic, Gaelic 142 gla gd - language Shona 143 sna sn - language Sinhala, Sinhalese 144 sin si - language Slovak 145 slk slo sk - language Slovene 146 slv sl - language Somali 147 som so - language Southern Sotho 148 sot st - language Spanish, Castilian 149 spa es - language Sundanese 150 sun su - language Swahili 151 swa sw - language Swati 152 ssw ss - language Swedish 153 swe sv - language Tamil 154 tam ta - language Telugu 155 tel te - language Tajik 156 tgk tg - language Thai 157 tha th - language Tigrinya 158 tir ti - language Tibetan Standard, Tibetan, Central 159 tib bod bo - language Turkmen 160 tuk tk - language Tagalog 161 tgl tl - language Tswana 162 tsn tn - language Tonga (Tonga Islands) 163 ton to - language Turkish 164 tur tr - language Tsonga 165 tso ts - language Tatar 166 tat tt - language Twi 167 twi tw - language Tahitian 168 tah ty - language Uyghur, Uighur 169 uig ug - language Ukrainian 170 ukr uk - language Urdu 171 urd ur - language Uzbek 172 uzb uz - language Venda 173 ven ve - language Vietnamese 174 vie vi - language Volapük 175 vol vo - language Walloon 176 wln wa - language Welsh 177 cym wel cy - language Wolof 178 wol wo - language Western Frisian 179 fry fy - language Xhosa 180 xho xh - language Yiddish 181 yid yi - language Yoruba 182 yor yo - language Zhuang, Chuang 183 zha za - language Zulu 184 zul zu - language Not applicable 185 +#controlledVocabulary DatasetField Value identifier displayOrder + subject Agricultural Sciences D01 0 + subject Arts and Humanities D0 1 + subject Astronomy and Astrophysics D1 2 + subject Business and Management D2 3 + subject Chemistry D3 4 + subject Computer and Information Science D7 5 + subject Earth and Environmental Sciences D4 6 + subject Engineering D5 7 + subject Law D8 8 + subject Mathematical Sciences D9 9 + subject Medicine, Health and Life Sciences D6 10 + subject Physics D10 11 + subject Social Sciences D11 12 + subject Other D12 13 + publicationIDType ark 0 + publicationIDType arXiv 1 + publicationIDType bibcode 2 + publicationIDType cstr 3 + publicationIDType doi 4 + publicationIDType ean13 5 + publicationIDType eissn 6 + publicationIDType handle 7 + publicationIDType isbn 8 + publicationIDType issn 9 + publicationIDType istc 10 + publicationIDType lissn 11 + publicationIDType lsid 12 + publicationIDType pmid 13 + publicationIDType purl 14 + publicationIDType upc 15 + publicationIDType url 16 + publicationIDType urn 17 + publicationIDType DASH-NRS 18 + contributorType Data Collector 0 + contributorType Data Curator 1 + contributorType Data Manager 2 + contributorType Editor 3 + contributorType Funder 4 + contributorType Hosting Institution 5 + contributorType Project Leader 6 + contributorType Project Manager 7 + contributorType Project Member 8 + contributorType Related Person 9 + contributorType Researcher 10 + contributorType Research Group 11 + contributorType Rights Holder 12 + contributorType Sponsor 13 + contributorType Supervisor 14 + contributorType Work Package Leader 15 + contributorType Other 16 + authorIdentifierScheme ORCID 0 + authorIdentifierScheme ISNI 1 + authorIdentifierScheme LCNA 2 + authorIdentifierScheme VIAF 3 + authorIdentifierScheme GND 4 + authorIdentifierScheme DAI 5 + authorIdentifierScheme ResearcherID 6 + authorIdentifierScheme ScopusID 7 + language Abkhaz 0 + language Afar 1 aar aa + language Afrikaans 2 afr af + language Akan 3 aka ak + language Albanian 4 sqi alb sq + language Amharic 5 amh am + language Arabic 6 ara ar + language Aragonese 7 arg an + language Armenian 8 hye arm hy + language Assamese 9 asm as + language Avaric 10 ava av + language Avestan 11 ave ae + language Aymara 12 aym ay + language Azerbaijani 13 aze az + language Bambara 14 bam bm + language Bashkir 15 bak ba + language Basque 16 eus baq eu + language Belarusian 17 bel be + language Bengali, Bangla 18 ben bn + language Bihari 19 bih bh + language Bislama 20 bis bi + language Bosnian 21 bos bs + language Breton 22 bre br + language Bulgarian 23 bul bg + language Burmese 24 mya bur my + language Catalan,Valencian 25 cat ca + language Chamorro 26 cha ch + language Chechen 27 che ce + language Chichewa, Chewa, Nyanja 28 nya ny + language Chinese 29 zho chi zh + language Chuvash 30 chv cv + language Cornish 31 cor kw + language Corsican 32 cos co + language Cree 33 cre cr + language Croatian 34 hrv src hr + language Czech 35 ces cze cs + language Danish 36 dan da + language Divehi, Dhivehi, Maldivian 37 div dv + language Dutch 38 nld dut nl + language Dzongkha 39 dzo dz + language English 40 eng en + language Esperanto 41 epo eo + language Estonian 42 est et + language Ewe 43 ewe ee + language Faroese 44 fao fo + language Fijian 45 fij fj + language Finnish 46 fin fi + language French 47 fra fre fr + language Fula, Fulah, Pulaar, Pular 48 ful ff + language Galician 49 glg gl + language Georgian 50 kat geo ka + language German 51 deu ger de + language Greek (modern) 52 gre ell el + language Guaraní 53 grn gn + language Gujarati 54 guj gu + language Haitian, Haitian Creole 55 hat ht + language Hausa 56 hau ha + language Hebrew (modern) 57 heb he + language Herero 58 her hz + language Hindi 59 hin hi + language Hiri Motu 60 hmo ho + language Hungarian 61 hun hu + language Interlingua 62 ina ia + language Indonesian 63 ind id + language Interlingue 64 ile ie + language Irish 65 gle ga + language Igbo 66 ibo ig + language Inupiaq 67 ipk ik + language Ido 68 ido io + language Icelandic 69 isl ice is + language Italian 70 ita it + language Inuktitut 71 iku iu + language Japanese 72 jpn ja + language Javanese 73 jav jv + language Kalaallisut, Greenlandic 74 kal kl + language Kannada 75 kan kn + language Kanuri 76 kau kr + language Kashmiri 77 kas ks + language Kazakh 78 kaz kk + language Khmer 79 khm km + language Kikuyu, Gikuyu 80 kik ki + language Kinyarwanda 81 kin rw + language Kyrgyz 82 + language Komi 83 kom kv + language Kongo 84 kon kg + language Korean 85 kor ko + language Kurdish 86 kur ku + language Kwanyama, Kuanyama 87 kua kj + language Latin 88 lat la + language Luxembourgish, Letzeburgesch 89 ltz lb + language Ganda 90 lug lg + language Limburgish, Limburgan, Limburger 91 lim li + language Lingala 92 lin ln + language Lao 93 lao lo + language Lithuanian 94 lit lt + language Luba-Katanga 95 lub lu + language Latvian 96 lav lv + language Manx 97 glv gv + language Macedonian 98 mkd mac mk + language Malagasy 99 mlg mg + language Malay 100 may msa ms + language Malayalam 101 mal ml + language Maltese 102 mlt mt + language Māori 103 mao mri mi + language Marathi (Marāṭhī) 104 mar mr + language Marshallese 105 mah mh + language Mixtepec Mixtec 106 mix + language Mongolian 107 mon mn + language Nauru 108 nau na + language Navajo, Navaho 109 nav nv + language Northern Ndebele 110 nde nd + language Nepali 111 nep ne + language Ndonga 112 ndo ng + language Norwegian Bokmål 113 nob nb + language Norwegian Nynorsk 114 nno nn + language Norwegian 115 nor no + language Nuosu 116 + language Southern Ndebele 117 nbl nr + language Occitan 118 oci oc + language Ojibwe, Ojibwa 119 oji oj + language Old Church Slavonic,Church Slavonic,Old Bulgarian 120 chu cu + language Oromo 121 orm om + language Oriya 122 ori or + language Ossetian, Ossetic 123 oss os + language Panjabi, Punjabi 124 pan pa + language Pāli 125 pli pi + language Persian (Farsi) 126 per fas fa + language Polish 127 pol pl + language Pashto, Pushto 128 pus ps + language Portuguese 129 por pt + language Quechua 130 que qu + language Romansh 131 roh rm + language Kirundi 132 run rn + language Romanian 133 ron rum ro + language Russian 134 rus ru + language Sanskrit (Saṁskṛta) 135 san sa + language Sardinian 136 srd sc + language Sindhi 137 snd sd + language Northern Sami 138 sme se + language Samoan 139 smo sm + language Sango 140 sag sg + language Serbian 141 srp scc sr + language Scottish Gaelic, Gaelic 142 gla gd + language Shona 143 sna sn + language Sinhala, Sinhalese 144 sin si + language Slovak 145 slk slo sk + language Slovene 146 slv sl + language Somali 147 som so + language Southern Sotho 148 sot st + language Spanish, Castilian 149 spa es + language Sundanese 150 sun su + language Swahili 151 swa sw + language Swati 152 ssw ss + language Swedish 153 swe sv + language Tamil 154 tam ta + language Telugu 155 tel te + language Tajik 156 tgk tg + language Thai 157 tha th + language Tigrinya 158 tir ti + language Tibetan Standard, Tibetan, Central 159 tib bod bo + language Turkmen 160 tuk tk + language Tagalog 161 tgl tl + language Tswana 162 tsn tn + language Tonga (Tonga Islands) 163 ton to + language Turkish 164 tur tr + language Tsonga 165 tso ts + language Tatar 166 tat tt + language Twi 167 twi tw + language Tahitian 168 tah ty + language Uyghur, Uighur 169 uig ug + language Ukrainian 170 ukr uk + language Urdu 171 urd ur + language Uzbek 172 uzb uz + language Venda 173 ven ve + language Vietnamese 174 vie vi + language Volapük 175 vol vo + language Walloon 176 wln wa + language Welsh 177 cym wel cy + language Wolof 178 wol wo + language Western Frisian 179 fry fy + language Xhosa 180 xho xh + language Yiddish 181 yid yi + language Yoruba 182 yor yo + language Zhuang, Chuang 183 zha za + language Zulu 184 zul zu + language Not applicable 185 diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java index f7f0e30ea6e..458803e0c92 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java @@ -1337,6 +1337,7 @@ private void processProdStmt(XMLStreamReader xmlr, MetadataBlockDTO citation) th List> producers = new ArrayList<>(); List> grants = new ArrayList<>(); List> software = new ArrayList<>(); + List prodPlac = new ArrayList<>(); for (int event = xmlr.next(); event != XMLStreamConstants.END_DOCUMENT; event = xmlr.next()) { if (event == XMLStreamConstants.START_ELEMENT) { @@ -1352,9 +1353,7 @@ private void processProdStmt(XMLStreamReader xmlr, MetadataBlockDTO citation) th } else if (xmlr.getLocalName().equals("prodDate")) { citation.getFields().add(FieldDTO.createPrimitiveFieldDTO("productionDate", parseDate(xmlr, "prodDate"))); } else if (xmlr.getLocalName().equals("prodPlac")) { - List prodPlac = new ArrayList<>(); prodPlac.add(parseText(xmlr, "prodPlac")); - citation.getFields().add(FieldDTO.createMultiplePrimitiveFieldDTO(DatasetFieldConstant.productionPlace, prodPlac)); } else if (xmlr.getLocalName().equals("software")) { HashSet set = new HashSet<>(); addToSet(set,"softwareVersion", xmlr.getAttributeValue(null, "version")); @@ -1387,6 +1386,9 @@ private void processProdStmt(XMLStreamReader xmlr, MetadataBlockDTO citation) th if (producers.size()>0) { citation.getFields().add(FieldDTO.createMultipleCompoundFieldDTO("producer", producers)); } + if (prodPlac.size() > 0) { + citation.getFields().add(FieldDTO.createMultiplePrimitiveFieldDTO(DatasetFieldConstant.productionPlace, prodPlac)); + } return; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java index eb53473d4d9..4ea90ea6199 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java @@ -895,8 +895,13 @@ private static void writeProducersElement(XMLStreamWriter xmlw, DatasetVersionDT } } } - writeFullElement(xmlw, "prodDate", dto2Primitive(version, DatasetFieldConstant.productionDate)); - writeFullElement(xmlw, "prodPlac", dto2Primitive(version, DatasetFieldConstant.productionPlace)); + writeFullElement(xmlw, "prodDate", dto2Primitive(version, DatasetFieldConstant.productionDate)); + + FieldDTO prodPlac = dto2FieldDTO( version, DatasetFieldConstant.productionPlace, "citation" ); + if (prodPlac != null) { + writeMultipleElement(xmlw, "prodPlac", prodPlac, null); + } + writeSoftwareElement(xmlw, version); writeGrantElement(xmlw, version); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java b/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java index 2ba06314ddb..14185b97e9e 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java @@ -758,7 +758,7 @@ public void testLoadMetadataBlock_NoErrorPath() { assertEquals(1, data.size()); List> addedElements = data.get("added"); //Note -test depends on the number of elements in the production citation block, so any changes to the # of elements there can break this test - assertEquals(323, addedElements.size()); + assertEquals(322, addedElements.size()); Map statistics = new HashMap<>(); for (Map unit : addedElements) { diff --git a/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json index 96f058b1b02..822623f721a 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json +++ b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json @@ -409,9 +409,9 @@ }, { "typeName": "productionPlace", - "multiple": false, + "multiple": true, "typeClass": "primitive", - "value": "ProductionPlace" + "value": ["ProductionPlace"] }, { "typeName": "contributor", From 89dbc7a87407a095670fa66dda87616b8500959d Mon Sep 17 00:00:00 2001 From: lubitchv Date: Thu, 16 Mar 2023 13:19:33 -0400 Subject: [PATCH 06/25] restore back --- src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java b/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java index 14185b97e9e..2ba06314ddb 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java @@ -758,7 +758,7 @@ public void testLoadMetadataBlock_NoErrorPath() { assertEquals(1, data.size()); List> addedElements = data.get("added"); //Note -test depends on the number of elements in the production citation block, so any changes to the # of elements there can break this test - assertEquals(322, addedElements.size()); + assertEquals(323, addedElements.size()); Map statistics = new HashMap<>(); for (Map unit : addedElements) { From dbd97ff1c9c7c19ec56270fdfcef7e11056a7778 Mon Sep 17 00:00:00 2001 From: lubitchv Date: Thu, 11 May 2023 16:38:49 -0400 Subject: [PATCH 07/25] develop + title --- .../api/imports/ImportDDIServiceBean.java | 33 +- .../dataverse/export/ddi/DdiExportUtil.java | 481 +++++++++++------- .../export/openaire/OpenAireExportUtil.java | 243 +++------ 3 files changed, 393 insertions(+), 364 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java index 458803e0c92..bafd7267acb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java @@ -1266,24 +1266,26 @@ private void parseVersionNumber(DatasetVersionDTO dvDTO, String versionNumber) { } - private void processSerStmt(XMLStreamReader xmlr, MetadataBlockDTO citation) throws XMLStreamException { - FieldDTO seriesName=null; - FieldDTO seriesInformation=null; - for (int event = xmlr.next(); event != XMLStreamConstants.END_DOCUMENT; event = xmlr.next()) { + private void processSerStmt(XMLStreamReader xmlr, MetadataBlockDTO citation) throws XMLStreamException { + FieldDTO seriesInformation = null; + FieldDTO seriesName = null; + for (int event = xmlr.next(); event != XMLStreamConstants.END_DOCUMENT; event = xmlr.next()) { if (event == XMLStreamConstants.START_ELEMENT) { + if (xmlr.getLocalName().equals("serInfo")) { + seriesInformation = FieldDTO.createPrimitiveFieldDTO("seriesInformation", parseText(xmlr)); + } if (xmlr.getLocalName().equals("serName")) { - seriesName = FieldDTO.createPrimitiveFieldDTO("seriesName", parseText(xmlr)); - - } else if (xmlr.getLocalName().equals("serInfo")) { - seriesInformation=FieldDTO.createPrimitiveFieldDTO("seriesInformation", parseText(xmlr) ); + seriesName = FieldDTO.createPrimitiveFieldDTO("seriesName", parseText(xmlr)); } } else if (event == XMLStreamConstants.END_ELEMENT) { if (xmlr.getLocalName().equals("serStmt")) { - citation.getFields().add(FieldDTO.createCompoundFieldDTO("series",seriesName,seriesInformation )); + if (seriesInformation != null || seriesName != null) { + citation.addField(FieldDTO.createMultipleCompoundFieldDTO("series", seriesName, seriesInformation )); + } return; } } - } + } } private void processDistStmt(XMLStreamReader xmlr, MetadataBlockDTO citation) throws XMLStreamException { @@ -1337,7 +1339,6 @@ private void processProdStmt(XMLStreamReader xmlr, MetadataBlockDTO citation) th List> producers = new ArrayList<>(); List> grants = new ArrayList<>(); List> software = new ArrayList<>(); - List prodPlac = new ArrayList<>(); for (int event = xmlr.next(); event != XMLStreamConstants.END_DOCUMENT; event = xmlr.next()) { if (event == XMLStreamConstants.START_ELEMENT) { @@ -1353,7 +1354,9 @@ private void processProdStmt(XMLStreamReader xmlr, MetadataBlockDTO citation) th } else if (xmlr.getLocalName().equals("prodDate")) { citation.getFields().add(FieldDTO.createPrimitiveFieldDTO("productionDate", parseDate(xmlr, "prodDate"))); } else if (xmlr.getLocalName().equals("prodPlac")) { + List prodPlac = new ArrayList<>(); prodPlac.add(parseText(xmlr, "prodPlac")); + citation.getFields().add(FieldDTO.createMultiplePrimitiveFieldDTO(DatasetFieldConstant.productionPlace, prodPlac)); } else if (xmlr.getLocalName().equals("software")) { HashSet set = new HashSet<>(); addToSet(set,"softwareVersion", xmlr.getAttributeValue(null, "version")); @@ -1386,9 +1389,6 @@ private void processProdStmt(XMLStreamReader xmlr, MetadataBlockDTO citation) th if (producers.size()>0) { citation.getFields().add(FieldDTO.createMultipleCompoundFieldDTO("producer", producers)); } - if (prodPlac.size() > 0) { - citation.getFields().add(FieldDTO.createMultiplePrimitiveFieldDTO(DatasetFieldConstant.productionPlace, prodPlac)); - } return; } } @@ -1437,8 +1437,9 @@ private void processTitlStmt(XMLStreamReader xmlr, DatasetDTO datasetDTO) throws if (otherIds.size()>0) { citation.addField(FieldDTO.createMultipleCompoundFieldDTO("otherId", otherIds)); } - if (altTitles.size()>0) { - citation.addField(FieldDTO.createMultiplePrimitiveFieldDTO("alternativeTitle", altTitles)); + if (!altTitles.isEmpty()) { + FieldDTO field = FieldDTO.createMultiplePrimitiveFieldDTO(DatasetFieldConstant.alternativeTitle, altTitles); + citation.getFields().add(field); } return; } diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java index 4ea90ea6199..a647c2a6f2a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java @@ -30,6 +30,7 @@ import static edu.harvard.iq.dataverse.export.DDIExportServiceBean.NOTE_TYPE_TAG; import static edu.harvard.iq.dataverse.export.DDIExportServiceBean.NOTE_TYPE_UNF; import edu.harvard.iq.dataverse.export.DDIExporter; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; @@ -93,7 +94,6 @@ public class DdiExportUtil { public static final String CITATION_BLOCK_NAME = "citation"; public static String datasetDtoAsJson2ddi(String datasetDtoAsJson) { - logger.fine(JsonUtil.prettyPrint(datasetDtoAsJson)); Gson gson = new Gson(); DatasetDTO datasetDto = gson.fromJson(datasetDtoAsJson, DatasetDTO.class); try { @@ -181,7 +181,7 @@ private static void createStdyDscr(XMLStreamWriter xmlw, DatasetDTO datasetDto) String pidUri = pid; //Some tests don't send real PIDs - don't try to get their URL form if(!pidUri.equals("null:null/null")) { - pidUri= new GlobalId(persistentProtocol + ":" + persistentAuthority + "/" + persistentId).toURL().toString(); + pidUri= PidUtil.parseAsGlobalID(persistentProtocol, persistentAuthority, persistentId).asURL(); } // The "persistentAgency" tag is used for the "agency" attribute of the // ddi section; back in the DVN3 days we used "handle" and "DOI" @@ -202,12 +202,12 @@ private static void createStdyDscr(XMLStreamWriter xmlw, DatasetDTO datasetDto) writeFullElement(xmlw, "titl", dto2Primitive(version, DatasetFieldConstant.title), datasetDto.getMetadataLanguage()); writeFullElement(xmlw, "subTitl", dto2Primitive(version, DatasetFieldConstant.subTitle)); - + //writeFullElement(xmlw, "altTitl", dto2Primitive(version, DatasetFieldConstant.alternativeTitle)); FieldDTO altField = dto2FieldDTO( version, DatasetFieldConstant.alternativeTitle, "citation" ); if (altField != null) { writeMultipleElement(xmlw, "altTitl", altField, datasetDto.getMetadataLanguage()); } - + xmlw.writeStartElement("IDNo"); writeAttribute(xmlw, "agency", persistentAgency); @@ -239,9 +239,11 @@ private static void createStdyDscr(XMLStreamWriter xmlw, DatasetDTO datasetDto) } writeDistributorsElement(xmlw, version, datasetDto.getMetadataLanguage()); writeContactsElement(xmlw, version); - writeFullElement(xmlw, "distDate", dto2Primitive(version, DatasetFieldConstant.distributionDate)); + /* per SCHEMA, depositr comes before depDate! - L.A. */ writeFullElement(xmlw, "depositr", dto2Primitive(version, DatasetFieldConstant.depositor)); + /* ... and depDate comes before distDate - L.A. */ writeFullElement(xmlw, "depDate", dto2Primitive(version, DatasetFieldConstant.dateOfDeposit)); + writeFullElement(xmlw, "distDate", dto2Primitive(version, DatasetFieldConstant.distributionDate)); xmlw.writeEndElement(); // diststmt @@ -294,23 +296,16 @@ private static void writeOtherStudyMaterial(XMLStreamWriter xmlw , DatasetVersio xmlw.writeEndElement(); //othrStdyMat } + /* + + + + + + */ private static void writeDataAccess(XMLStreamWriter xmlw , DatasetVersionDTO version) throws XMLStreamException { xmlw.writeStartElement("dataAccs"); - if (version.getTermsOfUse() != null && !version.getTermsOfUse().trim().equals("")) { - xmlw.writeStartElement("notes"); - writeAttribute(xmlw, "type", NOTE_TYPE_TERMS_OF_USE); - writeAttribute(xmlw, "level", LEVEL_DV); - xmlw.writeCharacters(version.getTermsOfUse()); - xmlw.writeEndElement(); //notes - } - if (version.getTermsOfAccess() != null && !version.getTermsOfAccess().trim().equals("")) { - xmlw.writeStartElement("notes"); - writeAttribute(xmlw, "type", NOTE_TYPE_TERMS_OF_ACCESS); - writeAttribute(xmlw, "level", LEVEL_DV); - xmlw.writeCharacters(version.getTermsOfAccess()); - xmlw.writeEndElement(); //notes - } - + xmlw.writeStartElement("setAvail"); writeFullElement(xmlw, "accsPlac", version.getDataAccessPlace()); writeFullElement(xmlw, "origArch", version.getOriginalArchive()); @@ -318,6 +313,7 @@ private static void writeDataAccess(XMLStreamWriter xmlw , DatasetVersionDTO ver writeFullElement(xmlw, "collSize", version.getSizeOfCollection()); writeFullElement(xmlw, "complete", version.getStudyCompletion()); xmlw.writeEndElement(); //setAvail + xmlw.writeStartElement("useStmt"); writeFullElement(xmlw, "confDec", version.getConfidentialityDeclaration()); writeFullElement(xmlw, "specPerm", version.getSpecialPermissions()); @@ -328,6 +324,15 @@ private static void writeDataAccess(XMLStreamWriter xmlw , DatasetVersionDTO ver writeFullElement(xmlw, "conditions", version.getConditions()); writeFullElement(xmlw, "disclaimer", version.getDisclaimer()); xmlw.writeEndElement(); //useStmt + + /* any s: */ + if (version.getTermsOfAccess() != null && !version.getTermsOfAccess().trim().equals("")) { + xmlw.writeStartElement("notes"); + writeAttribute(xmlw, "type", NOTE_TYPE_TERMS_OF_ACCESS); + writeAttribute(xmlw, "level", LEVEL_DV); + xmlw.writeCharacters(version.getTermsOfAccess()); + xmlw.writeEndElement(); //notes + } xmlw.writeEndElement(); //dataAccs } @@ -388,141 +393,222 @@ private static void writeVersionStatement(XMLStreamWriter xmlw, DatasetVersionDT xmlw.writeEndElement(); // verStmt } + /* From the DDI 2.5 schema: + + + + + + + + + + + + + */ private static void writeSummaryDescriptionElement(XMLStreamWriter xmlw, DatasetVersionDTO datasetVersionDTO, String lang) throws XMLStreamException { xmlw.writeStartElement("sumDscr"); + FieldDTO timePeriodCoveredDTO = null; + FieldDTO dateOfCollectionDTO = null; + FieldDTO geographicCoverageDTO = null; + FieldDTO geographicBoundingBoxDTO = null; + FieldDTO unitOfAnalysisDTO = null; + FieldDTO universeDTO = null; + FieldDTO kindOfDataDTO = null; + for (Map.Entry entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { String key = entry.getKey(); MetadataBlockDTO value = entry.getValue(); + if ("citation".equals(key)) { - Integer per = 0; - Integer coll = 0; for (FieldDTO fieldDTO : value.getFields()) { if (DatasetFieldConstant.timePeriodCovered.equals(fieldDTO.getTypeName())) { - String dateValStart = ""; - String dateValEnd = ""; - for (HashSet foo : fieldDTO.getMultipleCompound()) { - per++; - for (Iterator iterator = foo.iterator(); iterator.hasNext();) { - FieldDTO next = iterator.next(); - if (DatasetFieldConstant.timePeriodCoveredStart.equals(next.getTypeName())) { - dateValStart = next.getSinglePrimitive(); - } - if (DatasetFieldConstant.timePeriodCoveredEnd.equals(next.getTypeName())) { - dateValEnd = next.getSinglePrimitive(); - } - } - if (!dateValStart.isEmpty()) { - writeDateElement(xmlw, "timePrd", "P"+ per.toString(), "start", dateValStart ); - } - if (!dateValEnd.isEmpty()) { - writeDateElement(xmlw, "timePrd", "P"+ per.toString(), "end", dateValEnd ); - } - } + timePeriodCoveredDTO = fieldDTO; } + if (DatasetFieldConstant.dateOfCollection.equals(fieldDTO.getTypeName())) { - String dateValStart = ""; - String dateValEnd = ""; - for (HashSet foo : fieldDTO.getMultipleCompound()) { - coll++; - for (Iterator iterator = foo.iterator(); iterator.hasNext();) { - FieldDTO next = iterator.next(); - if (DatasetFieldConstant.dateOfCollectionStart.equals(next.getTypeName())) { - dateValStart = next.getSinglePrimitive(); - } - if (DatasetFieldConstant.dateOfCollectionEnd.equals(next.getTypeName())) { - dateValEnd = next.getSinglePrimitive(); - } - } - if (!dateValStart.isEmpty()) { - writeDateElement(xmlw, "collDate", "P"+ coll.toString(), "start", dateValStart ); - } - if (!dateValEnd.isEmpty()) { - writeDateElement(xmlw, "collDate", "P"+ coll.toString(), "end", dateValEnd ); - } - } + dateOfCollectionDTO = fieldDTO; } + if (DatasetFieldConstant.kindOfData.equals(fieldDTO.getTypeName())) { - writeMultipleElement(xmlw, "dataKind", fieldDTO, lang); + kindOfDataDTO = fieldDTO; } } } - - if("geospatial".equals(key)){ + + if ("geospatial".equals(key)) { for (FieldDTO fieldDTO : value.getFields()) { if (DatasetFieldConstant.geographicCoverage.equals(fieldDTO.getTypeName())) { - - for (HashSet foo : fieldDTO.getMultipleCompound()) { - HashMap geoMap = new HashMap<>(); - for (Iterator iterator = foo.iterator(); iterator.hasNext();) { - FieldDTO next = iterator.next(); - if (DatasetFieldConstant.country.equals(next.getTypeName())) { - geoMap.put("country", next.getSinglePrimitive()); - } - if (DatasetFieldConstant.city.equals(next.getTypeName())) { - geoMap.put("city", next.getSinglePrimitive()); - } - if (DatasetFieldConstant.state.equals(next.getTypeName())) { - geoMap.put("state", next.getSinglePrimitive()); - } - if (DatasetFieldConstant.otherGeographicCoverage.equals(next.getTypeName())) { - geoMap.put("otherGeographicCoverage", next.getSinglePrimitive()); - } - } - - if (geoMap.get("country") != null) { - writeFullElement(xmlw, "nation", geoMap.get("country")); - } - if (geoMap.get("city") != null) { - writeFullElement(xmlw, "geogCover", geoMap.get("city")); - } - if (geoMap.get("state") != null) { - writeFullElement(xmlw, "geogCover", geoMap.get("state")); - } - if (geoMap.get("otherGeographicCoverage") != null) { - writeFullElement(xmlw, "geogCover", geoMap.get("otherGeographicCoverage")); - } - - } + geographicCoverageDTO = fieldDTO; } if (DatasetFieldConstant.geographicBoundingBox.equals(fieldDTO.getTypeName())) { - for (HashSet foo : fieldDTO.getMultipleCompound()) { - xmlw.writeStartElement("geoBndBox"); - for (Iterator iterator = foo.iterator(); iterator.hasNext();) { - FieldDTO next = iterator.next(); - if (DatasetFieldConstant.westLongitude.equals(next.getTypeName())) { - writeFullElement(xmlw, "westBL", next.getSinglePrimitive()); - } - if (DatasetFieldConstant.eastLongitude.equals(next.getTypeName())) { - writeFullElement(xmlw, "eastBL", next.getSinglePrimitive()); - } - if (DatasetFieldConstant.northLatitude.equals(next.getTypeName())) { - writeFullElement(xmlw, "northBL", next.getSinglePrimitive()); - } - if (DatasetFieldConstant.southLatitude.equals(next.getTypeName())) { - writeFullElement(xmlw, "southBL", next.getSinglePrimitive()); - } - - } - xmlw.writeEndElement(); - } + geographicBoundingBoxDTO = fieldDTO; } } - writeFullElementList(xmlw, "geogUnit", dto2PrimitiveList(datasetVersionDTO, DatasetFieldConstant.geographicUnit)); } - if("socialscience".equals(key)){ + if ("socialscience".equals(key)) { for (FieldDTO fieldDTO : value.getFields()) { if (DatasetFieldConstant.universe.equals(fieldDTO.getTypeName())) { - writeMultipleElement(xmlw, "universe", fieldDTO, lang); + universeDTO = fieldDTO; } if (DatasetFieldConstant.unitOfAnalysis.equals(fieldDTO.getTypeName())) { - writeI18NElementList(xmlw, "anlyUnit", fieldDTO.getMultipleVocab(), "unitOfAnalysis", fieldDTO.getTypeClass(), "socialscience", lang); + unitOfAnalysisDTO = fieldDTO; + } + } + } + } + /* Finally, we can write the fields we have collected, in the correct order: -L.A.*/ + + if (timePeriodCoveredDTO != null) { + String dateValStart = ""; + String dateValEnd = ""; + Integer per = 0; + for (HashSet foo : timePeriodCoveredDTO.getMultipleCompound()) { + per++; + for (Iterator iterator = foo.iterator(); iterator.hasNext();) { + FieldDTO next = iterator.next(); + if (DatasetFieldConstant.timePeriodCoveredStart.equals(next.getTypeName())) { + dateValStart = next.getSinglePrimitive(); + } + if (DatasetFieldConstant.timePeriodCoveredEnd.equals(next.getTypeName())) { + dateValEnd = next.getSinglePrimitive(); + } + } + if (!dateValStart.isEmpty()) { + writeDateElement(xmlw, "timePrd", "P" + per.toString(), "start", dateValStart); + } + if (!dateValEnd.isEmpty()) { + writeDateElement(xmlw, "timePrd", "P" + per.toString(), "end", dateValEnd); + } + } + } + + if (dateOfCollectionDTO != null) { + String dateValStart = ""; + String dateValEnd = ""; + Integer coll = 0; + for (HashSet foo : dateOfCollectionDTO.getMultipleCompound()) { + coll++; + for (Iterator iterator = foo.iterator(); iterator.hasNext();) { + FieldDTO next = iterator.next(); + if (DatasetFieldConstant.dateOfCollectionStart.equals(next.getTypeName())) { + dateValStart = next.getSinglePrimitive(); + } + if (DatasetFieldConstant.dateOfCollectionEnd.equals(next.getTypeName())) { + dateValEnd = next.getSinglePrimitive(); } } + if (!dateValStart.isEmpty()) { + writeDateElement(xmlw, "collDate", "P" + coll.toString(), "start", dateValStart); + } + if (!dateValEnd.isEmpty()) { + writeDateElement(xmlw, "collDate", "P" + coll.toString(), "end", dateValEnd); + } } } + + /* and come next, in that order. -L.A. */ + if (geographicCoverageDTO != null) { + + List nationList = new ArrayList<>(); + List geogCoverList = new ArrayList<>(); + + for (HashSet foo : geographicCoverageDTO.getMultipleCompound()) { + for (Iterator iterator = foo.iterator(); iterator.hasNext();) { + FieldDTO next = iterator.next(); + /* our "country" field maps 1:1 to the DDI "": */ + if (DatasetFieldConstant.country.equals(next.getTypeName())) { + nationList.add(next.getSinglePrimitive()); + } + /* city, state and otherGeographicCoverage all exported as "": */ + if (DatasetFieldConstant.city.equals(next.getTypeName()) + || DatasetFieldConstant.state.equals(next.getTypeName()) + || DatasetFieldConstant.otherGeographicCoverage.equals(next.getTypeName())) { + geogCoverList.add(next.getSinglePrimitive()); + } + } + } + + /** + * And now we can write all the fields encountered, first the + * "" entries, then all the "" ones: + */ + for (String nationEntry : nationList) { + writeFullElement(xmlw, "nation", nationEntry); + } + for (String geogCoverEntry : geogCoverList) { + writeFullElement(xmlw, "geogCover", geogCoverEntry); + } + } + + writeFullElementList(xmlw, "geogUnit", dto2PrimitiveList(datasetVersionDTO, DatasetFieldConstant.geographicUnit)); + + /* Only 1 geoBndBox is allowed in the DDI. + So, I'm just going to arbitrarily use the first one, and ignore the rest! -L.A. */ + if (geographicBoundingBoxDTO != null) { + HashSet bndBoxSet = geographicBoundingBoxDTO.getMultipleCompound().get(0); + xmlw.writeStartElement("geoBndBox"); + HashMap geoBndBoxMap = new HashMap<>(); + for (FieldDTO next : bndBoxSet) { + if (DatasetFieldConstant.westLongitude.equals(next.getTypeName())) { + geoBndBoxMap.put("westBL", next.getSinglePrimitive()); + } + if (DatasetFieldConstant.eastLongitude.equals(next.getTypeName())) { + geoBndBoxMap.put("eastBL", next.getSinglePrimitive()); + } + if (DatasetFieldConstant.northLatitude.equals(next.getTypeName())) { + geoBndBoxMap.put("northBL", next.getSinglePrimitive()); + } + if (DatasetFieldConstant.southLatitude.equals(next.getTypeName())) { + geoBndBoxMap.put("southBL", next.getSinglePrimitive()); + } + } + + /* Once again, order is important! */ + /* + + + + + + + */ + if (geoBndBoxMap.get("westBL") != null) { + writeFullElement(xmlw, "westBL", geoBndBoxMap.get("westBL")); + } + if (geoBndBoxMap.get("eastBL") != null) { + writeFullElement(xmlw, "eastBL", geoBndBoxMap.get("eastBL")); + } + if (geoBndBoxMap.get("southBL") != null) { + writeFullElement(xmlw, "southBL", geoBndBoxMap.get("southBL")); + } + if (geoBndBoxMap.get("northBL") != null) { + writeFullElement(xmlw, "northBL", geoBndBoxMap.get("northBL")); + } + + xmlw.writeEndElement(); + } + + /* analyUnit: */ + if (unitOfAnalysisDTO != null) { + writeI18NElementList(xmlw, "anlyUnit", unitOfAnalysisDTO.getMultipleVocab(), "unitOfAnalysis", unitOfAnalysisDTO.getTypeClass(), "socialscience", lang); + + } + + /* universe: */ + if (universeDTO != null) { + writeMultipleElement(xmlw, "universe", universeDTO, lang); + } + + /* finally, any "kind of data" entries: */ + if (kindOfDataDTO != null) { + writeMultipleElement(xmlw, "dataKind", kindOfDataDTO, lang); + } + xmlw.writeEndElement(); //sumDscr } @@ -544,6 +630,29 @@ private static void writeDateElement(XMLStreamWriter xmlw, String element, Strin } + /** + * Again, is an xs:sequence - order is important and must follow + * the schema. -L.A. + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + */ private static void writeMethodElement(XMLStreamWriter xmlw , DatasetVersionDTO version, String lang) throws XMLStreamException{ xmlw.writeStartElement("method"); xmlw.writeStartElement("dataColl"); @@ -557,13 +666,7 @@ private static void writeMethodElement(XMLStreamWriter xmlw , DatasetVersionDTO writeI18NElement(xmlw, "deviat", version, DatasetFieldConstant.deviationsFromSampleDesign, lang); - xmlw.writeStartElement("sources"); - writeFullElementList(xmlw, "dataSrc", dto2PrimitiveList(version, DatasetFieldConstant.dataSources)); - writeI18NElement(xmlw, "srcOrig", version, DatasetFieldConstant.originOfSources, lang); - writeI18NElement(xmlw, "srcChar", version, DatasetFieldConstant.characteristicOfSources, lang); - writeI18NElement(xmlw, "srcDocu", version, DatasetFieldConstant.accessToSources, lang); - xmlw.writeEndElement(); //sources - + /* comes before : */ FieldDTO collModeFieldDTO = dto2FieldDTO(version, DatasetFieldConstant.collectionMode, "socialscience"); if (collModeFieldDTO != null) { // This field was made multiple as of 5.10 @@ -575,21 +678,33 @@ private static void writeMethodElement(XMLStreamWriter xmlw , DatasetVersionDTO writeI18NElement(xmlw, "collMode", version, DatasetFieldConstant.collectionMode, lang); } } + /* and so does : */ writeI18NElement(xmlw, "resInstru", version, DatasetFieldConstant.researchInstrument, lang); + xmlw.writeStartElement("sources"); + writeFullElementList(xmlw, "dataSrc", dto2PrimitiveList(version, DatasetFieldConstant.dataSources)); + writeI18NElement(xmlw, "srcOrig", version, DatasetFieldConstant.originOfSources, lang); + writeI18NElement(xmlw, "srcChar", version, DatasetFieldConstant.characteristicOfSources, lang); + writeI18NElement(xmlw, "srcDocu", version, DatasetFieldConstant.accessToSources, lang); + xmlw.writeEndElement(); //sources + + writeI18NElement(xmlw, "collSitu", version, DatasetFieldConstant.dataCollectionSituation, lang); writeI18NElement(xmlw, "actMin", version, DatasetFieldConstant.actionsToMinimizeLoss, lang); - writeI18NElement(xmlw, "conOps", version, DatasetFieldConstant.controlOperations, lang); + /* "" has the uppercase C: */ + writeI18NElement(xmlw, "ConOps", version, DatasetFieldConstant.controlOperations, lang); writeI18NElement(xmlw, "weight", version, DatasetFieldConstant.weighting, lang); writeI18NElement(xmlw, "cleanOps", version, DatasetFieldConstant.cleaningOperations, lang); xmlw.writeEndElement(); //dataColl + /* before : */ + writeNotesElement(xmlw, version); + xmlw.writeStartElement("anlyInfo"); //writeFullElement(xmlw, "anylInfo", dto2Primitive(version, DatasetFieldConstant.datasetLevelErrorNotes)); writeI18NElement(xmlw, "respRate", version, DatasetFieldConstant.responseRate, lang); writeI18NElement(xmlw, "EstSmpErr", version, DatasetFieldConstant.samplingErrorEstimates, lang); writeI18NElement(xmlw, "dataAppr", version, DatasetFieldConstant.otherDataAppraisal, lang); xmlw.writeEndElement(); //anlyInfo - writeNotesElement(xmlw, version); xmlw.writeEndElement();//method } @@ -852,7 +967,6 @@ private static void writeProducersElement(XMLStreamWriter xmlw, DatasetVersionDT String producerAffiliation = ""; String producerAbbreviation = ""; String producerLogo = ""; - String producerURL = ""; for (Iterator iterator = foo.iterator(); iterator.hasNext();) { FieldDTO next = iterator.next(); if (DatasetFieldConstant.producerName.equals(next.getTypeName())) { @@ -867,10 +981,6 @@ private static void writeProducersElement(XMLStreamWriter xmlw, DatasetVersionDT if (DatasetFieldConstant.producerLogo.equals(next.getTypeName())) { producerLogo = next.getSinglePrimitive(); } - if (DatasetFieldConstant.producerURL.equals(next.getTypeName())) { - producerURL = next.getSinglePrimitive(); - - } } if (!producerName.isEmpty()) { xmlw.writeStartElement("producer"); @@ -880,12 +990,9 @@ private static void writeProducersElement(XMLStreamWriter xmlw, DatasetVersionDT if (!producerAbbreviation.isEmpty()) { writeAttribute(xmlw, "abbr", producerAbbreviation); } - if (!producerLogo.isEmpty()) { + /*if (!producerLogo.isEmpty()) { writeAttribute(xmlw, "role", producerLogo); - } - if (!producerURL.isEmpty()) { - writeAttribute(xmlw, "URI", producerURL); - } + }*/ xmlw.writeCharacters(producerName); xmlw.writeEndElement(); //AuthEnty } @@ -896,12 +1003,10 @@ private static void writeProducersElement(XMLStreamWriter xmlw, DatasetVersionDT } } writeFullElement(xmlw, "prodDate", dto2Primitive(version, DatasetFieldConstant.productionDate)); - - FieldDTO prodPlac = dto2FieldDTO( version, DatasetFieldConstant.productionPlace, "citation" ); - if (prodPlac != null) { - writeMultipleElement(xmlw, "prodPlac", prodPlac, null); - } - + // productionPlace was made multiple as of 5.14: + // (a quick backward compatibility check was added to dto2PrimitiveList(), + // see the method for details) + writeFullElementList(xmlw, "prodPlac", dto2PrimitiveList(version, DatasetFieldConstant.productionPlace)); writeSoftwareElement(xmlw, version); writeGrantElement(xmlw, version); @@ -921,7 +1026,6 @@ private static void writeDistributorsElement(XMLStreamWriter xmlw, DatasetVersio String distributorAffiliation = ""; String distributorAbbreviation = ""; String distributorURL = ""; - String distributorLogoURL = ""; for (Iterator iterator = foo.iterator(); iterator.hasNext();) { FieldDTO next = iterator.next(); if (DatasetFieldConstant.distributorName.equals(next.getTypeName())) { @@ -936,9 +1040,6 @@ private static void writeDistributorsElement(XMLStreamWriter xmlw, DatasetVersio if (DatasetFieldConstant.distributorURL.equals(next.getTypeName())) { distributorURL = next.getSinglePrimitive(); } - if (DatasetFieldConstant.distributorLogo.equals(next.getTypeName())) { - distributorLogoURL = next.getSinglePrimitive(); - } } if (!distributorName.isEmpty()) { xmlw.writeStartElement("distrbtr"); @@ -954,9 +1055,6 @@ private static void writeDistributorsElement(XMLStreamWriter xmlw, DatasetVersio if (!distributorURL.isEmpty()) { writeAttribute(xmlw, "URI", distributorURL); } - if (!distributorLogoURL.isEmpty()) { - writeAttribute(xmlw, "role", distributorLogoURL); - } xmlw.writeCharacters(distributorName); xmlw.writeEndElement(); //AuthEnty } @@ -1000,16 +1098,33 @@ private static void writeRelPublElement(XMLStreamWriter xmlw, DatasetVersionDTO if (citation != null && !citation.trim().equals("")) { xmlw.writeStartElement("relPubl"); xmlw.writeStartElement("citation"); + /* + + + + + + + + + + + + (In other words - titlStmt is mandatory! -L.A.) + */ + xmlw.writeStartElement("titlStmt"); + writeFullElement(xmlw, "titl", citation); if (IDNo != null && !IDNo.trim().equals("")) { - xmlw.writeStartElement("titlStmt"); + xmlw.writeStartElement("IDNo"); if (IDType != null && !IDType.trim().equals("")) { - xmlw.writeAttribute("agency", IDType ); + xmlw.writeAttribute("agency", IDType); } xmlw.writeCharacters(IDNo); xmlw.writeEndElement(); //IDNo - xmlw.writeEndElement(); // titlStmt } + xmlw.writeEndElement(); // titlStmt + writeFullElement(xmlw,"biblCit",citation); xmlw.writeEndElement(); //citation @@ -1181,33 +1296,34 @@ private static void writeSeriesElement(XMLStreamWriter xmlw, DatasetVersionDTO d for (Map.Entry entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { String key = entry.getKey(); MetadataBlockDTO value = entry.getValue(); - if ("citation".equals(key)) { + if ("citation".equals(key)) { for (FieldDTO fieldDTO : value.getFields()) { if (DatasetFieldConstant.series.equals(fieldDTO.getTypeName())) { - xmlw.writeStartElement("serStmt"); String seriesName = ""; String seriesInformation = ""; - Set foo = fieldDTO.getSingleCompound(); + for (HashSet foo : fieldDTO.getMultipleCompound()) { + xmlw.writeStartElement("serStmt"); for (Iterator iterator = foo.iterator(); iterator.hasNext();) { FieldDTO next = iterator.next(); if (DatasetFieldConstant.seriesName.equals(next.getTypeName())) { - seriesName = next.getSinglePrimitive(); + seriesName = next.getSinglePrimitive(); } if (DatasetFieldConstant.seriesInformation.equals(next.getTypeName())) { - seriesInformation = next.getSinglePrimitive(); + seriesInformation = next.getSinglePrimitive(); } } - if (!seriesName.isEmpty()){ - xmlw.writeStartElement("serName"); + if (!seriesName.isEmpty()) { + xmlw.writeStartElement("serName"); xmlw.writeCharacters(seriesName); - xmlw.writeEndElement(); //grantno + xmlw.writeEndElement(); //serName } - if (!seriesInformation.isEmpty()){ - xmlw.writeStartElement("serInfo"); + if (!seriesInformation.isEmpty()) { + xmlw.writeStartElement("serInfo"); xmlw.writeCharacters(seriesInformation); - xmlw.writeEndElement(); //grantno + xmlw.writeEndElement(); //serInfo } - xmlw.writeEndElement(); //serStmt + xmlw.writeEndElement(); //serStmt + } } } } @@ -1234,17 +1350,18 @@ private static void writeTargetSampleElement(XMLStreamWriter xmlw, DatasetVersio actualSize = next.getSinglePrimitive(); } } - - if (!sizeFormula.isEmpty()) { - xmlw.writeStartElement("sampleSizeFormula"); - xmlw.writeCharacters(sizeFormula); - xmlw.writeEndElement(); //sampleSizeFormula - } + /* must come before ! -L.A. */ if (!actualSize.isEmpty()) { xmlw.writeStartElement("sampleSize"); xmlw.writeCharacters(actualSize); xmlw.writeEndElement(); //sampleSize } + if (!sizeFormula.isEmpty()) { + xmlw.writeStartElement("sampleSizeFormula"); + xmlw.writeCharacters(sizeFormula); + xmlw.writeEndElement(); //sampleSizeFormula + } + xmlw.writeEndElement(); // targetSampleSize } } @@ -1356,8 +1473,8 @@ private static void createOtherMatsFromFileMetadatas(XMLStreamWriter xmlw, List< writeAttribute(xmlw, "ID", "f" + fileMetadata.getDataFile().getId()); String dfIdentifier = fileMetadata.getDataFile().getIdentifier(); if (dfIdentifier != null && !dfIdentifier.isEmpty()){ - GlobalId globalId = new GlobalId(fileMetadata.getDataFile()); - writeAttribute(xmlw, "URI", globalId.toURL().toString()); + GlobalId globalId = fileMetadata.getDataFile().getGlobalId(); + writeAttribute(xmlw, "URI", globalId.asURL()); } else { writeAttribute(xmlw, "URI", dataverseUrl + "/api/access/datafile/" + fileMetadata.getDataFile().getId()); } @@ -1432,7 +1549,15 @@ private static List dto2PrimitiveList(DatasetVersionDTO datasetVersionDT MetadataBlockDTO value = entry.getValue(); for (FieldDTO fieldDTO : value.getFields()) { if (datasetFieldTypeName.equals(fieldDTO.getTypeName())) { - return fieldDTO.getMultiplePrimitive(); + // This hack is here to make sure the export does not blow + // up on an instance that upgraded to a Dataverse version + // where a certain primitive has been made multiple, but has + // not yet update the block. + if (fieldDTO.getMultiple() != null && fieldDTO.getMultiple()) { + return fieldDTO.getMultiplePrimitive(); + } else { + return Arrays.asList(fieldDTO.getSinglePrimitive()); + } } } } diff --git a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java index 34cb7a4e138..334b18f4601 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java @@ -1,13 +1,8 @@ package edu.harvard.iq.dataverse.export.openaire; import java.io.OutputStream; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.Map; -import java.util.Set; +import java.util.*; import java.util.logging.Logger; -import java.util.List; import javax.json.JsonObject; import javax.xml.stream.XMLOutputFactory; @@ -18,13 +13,17 @@ import com.google.gson.Gson; +import edu.harvard.iq.dataverse.DOIServiceBean; import edu.harvard.iq.dataverse.DatasetFieldConstant; import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.HandlenetServiceBean; import edu.harvard.iq.dataverse.TermsOfUseAndAccess; import edu.harvard.iq.dataverse.api.dto.DatasetDTO; import edu.harvard.iq.dataverse.api.dto.DatasetVersionDTO; import edu.harvard.iq.dataverse.api.dto.FieldDTO; import edu.harvard.iq.dataverse.api.dto.MetadataBlockDTO; +import edu.harvard.iq.dataverse.util.PersonOrOrgUtil; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.util.json.JsonUtil; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -71,7 +70,7 @@ private static void createOpenAire(XMLStreamWriter xmlw, DatasetDTO datasetDto) String persistentAgency = datasetDto.getProtocol(); String persistentAuthority = datasetDto.getAuthority(); String persistentId = datasetDto.getIdentifier(); - GlobalId globalId = new GlobalId(persistentAgency, persistentAuthority, persistentId); + GlobalId globalId = PidUtil.parseAsGlobalID(persistentAgency, persistentAuthority, persistentId); // The sequence is revied using sample: // https://schema.datacite.org/meta/kernel-4.0/example/datacite-example-full-v4.0.xml @@ -83,7 +82,7 @@ private static void createOpenAire(XMLStreamWriter xmlw, DatasetDTO datasetDto) String language = null; // 1, Identifier (with mandatory type sub-property) (M) - writeIdentifierElement(xmlw, globalId.toURL().toString(), language); + writeIdentifierElement(xmlw, globalId.asURL(), language); // 2, Creator (with optional given name, family name, // name identifier and affiliation sub-properties) (M) @@ -191,10 +190,10 @@ public static void writeIdentifierElement(XMLStreamWriter xmlw, String identifie if (StringUtils.isNotBlank(identifier)) { Map identifier_map = new HashMap(); - if (StringUtils.containsIgnoreCase(identifier, GlobalId.DOI_RESOLVER_URL)) { + if (StringUtils.containsIgnoreCase(identifier, DOIServiceBean.DOI_RESOLVER_URL)) { identifier_map.put("identifierType", "DOI"); identifier = StringUtils.substring(identifier, identifier.indexOf("10.")); - } else if (StringUtils.containsIgnoreCase(identifier, GlobalId.HDL_RESOLVER_URL)) { + } else if (StringUtils.containsIgnoreCase(identifier, HandlenetServiceBean.HDL_RESOLVER_URL)) { identifier_map.put("identifierType", "Handle"); if (StringUtils.contains(identifier, "http")) { identifier = identifier.replace(identifier.substring(0, identifier.indexOf("/") + 2), ""); @@ -250,72 +249,26 @@ public static void writeCreatorsElement(XMLStreamWriter xmlw, DatasetVersionDTO if (StringUtils.isNotBlank(creatorName)) { creator_check = writeOpenTag(xmlw, "creators", creator_check); xmlw.writeStartElement("creator"); // - - boolean nameType_check = false; + Map creator_map = new HashMap(); - if ((StringUtils.containsIgnoreCase(nameIdentifierScheme, "orcid"))) { + JsonObject creatorObj = PersonOrOrgUtil.getPersonOrOrganization(creatorName, false, + StringUtils.containsIgnoreCase(nameIdentifierScheme, "orcid")); + + // creatorName=, + if (creatorObj.getBoolean("isPerson")) { creator_map.put("nameType", "Personal"); - nameType_check = true; - } - // ToDo - the algorithm to determine if this is a Person or Organization here - // has been abstracted into a separate - // edu.harvard.iq.dataverse.util.PersonOrOrgUtil class that could be used here - // to avoid duplication/variants of the algorithm - creatorName = Cleanup.normalize(creatorName); - // Datacite algorithm, https://github.com/IQSS/dataverse/issues/2243#issuecomment-358615313 - if (creatorName.contains(",")) { - String givenName = FirstNames.getInstance().getFirstName(creatorName); - boolean isOrganization = Organizations.getInstance().isOrganization(creatorName); - - // creatorName=, - if (givenName != null && !isOrganization) { - // givenName ok - creator_map.put("nameType", "Personal"); - nameType_check = true; - } else if (isOrganization) { - creator_map.put("nameType", "Organizational"); - nameType_check = false; - } - writeFullElement(xmlw, null, "creatorName", creator_map, creatorName, language); - - if ((nameType_check) && (!creatorName.replaceFirst(",", "").contains(","))) { - // creatorName=, - String[] fullName = creatorName.split(", "); - if (fullName.length == 2) { - givenName = fullName[1]; - String familyName = fullName[0]; - - writeFullElement(xmlw, null, "givenName", null, givenName, language); - writeFullElement(xmlw, null, "familyName", null, familyName, language); - } else { - // It's possible to get here if "Smith," is entered as an author name. - logger.info("Unable to write givenName and familyName based on creatorName '" + creatorName + "'."); - } - } } else { - String givenName = FirstNames.getInstance().getFirstName(creatorName); - boolean isOrganization = Organizations.getInstance().isOrganization(creatorName); - - if (givenName != null && !isOrganization) { - // givenName ok, creatorName= - creator_map.put("nameType", "Personal"); - nameType_check = true; - writeFullElement(xmlw, null, "creatorName", creator_map, creatorName, language); - - String familyName = ""; - if (givenName.length() + 1 < creatorName.length()) { - familyName = creatorName.substring(givenName.length() + 1); - } - - writeFullElement(xmlw, null, "givenName", null, givenName, language); - writeFullElement(xmlw, null, "familyName", null, familyName, language); - } else { - // default - if (isOrganization) { - creator_map.put("nameType", "Organizational"); - } - writeFullElement(xmlw, null, "creatorName", creator_map, creatorName, language); - } + creator_map.put("nameType", "Organizational"); + } + writeFullElement(xmlw, null, "creatorName", creator_map, + creatorObj.getString("fullName"), language); + if (creatorObj.containsKey("givenName")) { + writeFullElement(xmlw, null, "givenName", null, creatorObj.getString("givenName"), + language); + } + if (creatorObj.containsKey("familyName")) { + writeFullElement(xmlw, null, "familyName", null, creatorObj.getString("familyName"), + language); } if (StringUtils.isNotBlank(nameIdentifier)) { @@ -369,10 +322,34 @@ public static void writeTitlesElement(XMLStreamWriter xmlw, DatasetVersionDTO da String subtitle = dto2Primitive(datasetVersionDTO, DatasetFieldConstant.subTitle); title_check = writeTitleElement(xmlw, "Subtitle", subtitle, title_check, language); + //String alternativeTitle = dto2Primitive(datasetVersionDTO, DatasetFieldConstant.alternativeTitle); + //title_check = writeTitleElement(xmlw, "AlternativeTitle", alternativeTitle, title_check, language); title_check = writeMultipleTitleElement(xmlw, "AlternativeTitle", datasetVersionDTO, "citation", title_check, language); + writeEndTag(xmlw, title_check); + } + private static boolean writeMultipleTitleElement(XMLStreamWriter xmlw, String titleType, DatasetVersionDTO datasetVersionDTO, String metadataBlockName, boolean title_check, String language) throws XMLStreamException { + MetadataBlockDTO block = datasetVersionDTO.getMetadataBlocks().get(metadataBlockName); + if (block != null) { + logger.info("Block is not empty"); + List fieldsBlock = block.getFields(); + if (fieldsBlock != null) { + for (FieldDTO fieldDTO : fieldsBlock) { + logger.info(titleType + " " + fieldDTO.getTypeName()); + if (titleType.toLowerCase().equals(fieldDTO.getTypeName().toLowerCase())) { + logger.info("Found Alt title"); + List fields = fieldDTO.getMultiplePrimitive(); + for (String value : fields) { + if (!writeTitleElement(xmlw, titleType, value, title_check, language)) + title_check = false; + } + break; + } + } + } + } - writeEndTag(xmlw, title_check); + return title_check; } /** @@ -405,33 +382,6 @@ private static boolean writeTitleElement(XMLStreamWriter xmlw, String titleType, } return title_check; } - - private static boolean writeMultipleTitleElement(XMLStreamWriter xmlw, String titleType, DatasetVersionDTO datasetVersionDTO, String metadataBlockName, boolean title_check, String language) throws XMLStreamException { - MetadataBlockDTO block = datasetVersionDTO.getMetadataBlocks().get(metadataBlockName); - if (block != null) { - logger.info("Block is not empty"); - List fieldsBlock = block.getFields(); - if (fieldsBlock != null) { - for (FieldDTO fieldDTO : fieldsBlock) { - logger.info(titleType + " " + fieldDTO.getTypeName()); - if (titleType.toLowerCase().equals(fieldDTO.getTypeName().toLowerCase())) { - logger.info("Found Alt title"); - List fields = fieldDTO.getMultiplePrimitive(); - for (String value : fields) { - if (!writeTitleElement(xmlw, titleType, value, title_check, language)) - title_check = false; - } - break; - } - } - } - } - - return title_check; - } - - - /** * 5, PublicationYear (M) @@ -737,61 +687,23 @@ public static void writeContributorElement(XMLStreamWriter xmlw, String contribu boolean nameType_check = false; Map contributor_map = new HashMap(); - // ToDo - the algorithm to determine if this is a Person or Organization here - // has been abstracted into a separate - // edu.harvard.iq.dataverse.util.PersonOrOrgUtil class that could be used here - // to avoid duplication/variants of the algorithm + JsonObject contributorObj = PersonOrOrgUtil.getPersonOrOrganization(contributorName, + ("ContactPerson".equals(contributorType) && !isValidEmailAddress(contributorName)), false); - contributorName = Cleanup.normalize(contributorName); - // Datacite algorithm, https://github.com/IQSS/dataverse/issues/2243#issuecomment-358615313 - if (contributorName.contains(",")) { - String givenName = FirstNames.getInstance().getFirstName(contributorName); - boolean isOrganization = Organizations.getInstance().isOrganization(contributorName); - - // contributorName=, - if (givenName != null && !isOrganization) { - // givenName ok + if (contributorObj.getBoolean("isPerson")) { + if(contributorObj.containsKey("givenName")) { contributor_map.put("nameType", "Personal"); - nameType_check = true; - // re: the above toDo - the ("ContactPerson".equals(contributorType) && - // !isValidEmailAddress(contributorName)) clause in the next line could/should - // be sent as the OrgIfTied boolean parameter - } else if (isOrganization || ("ContactPerson".equals(contributorType) && !isValidEmailAddress(contributorName))) { - contributor_map.put("nameType", "Organizational"); - } - writeFullElement(xmlw, null, "contributorName", contributor_map, contributorName, language); - - if ((nameType_check) && (!contributorName.replaceFirst(",", "").contains(","))) { - // contributorName=, - String[] fullName = contributorName.split(", "); - givenName = fullName[1]; - String familyName = fullName[0]; - - writeFullElement(xmlw, null, "givenName", null, givenName, language); - writeFullElement(xmlw, null, "familyName", null, familyName, language); } } else { - String givenName = FirstNames.getInstance().getFirstName(contributorName); - boolean isOrganization = Organizations.getInstance().isOrganization(contributorName); - - if (givenName != null && !isOrganization) { - contributor_map.put("nameType", "Personal"); - writeFullElement(xmlw, null, "contributorName", contributor_map, contributorName, language); - - String familyName = ""; - if (givenName.length() + 1 < contributorName.length()) { - familyName = contributorName.substring(givenName.length() + 1); - } + contributor_map.put("nameType", "Organizational"); + } + writeFullElement(xmlw, null, "contributorName", contributor_map, contributorName, language); - writeFullElement(xmlw, null, "givenName", null, givenName, language); - writeFullElement(xmlw, null, "familyName", null, familyName, language); - } else { - // default - if (isOrganization || ("ContactPerson".equals(contributorType) && !isValidEmailAddress(contributorName))) { - contributor_map.put("nameType", "Organizational"); - } - writeFullElement(xmlw, null, "contributorName", contributor_map, contributorName, language); - } + if (contributorObj.containsKey("givenName")) { + writeFullElement(xmlw, null, "givenName", null, contributorObj.getString("givenName"), language); + } + if (contributorObj.containsKey("familyName")) { + writeFullElement(xmlw, null, "familyName", null, contributorObj.getString("familyName"), language); } if (StringUtils.isNotBlank(contributorAffiliation)) { @@ -1291,26 +1203,17 @@ public static void writeDescriptionsElement(XMLStreamWriter xmlw, DatasetVersion if (DatasetFieldConstant.series.equals(fieldDTO.getTypeName())) { // String seriesName = null; String seriesInformation = null; - - Set fieldDTOs = fieldDTO.getSingleCompound(); - for (Iterator iterator = fieldDTOs.iterator(); iterator.hasNext();) { - FieldDTO next = iterator.next(); - /*if (DatasetFieldConstant.seriesName.equals(next.getTypeName())) { - seriesName = next.getSinglePrimitive(); - }*/ - if (DatasetFieldConstant.seriesInformation.equals(next.getTypeName())) { - seriesInformation = next.getSinglePrimitive(); + for (HashSet fieldDTOs : fieldDTO.getMultipleCompound()) { + for (Iterator iterator = fieldDTOs.iterator(); iterator.hasNext();) { + FieldDTO next = iterator.next(); + if (DatasetFieldConstant.seriesInformation.equals(next.getTypeName())) { + seriesInformation = next.getSinglePrimitive(); + } + } + if (StringUtils.isNotBlank(seriesInformation)) { + description_check = writeOpenTag(xmlw, "descriptions", description_check); + writeDescriptionElement(xmlw, "SeriesInformation", seriesInformation, language); } - } - - /*if (StringUtils.isNotBlank(seriesName)){ - contributor_check = writeOpenTag(xmlw, "descriptions", description_check); - - writeDescriptionElement(xmlw, "SeriesInformation", seriesName); - }*/ - if (StringUtils.isNotBlank(seriesInformation)) { - description_check = writeOpenTag(xmlw, "descriptions", description_check); - writeDescriptionElement(xmlw, "SeriesInformation", seriesInformation, language); } } } From 3bb7cbfae2072f7bb4f5b11567cd0b02c0b4bb02 Mon Sep 17 00:00:00 2001 From: lubitchv Date: Thu, 11 May 2023 17:27:05 -0400 Subject: [PATCH 08/25] add prodPlac --- .../iq/dataverse/api/imports/ImportDDIServiceBean.java | 8 +++++--- .../harvard/iq/dataverse/export/ddi/DdiExportUtil.java | 6 +++++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java index bafd7267acb..ae98e7e76ea 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java @@ -1339,6 +1339,7 @@ private void processProdStmt(XMLStreamReader xmlr, MetadataBlockDTO citation) th List> producers = new ArrayList<>(); List> grants = new ArrayList<>(); List> software = new ArrayList<>(); + List prodPlac = new ArrayList<>(); for (int event = xmlr.next(); event != XMLStreamConstants.END_DOCUMENT; event = xmlr.next()) { if (event == XMLStreamConstants.START_ELEMENT) { @@ -1354,9 +1355,7 @@ private void processProdStmt(XMLStreamReader xmlr, MetadataBlockDTO citation) th } else if (xmlr.getLocalName().equals("prodDate")) { citation.getFields().add(FieldDTO.createPrimitiveFieldDTO("productionDate", parseDate(xmlr, "prodDate"))); } else if (xmlr.getLocalName().equals("prodPlac")) { - List prodPlac = new ArrayList<>(); - prodPlac.add(parseText(xmlr, "prodPlac")); - citation.getFields().add(FieldDTO.createMultiplePrimitiveFieldDTO(DatasetFieldConstant.productionPlace, prodPlac)); + prodPlac.add(parseText(xmlr)); } else if (xmlr.getLocalName().equals("software")) { HashSet set = new HashSet<>(); addToSet(set,"softwareVersion", xmlr.getAttributeValue(null, "version")); @@ -1389,6 +1388,9 @@ private void processProdStmt(XMLStreamReader xmlr, MetadataBlockDTO citation) th if (producers.size()>0) { citation.getFields().add(FieldDTO.createMultipleCompoundFieldDTO("producer", producers)); } + if (prodPlac.size() > 0) { + citation.getFields().add(FieldDTO.createMultiplePrimitiveFieldDTO(DatasetFieldConstant.productionPlace, prodPlac)); + } return; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java index a647c2a6f2a..cd9311ec518 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java @@ -1006,7 +1006,11 @@ private static void writeProducersElement(XMLStreamWriter xmlw, DatasetVersionDT // productionPlace was made multiple as of 5.14: // (a quick backward compatibility check was added to dto2PrimitiveList(), // see the method for details) - writeFullElementList(xmlw, "prodPlac", dto2PrimitiveList(version, DatasetFieldConstant.productionPlace)); + + FieldDTO prodPlac = dto2FieldDTO( version, DatasetFieldConstant.productionPlace, "citation" ); + if (prodPlac != null) { + writeMultipleElement(xmlw, "prodPlac", prodPlac, null); + } writeSoftwareElement(xmlw, version); writeGrantElement(xmlw, version); From b9720c868b89e9db191b1425fb31574771bc1cee Mon Sep 17 00:00:00 2001 From: Victoria Lubitch <43550154+lubitchv@users.noreply.github.com> Date: Mon, 15 May 2023 12:35:42 -0400 Subject: [PATCH 09/25] Update src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java Co-authored-by: Philip Durbin --- .../java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java index cd9311ec518..819a14c6c68 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java @@ -202,7 +202,6 @@ private static void createStdyDscr(XMLStreamWriter xmlw, DatasetDTO datasetDto) writeFullElement(xmlw, "titl", dto2Primitive(version, DatasetFieldConstant.title), datasetDto.getMetadataLanguage()); writeFullElement(xmlw, "subTitl", dto2Primitive(version, DatasetFieldConstant.subTitle)); - //writeFullElement(xmlw, "altTitl", dto2Primitive(version, DatasetFieldConstant.alternativeTitle)); FieldDTO altField = dto2FieldDTO( version, DatasetFieldConstant.alternativeTitle, "citation" ); if (altField != null) { writeMultipleElement(xmlw, "altTitl", altField, datasetDto.getMetadataLanguage()); From 5684140dff737f092f195ffeefddbf5074a409e5 Mon Sep 17 00:00:00 2001 From: Victoria Lubitch <43550154+lubitchv@users.noreply.github.com> Date: Mon, 15 May 2023 12:37:18 -0400 Subject: [PATCH 10/25] Update src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java Co-authored-by: Philip Durbin --- .../iq/dataverse/export/openaire/OpenAireExportUtil.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java index 334b18f4601..e858dee6d2b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java @@ -331,7 +331,7 @@ public static void writeTitlesElement(XMLStreamWriter xmlw, DatasetVersionDTO da private static boolean writeMultipleTitleElement(XMLStreamWriter xmlw, String titleType, DatasetVersionDTO datasetVersionDTO, String metadataBlockName, boolean title_check, String language) throws XMLStreamException { MetadataBlockDTO block = datasetVersionDTO.getMetadataBlocks().get(metadataBlockName); if (block != null) { - logger.info("Block is not empty"); + logger.fine("Block is not empty"); List fieldsBlock = block.getFields(); if (fieldsBlock != null) { for (FieldDTO fieldDTO : fieldsBlock) { From 64f4f1f0af83384b8157f2b13d29d941cb2aac77 Mon Sep 17 00:00:00 2001 From: Victoria Lubitch <43550154+lubitchv@users.noreply.github.com> Date: Mon, 15 May 2023 12:37:31 -0400 Subject: [PATCH 11/25] Update src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java Co-authored-by: Philip Durbin --- .../iq/dataverse/export/openaire/OpenAireExportUtil.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java index e858dee6d2b..146c442526a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java @@ -335,7 +335,7 @@ private static boolean writeMultipleTitleElement(XMLStreamWriter xmlw, String ti List fieldsBlock = block.getFields(); if (fieldsBlock != null) { for (FieldDTO fieldDTO : fieldsBlock) { - logger.info(titleType + " " + fieldDTO.getTypeName()); + logger.fine(titleType + " " + fieldDTO.getTypeName()); if (titleType.toLowerCase().equals(fieldDTO.getTypeName().toLowerCase())) { logger.info("Found Alt title"); List fields = fieldDTO.getMultiplePrimitive(); From 37a372a23c92d4eb2c946a50bf00b91319e141de Mon Sep 17 00:00:00 2001 From: Victoria Lubitch <43550154+lubitchv@users.noreply.github.com> Date: Mon, 15 May 2023 12:37:44 -0400 Subject: [PATCH 12/25] Update src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java Co-authored-by: Philip Durbin --- .../iq/dataverse/export/openaire/OpenAireExportUtil.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java index 146c442526a..037428d0ea1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java @@ -337,7 +337,7 @@ private static boolean writeMultipleTitleElement(XMLStreamWriter xmlw, String ti for (FieldDTO fieldDTO : fieldsBlock) { logger.fine(titleType + " " + fieldDTO.getTypeName()); if (titleType.toLowerCase().equals(fieldDTO.getTypeName().toLowerCase())) { - logger.info("Found Alt title"); + logger.fine("Found Alt title"); List fields = fieldDTO.getMultiplePrimitive(); for (String value : fields) { if (!writeTitleElement(xmlw, titleType, value, title_check, language)) From 2ef0e5f0231367b90e962e73a638aab4c84a9ada Mon Sep 17 00:00:00 2001 From: lubitchv Date: Mon, 15 May 2023 12:39:32 -0400 Subject: [PATCH 13/25] test --- .../iq/dataverse/export/openaire/OpenAireExportUtil.java | 2 -- .../edu/harvard/iq/dataverse/export/ddi/dataset-finch1.json | 6 ++++++ .../edu/harvard/iq/dataverse/export/ddi/dataset-finch1.xml | 2 ++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java index 334b18f4601..6dca1ac348a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java @@ -322,8 +322,6 @@ public static void writeTitlesElement(XMLStreamWriter xmlw, DatasetVersionDTO da String subtitle = dto2Primitive(datasetVersionDTO, DatasetFieldConstant.subTitle); title_check = writeTitleElement(xmlw, "Subtitle", subtitle, title_check, language); - //String alternativeTitle = dto2Primitive(datasetVersionDTO, DatasetFieldConstant.alternativeTitle); - //title_check = writeTitleElement(xmlw, "AlternativeTitle", alternativeTitle, title_check, language); title_check = writeMultipleTitleElement(xmlw, "AlternativeTitle", datasetVersionDTO, "citation", title_check, language); writeEndTag(xmlw, title_check); } diff --git a/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.json b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.json index 7845f77d33f..9bdc7e45349 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.json +++ b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.json @@ -29,6 +29,12 @@ "typeClass": "primitive", "value": "Darwin's Finches" }, + { + "typeName": "alternativeTitle", + "multiple": true, + "typeClass": "primitive", + "value": ["Darwin's Finches Alternative Title1", "Darwin's Finches Alternative Title2"] + }, { "typeName": "author", "multiple": true, diff --git a/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.xml b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.xml index 5bbfdae09ac..6730c44603a 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.xml +++ b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.xml @@ -17,6 +17,8 @@ Darwin's Finches + Darwin's Finches Alternative Title1 + Darwin's Finches Alternative Title2 doi:10.5072/FK2/PCA2E3 From 4b4c9155048f0ee074f6ba9d01a12e02ea4abd00 Mon Sep 17 00:00:00 2001 From: lubitchv Date: Mon, 15 May 2023 12:54:37 -0400 Subject: [PATCH 14/25] docs --- doc/release-notes/9428-alternative-title.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/doc/release-notes/9428-alternative-title.md b/doc/release-notes/9428-alternative-title.md index d6eaa680612..3bc74f218b5 100644 --- a/doc/release-notes/9428-alternative-title.md +++ b/doc/release-notes/9428-alternative-title.md @@ -3,4 +3,7 @@ Alternative Title is made repeatable. `curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file scripts/api/data/metadatablocks/citation.tsv` - One will also need to update solr schema: Change in "alternativeTitle" field multiValued="true" in `/usr/local/solr/solr-8.11.1/server/solr/collection1/conf/schema.xml` -Reload solr schema: `curl "http://localhost:8983/solr/admin/cores?action=RELOAD&core=collection1"` +Reload solr schema: `curl "http://localhost:8983/solr/admin/cores?action=RELOAD&core=collection1"` + +Since Alternative Title is repeatable now, old json apis would not be compatable with a new version since value of alternative title has changed from simple string to an array. +For example, instead "value": "Alternative Title", the value canbe "value": ["Alternative Title1", "Alternative Title2"] From 58964db4102be36b5455a284cfa235d799196964 Mon Sep 17 00:00:00 2001 From: Victoria Lubitch Date: Mon, 21 Aug 2023 12:48:45 -0400 Subject: [PATCH 15/25] Alternative titles --- .../export/ddi/dataset-create-new-all-ddi-fields.json | 2 +- .../java/edu/harvard/iq/dataverse/export/ddi/exportfull.xml | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json index 1cc2d9f761f..bdff949bb36 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json +++ b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json @@ -53,7 +53,7 @@ "typeName": "alternativeTitle", "multiple": true, "typeClass": "primitive", - "value": ["Alternative Title"] + "value": ["Alternative Title1", "Alternative Title2"] }, { "typeName": "otherId", diff --git a/src/test/java/edu/harvard/iq/dataverse/export/ddi/exportfull.xml b/src/test/java/edu/harvard/iq/dataverse/export/ddi/exportfull.xml index 6e2ccfd5545..507d752192d 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/ddi/exportfull.xml +++ b/src/test/java/edu/harvard/iq/dataverse/export/ddi/exportfull.xml @@ -21,7 +21,8 @@ Replication Data for: Title Subtitle - Alternative Title + Alternative Title1 + Alternative Title2 doi:10.5072/FK2/WKUKGV OtherIDIdentifier1 OtherIDIdentifier2 From a90aa22095b41a07cc9f9f11d51425dc940c5438 Mon Sep 17 00:00:00 2001 From: lubitchv Date: Fri, 25 Aug 2023 11:50:38 -0400 Subject: [PATCH 16/25] alt title multiple --- scripts/api/data/dataset-create-new-all-default-fields.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/api/data/dataset-create-new-all-default-fields.json b/scripts/api/data/dataset-create-new-all-default-fields.json index 4af128955c9..1118ed98a03 100644 --- a/scripts/api/data/dataset-create-new-all-default-fields.json +++ b/scripts/api/data/dataset-create-new-all-default-fields.json @@ -22,9 +22,9 @@ }, { "typeName": "alternativeTitle", - "multiple": false, + "multiple": true, "typeClass": "primitive", - "value": "Alternative Title" + "value": ["Alternative Title"] }, { "typeName": "alternativeURL", From 87270aadbd71e18ce52cf18d32a4c7bfaf8f7257 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 30 Aug 2023 11:43:00 -0400 Subject: [PATCH 17/25] define/add ORE format version, include Dataverse software info --- .../iq/dataverse/util/bagit/OREMap.java | 44 +++++++++++++++++-- .../iq/dataverse/util/bagit/OREMapHelper.java | 7 ++- 2 files changed, 46 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java index b3995b5957e..8582184d210 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java @@ -38,14 +38,33 @@ import jakarta.json.JsonValue; import org.apache.commons.lang3.exception.ExceptionUtils; - +import org.bouncycastle.math.ec.ScaleYPointMap; + +/** + * This class is used to generate a JSON-LD representation of a Dataverse object leveraging the OAI_ORE and other community vocabularies. As of v1.0.0, + * the format is being versioned and ANY CHANGES TO THE OUTPUT of this class must be reflected in a version increment (see DATAVERSE_ORE_FORMAT_VERSION). + * + * The OREMap class is intended to record ALL the information needed to recreate an existing Dataverse dataset. As of v1.0.0, this is true with the + * exception that auxiliary files are not referenced in the OREMap. While many types of auxiliary files will be regenerated automatically based on datafile + * contents, Dataverse now allows manually uploaded auxiliary files and these cannot be reproduced solely from the dataset/datafile contents. + */ public class OREMap { + //Required Services static SettingsServiceBean settingsService; static DatasetFieldServiceBean datasetFieldService; + static SystemConfig systemConfig; + private static final Logger logger = Logger.getLogger(OREMap.class.getCanonicalName()); public static final String NAME = "OREMap"; + + //NOTE: Update this value whenever the output of this class is changed + private static final String DATAVERSE_ORE_FORMAT_VERSION = "Dataverse OREMap Format v1.0.0"; + private static final String DATAVERSE_SOFTWARE_NAME = "Dataverse"; + private static final String DATAVERSE_SOFTWARE_URL = "https://github.com/iqss/dataverse"; + + private Map localContext = new TreeMap(); private DatasetVersion version; private Boolean excludeEmail = null; @@ -269,10 +288,23 @@ public JsonObjectBuilder getOREMapBuilder(boolean aggregationOnly) { return aggBuilder.add("@context", contextBuilder.build()); } else { // Now create the overall map object with it's metadata + + //Start with a reference to the Dataverse software + JsonObjectBuilder dvSoftwareBuilder = Json.createObjectBuilder() + .add("@type", JsonLDTerm.ore("SoftwareApplication").getLabel()) + .add(JsonLDTerm.schemaOrg("name").getLabel(), DATAVERSE_SOFTWARE_NAME) + .add(JsonLDTerm.schemaOrg("version").getLabel(), systemConfig.getVersion(true)) + .add(JsonLDTerm.schemaOrg("url").getLabel(), DATAVERSE_SOFTWARE_URL); + + //Now the OREMAP object itself JsonObjectBuilder oremapBuilder = Json.createObjectBuilder() .add(JsonLDTerm.dcTerms("modified").getLabel(), LocalDate.now().toString()) .add(JsonLDTerm.dcTerms("creator").getLabel(), BrandingUtil.getInstallationBrandName()) .add("@type", JsonLDTerm.ore("ResourceMap").getLabel()) + //Add the version of our ORE format used + .add(JsonLDTerm.schemaOrg("additionalType").getLabel(), DATAVERSE_ORE_FORMAT_VERSION) + //Indicate which Dataverse version created it + .add(JsonLDTerm.DVCore("generatedBy").getLabel(), dvSoftwareBuilder) // Define an id for the map itself (separate from the @id of the dataset being // described .add("@id", @@ -283,7 +315,11 @@ public JsonObjectBuilder getOREMapBuilder(boolean aggregationOnly) { aggBuilder.add(JsonLDTerm.ore("aggregates").getLabel(), aggResArrayBuilder.build()) .add(JsonLDTerm.schemaOrg("hasPart").getLabel(), fileArray.build()).build()) // and finally add the context - .add("@context", contextBuilder.build()); + .add("@context", contextBuilder.build()) + ; + + + return oremapBuilder; } } @@ -467,8 +503,10 @@ private static void addCvocValue(String val, JsonArrayBuilder vals, JsonObject c } } - public static void injectSettingsService(SettingsServiceBean settingsSvc, DatasetFieldServiceBean datasetFieldSvc) { + //These are used to pick up various settings/constants from the application + public static void injectServices(SettingsServiceBean settingsSvc, DatasetFieldServiceBean datasetFieldSvc, SystemConfig systemCfg) { settingsService = settingsSvc; datasetFieldService = datasetFieldSvc; + systemConfig = systemCfg; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMapHelper.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMapHelper.java index 4d63edac268..cca1e16b4f8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMapHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMapHelper.java @@ -2,7 +2,7 @@ import edu.harvard.iq.dataverse.DatasetFieldServiceBean; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; - +import edu.harvard.iq.dataverse.util.SystemConfig; import jakarta.annotation.PostConstruct; import jakarta.ejb.EJB; import jakarta.ejb.Singleton; @@ -22,8 +22,11 @@ public class OREMapHelper { @EJB DatasetFieldServiceBean datasetFieldSvc; + @EJB + SystemConfig systemConfig; + @PostConstruct public void injectService() { - OREMap.injectSettingsService(settingsSvc, datasetFieldSvc); + OREMap.injectServices(settingsSvc, datasetFieldSvc, systemConfig); } } From 4f4355a69e909a9c6733767edfb78a660d7e59f2 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 30 Aug 2023 12:46:35 -0400 Subject: [PATCH 18/25] add version state info --- .../edu/harvard/iq/dataverse/util/bagit/OREMap.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java index 8582184d210..fbceb98f8b1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java @@ -8,6 +8,7 @@ import edu.harvard.iq.dataverse.DatasetFieldServiceBean; import edu.harvard.iq.dataverse.DatasetFieldType; import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.DatasetVersion.VersionState; import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.DvObjectContainer; import edu.harvard.iq.dataverse.Embargo; @@ -133,6 +134,18 @@ public JsonObjectBuilder getOREMapBuilder(boolean aggregationOnly) { .add(JsonLDTerm.schemaOrg("name").getLabel(), version.getTitle()) .add(JsonLDTerm.schemaOrg("dateModified").getLabel(), version.getLastUpdateTime().toString()); addIfNotNull(aggBuilder, JsonLDTerm.schemaOrg("datePublished"), dataset.getPublicationDateFormattedYYYYMMDD()); + //Add version state info - DRAFT, RELEASED, DEACCESSIONED, ARCHIVED with extra info for DEACCESIONED + VersionState vs = version.getVersionState(); + if(vs.equals(VersionState.DEACCESSIONED)) { + JsonObjectBuilder deaccBuilder = Json.createObjectBuilder(); + deaccBuilder.add(JsonLDTerm.schemaOrg("name").getLabel(), vs.name()); + deaccBuilder.add(JsonLDTerm.DVCore("reason").getLabel(), version.getVersionNote()); + addIfNotNull(deaccBuilder, JsonLDTerm.DVCore("forwardUrl"), version.getArchiveNote()); + aggBuilder.add(JsonLDTerm.schemaOrg("creativeWorkStatus").getLabel(), deaccBuilder); + + } else { + aggBuilder.add(JsonLDTerm.schemaOrg("creativeWorkStatus").getLabel(), vs.name()); + } TermsOfUseAndAccess terms = version.getTermsOfUseAndAccess(); if (terms.getLicense() != null) { From e69b4a517bb4e8e89dfe867f796d0c4efe56865a Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 30 Aug 2023 13:03:20 -0400 Subject: [PATCH 19/25] remove unused import --- src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java index fbceb98f8b1..fd219bf9d93 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java @@ -39,7 +39,6 @@ import jakarta.json.JsonValue; import org.apache.commons.lang3.exception.ExceptionUtils; -import org.bouncycastle.math.ec.ScaleYPointMap; /** * This class is used to generate a JSON-LD representation of a Dataverse object leveraging the OAI_ORE and other community vocabularies. As of v1.0.0, From 573bed941cfebae0282c53785a3847cddaf6b809 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 15 Sep 2023 09:39:46 -0400 Subject: [PATCH 20/25] typo - using schema.org SoftwareApplication --- src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java index fd219bf9d93..1ea1a5411fa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java @@ -303,7 +303,7 @@ public JsonObjectBuilder getOREMapBuilder(boolean aggregationOnly) { //Start with a reference to the Dataverse software JsonObjectBuilder dvSoftwareBuilder = Json.createObjectBuilder() - .add("@type", JsonLDTerm.ore("SoftwareApplication").getLabel()) + .add("@type", JsonLDTerm.schemaOrg("SoftwareApplication").getLabel()) .add(JsonLDTerm.schemaOrg("name").getLabel(), DATAVERSE_SOFTWARE_NAME) .add(JsonLDTerm.schemaOrg("version").getLabel(), systemConfig.getVersion(true)) .add(JsonLDTerm.schemaOrg("url").getLabel(), DATAVERSE_SOFTWARE_URL); From 5a7568afc9804dc5e9159dc5f395eebfc963d2c1 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 15 Sep 2023 11:12:12 -0400 Subject: [PATCH 21/25] update docs, add release note --- doc/release-notes/9859-ORE and Bag updates.md | 14 ++++++++++++++ doc/sphinx-guides/source/admin/integrations.rst | 9 ++++++++- doc/sphinx-guides/source/api/native-api.rst | 4 +++- doc/sphinx-guides/source/installation/config.rst | 14 +++++++------- 4 files changed, 32 insertions(+), 9 deletions(-) create mode 100644 doc/release-notes/9859-ORE and Bag updates.md diff --git a/doc/release-notes/9859-ORE and Bag updates.md b/doc/release-notes/9859-ORE and Bag updates.md new file mode 100644 index 00000000000..dd3ae3bbbe1 --- /dev/null +++ b/doc/release-notes/9859-ORE and Bag updates.md @@ -0,0 +1,14 @@ +Dataverse's OAI_ORE Metadata Export format and archival BagIT exports +(which include the OAI-ORE metadata export file) have been updated to include +information about the dataset version state, e.g. RELEASED or DEACCESSIONED +and to indicate which version of Dataverse was used to create the archival Bag. +As part of the latter, the current OAI_ORE Metadata format has been given a 1.0.0 +version designation and it is expected that any future changes to the OAI_ORE export +format will result in a version change and that tools such as DVUploader that can +recreate datasets from archival Bags will start indicating which version(s) of the +OAI_ORE format they can read. + +Dataverse installations that have been using archival Bags may wish to update any +existing archival Bags they have, e.g. by deleting existing Bags and using the Dataverse +[archival Bag export API](https://guides.dataverse.org/en/latest/installation/config.html#bagit-export-api-calls) +to generate updated versions. \ No newline at end of file diff --git a/doc/sphinx-guides/source/admin/integrations.rst b/doc/sphinx-guides/source/admin/integrations.rst index 21adf8338d9..9a24cf0715c 100644 --- a/doc/sphinx-guides/source/admin/integrations.rst +++ b/doc/sphinx-guides/source/admin/integrations.rst @@ -217,7 +217,14 @@ Sponsored by the `Ontario Council of University Libraries (OCUL) `_ zipped `BagIt `_ bags to the `Chronopolis `_ via `DuraCloud `_, to a local file system, or to `Google Cloud Storage `_. +A Dataverse installation can be configured to submit a copy of published Dataset versions, packaged as `Research Data Alliance conformant `_ zipped `BagIt `_ bags to `Chronopolis `_ via `DuraCloud `_, a local file system, any S3 store, or to `Google Cloud Storage `_. +Submission can be automated to occur upon publication, or can be done periodically (via external scripting). +The archival status of each Dataset version can be seen in the Dataset page version table and queried via API. + +The archival Bags include all of the files and metadata in a given dataset version and are sufficient to recreate the dataset, e.g. in a new Dataverse instance, or potentially in another RDA-conformant repository. +Specifically, the archival Bags include an OAI-ORE Map serialized as JSON-LD that describe the dataset and it's files, as well as information about the version of Dataverse used to export the archival Bag. + +The `DVUploader `_ includes functionality to recreate a Dataset from an archival Bag produced by Dataverse (using the Dataverse API to do so). For details on how to configure this integration, see :ref:`BagIt Export` in the :doc:`/installation/config` section of the Installation Guide. diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 4d9466703e4..e87842ab1c7 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -2088,10 +2088,12 @@ The API call requires a Json body that includes the list of the fileIds that the curl -H "X-Dataverse-key: $API_TOKEN" -H "Content-Type:application/json" "$SERVER_URL/api/datasets/:persistentId/files/actions/:unset-embargo?persistentId=$PERSISTENT_IDENTIFIER" -d "$JSON" +.. _Archival Status API: + Get the Archival Status of a Dataset By Version ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Archiving is an optional feature that may be configured for a Dataverse installation. When that is enabled, this API call be used to retrieve the status. Note that this requires "superuser" credentials. +Archival :ref:`BagIt Export` is an optional feature that may be configured for a Dataverse installation. When that is enabled, this API call be used to retrieve the status. Note that this requires "superuser" credentials. ``GET /api/datasets/$dataset-id/$version/archivalStatus`` returns the archival status of the specified dataset version. diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index f9fe74afc7c..cd841e22f6c 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1,4 +1,3 @@ -============= Configuration ============= @@ -1425,24 +1424,25 @@ BagIt file handler configuration settings: BagIt Export ------------ -Your Dataverse installation may be configured to submit a copy of published Datasets, packaged as `Research Data Alliance conformant `_ zipped `BagIt `_ archival Bags (sometimes called BagPacks) to `Chronopolis `_ via `DuraCloud `_ or alternately to any folder on the local filesystem. +Your Dataverse installation may be configured to submit a copy of published Datasets, packaged as `Research Data Alliance conformant `_ zipped `BagIt `_ archival Bags (sometimes called BagPacks) to one of several supported storage services. +Supported services include `Chronopolis `_ via `DuraCloud `_, Google's Cloud, and any service that can provide an S3 interface or handle files transferred to a folder on the local filesystem. -These archival Bags include all of the files and metadata in a given dataset version and are sufficient to recreate the dataset, e.g. in a new Dataverse instance, or postentially in another RDA-conformant repository. +These archival Bags include all of the files and metadata in a given dataset version and are sufficient to recreate the dataset, e.g. in a new Dataverse instance, or potentially in another RDA-conformant repository. The `DVUploader `_ includes functionality to recreate a Dataset from an archival Bag produced by Dataverse. (Note that this functionality is distinct from the :ref:`BagIt File Handler` upload files to an existing Dataset via the Dataverse user interface.) The Dataverse Software offers an internal archive workflow which may be configured as a PostPublication workflow via an admin API call to manually submit previously published Datasets and prior versions to a configured archive such as Chronopolis. The workflow creates a `JSON-LD `_ serialized `OAI-ORE `_ map file, which is also available as a metadata export format in the Dataverse Software web interface. At present, archiving classes include the DuraCloudSubmitToArchiveCommand, LocalSubmitToArchiveCommand, GoogleCloudSubmitToArchive, and S3SubmitToArchiveCommand , which all extend the AbstractSubmitToArchiveCommand and use the configurable mechanisms discussed below. (A DRSSubmitToArchiveCommand, which works with Harvard's DRS also exists and, while specific to DRS, is a useful example of how Archivers can support single-version-only semantics and support archiving only from specified collections (with collection specific parameters)). -All current options support the archival status APIs and the same status is available in the dataset page version table (for contributors/those who could view the unpublished dataset, with more detail available to superusers). +All current options support the :ref:`Archival Status API` calls and the same status is available in the dataset page version table (for contributors/those who could view the unpublished dataset, with more detail available to superusers). .. _Duracloud Configuration: Duracloud Configuration +++++++++++++++++++++++ -Also note that while the current Chronopolis implementation generates the archival Bag and submits it to the archive's DuraCloud interface, the step to make a 'snapshot' of the space containing the archival Bag (and verify it's successful submission) are actions a curator must take in the DuraCloud interface. +The current Chronopolis implementation generates the archival Bag and submits it to the archive's DuraCloud interface. The step to make a 'snapshot' of the space containing the archival Bag (and verify it's successful submission) are actions a curator must take in the DuraCloud interface. -The minimal configuration to support an archiver integration involves adding a minimum of two Dataverse Software Keys and any required Payara jvm options. The example instructions here are specific to the DuraCloud Archiver\: +The minimal configuration to support archiver integration involves adding a minimum of two Dataverse Software settings. Individual archivers may require additional settings and/or Payara jvm options and micro-profile settings. The example instructions here are specific to the DuraCloud Archiver\: \:ArchiverClassName - the fully qualified class to be used for archiving. For example: @@ -1452,7 +1452,7 @@ The minimal configuration to support an archiver integration involves adding a m ``curl http://localhost:8080/api/admin/settings/:ArchiverSettings -X PUT -d ":DuraCloudHost, :DuraCloudPort, :DuraCloudContext, :BagGeneratorThreads"`` -The DPN archiver defines three custom settings, one of which is required (the others have defaults): +The DuraCloud archiver defines three custom settings, one of which is required (the others have defaults): \:DuraCloudHost - the URL for your organization's Duracloud site. For example: From 2ae1a9f847c256236bec8874ba307eb7e1631967 Mon Sep 17 00:00:00 2001 From: Ben Companjen Date: Fri, 15 Sep 2023 17:55:26 +0200 Subject: [PATCH 22/25] Get JSON object from JsonUtil --- .../java/edu/harvard/iq/dataverse/util/json/JsonParser.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java index febb785cd95..984c607aac7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java @@ -34,7 +34,6 @@ import edu.harvard.iq.dataverse.workflow.step.WorkflowStepData; import org.apache.commons.validator.routines.DomainValidator; -import java.io.StringReader; import java.sql.Timestamp; import java.text.ParseException; import java.util.ArrayList; @@ -53,7 +52,6 @@ import jakarta.json.Json; import jakarta.json.JsonArray; import jakarta.json.JsonObject; -import jakarta.json.JsonReader; import jakarta.json.JsonString; import jakarta.json.JsonValue; import jakarta.json.JsonValue.ValueType; @@ -682,8 +680,7 @@ private DatasetField remapGeographicCoverage(CompoundVocabularyException ex) thr // convert DTO to datasetField so we can back valid values. Gson gson = new Gson(); String jsonString = gson.toJson(geoCoverageDTO); - JsonReader jsonReader = Json.createReader(new StringReader(jsonString)); - JsonObject obj = jsonReader.readObject(); + JsonObject obj = JsonUtil.getJsonObject(jsonString); DatasetField geoCoverageField = parseField(obj); // add back valid values From f5aa17a635fb085f8c42ac31f3c73660ea5c1e5e Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 18 Sep 2023 10:15:44 -0400 Subject: [PATCH 23/25] fix formatting --- .../java/edu/harvard/iq/dataverse/util/bagit/OREMap.java | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java index 1ea1a5411fa..aa653a6e360 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java @@ -327,11 +327,7 @@ public JsonObjectBuilder getOREMapBuilder(boolean aggregationOnly) { aggBuilder.add(JsonLDTerm.ore("aggregates").getLabel(), aggResArrayBuilder.build()) .add(JsonLDTerm.schemaOrg("hasPart").getLabel(), fileArray.build()).build()) // and finally add the context - .add("@context", contextBuilder.build()) - ; - - - + .add("@context", contextBuilder.build()); return oremapBuilder; } } From bbed57df0f2624f6a9300af509906e902541c82b Mon Sep 17 00:00:00 2001 From: Don Sizemore Date: Fri, 22 Sep 2023 10:26:02 -0400 Subject: [PATCH 24/25] #9944 add python-dvuploader to client libraries page --- doc/sphinx-guides/source/api/client-libraries.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/sphinx-guides/source/api/client-libraries.rst b/doc/sphinx-guides/source/api/client-libraries.rst index a25efe3a5f8..d53b9b2d776 100755 --- a/doc/sphinx-guides/source/api/client-libraries.rst +++ b/doc/sphinx-guides/source/api/client-libraries.rst @@ -52,6 +52,8 @@ There are multiple Python modules for interacting with Dataverse APIs. `EasyDataverse `_ is a Python library designed to simplify the management of Dataverse datasets in an object-oriented way, giving users the ability to upload, download, and update datasets with ease. By utilizing metadata block configurations, EasyDataverse automatically generates Python objects that contain all the necessary details required to create the native Dataverse JSON format used to create or edit datasets. Adding files and directories is also possible with EasyDataverse and requires no additional API calls. This library is particularly well-suited for client applications such as workflows and scripts as it minimizes technical complexities and facilitates swift development. +`python-dvuploader `_ implements Jim Myers' excellent `dv-uploader `_ as a Python module. It offers parallel direct uploads to Dataverse backend storage, streams files directly instead of buffering them in memory, and supports multi-part uploads, chunking data accordingly. + `pyDataverse `_ primarily allows developers to manage Dataverse collections, datasets and datafiles. Its intention is to help with data migrations and DevOps activities such as testing and configuration management. The module is developed by `Stefan Kasberger `_ from `AUSSDA - The Austrian Social Science Data Archive `_. `UBC's Dataverse Utilities `_ are a set of Python console utilities which allow one to upload datasets from a tab-separated-value spreadsheet, bulk release multiple datasets, bulk delete unpublished datasets, quickly duplicate records. replace licenses, and more. For additional information see their `PyPi page `_. From c82c47e0c08d9a0d507ee980fec78fd7345d4f4a Mon Sep 17 00:00:00 2001 From: Don Sizemore Date: Fri, 22 Sep 2023 13:33:57 -0400 Subject: [PATCH 25/25] #9944 python-dvuploader moved to gdcc org --- doc/sphinx-guides/source/api/client-libraries.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/api/client-libraries.rst b/doc/sphinx-guides/source/api/client-libraries.rst index d53b9b2d776..4aa5b935e27 100755 --- a/doc/sphinx-guides/source/api/client-libraries.rst +++ b/doc/sphinx-guides/source/api/client-libraries.rst @@ -52,7 +52,7 @@ There are multiple Python modules for interacting with Dataverse APIs. `EasyDataverse `_ is a Python library designed to simplify the management of Dataverse datasets in an object-oriented way, giving users the ability to upload, download, and update datasets with ease. By utilizing metadata block configurations, EasyDataverse automatically generates Python objects that contain all the necessary details required to create the native Dataverse JSON format used to create or edit datasets. Adding files and directories is also possible with EasyDataverse and requires no additional API calls. This library is particularly well-suited for client applications such as workflows and scripts as it minimizes technical complexities and facilitates swift development. -`python-dvuploader `_ implements Jim Myers' excellent `dv-uploader `_ as a Python module. It offers parallel direct uploads to Dataverse backend storage, streams files directly instead of buffering them in memory, and supports multi-part uploads, chunking data accordingly. +`python-dvuploader `_ implements Jim Myers' excellent `dv-uploader `_ as a Python module. It offers parallel direct uploads to Dataverse backend storage, streams files directly instead of buffering them in memory, and supports multi-part uploads, chunking data accordingly. `pyDataverse `_ primarily allows developers to manage Dataverse collections, datasets and datafiles. Its intention is to help with data migrations and DevOps activities such as testing and configuration management. The module is developed by `Stefan Kasberger `_ from `AUSSDA - The Austrian Social Science Data Archive `_.