diff --git a/dspace-api/src/main/java/org/dspace/app/sitemap/GenerateSitemaps.java b/dspace-api/src/main/java/org/dspace/app/sitemap/GenerateSitemaps.java index 5e9a61556083..90962d12aa75 100644 --- a/dspace-api/src/main/java/org/dspace/app/sitemap/GenerateSitemaps.java +++ b/dspace-api/src/main/java/org/dspace/app/sitemap/GenerateSitemaps.java @@ -11,7 +11,6 @@ import java.io.IOException; import java.sql.SQLException; import java.util.Date; -import java.util.Iterator; import java.util.List; import org.apache.commons.cli.CommandLine; @@ -24,9 +23,6 @@ import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.Logger; -import org.dspace.content.Collection; -import org.dspace.content.Community; -import org.dspace.content.Item; import org.dspace.content.factory.ContentServiceFactory; import org.dspace.content.service.CollectionService; import org.dspace.content.service.CommunityService; @@ -35,6 +31,7 @@ import org.dspace.core.LogHelper; import org.dspace.discovery.DiscoverQuery; import org.dspace.discovery.DiscoverResult; +import org.dspace.discovery.IndexableObject; import org.dspace.discovery.SearchService; import org.dspace.discovery.SearchServiceException; import org.dspace.discovery.SearchUtils; @@ -60,6 +57,7 @@ public class GenerateSitemaps { private static final ConfigurationService configurationService = DSpaceServicesFactory.getInstance().getConfigurationService(); private static final SearchService searchService = SearchUtils.getSearchService(); + private static final int PAGE_SIZE = 100; /** * Default constructor @@ -183,96 +181,113 @@ public static void generateSitemaps(boolean makeHTMLMap, boolean makeSitemapOrg) } Context c = new Context(Context.Mode.READ_ONLY); + int offset = 0; + long commsCount = 0; + long collsCount = 0; + long itemsCount = 0; - List comms = communityService.findAll(c); - - for (Community comm : comms) { - String url = uiURLStem + "communities/" + comm.getID(); + try { + DiscoverQuery discoveryQuery = new DiscoverQuery(); + discoveryQuery.setMaxResults(PAGE_SIZE); + discoveryQuery.setQuery("search.resourcetype:Community"); + do { + discoveryQuery.setStart(offset); + DiscoverResult discoverResult = searchService.search(c, discoveryQuery); + List docs = discoverResult.getIndexableObjects(); + commsCount = discoverResult.getTotalSearchResults(); + + for (IndexableObject doc : docs) { + String url = uiURLStem + "communities/" + doc.getID(); + c.uncacheEntity(doc.getIndexedObject()); + + if (makeHTMLMap) { + html.addURL(url, null); + } + if (makeSitemapOrg) { + sitemapsOrg.addURL(url, null); + } + } + offset += PAGE_SIZE; + } while (offset < commsCount); + + offset = 0; + discoveryQuery = new DiscoverQuery(); + discoveryQuery.setMaxResults(PAGE_SIZE); + discoveryQuery.setQuery("search.resourcetype:Collection"); + do { + discoveryQuery.setStart(offset); + DiscoverResult discoverResult = searchService.search(c, discoveryQuery); + List docs = discoverResult.getIndexableObjects(); + collsCount = discoverResult.getTotalSearchResults(); + + for (IndexableObject doc : docs) { + String url = uiURLStem + "collections/" + doc.getID(); + c.uncacheEntity(doc.getIndexedObject()); + + if (makeHTMLMap) { + html.addURL(url, null); + } + if (makeSitemapOrg) { + sitemapsOrg.addURL(url, null); + } + } + offset += PAGE_SIZE; + } while (offset < collsCount); + + offset = 0; + discoveryQuery = new DiscoverQuery(); + discoveryQuery.setMaxResults(PAGE_SIZE); + discoveryQuery.setQuery("search.resourcetype:Item"); + discoveryQuery.addSearchField("search.entitytype"); + do { + + discoveryQuery.setStart(offset); + DiscoverResult discoverResult = searchService.search(c, discoveryQuery); + List docs = discoverResult.getIndexableObjects(); + itemsCount = discoverResult.getTotalSearchResults(); + + for (IndexableObject doc : docs) { + String url; + List entityTypeFieldValues = discoverResult.getSearchDocument(doc).get(0) + .getSearchFieldValues("search.entitytype"); + if (CollectionUtils.isNotEmpty(entityTypeFieldValues)) { + url = uiURLStem + "entities/" + StringUtils.lowerCase(entityTypeFieldValues.get(0)) + "/" + + doc.getID(); + } else { + url = uiURLStem + "items/" + doc.getID(); + } + Date lastMod = doc.getLastModified(); + c.uncacheEntity(doc.getIndexedObject()); + + if (makeHTMLMap) { + html.addURL(url, null); + } + if (makeSitemapOrg) { + sitemapsOrg.addURL(url, null); + } + } + offset += PAGE_SIZE; + } while (offset < itemsCount); if (makeHTMLMap) { - html.addURL(url, null); - } - if (makeSitemapOrg) { - sitemapsOrg.addURL(url, null); + int files = html.finish(); + log.info(LogHelper.getHeader(c, "write_sitemap", + "type=html,num_files=" + files + ",communities=" + + commsCount + ",collections=" + collsCount + + ",items=" + itemsCount)); } - c.uncacheEntity(comm); - } - - List colls = collectionService.findAll(c); - - for (Collection coll : colls) { - String url = uiURLStem + "collections/" + coll.getID(); - - if (makeHTMLMap) { - html.addURL(url, null); - } if (makeSitemapOrg) { - sitemapsOrg.addURL(url, null); - } - - c.uncacheEntity(coll); - } - - Iterator allItems = itemService.findAll(c); - int itemCount = 0; - - while (allItems.hasNext()) { - Item i = allItems.next(); - - DiscoverQuery entityQuery = new DiscoverQuery(); - entityQuery.setQuery("search.uniqueid:\"Item-" + i.getID() + "\" and entityType:*"); - entityQuery.addSearchField("entityType"); - - try { - DiscoverResult discoverResult = searchService.search(c, entityQuery); - - String url; - if (CollectionUtils.isNotEmpty(discoverResult.getIndexableObjects()) - && CollectionUtils.isNotEmpty(discoverResult.getSearchDocument( - discoverResult.getIndexableObjects().get(0)).get(0).getSearchFieldValues("entityType")) - && StringUtils.isNotBlank(discoverResult.getSearchDocument( - discoverResult.getIndexableObjects().get(0)).get(0).getSearchFieldValues("entityType").get(0)) - ) { - url = uiURLStem + "entities/" + StringUtils.lowerCase(discoverResult.getSearchDocument( - discoverResult.getIndexableObjects().get(0)) - .get(0).getSearchFieldValues("entityType").get(0)) + "/" + i.getID(); - } else { - url = uiURLStem + "items/" + i.getID(); - } - Date lastMod = i.getLastModified(); - - if (makeHTMLMap) { - html.addURL(url, lastMod); - } - if (makeSitemapOrg) { - sitemapsOrg.addURL(url, lastMod); - } - } catch (SearchServiceException e) { - log.error("Failed getting entitytype through solr for item " + i.getID() + ": " + e.getMessage()); + int files = sitemapsOrg.finish(); + log.info(LogHelper.getHeader(c, "write_sitemap", + "type=html,num_files=" + files + ",communities=" + + commsCount + ",collections=" + collsCount + + ",items=" + itemsCount)); } - - c.uncacheEntity(i); - - itemCount++; + } catch (SearchServiceException e) { + throw new RuntimeException(e); + } finally { + c.abort(); } - - if (makeHTMLMap) { - int files = html.finish(); - log.info(LogHelper.getHeader(c, "write_sitemap", - "type=html,num_files=" + files + ",communities=" - + comms.size() + ",collections=" + colls.size() - + ",items=" + itemCount)); - } - - if (makeSitemapOrg) { - int files = sitemapsOrg.finish(); - log.info(LogHelper.getHeader(c, "write_sitemap", - "type=html,num_files=" + files + ",communities=" - + comms.size() + ",collections=" + colls.size() - + ",items=" + itemCount)); - } - - c.abort(); } } diff --git a/dspace-api/src/main/java/org/dspace/discovery/SolrServiceImpl.java b/dspace-api/src/main/java/org/dspace/discovery/SolrServiceImpl.java index 0cf2aa50af67..cd3797e3e34e 100644 --- a/dspace-api/src/main/java/org/dspace/discovery/SolrServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/discovery/SolrServiceImpl.java @@ -1031,9 +1031,8 @@ protected DiscoverResult retrieveResult(Context context, DiscoverQuery query) // Add information about our search fields for (String field : searchFields) { List valuesAsString = new ArrayList<>(); - for (Object o : doc.getFieldValues(field)) { - valuesAsString.add(String.valueOf(o)); - } + Optional.ofNullable(doc.getFieldValues(field)) + .ifPresent(l -> l.forEach(o -> valuesAsString.add(String.valueOf(o)))); resultDoc.addSearchField(field, valuesAsString.toArray(new String[valuesAsString.size()])); } result.addSearchDocument(indexableObject, resultDoc); diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/SitemapRestControllerIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/SitemapRestControllerIT.java index cbcf970547f7..175fb34e6cac 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/SitemapRestControllerIT.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/SitemapRestControllerIT.java @@ -8,6 +8,7 @@ package org.dspace.app.rest; import static org.dspace.builder.ItemBuilder.createItem; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get; import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.content; @@ -16,6 +17,7 @@ import javax.servlet.ServletException; import org.dspace.app.rest.test.AbstractControllerIntegrationTest; +import org.dspace.authorize.service.ResourcePolicyService; import org.dspace.builder.CollectionBuilder; import org.dspace.builder.CommunityBuilder; import org.dspace.content.Collection; @@ -38,10 +40,22 @@ public class SitemapRestControllerIT extends AbstractControllerIntegrationTest { @Autowired ConfigurationService configurationService; + @Autowired + ResourcePolicyService policyService; + private final static String SITEMAPS_ENDPOINT = "sitemaps"; private Item item1; private Item item2; + private Item itemRestricted; + private Item itemUndiscoverable; + private Item entityPublication; + private Item entityPublicationRestricted; + private Item entityPublicationUndiscoverable; + private Community community; + private Community communityRestricted; + private Collection collection; + private Collection collectionRestricted; @Before @Override @@ -52,8 +66,16 @@ public void setUp() throws Exception { context.turnOffAuthorisationSystem(); - Community community = CommunityBuilder.createCommunity(context).build(); - Collection collection = CollectionBuilder.createCollection(context, community).build(); + community = CommunityBuilder.createCommunity(context).build(); + communityRestricted = CommunityBuilder.createCommunity(context).build(); + policyService.removeAllPolicies(context, communityRestricted); + collection = CollectionBuilder.createCollection(context, community).build(); + collectionRestricted = CollectionBuilder.createCollection(context, community).build(); + Collection publicationCollection = CollectionBuilder.createCollection(context, community) + .withEntityType("Publication") + .withName("Publication Collection").build(); + policyService.removeAllPolicies(context, collectionRestricted); + this.item1 = createItem(context, collection) .withTitle("Test 1") .withIssueDate("2010-10-17") @@ -62,6 +84,30 @@ public void setUp() throws Exception { .withTitle("Test 2") .withIssueDate("2015-8-3") .build(); + this.itemRestricted = createItem(context, collection) + .withTitle("Test 3") + .withIssueDate("2015-8-3") + .build(); + policyService.removeAllPolicies(context, itemRestricted); + this.itemUndiscoverable = createItem(context, collection) + .withTitle("Test 4") + .withIssueDate("2015-8-3") + .makeUnDiscoverable() + .build(); + this.entityPublication = createItem(context, publicationCollection) + .withTitle("Item Publication") + .withIssueDate("2015-8-3") + .build(); + this.entityPublicationRestricted = createItem(context, publicationCollection) + .withTitle("Item Publication Restricted") + .withIssueDate("2015-8-3") + .build(); + policyService.removeAllPolicies(context, entityPublicationRestricted); + this.entityPublicationUndiscoverable = createItem(context, publicationCollection) + .withTitle("Item Publication") + .withIssueDate("2015-8-3") + .makeUnDiscoverable() + .build(); runDSpaceScript("generate-sitemaps"); @@ -127,9 +173,39 @@ public void testSitemap_sitemap0Html() throws Exception { .andReturn(); String response = result.getResponse().getContentAsString(); + // contains a link to communities: [dspace.ui.url]/communities/ + assertTrue(response + .contains(configurationService.getProperty("dspace.ui.url") + "/communities/" + community.getID())); + // contains a link to collections: [dspace.ui.url]/collections/ + assertTrue(response + .contains(configurationService.getProperty("dspace.ui.url") + "/collections/" + collection.getID())); // contains a link to items: [dspace.ui.url]/items/ assertTrue(response.contains(configurationService.getProperty("dspace.ui.url") + "/items/" + item1.getID())); assertTrue(response.contains(configurationService.getProperty("dspace.ui.url") + "/items/" + item2.getID())); + // contains proper link to entities items + assertTrue(response.contains(configurationService.getProperty("dspace.ui.url") + "/entities/publication/" + + entityPublication.getID())); + assertFalse(response + .contains(configurationService.getProperty("dspace.ui.url") + "/items/" + entityPublication.getID())); + // does not contain links to restricted content + assertFalse(response.contains( + configurationService.getProperty("dspace.ui.url") + "/communities/" + communityRestricted.getID())); + assertFalse(response.contains( + configurationService.getProperty("dspace.ui.url") + "/collections/" + collectionRestricted.getID())); + assertFalse(response + .contains(configurationService.getProperty("dspace.ui.url") + "/items/" + itemRestricted.getID())); + assertFalse(response.contains(configurationService.getProperty("dspace.ui.url") + "/entities/publication/" + + entityPublicationRestricted.getID())); + assertFalse(response.contains( + configurationService.getProperty("dspace.ui.url") + "/items/" + entityPublicationRestricted.getID())); + // does not contain links to undiscoverable content + assertFalse(response + .contains(configurationService.getProperty("dspace.ui.url") + "/items/" + itemUndiscoverable.getID())); + assertFalse(response.contains(configurationService.getProperty("dspace.ui.url") + "/entities/publication/" + + entityPublicationUndiscoverable.getID())); + assertFalse(response.contains(configurationService.getProperty("dspace.ui.url") + "/items/" + + entityPublicationUndiscoverable.getID())); + } @Test @@ -160,8 +236,37 @@ public void testSitemap_sitemap0Xml() throws Exception { .andReturn(); String response = result.getResponse().getContentAsString(); + // contains a link to communities: [dspace.ui.url]/communities/ + assertTrue(response + .contains(configurationService.getProperty("dspace.ui.url") + "/communities/" + community.getID())); + // contains a link to collections: [dspace.ui.url]/collections/ + assertTrue(response + .contains(configurationService.getProperty("dspace.ui.url") + "/collections/" + collection.getID())); // contains a link to items: [dspace.ui.url]/items/ assertTrue(response.contains(configurationService.getProperty("dspace.ui.url") + "/items/" + item1.getID())); assertTrue(response.contains(configurationService.getProperty("dspace.ui.url") + "/items/" + item2.getID())); + // contains proper link to entities items + assertTrue(response.contains(configurationService.getProperty("dspace.ui.url") + "/entities/publication/" + + entityPublication.getID())); + assertFalse(response + .contains(configurationService.getProperty("dspace.ui.url") + "/items/" + entityPublication.getID())); + // does not contain links to restricted content + assertFalse(response.contains( + configurationService.getProperty("dspace.ui.url") + "/communities/" + communityRestricted.getID())); + assertFalse(response.contains( + configurationService.getProperty("dspace.ui.url") + "/collections/" + collectionRestricted.getID())); + assertFalse(response + .contains(configurationService.getProperty("dspace.ui.url") + "/items/" + itemRestricted.getID())); + assertFalse(response.contains(configurationService.getProperty("dspace.ui.url") + "/entities/publication/" + + entityPublicationRestricted.getID())); + assertFalse(response.contains( + configurationService.getProperty("dspace.ui.url") + "/items/" + entityPublicationRestricted.getID())); + // does not contain links to undiscoverable content + assertFalse(response + .contains(configurationService.getProperty("dspace.ui.url") + "/items/" + itemUndiscoverable.getID())); + assertFalse(response.contains(configurationService.getProperty("dspace.ui.url") + "/entities/publication/" + + entityPublicationUndiscoverable.getID())); + assertFalse(response.contains(configurationService.getProperty("dspace.ui.url") + "/items/" + + entityPublicationUndiscoverable.getID())); } }