From 257c76c5b832fe870aca9160767a880e78a2b326 Mon Sep 17 00:00:00 2001 From: khituras Date: Mon, 9 Sep 2024 10:23:57 +0200 Subject: [PATCH 1/4] Add input-list-specific taxonomy filters. --- .../core/retrieval/data/GepiRequestData.java | 14 +++- .../core/retrieval/services/EventQueries.java | 66 ++++++++++++------- .../services/EventRetrievalService.java | 11 +++- .../gepi/core/services/GePiDataService.java | 1 - .../gepi/webapp/components/GepiInput.java | 23 ++++++- .../gepi/webapp/components/StatsWidget.java | 8 +-- .../webapp/data/FilteredGepiRequestData.java | 2 +- .../gepi/webapp/data/GepiQueryParameters.java | 14 ++++ .../gepi/webapp/services/AppModule.java | 2 +- .../modules/gepi/components/gepiinput.js | 18 +++++ .../gepi/webapp/components/GepiInput.tml | 10 ++- .../de/julielab/gepi/webapp/pages/Help.tml | 12 ++-- 12 files changed, 136 insertions(+), 45 deletions(-) diff --git a/gepi/gepi-core/src/main/java/de/julielab/gepi/core/retrieval/data/GepiRequestData.java b/gepi/gepi-core/src/main/java/de/julielab/gepi/core/retrieval/data/GepiRequestData.java index c419d6b4..0b64b9c2 100644 --- a/gepi/gepi-core/src/main/java/de/julielab/gepi/core/retrieval/data/GepiRequestData.java +++ b/gepi/gepi-core/src/main/java/de/julielab/gepi/core/retrieval/data/GepiRequestData.java @@ -22,13 +22,17 @@ public class GepiRequestData implements Cloneable { private boolean includeUnary; private int eventLikelihood; private String[] taxId; + private String[] taxIdsA; + private String[] taxIdsB; private String sectionNameFilterString; private int pageSize = 10; - public GepiRequestData(List eventTypes, boolean includeUnary, int eventLikelihood, Future listAGePiIds, Future listBGePiIds, String[] taxId, String sentenceFilterString, String paragraphFilterString, String filterFieldsConnectionOperator, String sectionNameFilterString, EnumSet inputMode, String docId, long dataSessionId) { + public GepiRequestData(List eventTypes, boolean includeUnary, int eventLikelihood, Future listAGePiIds, Future listBGePiIds, String[] taxId, String[] taxIdA, String[] taxIdB, String sentenceFilterString, String paragraphFilterString, String filterFieldsConnectionOperator, String sectionNameFilterString, EnumSet inputMode, String docId, long dataSessionId) { this.includeUnary = includeUnary; this.eventLikelihood = eventLikelihood; this.taxId = taxId; + this.taxIdsA = taxIdA; + this.taxIdsB = taxIdB; this.sectionNameFilterString = sectionNameFilterString; this.eventTypes = eventTypes; this.listAGePiIds = listAGePiIds; @@ -203,4 +207,12 @@ public String toString() { ", dataSessionId=" + dataSessionId + '}'; } + + public String[] getTaxIdsB() { + return taxIdsB; + } + + public String[] getTaxIdsA() { + return taxIdsA; + } } diff --git a/gepi/gepi-core/src/main/java/de/julielab/gepi/core/retrieval/services/EventQueries.java b/gepi/gepi-core/src/main/java/de/julielab/gepi/core/retrieval/services/EventQueries.java index 71b37444..e10c782d 100644 --- a/gepi/gepi-core/src/main/java/de/julielab/gepi/core/retrieval/services/EventQueries.java +++ b/gepi/gepi-core/src/main/java/de/julielab/gepi/core/retrieval/services/EventQueries.java @@ -27,11 +27,19 @@ public static BoolQuery getClosedQuery(GepiRequestData requestData, Set a1b2Clause.addQuery(listA1Query); a1b2Clause.addQuery(listB2Query); a1b2Clause.occur = MUST; + // List B can be empty if there is a taxId filter on the B-side. This is then still a form of a closed search. + // However, this results in a B-query that has an empty list of terms which causes + // ElasticSearch to not retrieve anything. + if (listB2Query.terms.isEmpty()) + a1b2Clause.queries.remove(listB2Query); BoolClause a2b1Clause = new BoolClause(); a2b1Clause.addQuery(listA2Query); a2b1Clause.addQuery(listB1Query); a2b1Clause.occur = MUST; + // Analogous to the a1b2Clause above. + if (listB1Query.terms.isEmpty()) + a1b2Clause.queries.remove(listB1Query); BoolQuery a1b2Query = new BoolQuery(); a1b2Query.addClause(a1b2Clause); @@ -82,13 +90,22 @@ public static BoolQuery getClosedQuery(GepiRequestData requestData, Set } if (requestData.getEventLikelihood() > 1) addEventLikelihoodFilter(eventQuery, requestData.getEventLikelihood()); - if (requestData.getTaxId() != null && requestData.getTaxId().length > 0) { - final TermsQuery taxQuery = new TermsQuery(Arrays.stream(requestData.getTaxId()).collect(Collectors.toList())); - taxQuery.field = FIELD_EVENT_TAX_IDS; - BoolClause taxIdFilterClause = new BoolClause(); - taxIdFilterClause.occur = FILTER; - taxIdFilterClause.addQuery(taxQuery); - eventQuery.addClause(taxIdFilterClause); + addTaxIdFilter(eventQuery, requestData.getTaxId(), FIELD_EVENT_TAX_IDS); + if (requestData.getTaxIdsA() != null && requestData.getTaxIdsA().length > 0) { + final TermsQuery taxQuery = new TermsQuery(Arrays.stream(requestData.getTaxIdsA()).collect(Collectors.toList())); + taxQuery.field = FIELD_EVENT_ARG1_TAX_ID; + a1b2Clause.addQuery(taxQuery); + final TermsQuery taxQuery2 = new TermsQuery(Arrays.stream(requestData.getTaxIdsA()).collect(Collectors.toList())); + taxQuery2.field = FIELD_EVENT_ARG2_TAX_ID; + a2b1Clause.addQuery(taxQuery2); + } + if (requestData.getTaxIdsB() != null && requestData.getTaxIdsB().length > 0) { + final TermsQuery taxQuery = new TermsQuery(Arrays.stream(requestData.getTaxIdsB()).collect(Collectors.toList())); + taxQuery.field = FIELD_EVENT_ARG2_TAX_ID; + a1b2Clause.addQuery(taxQuery); + final TermsQuery taxQuery2 = new TermsQuery(Arrays.stream(requestData.getTaxIdsB()).collect(Collectors.toList())); + taxQuery2.field = FIELD_EVENT_ARG1_TAX_ID; + a2b1Clause.addQuery(taxQuery2); } if (requestData.getDocId() != null && !requestData.getDocId().isBlank()) { final MultiMatchQuery docIdQuery = new MultiMatchQuery(); @@ -102,6 +119,17 @@ public static BoolQuery getClosedQuery(GepiRequestData requestData, Set return eventQuery; } + private static void addTaxIdFilter(BoolQuery eventQuery, String[] taxId, String indexSearchField) { + if (taxId != null && taxId.length > 0) { + final TermsQuery taxQuery = new TermsQuery(Arrays.stream(taxId).collect(Collectors.toList())); + taxQuery.field = indexSearchField; + BoolClause taxIdFilterClause = new BoolClause(); + taxIdFilterClause.occur = FILTER; + taxIdFilterClause.addQuery(taxQuery); + eventQuery.addClause(taxIdFilterClause); + } + } + public static BoolQuery getOpenQuery(GepiRequestData requestData) throws InterruptedException, ExecutionException { List eventTypes = requestData.getEventTypes(); String sentenceFilter = requestData.getSentenceFilterString(); @@ -156,14 +184,8 @@ public static BoolQuery getOpenQuery(GepiRequestData requestData) throws Interru } if (requestData.getEventLikelihood() > 1) addEventLikelihoodFilter(eventQuery, requestData.getEventLikelihood()); - if (requestData.getTaxId() != null && requestData.getTaxId().length > 0) { - final TermsQuery taxQuery = new TermsQuery(Arrays.stream(requestData.getTaxId()).collect(Collectors.toList())); - taxQuery.field = FIELD_EVENT_TAX_IDS; - BoolClause taxIdFilterClause = new BoolClause(); - taxIdFilterClause.occur = FILTER; - taxIdFilterClause.addQuery(taxQuery); - eventQuery.addClause(taxIdFilterClause); - } + addTaxIdFilter(eventQuery, requestData.getTaxId(), FIELD_EVENT_TAX_IDS); + addTaxIdFilter(eventQuery, requestData.getTaxIdsA(), FIELD_EVENT_ARG1_TAX_ID); if (requestData.getDocId() != null && !requestData.getDocId().isBlank()) { final MultiMatchQuery docIdQuery = new MultiMatchQuery(); docIdQuery.query = requestData.getDocId(); @@ -197,7 +219,6 @@ public static BoolQuery getFulltextQuery(GepiRequestData requestData) { final String paragraphFilter = requestData.getParagraphFilterString(); final String sectionNameFilter = requestData.getSectionNameFilterString(); final int eventLikelihood = requestData.getEventLikelihood(); - final String[] taxIds = requestData.getTaxId(); final String docId = requestData.getDocId(); if (eventTypes != null && !eventTypes.isEmpty()) { @@ -236,14 +257,9 @@ public static BoolQuery getFulltextQuery(GepiRequestData requestData) { if (eventLikelihood > 1) { addEventLikelihoodFilter(eventQuery, eventLikelihood); } - if (taxIds != null && taxIds.length > 0) { - final TermsQuery taxQuery = new TermsQuery(Arrays.stream(taxIds).collect(Collectors.toList())); - taxQuery.field = FIELD_EVENT_TAX_IDS; - BoolClause taxIdFilterClause = new BoolClause(); - taxIdFilterClause.occur = FILTER; - taxIdFilterClause.addQuery(taxQuery); - eventQuery.addClause(taxIdFilterClause); - } + addTaxIdFilter(eventQuery, requestData.getTaxId(), FIELD_EVENT_TAX_IDS); + addTaxIdFilter(eventQuery, requestData.getTaxIdsA(), FIELD_EVENT_ARG1_TAX_ID); + addTaxIdFilter(eventQuery, requestData.getTaxIdsB(), FIELD_EVENT_ARG2_TAX_ID); if (docId != null && docId.isBlank()) { final MultiMatchQuery docIdQuery = new MultiMatchQuery(); docIdQuery.query = docId; @@ -269,7 +285,7 @@ private static void addFulltextSearchQuery(String filterQuery, String field, Boo final SimpleQueryStringQuery textFilterQuery = new SimpleQueryStringQuery(); textFilterQuery.flags = List.of(SimpleQueryStringQuery.Flag.ALL); textFilterQuery.query = filterQuery; - textFilterQuery.fields = Arrays.asList(field); + textFilterQuery.fields = List.of(field); final BoolClause textFilterClause = new BoolClause(); textFilterClause.addQuery(textFilterQuery); textFilterClause.occur = occur; diff --git a/gepi/gepi-core/src/main/java/de/julielab/gepi/core/retrieval/services/EventRetrievalService.java b/gepi/gepi-core/src/main/java/de/julielab/gepi/core/retrieval/services/EventRetrievalService.java index 8aa50fe1..2ebceb0d 100644 --- a/gepi/gepi-core/src/main/java/de/julielab/gepi/core/retrieval/services/EventRetrievalService.java +++ b/gepi/gepi-core/src/main/java/de/julielab/gepi/core/retrieval/services/EventRetrievalService.java @@ -55,6 +55,8 @@ public class EventRetrievalService implements IEventRetrievalService { public static final String FIELD_EVENT_ARG1_GENE_ID = "argument1geneid"; + public static final String FIELD_EVENT_ARG1_TAX_ID = "argument1taxid"; + public static final String FIELD_EVENT_ARG1_CONCEPT_ID = "argument1conceptid"; public static final String FIELD_EVENT_ARG1_TOP_HOMOLOGY_ID = "argument1tophomoid"; @@ -71,6 +73,8 @@ public class EventRetrievalService implements IEventRetrievalService { public static final String FIELD_EVENT_ARG2_GENE_ID = "argument2geneid"; + public static final String FIELD_EVENT_ARG2_TAX_ID = "argument2taxid"; + public static final String FIELD_EVENT_ARG2_CONCEPT_ID = "argument2conceptid"; public static final String FIELD_EVENT_ARG2_TOP_HOMOLOGY_ID = "argument2tophomoid"; @@ -208,10 +212,10 @@ public Future closedSearch(GepiRequestData requestData, in return CompletableFuture.supplyAsync(() -> { try { - log.debug("Retrieving closed events for {} A IDs and {} B IDs", requestData.getListAGePiIds().get().getConvertedItems().size(), requestData.getListBGePiIds().get().getConvertedItems().size()); + log.debug("Retrieving closed events for {} A IDs and {} B IDs", requestData.getListAGePiIds().get().getConvertedItems().size(), requestData.getListBGePiIds() != null ? requestData.getListBGePiIds().get().getConvertedItems().size() : 0); if (log.isDebugEnabled()) log.debug("Some A target IDs are: {}", requestData.getListAGePiIds().get().getTargetIds().stream().limit(10).collect(Collectors.joining(", "))); - if (log.isDebugEnabled()) + if (requestData.getListBGePiIds() != null && log.isDebugEnabled()) log.debug("Some B target IDs are: {}", requestData.getListBGePiIds().get().getTargetIds().stream().limit(10).collect(Collectors.joining(", "))); SearchServerRequest serverRqst = getClosedSearchRequest(requestData, from, numRows, forCharts); @@ -242,7 +246,8 @@ public Future closedSearch(GepiRequestData requestData, in } private SearchServerRequest getClosedSearchRequest(GepiRequestData requestData, int from, int numRows, boolean forCharts) throws ExecutionException, InterruptedException { - BoolQuery eventQuery = EventQueries.getClosedQuery(requestData, requestData.getAListIdsAsSet(), requestData.getBListIdsAsSet()); + // List B might be empty because its also valid if there is tax ID filter on the B-side + BoolQuery eventQuery = EventQueries.getClosedQuery(requestData, requestData.getAListIdsAsSet(), requestData.getListBGePiIds() != null ? requestData.getBListIdsAsSet() : Collections.emptySet()); boolean downloadAll = forCharts || numRows == Integer.MAX_VALUE; diff --git a/gepi/gepi-core/src/main/java/de/julielab/gepi/core/services/GePiDataService.java b/gepi/gepi-core/src/main/java/de/julielab/gepi/core/services/GePiDataService.java index 4c037339..9faceac0 100644 --- a/gepi/gepi-core/src/main/java/de/julielab/gepi/core/services/GePiDataService.java +++ b/gepi/gepi-core/src/main/java/de/julielab/gepi/core/services/GePiDataService.java @@ -85,7 +85,6 @@ public long newSession() { return id; } - @Log @Override public GePiData getData(long sessionId) { GePiData data = dataCache.getIfPresent(sessionId); diff --git a/gepi/gepi-webapp/src/main/java/de/julielab/gepi/webapp/components/GepiInput.java b/gepi/gepi-webapp/src/main/java/de/julielab/gepi/webapp/components/GepiInput.java index fca34a50..07fd7880 100644 --- a/gepi/gepi-webapp/src/main/java/de/julielab/gepi/webapp/components/GepiInput.java +++ b/gepi/gepi-webapp/src/main/java/de/julielab/gepi/webapp/components/GepiInput.java @@ -35,6 +35,7 @@ import java.util.List; import java.util.concurrent.CompletableFuture; import java.util.concurrent.Future; +import java.util.function.Function; import java.util.function.Predicate; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -91,6 +92,14 @@ public class GepiInput { @Persist(TabPersistentField.TAB) private String taxId; + @Property + @Persist(TabPersistentField.TAB) + private String taxIdA; + + @Property + @Persist(TabPersistentField.TAB) + private String taxIdB; + @Inject private ComponentResources resources; @@ -198,6 +207,8 @@ public void reset() { listATextAreaValue = ""; listBTextAreaValue = ""; taxId = ""; + taxIdA = ""; + taxIdB = ""; selectedEventTypes = new ArrayList<>(EnumSet.allOf(EventTypes.class)); includeUnary = false; eventLikelihood = 1; @@ -251,6 +262,8 @@ public void executeSearch(GepiQueryParameters queryParameters, long dataSessionI this.listATextAreaValue = queryParameters.getListATextAreaValue(); this.listBTextAreaValue = queryParameters.getListBTextAreaValue(); this.taxId = queryParameters.getTaxId(); + this.taxIdA = queryParameters.getTaxIdA(); + this.taxIdB = queryParameters.getTaxIdB(); this.eventLikelihood = queryParameters.getEventLikelihood(); this.sentenceFilterString = queryParameters.getSentenceFilterString(); this.paragraphFilterString = queryParameters.getParagraphFilterString(); @@ -269,8 +282,9 @@ public void executeSearch() { if (selectedEventTypeNames.isEmpty()) selectedEventTypeNames = EnumSet.allOf(EventTypes.class).stream().map(Enum::name).distinct().collect(Collectors.toList()); boolean isAListPresent = listATextAreaValue != null && listATextAreaValue.trim().length() > 0; - boolean isABSearchRequest = listATextAreaValue != null && listATextAreaValue.trim().length() > 0 && listBTextAreaValue != null - && listBTextAreaValue.trim().length() > 0; + // when there are taxId filters for A and B, then this is also a closed search + boolean isABSearchRequest = (listATextAreaValue != null && listATextAreaValue.trim().length() > 0) && ((listBTextAreaValue != null + && listBTextAreaValue.trim().length() > 0) || (taxIdB != null && !taxIdB.isEmpty())); boolean isSentenceFilterPresent = sentenceFilterString != null && !sentenceFilterString.isBlank(); boolean isParagraphFilterPresent = paragraphFilterString != null && !paragraphFilterString.isBlank(); boolean isSectionNameFilterPresent = sectionNameFilterString != null && !sectionNameFilterString.isBlank(); @@ -289,7 +303,8 @@ public void executeSearch() { else inputMode = EnumSet.of(InputMode.FULLTEXT_QUERY); } - requestData = new GepiRequestData(selectedEventTypeNames, includeUnary, eventLikelihood, listAGePiIds, listBGePiIds, taxId != null ? taxId.split("\\s*,\\s*") : null, sentenceFilterString, paragraphFilterString, filterFieldsConnectionOperator, sectionNameFilterString, inputMode, docId, dataSessionId); + Function taxIdSplit = taxId -> taxId != null ? taxId.split("\\s*,\\s*") : null; + requestData = new GepiRequestData(selectedEventTypeNames, includeUnary, eventLikelihood, listAGePiIds, listBGePiIds, taxIdSplit.apply(taxId), taxIdSplit.apply(taxIdA),taxIdSplit.apply(taxIdB),sentenceFilterString, paragraphFilterString, filterFieldsConnectionOperator, sectionNameFilterString, inputMode, docId, dataSessionId); log.debug("Fetching events from ElasticSearch"); Future pagedEsResult = eventRetrievalService.getEvents(requestData, 0, TableResultWidget.ROWS_PER_PAGE, false); Future unrolledResult4Charts = null; @@ -299,6 +314,8 @@ public void executeSearch() { log.info("[Session {}] A input, first elements (out of {}): {}", dataSessionId, Arrays.asList(aLines).subList(0, Math.min(5, aLines.length)), aLines.length); log.info("[Session {}] B input, first elements (out of {}): {}", dataSessionId, Arrays.asList(bLines).subList(0, Math.min(5, bLines.length)), bLines.length); log.info("[Session {}] taxIds: {}", dataSessionId, taxId); + log.info("[Session {}] taxIdsA: {}", dataSessionId, taxIdA); + log.info("[Session {}] taxIdsB: {}", dataSessionId, taxIdB); log.info("[Session {}] sentence filter: {}", dataSessionId, sentenceFilterString); log.info("[Session {}] paragraph filter: {}", dataSessionId, paragraphFilterString); log.info("[Session {}] section filter: {}", dataSessionId, sectionNameFilterString); diff --git a/gepi/gepi-webapp/src/main/java/de/julielab/gepi/webapp/components/StatsWidget.java b/gepi/gepi-webapp/src/main/java/de/julielab/gepi/webapp/components/StatsWidget.java index bfda1e84..7fba8f8d 100644 --- a/gepi/gepi-webapp/src/main/java/de/julielab/gepi/webapp/components/StatsWidget.java +++ b/gepi/gepi-webapp/src/main/java/de/julielab/gepi/webapp/components/StatsWidget.java @@ -94,12 +94,12 @@ public List> getTopInteractions() { public int getInputSize(String list, String type) { try { - final IdConversionResult idConversionResult = list.equalsIgnoreCase("a") ? requestData.getListAGePiIds().get() : requestData.getListBGePiIds().get(); - if (type.equalsIgnoreCase("converted")) { - final Multimap convertedItems = idConversionResult.getConvertedItems(); + final Future idConversionResult = list.equalsIgnoreCase("a") ? requestData.getListAGePiIds() : requestData.getListBGePiIds(); + if (idConversionResult != null && type.equalsIgnoreCase("converted")) { + final Multimap convertedItems = idConversionResult.get().getConvertedItems(); return convertedItems.keySet().size(); } - return (int) idConversionResult.getUnconvertedItems().count(); + return (int) (idConversionResult != null ? idConversionResult.get().getUnconvertedItems().count() : 0); } catch (InterruptedException e) { e.printStackTrace(); } catch (ExecutionException e) { diff --git a/gepi/gepi-webapp/src/main/java/de/julielab/gepi/webapp/data/FilteredGepiRequestData.java b/gepi/gepi-webapp/src/main/java/de/julielab/gepi/webapp/data/FilteredGepiRequestData.java index 36eddbc4..3606d232 100644 --- a/gepi/gepi-webapp/src/main/java/de/julielab/gepi/webapp/data/FilteredGepiRequestData.java +++ b/gepi/gepi-webapp/src/main/java/de/julielab/gepi/webapp/data/FilteredGepiRequestData.java @@ -70,7 +70,7 @@ public void setArg2IdFilter(String arg2IdFilter) { private String arg2IdFilter; public FilteredGepiRequestData(GepiRequestData requestData) { - super(requestData.getEventTypes(), requestData.isIncludeUnary(), requestData.getEventLikelihood(), requestData.getListAGePiIds(),requestData.getListBGePiIds(), requestData.getTaxId(), requestData.getSentenceFilterString(), requestData.getParagraphFilterString(), requestData.getFilterFieldsConnectionOperator(), requestData.getSectionNameFilterString(), requestData.getInputMode(), requestData.getDocId(), requestData.getDataSessionId()); + super(requestData.getEventTypes(), requestData.isIncludeUnary(), requestData.getEventLikelihood(), requestData.getListAGePiIds(),requestData.getListBGePiIds(), requestData.getTaxId(), requestData.getTaxIdsA(), requestData.getTaxIdsB(), requestData.getSentenceFilterString(), requestData.getParagraphFilterString(), requestData.getFilterFieldsConnectionOperator(), requestData.getSectionNameFilterString(), requestData.getInputMode(), requestData.getDocId(), requestData.getDataSessionId()); } } diff --git a/gepi/gepi-webapp/src/main/java/de/julielab/gepi/webapp/data/GepiQueryParameters.java b/gepi/gepi-webapp/src/main/java/de/julielab/gepi/webapp/data/GepiQueryParameters.java index c6048ebc..22b566a6 100644 --- a/gepi/gepi-webapp/src/main/java/de/julielab/gepi/webapp/data/GepiQueryParameters.java +++ b/gepi/gepi-webapp/src/main/java/de/julielab/gepi/webapp/data/GepiQueryParameters.java @@ -17,6 +17,8 @@ public class GepiQueryParameters { public static final String ALIST = "alist"; public static final String BLIST = "blist"; public static final String TAXID = "taxids"; + public static final String TAXIDA = "taxidsA"; + public static final String TAXIDB = "taxidsB"; public static final String EVENTTYPES = "eventtypes"; public static final String FACTUALITY = "factuality"; public static final String FILTERFIELDSCONNECTIONOPERATOR = "filterconnector"; @@ -35,6 +37,8 @@ public class GepiQueryParameters { private String listATextAreaValue; private String listBTextAreaValue; private String taxId; + private String taxIdA; + private String taxIdB; private List selectedEventTypes; private int eventLikelihood; private String filterFieldsConnectionOperator; @@ -124,6 +128,8 @@ private void readParameters(Request request) { if (listBTextAreaValue != null) listBTextAreaValue = Arrays.stream(listBTextAreaValue.split("[\n,]")).map(this::decodeUrlEncoding).collect(Collectors.joining("\n")); taxId = request.getParameter(TAXID); + taxIdA = request.getParameter(TAXIDA); + taxIdB = request.getParameter(TAXIDB); selectedEventTypes = new ArrayList<>(EnumSet.allOf(EventTypes.class)); final String eventTypesString = request.getParameter(EVENTTYPES); if (!StringUtils.isBlank(eventTypesString)) { @@ -187,4 +193,12 @@ public int getInteractionRetrievalLimitForAggregations() { public void setInteractionRetrievalLimitForAggregations(int interactionRetrievalLimitForAggregations) { this.interactionRetrievalLimitForAggregations = interactionRetrievalLimitForAggregations; } + + public String getTaxIdA() { + return taxIdA; + } + + public String getTaxIdB() { + return taxIdB; + } } diff --git a/gepi/gepi-webapp/src/main/java/de/julielab/gepi/webapp/services/AppModule.java b/gepi/gepi-webapp/src/main/java/de/julielab/gepi/webapp/services/AppModule.java index 13c7f887..5fba3975 100644 --- a/gepi/gepi-webapp/src/main/java/de/julielab/gepi/webapp/services/AppModule.java +++ b/gepi/gepi-webapp/src/main/java/de/julielab/gepi/webapp/services/AppModule.java @@ -75,7 +75,7 @@ public static void contributeFactoryDefaults( // This is something that should be removed when going to production, but is useful // in the early stages of development. - configuration.override(SymbolConstants.PRODUCTION_MODE, true); + configuration.override(SymbolConstants.PRODUCTION_MODE, false); // This symbol is meant to be controlled through a configuration properties file (loaded with the contributed // symbol source at the top). This is here to give a default value when the property is not included in the diff --git a/gepi/gepi-webapp/src/main/resources/META-INF/modules/gepi/components/gepiinput.js b/gepi/gepi-webapp/src/main/resources/META-INF/modules/gepi/components/gepiinput.js index 9770f751..c1dd5def 100644 --- a/gepi/gepi-webapp/src/main/resources/META-INF/modules/gepi/components/gepiinput.js +++ b/gepi/gepi-webapp/src/main/resources/META-INF/modules/gepi/components/gepiinput.js @@ -10,6 +10,7 @@ define(["jquery", "gepi/pages/index", "gepi/charts/data", "bootstrap5/tooltip"], observelistbchange(); togglelistb(); + toggleSidedTaxFilters(); observelistachange(); setuplistfileselectors(); setuplistfiledragndrop(listaId); @@ -52,6 +53,7 @@ define(["jquery", "gepi/pages/index", "gepi/charts/data", "bootstrap5/tooltip"], function observelistachange() { $(lista).on("input change", function() { togglelistb(); + toggleSidedTaxFilters(); }); } @@ -176,6 +178,22 @@ define(["jquery", "gepi/pages/index", "gepi/charts/data", "bootstrap5/tooltip"], } } + /* + * Deactivates or activates the List A- and List B-specific tax ID filters. + */ + function toggleSidedTaxFilters() { + let islistaempty = $(lista).val().length == 0; + let taxidElements = $("#organismInputA, #organismInputB"); + let sidedTaxIdFilterDiv = $("#sidedTaxIdFilterDiv") + if (islistaempty) { + taxidElements.attr("disabled", true); + sidedTaxIdFilterDiv.tooltip("enable"); + } else { + taxidElements.attr("disabled", false); + sidedTaxIdFilterDiv.tooltip("disable"); + } + } + function setupShowInputPanel() { if (resultExists) { hideInput(); diff --git a/gepi/gepi-webapp/src/main/resources/de/julielab/gepi/webapp/components/GepiInput.tml b/gepi/gepi-webapp/src/main/resources/de/julielab/gepi/webapp/components/GepiInput.tml index 33ef05a3..c89a4c91 100644 --- a/gepi/gepi-webapp/src/main/resources/de/julielab/gepi/webapp/components/GepiInput.tml +++ b/gepi/gepi-webapp/src/main/resources/de/julielab/gepi/webapp/components/GepiInput.tml @@ -74,12 +74,18 @@
Filter the results by organism
Add multiple IDs separated by commas
- NCBI Taxonomy IDs + NCBI Taxonomy IDs (List A or B) +
+ NCBI Taxonomy IDs (List A) + + NCBI Taxonomy IDs (List B) + +
diff --git a/gepi/gepi-webapp/src/main/resources/de/julielab/gepi/webapp/pages/Help.tml b/gepi/gepi-webapp/src/main/resources/de/julielab/gepi/webapp/pages/Help.tml index 36bd44b2..5c77c238 100644 --- a/gepi/gepi-webapp/src/main/resources/de/julielab/gepi/webapp/pages/Help.tml +++ b/gepi/gepi-webapp/src/main/resources/de/julielab/gepi/webapp/pages/Help.tml @@ -138,6 +138,10 @@

+

+ Regardless of the specific source database, GePI will map the input to all of its orthologs according to the NCBI Gene gene_orthologs.gz file. Thus, interaction retrieval will be performed across all species with that gene. + Species can be restricted using the NCBI Taxonomy filters described further below. +

Search types

@@ -220,10 +224,10 @@

Taxonomy filter
A comma-separated list of NCBI Taxonomy IDs may be provided to filter on the interaction partners. - NCBI Gene entries are always assigned a exactly one taxonomy ID. The GePI text processing pipeline also assigns species to protein families - based on the species discussed in the respective document. The species are stored as taxonomy IDs in the interaction database and - also used for species filter purposes. In case no species is mentioned at all - which commonly happens in PubMed abstracts - the human - taxonomy ID 9606 is assigned as this the most frequent organism discussed in PubMed. + NCBI Gene entries are always assigned exactly one taxonomy ID. The GePI text processing pipeline also assigns species to protein families + based on the species discussed in the respective document. In case no species is mentioned at all in a document - which commonly happens in PubMed abstracts - the human + taxonomy ID 9606 is assigned as this the most frequent organism discussed in PubMed. The species are stored as taxonomy IDs in the interaction database and + can be used for species filter purposes. GePI offers taxonomy filters for 1) List A or B, 2) only List A and 3) only List B. The semantics is that in case 1) an interaction will be retrieved if either of its arguments matches any of the specified species. In case 2),
From 9696ebdbaab1465ce5a0cb5cc637433a726cdc7d Mon Sep 17 00:00:00 2001 From: khituras Date: Mon, 9 Sep 2024 14:21:16 +0200 Subject: [PATCH 2/4] Add help-docs for new tax ID filters, clear them for examples. --- .../de/julielab/gepi/webapp/components/StatsWidget.java | 5 ++--- .../resources/META-INF/modules/gepi/components/gepiinput.js | 6 ++++++ .../de/julielab/gepi/webapp/components/GepiInput.tml | 2 +- .../main/resources/de/julielab/gepi/webapp/pages/Help.tml | 2 +- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/gepi/gepi-webapp/src/main/java/de/julielab/gepi/webapp/components/StatsWidget.java b/gepi/gepi-webapp/src/main/java/de/julielab/gepi/webapp/components/StatsWidget.java index 7fba8f8d..27f24546 100644 --- a/gepi/gepi-webapp/src/main/java/de/julielab/gepi/webapp/components/StatsWidget.java +++ b/gepi/gepi-webapp/src/main/java/de/julielab/gepi/webapp/components/StatsWidget.java @@ -4,7 +4,6 @@ import de.julielab.gepi.core.retrieval.data.EsAggregatedResult; import de.julielab.gepi.core.retrieval.data.Event; import de.julielab.gepi.core.retrieval.data.IdConversionResult; -import de.julielab.gepi.core.retrieval.data.InputMode; import de.julielab.gepi.core.services.IGePiDataService; import de.julielab.gepi.webapp.data.InputMapping; import org.apache.commons.lang3.tuple.ImmutableTriple; @@ -109,11 +108,11 @@ public int getInputSize(String list, String type) { } public boolean isAList() { - return requestData.getInputMode().contains(InputMode.A) || isBList(); + return requestData.getListAGePiIds() != null; } public boolean isBList() { - return requestData.getInputMode().contains(InputMode.AB); + return requestData.getListBGePiIds() != null; } } diff --git a/gepi/gepi-webapp/src/main/resources/META-INF/modules/gepi/components/gepiinput.js b/gepi/gepi-webapp/src/main/resources/META-INF/modules/gepi/components/gepiinput.js index c1dd5def..ec8c3f93 100644 --- a/gepi/gepi-webapp/src/main/resources/META-INF/modules/gepi/components/gepiinput.js +++ b/gepi/gepi-webapp/src/main/resources/META-INF/modules/gepi/components/gepiinput.js @@ -251,6 +251,8 @@ define(["jquery", "gepi/pages/index", "gepi/charts/data", "bootstrap5/tooltip"], listaTextAreaId : "lista", listbTextAreaId : "listb", orgTextFieldId : "organismInput", + orgTextFieldIdA : "organismInputA", + orgTextFieldIdB : "organismInputB", eventTypeChecklistId : "eventtypes", negRegulationCheckboxSelector : "#eventtypes input[value='Negative_regulation']", radioLikelihoodNegRadioClientId: "radio_likelihood_negation", @@ -318,6 +320,8 @@ define(["jquery", "gepi/pages/index", "gepi/charts/data", "bootstrap5/tooltip"], const listaTextArea = document.getElementById(formElementIds.listaTextAreaId); const listbTextArea = document.getElementById(formElementIds.listbTextAreaId); const orgTextField = document.getElementById(formElementIds.orgTextFieldId); + const orgTextFieldA = document.getElementById(formElementIds.orgTextFieldIdA); + const orgTextFieldB = document.getElementById(formElementIds.orgTextFieldIdB); const eventTypeCheckboxes = document.querySelectorAll(`#${formElementIds.eventTypeChecklistId} input`); const radioLikelihoodNegRadio = document.querySelector(`input[clientid='${formElementIds.radioLikelihoodNegRadioClientId}'`); const includeUnaryCheckbox = document.getElementById(formElementIds.includeUnaryId); @@ -329,6 +333,8 @@ define(["jquery", "gepi/pages/index", "gepi/charts/data", "bootstrap5/tooltip"], listaTextArea.value = ""; listbTextArea.value = ""; orgTextField.value = ""; + orgTextFieldA.value = ""; + orgTextFieldB.value = ""; eventTypeCheckboxes.forEach(box => box.checked=true); includeUnaryCheckbox.checked = false; radioLikelihoodNegRadio.checked = true; diff --git a/gepi/gepi-webapp/src/main/resources/de/julielab/gepi/webapp/components/GepiInput.tml b/gepi/gepi-webapp/src/main/resources/de/julielab/gepi/webapp/components/GepiInput.tml index c89a4c91..7cc88c71 100644 --- a/gepi/gepi-webapp/src/main/resources/de/julielab/gepi/webapp/components/GepiInput.tml +++ b/gepi/gepi-webapp/src/main/resources/de/julielab/gepi/webapp/components/GepiInput.tml @@ -74,7 +74,7 @@
Filter the results by organism
Add multiple IDs separated by commas
diff --git a/gepi/gepi-webapp/src/main/resources/de/julielab/gepi/webapp/pages/Help.tml b/gepi/gepi-webapp/src/main/resources/de/julielab/gepi/webapp/pages/Help.tml index 5c77c238..dbc3bc6c 100644 --- a/gepi/gepi-webapp/src/main/resources/de/julielab/gepi/webapp/pages/Help.tml +++ b/gepi/gepi-webapp/src/main/resources/de/julielab/gepi/webapp/pages/Help.tml @@ -227,7 +227,7 @@ NCBI Gene entries are always assigned exactly one taxonomy ID. The GePI text processing pipeline also assigns species to protein families based on the species discussed in the respective document. In case no species is mentioned at all in a document - which commonly happens in PubMed abstracts - the human taxonomy ID 9606 is assigned as this the most frequent organism discussed in PubMed. The species are stored as taxonomy IDs in the interaction database and - can be used for species filter purposes. GePI offers taxonomy filters for 1) List A or B, 2) only List A and 3) only List B. The semantics is that in case 1) an interaction will be retrieved if either of its arguments matches any of the specified species. In case 2), + can be used for species filter purposes. GePI offers taxonomy filters for 1) List A or B, 2) only List A and 3) only List B. The semantics is that in case 1) an interaction will be retrieved if either of its arguments matches any of the specified species. In case 2), the genes of List A will be restricted to the given organisms. Case 3) has two sub-cases: 3a) is when there are items on List B. Then, those items are restricted to the given organisms analogous to case 2). If List B is empty but an organism filter for List B is specified, then List B is implicitly all genes, groups, families etc. that belong to the given taxonomy IDs for List B.
From a942610f6597de64ab505f2e2bf82a34ed401191 Mon Sep 17 00:00:00 2001 From: khituras Date: Mon, 9 Sep 2024 17:26:25 +0200 Subject: [PATCH 3/4] Resolves #276, resolves #277. --- .../gepi/core/services/GePiDataService.java | 35 +++++++- .../gepi/core/services/IGePiDataService.java | 8 +- .../src/main/resources/ExcelResultCreation.py | 84 ++++++++++++------- .../core/services/GePiDataServiceTest.java | 8 +- .../webapp/components/TableResultWidget.java | 9 +- 5 files changed, 97 insertions(+), 47 deletions(-) diff --git a/gepi/gepi-core/src/main/java/de/julielab/gepi/core/services/GePiDataService.java b/gepi/gepi-core/src/main/java/de/julielab/gepi/core/services/GePiDataService.java index 9faceac0..1b910ecc 100644 --- a/gepi/gepi-core/src/main/java/de/julielab/gepi/core/services/GePiDataService.java +++ b/gepi/gepi-core/src/main/java/de/julielab/gepi/core/services/GePiDataService.java @@ -26,6 +26,7 @@ import java.util.concurrent.Future; import java.util.function.Function; import java.util.stream.Collectors; +import java.util.stream.Stream; import static java.nio.charset.StandardCharsets.UTF_8; @@ -322,7 +323,8 @@ public JSONArray convertToJson(List eventList) { } @Override - public Path getOverviewExcel(Future eventRetrievalResult, long dataSessionId, EnumSet inputMode, String sentenceFilterString, String paragraphFilterString, String sectionNameFilterString) throws IOException, ExecutionException, InterruptedException { + public Path getOverviewExcel(Future eventRetrievalResult, GepiRequestData requestData) throws IOException, ExecutionException, InterruptedException { + final long dataSessionId = requestData.getDataSessionId(); long time = System.currentTimeMillis(); log.info("Creating event statistics Excel file for dataSessionId {}", dataSessionId); final Path tempStatusFile = getTempStatusFile(dataSessionId); @@ -336,7 +338,7 @@ public Path getOverviewExcel(Future eventRetrievalResult, Path xlsFile = getTempXlsDataFile(dataSessionId); writeOverviewTsvFile(eventRetrievalResult.get().getEventList(), tsvFile); updateDownloadFileCreationsStatus("Step 2 of 3: Retrieval of all interactions has finished. Creating Excel file.", dataSessionId); - createExcelSummaryFile(tsvFile, xlsFile, inputMode, sentenceFilterString, paragraphFilterString, sectionNameFilterString); + createExcelSummaryFile(tsvFile, xlsFile, requestData); updateDownloadFileCreationsStatus(EXCEL_FILE_SUCCESS_STATE + " The file is ready for download.", dataSessionId); time = System.currentTimeMillis() - time; log.info("Excel sheet creation took {} seconds", time / 1000); @@ -366,8 +368,33 @@ private void updateDownloadFileCreationsStatus(String status, long dataSessionId Files.writeString(tempStatusFile, status); } - private void createExcelSummaryFile(Path tsvFile, Path xlsFile, EnumSet inputMode, String sentenceFilterString, String paragraphFilterString, String sectionNameFilterString) throws IOException { - ProcessBuilder builder = new ProcessBuilder().command("python3", "-c", excelResultCreationScript, tsvFile.toAbsolutePath().toString(), xlsFile.toAbsolutePath().toString(), inputMode.stream().map(InputMode::name).collect(Collectors.joining(" ")), sentenceFilterString != null ? sentenceFilterString : "", paragraphFilterString != null ? paragraphFilterString : "", sectionNameFilterString != null ? sectionNameFilterString : ""); + private void createExcelSummaryFile(Path tsvFile, Path xlsFile, GepiRequestData requestData) throws IOException { + final EnumSet inputMode = requestData.getInputMode(); + final String sentenceFilterString = requestData.getSentenceFilterString(); + final String paragraphFilterString = requestData.getParagraphFilterString(); + final String sectionNameFilterString = requestData.getSectionNameFilterString(); + final List eventTypes = requestData.getEventTypes(); + final boolean includeUnary = requestData.isIncludeUnary(); + final String[] taxId = requestData.getTaxId(); + final String[] taxIdsA = requestData.getTaxIdsA(); + final String[] taxIdsB = requestData.getTaxIdsB(); + Map likelihood2string = Map.of(1, "negation", 2, "low", 3, "investigation", 4, "moderate", 5, "high", 6, "assertion"); + + ProcessBuilder builder = new ProcessBuilder().command("python3", "-c", + excelResultCreationScript, + tsvFile.toAbsolutePath().toString(), + xlsFile.toAbsolutePath().toString(), + inputMode.stream().map(InputMode::name).collect(Collectors.joining(" ")), + sentenceFilterString != null ? sentenceFilterString : "", + paragraphFilterString != null ? paragraphFilterString : "", + sectionNameFilterString != null ? sectionNameFilterString : "", + eventTypes.stream().collect(Collectors.joining(" ")), + Boolean.toString(includeUnary), + likelihood2string.get(requestData.getEventLikelihood()), + taxId != null ? Stream.of(taxId).collect(Collectors.joining(" ")) : "", + taxIdsA != null ? Stream.of(taxIdsA).collect(Collectors.joining(" ")) : "", + taxIdsB != null ? Stream.of(taxIdsB).collect(Collectors.joining(" ")) : "" + ); log.info("xls builder command: {}", builder.command()); Process process = builder.start(); InputStream processInput = process.getInputStream(); diff --git a/gepi/gepi-core/src/main/java/de/julielab/gepi/core/services/IGePiDataService.java b/gepi/gepi-core/src/main/java/de/julielab/gepi/core/services/IGePiDataService.java index 39be18b1..43d5a88f 100644 --- a/gepi/gepi-core/src/main/java/de/julielab/gepi/core/services/IGePiDataService.java +++ b/gepi/gepi-core/src/main/java/de/julielab/gepi/core/services/IGePiDataService.java @@ -9,7 +9,6 @@ import java.io.IOException; import java.lang.ref.WeakReference; import java.nio.file.Path; -import java.util.EnumSet; import java.util.List; import java.util.Map; import java.util.concurrent.ExecutionException; @@ -75,13 +74,10 @@ public interface IGePiDataService { *

To do this, the event data is written to a temporary file, a Python-Pandas script is applied and the * resulting Excel file is then read back in the form of the InputStream.

* @param events The events to create the result workbook for. - * @param inputMode - * @param sentenceFilterString - * @param paragraphFilterString - * @param sectionNameFilterString + * @param requestData * @return An InputStream of the created Excel file. */ - Path getOverviewExcel(Future events, long dataSessionId, EnumSet inputMode, String sentenceFilterString, String paragraphFilterString, String sectionNameFilterString) throws IOException, ExecutionException, InterruptedException; + Path getOverviewExcel(Future events, GepiRequestData requestData) throws IOException, ExecutionException, InterruptedException; String getDownloadFileCreationStatus(long dataSessionId) throws IOException; diff --git a/gepi/gepi-core/src/main/resources/ExcelResultCreation.py b/gepi/gepi-core/src/main/resources/ExcelResultCreation.py index f8bf62ee..d734af8f 100644 --- a/gepi/gepi-core/src/main/resources/ExcelResultCreation.py +++ b/gepi/gepi-core/src/main/resources/ExcelResultCreation.py @@ -9,6 +9,15 @@ from datetime import date from pandas import ExcelWriter +class Counter: + # Helper class to count rows + def __init__(self, start=0): + self.value = start + + def inc(self, amount=1): + current_value = self.value + self.value += amount + return current_value def makeNorm2MajorityMap(originalDf, rx): allsymbols = pd.concat([originalDf['arg1symbol'], originalDf['arg2symbol']]) @@ -38,7 +47,7 @@ def makeGeneHyperlink(value): url = "https://github.com/sorgerlab/famplex/{}" return f'=HYPERLINK("{url.format(id)}", "{value}")' -def writeresults(input,output,inputMode,sentenceFilterString,paragraphFilterString,sectionNameFilterString): +def writeresults(input,output,searchParameters): columndesc=[ 'Input gene symbol', 'Event partner gene symbol', 'the document text of the input gene in the found sentence', @@ -117,7 +126,7 @@ def writeresults(input,output,inputMode,sentenceFilterString,paragraphFilterStri with ExcelWriter(output, mode="w") as ew: pd.DataFrame().to_excel(ew, sheet_name='Frontpage') df.to_excel(ew, sheet_name="Results", index=False) - if 'A' in inputMode or 'AB' in inputMode: + if 'A' in searchParameters['inputMode'] or 'AB' in searchParameters['inputMode']: givengenesfreq.to_excel(ew, sheet_name="Given Genes Statistics") othergenesfreq.to_excel(ew, sheet_name="Event Partner Statistics") relfreq.to_excel(ew, sheet_name="Event Statistics") @@ -130,41 +139,56 @@ def writeresults(input,output,inputMode,sentenceFilterString,paragraphFilterStri frontpage = ew.sheets['Frontpage'] #frontpage.hide_gridlines(2) bold = ew.book.add_format({'bold': True}) - frontpage.write(0,0, f'This is a GePi statistics file which contains results of event extraction. Creation date is {date.today()}.') - frontpage.write(1,0, 'The contained worksheets contain the actual text mining results as well as statistics extracted from them.') - frontpage.write(2,0, 'The result was obtained using the following filter terms:') - frontpage.write(3,0, f'Sentence level filter query: {sentenceFilterString}') - frontpage.write(4,0, f'Paragraph level filter query: {paragraphFilterString}') - frontpage.write(5,0, f'Section Heading filter query: {sectionNameFilterString}') - frontpage.write(6,0, 'Only molecular events that were described in a sentence or a paragraph containing the filter terms was returned for this result.') - frontpage.write(8,0, 'The "Results" sheet is a large table containing the gene event arguments, an indication of how well the text matched') - frontpage.write(9,0, 'a gene synonym, the recognized type of the event (such as "phosphorylation" or "regulation"),') - frontpage.write(10,0, 'the document ID (PubMed ID for PubMed results, PMC ID for PubMed Central results) and the sentence in which the') - frontpage.write(11,0, 'respective event was found.') - resultsdesc.to_excel(ew, startrow=7, index=False, sheet_name='Frontpage') + c = Counter() + frontpage.write(c.inc(),0, f'This is a GePi statistics file which contains results of event extraction. Creation date is {date.today()}.') + frontpage.write(c.inc(),0, 'The contained worksheets contain the actual text mining results as well as statistics extracted from them.') + frontpage.write(c.inc(),0, 'The result was obtained using the following filter terms:') + frontpage.write(c.inc(),0, f'Sentence level filter query: {searchParameters["sentenceFilterString"]}') + frontpage.write(c.inc(),0, f'Paragraph level filter query: {searchParameters["paragraphFilterString"]}') + frontpage.write(c.inc(),0, f'Section Heading filter query: {searchParameters["sectionNameFilterString"]}') + frontpage.write(c.inc(),0, f'Event types included: {searchParameters["eventTypes"]}') + frontpage.write(c.inc(),0, f'Unary events allowed: {searchParameters["includeUnary"]}') + frontpage.write(c.inc(),0, f'Minimum event likelihood: {searchParameters["likelihood"]}') + frontpage.write(c.inc(),0, f'Organism filter A or B: {searchParameters["taxId"]}') + frontpage.write(c.inc(),0, f'Organism filter A: {searchParameters["taxIdsA"]}') + frontpage.write(c.inc(),0, f'Organism filter B: {searchParameters["taxIdsB"]}') + frontpage.write(c.inc(),0, 'Only molecular events that were described in a sentence or a paragraph containing the filter terms was returned for this result.') + frontpage.write(c.inc(),0, 'The "Results" sheet is a large table containing the gene event arguments, an indication of how well the text matched') + frontpage.write(c.inc(),0, 'a gene synonym, the recognized type of the event (such as "phosphorylation" or "regulation"),') + frontpage.write(c.inc(),0, 'the document ID (PubMed ID for PubMed results, PMC ID for PubMed Central results) and the sentence in which the') + frontpage.write(c.inc(),0, 'respective event was found.') + c.inc() + resultsdesc.to_excel(ew, startrow=c.inc(14), index=False, sheet_name='Frontpage') #frontpage.write(24,0, 'Description of the sheets:', bold) - frontpage.write(26,0, 'Description of the sheets:') - if 'A' in inputMode or 'AB' in inputMode: - frontpage.write(27,0, '"Given Genes Statistics" shows how often the input gene symbols were found in relations with other genes.') - frontpage.write(28,0, '"Event Partner Statistics" shows the same but from the perspective of the interaction partners of the input genes.') - frontpage.write(29,0, '"Event Statistics" lists the extracted events grouped by their combination of input and event partner genes. In other words, it counts how often two genes interact with each other in the results. The value "none" indicates unary events without a second interaction partner.') - frontpage.write(30,0, '"Input Gene Event Diversity" shows for each input gene symbol how many different interaction partners it has in the results.') - frontpage.write(31,0, '"Gene Argument Event Diversity" shows for each gene that participated in an event the number of different interaction partners in the results.') + frontpage.write(c.inc(),0, 'Description of the sheets:') + if 'A' in searchParameters['inputMode'] or 'AB' in searchParameters['inputMode']: + frontpage.write(c.inc(),0, '"Given Genes Statistics" shows how often the input gene symbols were found in relations with other genes.') + frontpage.write(c.inc(),0, '"Event Partner Statistics" shows the same but from the perspective of the interaction partners of the input genes.') + frontpage.write(c.inc(),0, '"Event Statistics" lists the extracted events grouped by their combination of input and event partner genes. In other words, it counts how often two genes interact with each other in the results. The value "none" indicates unary events without a second interaction partner.') + frontpage.write(c.inc(),0, '"Input Gene Event Diversity" shows for each input gene symbol how many different interaction partners it has in the results.') + frontpage.write(c.inc(),0, '"Gene Argument Event Diversity" shows for each gene that participated in an event the number of different interaction partners in the results.') else: - frontpage.write(27,0, '"Gene Interaction Statistics" shows how often gene symbols were found in relations with other genes.') - frontpage.write(28,0, '"Event Statistics" lists the extracted events grouped by their combination of input and event partner genes. In other words, it counts how often two genes interact with each other in the results.') - frontpage.write(29,0, '"Gene Argument Event Diversity" shows for each gene that participated in an event the number of different interaction partners in the results. The value "none" indicates the number of unary events without a second interaction partner.') + frontpage.write(c.inc(),0, '"Gene Interaction Statistics" shows how often gene symbols were found in relations with other genes.') + frontpage.write(c.inc(),0, '"Event Statistics" lists the extracted events grouped by their combination of input and event partner genes. In other words, it counts how often two genes interact with each other in the results.') + frontpage.write(c.inc(),0, '"Gene Argument Event Diversity" shows for each gene that participated in an event the number of different interaction partners in the results. The value "none" indicates the number of unary events without a second interaction partner.') return df if __name__ == "__main__": input = sys.argv[1] output = sys.argv[2] - inputMode = sys.argv[3].split(' ') - sentenceFilterString = sys.argv[4] - paragraphFilterString = sys.argv[5] - sectionNameFilterString = sys.argv[6] - - writeresults(input,output,inputMode,sentenceFilterString,paragraphFilterString,sectionNameFilterString) + searchParameters = { + 'inputMode': sys.argv[3].split(' '), + 'sentenceFilterString': sys.argv[4], + 'paragraphFilterString': sys.argv[5], + 'sectionNameFilterString': sys.argv[6], + 'eventTypes': sys.argv[7].split(' '), + 'includeUnary': sys.argv[8], + 'likelihood': sys.argv[9], + 'taxId': sys.argv[10], + 'taxIdsA': sys.argv[11], + 'taxIdsB': sys.argv[12] + } + writeresults(input,output,searchParameters) diff --git a/gepi/gepi-core/src/test/java/de/julielab/gepi/core/services/GePiDataServiceTest.java b/gepi/gepi-core/src/test/java/de/julielab/gepi/core/services/GePiDataServiceTest.java index 967f5c65..19e7bbdd 100644 --- a/gepi/gepi-core/src/test/java/de/julielab/gepi/core/services/GePiDataServiceTest.java +++ b/gepi/gepi-core/src/test/java/de/julielab/gepi/core/services/GePiDataServiceTest.java @@ -1,9 +1,6 @@ package de.julielab.gepi.core.services; -import de.julielab.gepi.core.retrieval.data.Argument; -import de.julielab.gepi.core.retrieval.data.Event; -import de.julielab.gepi.core.retrieval.data.EventRetrievalResult; -import de.julielab.gepi.core.retrieval.data.InputMode; +import de.julielab.gepi.core.retrieval.data.*; import org.apache.tapestry5.json.JSONArray; import org.apache.tapestry5.json.JSONObject; import org.junit.BeforeClass; @@ -85,7 +82,8 @@ public void writeExcelSummary() throws Exception { final EventRetrievalResult eventRetrievalResult = new EventRetrievalResult(); eventRetrievalResult.setEvents(events); - Path outputFile = gePiDataService.getOverviewExcel(CompletableFuture.completedFuture(eventRetrievalResult), 1234, EnumSet.of(InputMode.A), null, null, null); + final GepiRequestData requestData = new GepiRequestData().withDataSessionId(1234).withInputMode(EnumSet.of(InputMode.A)); + Path outputFile = gePiDataService.getOverviewExcel(CompletableFuture.completedFuture(eventRetrievalResult), requestData); assertThat(outputFile).exists(); } diff --git a/gepi/gepi-webapp/src/main/java/de/julielab/gepi/webapp/components/TableResultWidget.java b/gepi/gepi-webapp/src/main/java/de/julielab/gepi/webapp/components/TableResultWidget.java index 0329b912..223043c3 100644 --- a/gepi/gepi-webapp/src/main/java/de/julielab/gepi/webapp/components/TableResultWidget.java +++ b/gepi/gepi-webapp/src/main/java/de/julielab/gepi/webapp/components/TableResultWidget.java @@ -16,6 +16,7 @@ import de.julielab.java.utilities.FileUtilities; import org.apache.tapestry5.ComponentResources; import org.apache.tapestry5.StreamResponse; +import org.apache.tapestry5.SymbolConstants; import org.apache.tapestry5.annotations.*; import org.apache.tapestry5.beanmodel.BeanModel; import org.apache.tapestry5.beanmodel.services.BeanModelSource; @@ -24,6 +25,7 @@ import org.apache.tapestry5.http.services.Response; import org.apache.tapestry5.ioc.LoggerSource; import org.apache.tapestry5.ioc.annotations.Inject; +import org.apache.tapestry5.ioc.annotations.Symbol; import org.apache.tapestry5.services.javascript.JavaScriptSupport; import org.slf4j.Logger; @@ -93,6 +95,9 @@ public class TableResultWidget extends GepiWidget { private LoggerSource loggerSource; @Environmental private JavaScriptSupport javaScriptSupport; + @Inject + @Symbol(SymbolConstants.PRODUCTION_MODE) + private boolean productionMode; // @Property // private List selectedColumns; @@ -246,7 +251,7 @@ public void prepareResponse(Response response) { // log.info("[{}] Unrolled result retrieval for Excel sheet creation took {} seconds", requestData.getDataSessionId(), time / 1000); // } final Future unrolledResult4download = dataService.getUnrolledResult4download(requestData, eventRetrievalService); - statisticsFile = dataService.getOverviewExcel(unrolledResult4download, requestData.getDataSessionId(), requestData.getInputMode(), requestData.getSentenceFilterString(), requestData.getParagraphFilterString(), requestData.getSectionNameFilterString()); + statisticsFile = dataService.getOverviewExcel(unrolledResult4download, requestData); response.setHeader("Content-Length", "" + Files.size(statisticsFile)); // output into file response.setHeader("Content-disposition", "attachment; filename=" + statisticsFile.getFileName()); @@ -293,7 +298,7 @@ public String getArgumentLink(int argPosition) { public void afterRender() { final Link downloadEventLink = resources.createEventLink("download"); - javaScriptSupport.require("gepi/charts/tablewidget").invoke("download").with(downloadEventLink.toAbsoluteURI().replace(":80", "")); + javaScriptSupport.require("gepi/charts/tablewidget").invoke("download").with(downloadEventLink.toAbsoluteURI(productionMode)); javaScriptSupport.require("gepi/charts/tablewidget").invoke("setupHighlightTooltips"); javaScriptSupport.require("gepi/base").invoke("setuptooltips"); } From 9c429c3acf1dc8f1521926c4278970f0f2cbf737 Mon Sep 17 00:00:00 2001 From: khituras Date: Thu, 19 Sep 2024 15:41:38 +0200 Subject: [PATCH 4/4] Minor changes. --- .../java/de/julielab/gepi/core/services/GeneIdService.java | 1 + gepi/gepi-core/src/main/resources/ExcelResultCreation.py | 5 ++--- .../java/de/julielab/gepi/webapp/components/GepiInput.java | 2 +- .../main/resources/de/julielab/gepi/webapp/pages/Help.tml | 3 ++- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/gepi/gepi-core/src/main/java/de/julielab/gepi/core/services/GeneIdService.java b/gepi/gepi-core/src/main/java/de/julielab/gepi/core/services/GeneIdService.java index 18fb02c0..4059fb10 100644 --- a/gepi/gepi-core/src/main/java/de/julielab/gepi/core/services/GeneIdService.java +++ b/gepi/gepi-core/src/main/java/de/julielab/gepi/core/services/GeneIdService.java @@ -296,6 +296,7 @@ public Future convert(Stream stream, IdType to) { return CompletableFuture.supplyAsync(() -> { long time = System.currentTimeMillis(); final Multimap idsByType = determineIdTypes(stream); + log.debug("Starting to convert {} input IDs for GePI retrieval."); final List> convertedIds = new ArrayList<>(); for (IdType from : idsByType.keySet()) { final Collection sourceIds = idsByType.get(from); diff --git a/gepi/gepi-core/src/main/resources/ExcelResultCreation.py b/gepi/gepi-core/src/main/resources/ExcelResultCreation.py index d734af8f..1795b98d 100644 --- a/gepi/gepi-core/src/main/resources/ExcelResultCreation.py +++ b/gepi/gepi-core/src/main/resources/ExcelResultCreation.py @@ -140,7 +140,7 @@ def writeresults(input,output,searchParameters): #frontpage.hide_gridlines(2) bold = ew.book.add_format({'bold': True}) c = Counter() - frontpage.write(c.inc(),0, f'This is a GePi statistics file which contains results of event extraction. Creation date is {date.today()}.') + frontpage.write(c.inc(),0, f'This is a GePI statistics file which contains results of event extraction. Creation date is {date.today()}.') frontpage.write(c.inc(),0, 'The contained worksheets contain the actual text mining results as well as statistics extracted from them.') frontpage.write(c.inc(),0, 'The result was obtained using the following filter terms:') frontpage.write(c.inc(),0, f'Sentence level filter query: {searchParameters["sentenceFilterString"]}') @@ -155,8 +155,7 @@ def writeresults(input,output,searchParameters): frontpage.write(c.inc(),0, 'Only molecular events that were described in a sentence or a paragraph containing the filter terms was returned for this result.') frontpage.write(c.inc(),0, 'The "Results" sheet is a large table containing the gene event arguments, an indication of how well the text matched') frontpage.write(c.inc(),0, 'a gene synonym, the recognized type of the event (such as "phosphorylation" or "regulation"),') - frontpage.write(c.inc(),0, 'the document ID (PubMed ID for PubMed results, PMC ID for PubMed Central results) and the sentence in which the') - frontpage.write(c.inc(),0, 'respective event was found.') + frontpage.write(c.inc(),0, 'the document ID (PubMed ID for PubMed results, PMC ID for PubMed Central results) and the sentence in which the respective event was found.') c.inc() resultsdesc.to_excel(ew, startrow=c.inc(14), index=False, sheet_name='Frontpage') #frontpage.write(24,0, 'Description of the sheets:', bold) diff --git a/gepi/gepi-webapp/src/main/java/de/julielab/gepi/webapp/components/GepiInput.java b/gepi/gepi-webapp/src/main/java/de/julielab/gepi/webapp/components/GepiInput.java index 07fd7880..5f63eecd 100644 --- a/gepi/gepi-webapp/src/main/java/de/julielab/gepi/webapp/components/GepiInput.java +++ b/gepi/gepi-webapp/src/main/java/de/julielab/gepi/webapp/components/GepiInput.java @@ -360,7 +360,7 @@ void afterRender() { private Future convertToAggregateIds(String input, String listName) { if (input != null) { List inputList = Stream.of(input.split("[\n]")).map(String::trim).filter(Predicate.not(String::isBlank)).collect(Collectors.toList()); - log.debug("Got {} input IDs from {}", inputList.size(), listName); + log.debug("Got {} input IDs from {} for conversion to GePI IDs", inputList.size(), listName); IdType toIdType = IdType.GEPI_AGGREGATE; return geneIdService.convert(inputList.stream(), toIdType); } diff --git a/gepi/gepi-webapp/src/main/resources/de/julielab/gepi/webapp/pages/Help.tml b/gepi/gepi-webapp/src/main/resources/de/julielab/gepi/webapp/pages/Help.tml index dbc3bc6c..a9249015 100644 --- a/gepi/gepi-webapp/src/main/resources/de/julielab/gepi/webapp/pages/Help.tml +++ b/gepi/gepi-webapp/src/main/resources/de/julielab/gepi/webapp/pages/Help.tml @@ -498,7 +498,8 @@ flatly listing all retrieved interactions can be obtained.

- The API is realized through URL request parameters, i.e. the GePI Web address followed by a + The API currently works through HTTP GET requests (POST is not yet supported) and + is realized through URL request parameters, i.e. the GePI Web address followed by a single question mark (?) and parameter-value pairs. A parameter-value pair is separated by an equal sign (=) and a sequence of such pairs are separated by the ampersand characters (&).