17
- [3.7.6,4.0.0)
+ [3.9.2,4.0.0)
qanary
qanary-component-qb-birthdata-wikidata
1.4.13
diff --git a/qanary-component-QB-BirthDataWikidata/src/main/java/eu/wdaqua/component/qb/birthdata/wikidata/Application.java b/qanary-component-QB-BirthDataWikidata/src/main/java/eu/wdaqua/component/qb/birthdata/wikidata/Application.java
index 65aa10bd5..e38526814 100644
--- a/qanary-component-QB-BirthDataWikidata/src/main/java/eu/wdaqua/component/qb/birthdata/wikidata/Application.java
+++ b/qanary-component-QB-BirthDataWikidata/src/main/java/eu/wdaqua/component/qb/birthdata/wikidata/Application.java
@@ -14,10 +14,10 @@
import org.springframework.context.annotation.ComponentScan;
@SpringBootApplication
-@ComponentScan(basePackages = {"eu.wdaqua.qanary"})
+@ComponentScan(basePackages = { "eu.wdaqua.qanary" })
/**
- * basic class for wrapping functionality to a Qanary component
- * note: there is no need to change something here
+ * basic class for wrapping functionality to a Qanary component note: there is
+ * no need to change something here
*/
public class Application {
@@ -29,8 +29,8 @@ public static void main(String[] args) {
}
/**
- * this method is needed to make the QanaryComponent in this project known
- * to the QanaryServiceController in the qanary_component-template
+ * this method is needed to make the QanaryComponent in this project known to
+ * the QanaryServiceController in the qanary_component-template
*
* @return
*/
@@ -38,9 +38,10 @@ public static void main(String[] args) {
public QanaryComponent qanaryComponent(@Value("${spring.application.name}") final String applicationName) {
return new BirthDataQueryBuilder(applicationName);
}
-
- @Bean
- public BirthDataQueryBuilderController getBirthDataQueryBuilderController(BirthDataQueryBuilder myBirthDataQueryBuilder) {
+
+ @Bean
+ public BirthDataQueryBuilderController getBirthDataQueryBuilderController(
+ BirthDataQueryBuilder myBirthDataQueryBuilder) {
return new BirthDataQueryBuilderController(myBirthDataQueryBuilder);
}
diff --git a/qanary-component-QB-BirthDataWikidata/src/main/java/eu/wdaqua/component/qb/birthdata/wikidata/BirthDataQueryBuilder.java b/qanary-component-QB-BirthDataWikidata/src/main/java/eu/wdaqua/component/qb/birthdata/wikidata/BirthDataQueryBuilder.java
index 6a7aaf06f..5cbcd75fe 100644
--- a/qanary-component-QB-BirthDataWikidata/src/main/java/eu/wdaqua/component/qb/birthdata/wikidata/BirthDataQueryBuilder.java
+++ b/qanary-component-QB-BirthDataWikidata/src/main/java/eu/wdaqua/component/qb/birthdata/wikidata/BirthDataQueryBuilder.java
@@ -1,13 +1,13 @@
package eu.wdaqua.component.qb.birthdata.wikidata;
-import eu.wdaqua.qanary.commons.QanaryExceptionNoOrMultipleQuestions;
-import eu.wdaqua.qanary.commons.QanaryMessage;
-import eu.wdaqua.qanary.commons.QanaryQuestion;
-import eu.wdaqua.qanary.commons.QanaryUtils;
-import eu.wdaqua.qanary.commons.triplestoreconnectors.QanaryTripleStoreConnector;
-import eu.wdaqua.qanary.component.QanaryComponent;
-import eu.wdaqua.qanary.exceptions.SparqlQueryFailed;
-import io.swagger.v3.oas.annotations.Operation;
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
import org.apache.jena.datatypes.xsd.XSDDatatype;
import org.apache.jena.query.QuerySolution;
import org.apache.jena.query.QuerySolutionMap;
@@ -19,307 +19,459 @@
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
-import java.io.IOException;
-import java.net.URISyntaxException;
-import java.util.ArrayList;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
+import eu.wdaqua.qanary.commons.QanaryExceptionNoOrMultipleQuestions;
+import eu.wdaqua.qanary.commons.QanaryMessage;
+import eu.wdaqua.qanary.commons.QanaryQuestion;
+import eu.wdaqua.qanary.commons.QanaryUtils;
+import eu.wdaqua.qanary.commons.triplestoreconnectors.QanaryTripleStoreConnector;
+import eu.wdaqua.qanary.component.QanaryComponent;
+import eu.wdaqua.qanary.exceptions.SparqlQueryFailed;
+import io.swagger.v3.oas.annotations.Operation;
/**
- * represents a query builder to answer questions regarding birthplace and date using Wikidata
+ * represents a query builder to answer questions regarding birthplace and date
+ * using Wikidata
*
- * requirements: expects a textual question to be stored in the Qanary triplestore,
- * written in English language, as well as previously annotated named entities
+ * requirements: expects a textual question to be stored in the Qanary
+ * triplestore, written in English language, as well as previously annotated
+ * named entities
*
- * outcome: if the question structure is supported and a previous component (NED/NER) has found
- * named entities then this component constructs a Wikidata query that might be used to compute
- * the answer to the question
+ * outcome: if the question structure is supported and a previous component
+ * (NED/NER) has found named entities then this component constructs a Wikidata
+ * query that might be used to compute the answer to the question
*/
@Component
public class BirthDataQueryBuilder extends QanaryComponent {
- private static final Logger logger = LoggerFactory.getLogger(BirthDataQueryBuilder.class);
-
- private static final String FILENAME_ANNOTATIONS = "/queries/getAnnotation.rq";
- private static final String FILENAME_ANNOTATIONS_FILTERED = "/queries/getAnnotationFilteredOnlyWikidata.rq";
-
- private static final String FILENAME_WIKIDATA_BIRTHDATA_QUERY_PERSON = "/queries/getQuestionAnswerFromWikidataByPerson.rq";
- private static final String FILENAME_WIKIDATA_BIRTHDATA_QUERY_FIRST_AND_LASTNAME = "/queries/getQuestionAnswerFromWikidataByFirstnameLastname.rq";
-
- private static final String FIRSTNAME_ANNOTATION = "FIRST_NAME";
- private static final String LASTNAME_ANNOTATION = "LAST_NAME";
-
- private static final String GRAPH = "graph";
- private static final String VALUE = "value";
-
- private final String applicationName;
-
- private QanaryUtils myQanaryUtils;
- private QanaryQuestion myQanaryQuestion;
- private String myQuestion;
-
- private final String[] supportedQuestionPatterns = {
- "([Ww]here and when was )(.*)( born)",
- "([Ww]here was )(.*)( born)",
- "([Ww]hen was )(.*)( born)"
- };
-
- private int patternIndex;
-
- public BirthDataQueryBuilder(@Value("$P{spring.application.name}") final String applicationName) {
- this.applicationName = applicationName;
- // check if files exists and are not empty
- QanaryTripleStoreConnector.guardNonEmptyFileFromResources(FILENAME_ANNOTATIONS);
- QanaryTripleStoreConnector.guardNonEmptyFileFromResources(FILENAME_ANNOTATIONS_FILTERED);
- QanaryTripleStoreConnector.guardNonEmptyFileFromResources(FILENAME_WIKIDATA_BIRTHDATA_QUERY_PERSON);
- QanaryTripleStoreConnector.guardNonEmptyFileFromResources(FILENAME_WIKIDATA_BIRTHDATA_QUERY_FIRST_AND_LASTNAME);
- }
-
- /**
- * compare the question against regular expression(s) representing the supported format
- * and if a match is found, store the matched pattern index
- *
- * @param questionString the textual question
- */
- @Operation(
- summary = "Check if the question is supported and store the matched pattern index",
- operationId = "isQuestionSupported",
- description = "Compare the question against regular expression(s) representing the supported format and if a match is found, store the matched pattern index"
- )
- private boolean isQuestionSupported(String questionString) {
- for (int i = 0; i < this.supportedQuestionPatterns.length; i++) {
- String pattern = this.supportedQuestionPatterns[i];
-
- Pattern p = Pattern.compile(pattern);
- Matcher m = p.matcher(questionString);
- logger.info("checking pattern \"{}\"", pattern);
- if (m.find()) {
- this.patternIndex = i;
- return true;
- }
- }
-
- return false;
- }
-
- /**
- * Find the position of a name in the textual question.
- *
- * @param questionString the textual question
- * @param pattern a regular expression (from supportedQuestionPatterns)
- */
- @Operation(
- summary = "Find the index of the entity in the question",
- operationId = "getNamePosition",
- description = "Find the position of a name in the textual question." //
- + "The name is represented as a matched group within supportedQuestionPatterns."
- )
- private int getNamePosition(String questionString, String pattern) {
- Matcher m = Pattern.compile(pattern).matcher(questionString);
- m.find();
- int index = m.start(2);
- return index;
- }
-
- private String loadQueryFromFile(String filenameWithRelativePath, QuerySolutionMap bindings) throws IOException {
- return QanaryTripleStoreConnector.readFileFromResourcesWithMap(filenameWithRelativePath, bindings);
- }
-
- /**
- * standard method for processing a message from the central Qanary component
- *
- * @param myQanaryMessage
- * @throws Exception
- */
- @Operation(
- summary = "Process a Qanary question with BirthDataQueryBuilder", //
- operationId = "process", //
- description = "Encapsulates the main functionality of this component. " //
- + "Construct a Wikidata query to find birth date and place for named entities."
- )
- @Override
- public QanaryMessage process(QanaryMessage myQanaryMessage) throws Exception {
- logger.info("process: {}", myQanaryMessage);
-
- // STEP 1: Get the required Data
- //
- // This example component requires the textual representation of the Question
- // as well as annotations of Wikidata entities made by the OpenTapioca NED.
-
- // get the question as String
- this.myQanaryUtils = this.getUtils(myQanaryMessage);
- this.myQanaryQuestion = new QanaryQuestion<>(myQanaryMessage, myQanaryUtils.getQanaryTripleStoreConnector());
- this.myQuestion = myQanaryQuestion.getTextualRepresentation();
-
- // This component is only supposed to answer a specific type of question.
- // Therefore, we only need to continue if the question asks for birthplace and date or if there is an
- // annotation of the first and lastname.
-
-
- // Get the firstname annotation if it's annotated
- QuerySolutionMap bindingsForFirstname = new QuerySolutionMap();
- bindingsForFirstname.add(GRAPH, ResourceFactory.createResource(myQanaryQuestion.getOutGraph().toASCIIString()));
- bindingsForFirstname.add(VALUE, ResourceFactory.createStringLiteral(FIRSTNAME_ANNOTATION));
-
- String sparqlCheckFirstname = this.loadQueryFromFile(FILENAME_ANNOTATIONS, bindingsForFirstname);
- ResultSet resultsetFirstname = myQanaryUtils.getQanaryTripleStoreConnector().select(sparqlCheckFirstname);
-
- // Get the lastname annotation, if it's annotated
- QuerySolutionMap bindingsForLastname = new QuerySolutionMap();
- // the currently used graph
- bindingsForLastname.add(GRAPH, ResourceFactory.createResource(myQanaryQuestion.getOutGraph().toASCIIString()));
- // annotated for the current question
- bindingsForLastname.add(VALUE, ResourceFactory.createStringLiteral(LASTNAME_ANNOTATION));
-
- String sparqlCheckLastname = this.loadQueryFromFile(FILENAME_ANNOTATIONS, bindingsForLastname);
- ResultSet resultsetLastname = myQanaryUtils.getQanaryTripleStoreConnector().select(sparqlCheckLastname);
-
-
- // STEP 2: Create queries for Wikidata if the question is supported or annotations are available
- ArrayList queriesForAnnotation = new ArrayList<>();
-
- if (resultsetFirstname.hasNext() && resultsetLastname.hasNext()) {
- // In this example, we are only interested in Entities that were found from another component and
- // annotated with the annotation "FIRST_NAME" and "LAST_NAME".
- queriesForAnnotation = createQueriesForAnnotation(resultsetFirstname, resultsetLastname);
- } else {
- logger.info("no annotation for {} and {} found", FIRSTNAME_ANNOTATION, LASTNAME_ANNOTATION);
- }
-
- if ((queriesForAnnotation.isEmpty() || queriesForAnnotation.get(0).isBlank()) && this.isQuestionSupported(myQuestion)) {
- // In this example we are only interested in Entities that were found at a specific point
- // in the question: e.g., 'when and where was born?'.
- // Because we do not require entities that might have been found anywhere else in the
- // question we can filter our results:
-
- int filterStart = this.getNamePosition(myQuestion, this.supportedQuestionPatterns[this.patternIndex]);
- // formulate a query to find existing information
- queriesForAnnotation = createQueriesForAnnotation(filterStart);
-
- }
-
- // If no query was created, we can stop here.
- if (queriesForAnnotation.isEmpty() || queriesForAnnotation.get(0).isBlank() ) {
- logger.warn("nothing to do here as question \"{}\" does not have the supported format", myQuestion);
- return myQanaryMessage;
- }
-
-
- for (int i = 0; i < queriesForAnnotation.size(); i++) {
- // store the created select query as an annotation for the current question
- // define here the parameters for the SPARQL INSERT query
- QuerySolutionMap bindings = new QuerySolutionMap();
- // use here the variable names defined in method insertAnnotationOfAnswerSPARQL
- bindings.add(GRAPH, ResourceFactory.createResource(myQanaryQuestion.getOutGraph().toASCIIString()));
- bindings.add("targetQuestion", ResourceFactory.createResource(myQanaryQuestion.getUri().toASCIIString()));
- bindings.add("selectQueryThatShouldComputeTheAnswer", ResourceFactory.createStringLiteral(queriesForAnnotation.get(i)));
- bindings.add("confidence", ResourceFactory.createTypedLiteral("1.0", XSDDatatype.XSDfloat)); // as it is rule based, a high confidence is expressed
- bindings.add("application", ResourceFactory.createResource("urn:qanary:" + this.applicationName));
-
- // get the template of the INSERT query
- String insertDataIntoQanaryTriplestoreQuery = QanaryTripleStoreConnector.insertAnnotationOfAnswerSPARQL(bindings);
- logger.info("SPARQL insert for adding data to Qanary triplestore: {}", insertDataIntoQanaryTriplestoreQuery);
-
- //STEP 4: Push the computed result to the Qanary triplestore
- logger.info("store data in graph {} of Qanary triplestore endpoint {}", //
- myQanaryMessage.getValues().get(myQanaryMessage.getOutGraph()), //
- myQanaryMessage.getValues().get(myQanaryMessage.getEndpoint()));
- myQanaryUtils.getQanaryTripleStoreConnector().update(insertDataIntoQanaryTriplestoreQuery);
- }
-
- return myQanaryMessage;
- }
-
- private ArrayList createQueriesForAnnotation(int filterStart) throws IOException, QanaryExceptionNoOrMultipleQuestions, URISyntaxException, SparqlQueryFailed {
- QuerySolutionMap bindingsForAnnotation = new QuerySolutionMap();
- // the currently used graph
- bindingsForAnnotation.add(GRAPH, ResourceFactory.createResource(myQanaryQuestion.getOutGraph().toASCIIString()));
- // annotated for the current question
- bindingsForAnnotation.add("source", ResourceFactory.createResource(myQanaryQuestion.getUri().toASCIIString()));
- // only for relevant annotations
- bindingsForAnnotation.add("filterStart", ResourceFactory.createTypedLiteral(String.valueOf(filterStart), XSDDatatype.XSDint));
-
- String sparqlGetAnnotation = this.loadQueryFromFile(FILENAME_ANNOTATIONS_FILTERED, bindingsForAnnotation);
-
- // STEP 3: Compute SPARQL select queries that should produce the result for every identified entity
- //
- // Rather than computing a (textual) result this component provides a
- // SPARQL query that might be used to answer the question.
- // This query can the used by other components.
-
- // there might be multiple entities identified for one name
- ResultSet resultset = myQanaryUtils.getQanaryTripleStoreConnector().select(sparqlGetAnnotation);
- ArrayList queries = new ArrayList<>();
- while (resultset.hasNext()) {
- QuerySolution tupel = resultset.next();
- RDFNode wikidataResource = tupel.get("wikidataResource");
- logger.info("creating query for resource: {}", wikidataResource);
- String createdWikiDataQuery = createWikidataSparqlQuery(wikidataResource);
- queries.add(createdWikiDataQuery);
- }
-
- return queries;
- }
-
- private ArrayList createQueriesForAnnotation(ResultSet resultsetFirstname, ResultSet resultsetLastname) throws IOException {
- ArrayList firstnameStartsEnds = new ArrayList<>();
- ArrayList lastnameStartsEnds = new ArrayList<>();
-
- while (resultsetFirstname.hasNext()) {
- Integer[] startEnd = new Integer[2];
- QuerySolution tupel = resultsetFirstname.next();
- startEnd[0] = tupel.getLiteral("start").getInt();
- startEnd[1] = tupel.getLiteral("end").getInt();
-
- firstnameStartsEnds.add(startEnd);
- }
-
- while (resultsetLastname.hasNext()) {
- Integer[] startEnd = new Integer[2];
- QuerySolution tupel = resultsetLastname.next();
- startEnd[0] = tupel.getLiteral("start").getInt();
- startEnd[1] = tupel.getLiteral("end").getInt();
-
- lastnameStartsEnds.add(startEnd);
- }
-
- ArrayList queries = new ArrayList<>();
- for (int i = 0; i < firstnameStartsEnds.size(); i++) {
- String firstanme = "";
- String lastname = "";
-
-
- try {
- firstanme = myQuestion.substring(firstnameStartsEnds.get(i)[0], firstnameStartsEnds.get(i)[1]);
- lastname = myQuestion.substring(lastnameStartsEnds.get(i)[0], lastnameStartsEnds.get(i)[1]);
- } catch (Exception e) {
- logger.error("error while get first or lastname: {}", e.getMessage());
- break;
- }
-
- logger.info("creating query for {} {}", firstanme, lastname);
-
- String createdWikiDataQuery = createWikidataSparqlQuery(firstanme, lastname);
- queries.add(createdWikiDataQuery);
- }
-
- return queries;
- }
-
- public String createWikidataSparqlQuery(RDFNode wikidataResource) throws IOException {
- // populate a generalized answer query with the specific entity (Wikidata ID)
- QuerySolutionMap bindingsForWikidataResultQuery = new QuerySolutionMap();
- // set expected person as parameter for Wikidata query
- bindingsForWikidataResultQuery.add("person", wikidataResource);
- return this.loadQueryFromFile(FILENAME_WIKIDATA_BIRTHDATA_QUERY_PERSON, bindingsForWikidataResultQuery);
- }
-
- public String createWikidataSparqlQuery(String firstname, String lastname) throws IOException {
- // populate a generalized answer query with the specific entity (Wikidata ID)
- QuerySolutionMap bindingsForWikidataResultQuery = new QuerySolutionMap();
- // set expected last and firstname as parameter for Wikidata query
- bindingsForWikidataResultQuery.add("firstnameValue", ResourceFactory.createLangLiteral(firstname, "en"));
- bindingsForWikidataResultQuery.add("lastnameValue", ResourceFactory.createLangLiteral(lastname, "en"));
- return this.loadQueryFromFile(FILENAME_WIKIDATA_BIRTHDATA_QUERY_FIRST_AND_LASTNAME, bindingsForWikidataResultQuery);
- }
+ private static final Logger logger = LoggerFactory.getLogger(BirthDataQueryBuilder.class);
+
+ public static final String FILENAME_ANNOTATIONS = "/queries/getAnnotation.rq";
+ public static final String FILENAME_ANNOTATIONS_NAMED_ENTITY_FILTERED_FOR_WIKIDATA = "/queries/getAnnotationOfNamedEntityLinkedToSpecificKnowledgeGraph.rq";
+
+ public static final String FILENAME_WIKIDATA_BIRTHDATA_QUERY_PERSON = "/queries/getQuestionAnswerFromWikidataByPerson.rq";
+ public static final String FILENAME_WIKIDATA_BIRTHDATA_QUERY_FIRST_AND_LASTNAME = "/queries/getQuestionAnswerFromWikidataByFirstnameLastname.rq";
+
+ private static final String FIRSTNAME_ANNOTATION = "FIRST_NAME";
+ private static final String LASTNAME_ANNOTATION = "LAST_NAME";
+
+ private static final String GRAPH = "graph";
+ private static final String VALUE = "value";
+
+ private final String applicationName;
+
+ private QanaryUtils myQanaryUtils;
+ private QanaryQuestion myQanaryQuestion;
+ private String myQuestion;
+
+ private final String[] supportedQuestionPatterns = { "([Ww]here and when was )(.*)( born)",
+ "([Ww]here was )(.*)( born)", "([Ww]hen was )(.*)( born)" };
+
+ private int patternIndex;
+
+ public BirthDataQueryBuilder(@Value("$P{spring.application.name}") final String applicationName) {
+ this.applicationName = applicationName;
+ // check if files exists and are not empty
+ QanaryTripleStoreConnector.guardNonEmptyFileFromResources(FILENAME_ANNOTATIONS);
+ QanaryTripleStoreConnector
+ .guardNonEmptyFileFromResources(FILENAME_ANNOTATIONS_NAMED_ENTITY_FILTERED_FOR_WIKIDATA);
+ QanaryTripleStoreConnector.guardNonEmptyFileFromResources(FILENAME_WIKIDATA_BIRTHDATA_QUERY_PERSON);
+ QanaryTripleStoreConnector.guardNonEmptyFileFromResources(FILENAME_WIKIDATA_BIRTHDATA_QUERY_FIRST_AND_LASTNAME);
+ }
+
+ /**
+ * compare the question against regular expression(s) representing the supported
+ * format and if a match is found, store the matched pattern index
+ *
+ * @param questionString the textual question
+ */
+ @Operation(summary = "Check if the question is supported and store the matched pattern index", operationId = "isQuestionSupported", description = "Compare the question against regular expression(s) representing the supported format and if a match is found, store the matched pattern index")
+ private boolean isQuestionSupported(String questionString) {
+ for (int i = 0; i < this.supportedQuestionPatterns.length; i++) {
+ String pattern = this.supportedQuestionPatterns[i];
+
+ Pattern p = Pattern.compile(pattern);
+ Matcher m = p.matcher(questionString);
+ logger.info("checking pattern \"{}\"", pattern);
+ if (m.find()) {
+ this.patternIndex = i;
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * Find the position of a name in the textual question.
+ *
+ * @param questionString the textual question
+ * @param pattern a regular expression (from supportedQuestionPatterns)
+ */
+ @Operation(summary = "Find the index of the entity in the question", operationId = "getNamePosition", description = "Find the position of a name in the textual question." //
+ + "The name is represented as a matched group within supportedQuestionPatterns.")
+ private int getNamePosition(String questionString, String pattern) {
+ Matcher m = Pattern.compile(pattern).matcher(questionString);
+ m.find();
+ int index = m.start(2);
+ return index;
+ }
+
+ private String loadQueryFromFile(String filenameWithRelativePath, QuerySolutionMap bindings) throws IOException {
+ return QanaryTripleStoreConnector.readFileFromResourcesWithMap(filenameWithRelativePath, bindings);
+ }
+
+ /**
+ * standard method for processing a message from the central Qanary component
+ *
+ * @param myQanaryMessage
+ * @throws Exception
+ */
+ @Operation(summary = "Process a Qanary question with BirthDataQueryBuilder", //
+ operationId = "process", //
+ description = "Encapsulates the main functionality of this component. " //
+ + "Construct a Wikidata query to find birth date and place for named entities." //
+ + "The process can use the provided firstname and lastname or a named entity annotation.")
+ @Override
+ public QanaryMessage process(QanaryMessage myQanaryMessage) throws Exception {
+ logger.info("process: {}", myQanaryMessage);
+
+ // This example component requires the textual representation of the Question
+ // as well as annotations of Wikidata entities made by the OpenTapioca NED.
+
+ this.myQanaryUtils = this.getUtils(myQanaryMessage);
+ this.myQanaryQuestion = new QanaryQuestion<>(myQanaryMessage, myQanaryUtils.getQanaryTripleStoreConnector());
+ this.myQuestion = myQanaryQuestion.getTextualRepresentation(); // get the question as String
+
+ // STEP 1-3 have two options
+
+ // first, try to use a named entity annotation because it is more precise if it
+ // works, then stop
+ myQanaryMessage = this.processForExistingNamedEntity(myQanaryMessage);
+ if (myQanaryMessage != null) {
+ logger.info("Found a named entity annotation. Processing finished.");
+ return myQanaryMessage;
+ }
+
+// // second, let's try to find a firstname and lastname, if that works we stop
+// myQanaryMessage = this.processForFirstNameAndLastName(myQanaryMessage);
+// if( myQanaryMessage != null ) {
+// logger.info("Found firstname and lastname. Processing finished.");
+// return myQanaryMessage;
+// }
+
+ logger.warn("Nothing could be done here.");
+ return myQanaryMessage;
+ }
+
+ /**
+ * This process is only supposed to answer a specific type of question.
+ * Therefore, we only need to continue if the question asks for birthplace and
+ * date or if there is an annotation of the first and lastname.
+ *
+ * @param myQanaryMessage
+ * @return
+ * @throws Exception
+ */
+ private QanaryMessage processForFirstNameAndLastName(QanaryMessage myQanaryMessage) throws Exception {
+
+ // STEP 1: Get the required Data
+ // Get the firstname annotation if it's annotated
+ QuerySolutionMap bindingsForFirstname = new QuerySolutionMap();
+ bindingsForFirstname.add(GRAPH, ResourceFactory.createResource(myQanaryQuestion.getOutGraph().toASCIIString()));
+ bindingsForFirstname.add(VALUE, ResourceFactory.createStringLiteral(FIRSTNAME_ANNOTATION));
+
+ String sparqlCheckFirstname = this.loadQueryFromFile(FILENAME_ANNOTATIONS, bindingsForFirstname);
+ ResultSet resultsetFirstname = myQanaryUtils.getQanaryTripleStoreConnector().select(sparqlCheckFirstname);
+
+ // Get the lastname annotation if it's annotated
+ QuerySolutionMap bindingsForLastname = new QuerySolutionMap();
+ // the currently used graph
+ bindingsForLastname.add(GRAPH, ResourceFactory.createResource(myQanaryQuestion.getOutGraph().toASCIIString()));
+ // annotated for the current question
+ bindingsForLastname.add(VALUE, ResourceFactory.createStringLiteral(LASTNAME_ANNOTATION));
+
+ String sparqlCheckLastname = this.loadQueryFromFile(FILENAME_ANNOTATIONS, bindingsForLastname);
+ ResultSet resultsetLastname = myQanaryUtils.getQanaryTripleStoreConnector().select(sparqlCheckLastname);
+
+ // STEP 2: Create queries for Wikidata if the question is supported or
+ // annotations are available
+ ArrayList queriesForAnnotation = new ArrayList<>();
+
+ if (resultsetFirstname.hasNext() && resultsetLastname.hasNext()) {
+ // In this example, we are only interested in Entities that were found from
+ // another component and
+ // annotated with the annotation "FIRST_NAME" and "LAST_NAME".
+ queriesForAnnotation = createQueriesForAnnotation(resultsetFirstname, resultsetLastname);
+ } else {
+ logger.info("no annotation for {} and {} found", FIRSTNAME_ANNOTATION, LASTNAME_ANNOTATION);
+ }
+
+ if ((queriesForAnnotation.isEmpty() || queriesForAnnotation.get(0).isBlank())
+ && this.isQuestionSupported(myQuestion)) {
+ // In this example we are only interested in Entities that were found at a
+ // specific point
+ // in the question: e.g., 'when and where was born?'.
+ // Because we do not require entities that might have been found anywhere else
+ // in the
+ // question we can filter our results:
+
+ int filterStart = this.getNamePosition(myQuestion, this.supportedQuestionPatterns[this.patternIndex]);
+ // formulate a query to find existing information
+ queriesForAnnotation = createQueriesForAnnotation(filterStart);
+ }
+
+ // If no query was created, we can stop here.
+ if (queriesForAnnotation.isEmpty() || queriesForAnnotation.get(0).isBlank()) {
+ logger.warn("nothing to do here as question \"{}\" does not have the supported format; ", myQuestion,
+ resultsetFirstname);
+ return null;
+ } else {
+ for (int i = 0; i < queriesForAnnotation.size(); i++) {
+ // store the created select query as an annotation for the current question
+ // define here the parameters for the SPARQL INSERT query
+ QuerySolutionMap bindings = new QuerySolutionMap();
+ // use here the variable names defined in method insertAnnotationOfAnswerSPARQL
+ bindings.add(GRAPH, ResourceFactory.createResource(myQanaryQuestion.getOutGraph().toASCIIString()));
+ bindings.add("targetQuestion",
+ ResourceFactory.createResource(myQanaryQuestion.getUri().toASCIIString()));
+ bindings.add("selectQueryThatShouldComputeTheAnswer",
+ ResourceFactory.createStringLiteral(queriesForAnnotation.get(i)));
+ bindings.add("confidence", ResourceFactory.createTypedLiteral("1.0", XSDDatatype.XSDfloat)); // as it is
+ // rule
+ // based,
+ // a
+ // high
+ // confidence
+ // is
+ // expressed
+ bindings.add("application", ResourceFactory.createResource("urn:qanary:" + this.applicationName));
+
+ // get the template of the INSERT query
+ String insertDataIntoQanaryTriplestoreQuery = QanaryTripleStoreConnector
+ .insertAnnotationOfAnswerSPARQL(bindings);
+ logger.info("SPARQL insert for adding data to Qanary triplestore: {}",
+ insertDataIntoQanaryTriplestoreQuery);
+
+ // STEP 3: Push the computed result to the Qanary triplestore
+ logger.info("store data in graph {} of Qanary triplestore endpoint {}", //
+ myQanaryMessage.getValues().get(myQanaryMessage.getOutGraph()), //
+ myQanaryMessage.getValues().get(myQanaryMessage.getEndpoint()));
+ myQanaryUtils.getQanaryTripleStoreConnector().update(insertDataIntoQanaryTriplestoreQuery);
+ }
+
+ return myQanaryMessage;
+ }
+ }
+
+ private QanaryMessage processForExistingNamedEntity(QanaryMessage myQanaryMessage)
+ throws IOException, SparqlQueryFailed, QanaryExceptionNoOrMultipleQuestions, URISyntaxException {
+
+ logger.info("Executing processForExistingNamedEntity.");
+
+ String inGraph = myQanaryQuestion.getInGraph().toASCIIString();
+ String outGraph = myQanaryQuestion.getOutGraph().toASCIIString();
+ String myQuestionURI = myQanaryQuestion.getUri().toASCIIString();
+ String endpoint = myQanaryMessage.getEndpoint().toASCIIString();
+
+ // STEP 1: Get Named Entity from the Qanary triplestore
+ int filterStart = 0;
+ if (this.isQuestionSupported(myQuestion)) {
+ // In this example we are only interested in Entities that were found at a
+ // specific point
+ // in the question: e.g., 'when and where was born?'.
+ filterStart = this.getNamePosition(myQuestion, this.supportedQuestionPatterns[this.patternIndex]);
+ } else {
+ // stop the processing
+ logger.warn("processForExistingNamedEntity: Stop here as the question pattern was not found in '{}'.",
+ this.myQuestion);
+ return null;
+ }
+
+ QuerySolutionMap bindingsForAnnotationWithWikidataResource = new QuerySolutionMap();
+ bindingsForAnnotationWithWikidataResource.add(GRAPH, ResourceFactory.createResource(inGraph));
+ bindingsForAnnotationWithWikidataResource.add("regexForResourceFilter",
+ ResourceFactory.createPlainLiteral("^http://www.wikidata.org/entity/"));
+ bindingsForAnnotationWithWikidataResource.add("filterStart",
+ ResourceFactory.createTypedLiteral(String.valueOf(filterStart), XSDDatatype.XSDint));
+ String sparqlNamedEntityAnnotation = this.loadQueryFromFile(
+ FILENAME_ANNOTATIONS_NAMED_ENTITY_FILTERED_FOR_WIKIDATA, bindingsForAnnotationWithWikidataResource);
+
+ // find the resources that are annotated in the given question as there are
+ // possibly multiple resource, we store them in a map with the score
+ ResultSet resultset = myQanaryUtils.getQanaryTripleStoreConnector().select(sparqlNamedEntityAnnotation);
+ Map wikidataResources = new HashMap<>();
+ while (resultset.hasNext()) {
+ QuerySolution tupel = resultset.next();
+ String wikidataResource = tupel.get("wikidataResource").asResource().getURI();
+ float score = tupel.get("annotationScore").asLiteral().getFloat();
+
+ // if the resource exists, then check if the score is higher OR no such key
+ // exists
+ if ((wikidataResources.containsKey(wikidataResource) && wikidataResources.get(wikidataResource) < score)
+ || (!wikidataResources.containsKey(wikidataResource))) {
+ wikidataResources.put(wikidataResource, score);
+ }
+
+ }
+ logger.info("found entities: {}", wikidataResources);
+ if (wikidataResources.size() == 0) {
+ // stop the processing
+ logger.warn("processForExistingNamedEntity: Stop here as no Wikidata resources were found in the graph {}.",
+ inGraph);
+ return null;
+ }
+
+ // STEP 2: compute SPARQL queries that can be used to retrieve the actual answer
+ ArrayList queriesCapableOfRetrievingTheAnswer = new ArrayList<>(); // queries for inserting annotation
+ // of AnswerSparql into the Qanary
+ // triplestore
+ for (String namedEntityResource : wikidataResources.keySet()) {
+ float score = wikidataResources.get(namedEntityResource);
+
+ String answerRepresentedAsSparqlQuery = createWikidataSparqlQuery(namedEntityResource);
+
+ QuerySolutionMap bindingsForInserting = new QuerySolutionMap();
+ bindingsForInserting.add(GRAPH, ResourceFactory.createResource(outGraph));
+ bindingsForInserting.add("targetQuestion", ResourceFactory.createResource(myQuestionURI));
+ bindingsForInserting.add("selectQueryThatShouldComputeTheAnswer",
+ ResourceFactory.createStringLiteral(answerRepresentedAsSparqlQuery));
+ // we take over the score of the named entity recognizer (NER+NED)
+ bindingsForInserting.add("confidence",
+ ResourceFactory.createTypedLiteral(String.valueOf(score), XSDDatatype.XSDfloat));
+ bindingsForInserting.add("application",
+ ResourceFactory.createResource("urn:qanary:" + this.applicationName));
+
+ // get the template of the INSERT query to insert the new annotation into the
+ // Qanary triplestore
+ String insertDataIntoQanaryTriplestoreQuery = QanaryTripleStoreConnector
+ .insertAnnotationOfAnswerSPARQL(bindingsForInserting);
+ logger.info("created SPARQL INSERT query for adding data to Qanary triplestore: {}",
+ insertDataIntoQanaryTriplestoreQuery);
+ queriesCapableOfRetrievingTheAnswer.add(insertDataIntoQanaryTriplestoreQuery);
+ }
+ if (queriesCapableOfRetrievingTheAnswer.size() == 0) {
+ // stop the processing
+ logger.warn("processForExistingNamedEntity: Stop here as no queries were created (based on graph {}).",
+ inGraph);
+ return null;
+ } else {
+ logger.info(
+ "Created {} SPARQL queries that should be capable of retrieving the correct answer over Wikidata.",
+ queriesCapableOfRetrievingTheAnswer.size());
+ }
+
+ // STEP 3: store the created information in the Qanary triplestore as
+ // AnnotationfAnswerSPARQL
+ for (String query : queriesCapableOfRetrievingTheAnswer) {
+ logger.info("store data in graph {} of Qanary triplestore endpoint {}", outGraph, endpoint);
+ myQanaryUtils.getQanaryTripleStoreConnector().update(query);
+ }
+
+ return myQanaryMessage; // everything done
+ }
+
+ private ArrayList createQueriesForAnnotation(int filterStart)
+ throws IOException, QanaryExceptionNoOrMultipleQuestions, URISyntaxException, SparqlQueryFailed {
+ QuerySolutionMap bindingsForAnnotation = new QuerySolutionMap();
+ // the currently used graph
+ bindingsForAnnotation.add(GRAPH,
+ ResourceFactory.createResource(myQanaryQuestion.getOutGraph().toASCIIString()));
+ // annotated for the current question
+ bindingsForAnnotation.add("source", ResourceFactory.createResource(myQanaryQuestion.getUri().toASCIIString()));
+ // only for relevant annotations filter by starting point
+ bindingsForAnnotation.add("filterStart",
+ ResourceFactory.createTypedLiteral(String.valueOf(filterStart), XSDDatatype.XSDint));
+ // filter resources to get only the ones that are pointing to the Wikidata
+ // knowledge graph
+ bindingsForAnnotation.add("regexForResourceFilter",
+ ResourceFactory.createPlainLiteral("^http://www.wikidata.org/entity/"));
+
+ String sparqlGetAnnotation = this.loadQueryFromFile(FILENAME_ANNOTATIONS_NAMED_ENTITY_FILTERED_FOR_WIKIDATA,
+ bindingsForAnnotation);
+
+ // STEP 3: Compute SPARQL select queries that should produce the result for
+ // every identified entity
+
+ // Rather than computing a (textual) result this component provides a SPARQL
+ // query that might be used to answer the question. This query can the used by
+ // other components. This query will be stored in the Qanary triplestore.
+ ResultSet resultset = myQanaryUtils.getQanaryTripleStoreConnector().select(sparqlGetAnnotation);
+ ArrayList queries = new ArrayList<>();
+ while (resultset.hasNext()) {
+ QuerySolution tupel = resultset.next();
+ RDFNode wikidataResource = tupel.get("wikidataResource");
+ logger.info("creating query for resource: {}", wikidataResource);
+ String createdWikiDataQuery = createWikidataSparqlQuery(wikidataResource);
+ queries.add(createdWikiDataQuery);
+ }
+
+ return queries;
+ }
+
+ private ArrayList createQueriesForAnnotation(ResultSet resultsetFirstname, ResultSet resultsetLastname)
+ throws IOException {
+ ArrayList firstnameStartsEnds = new ArrayList<>();
+ ArrayList lastnameStartsEnds = new ArrayList<>();
+
+ while (resultsetFirstname.hasNext()) {
+ Integer[] startEnd = new Integer[2];
+ QuerySolution tupel = resultsetFirstname.next();
+ startEnd[0] = tupel.getLiteral("start").getInt();
+ startEnd[1] = tupel.getLiteral("end").getInt();
+
+ firstnameStartsEnds.add(startEnd);
+ }
+
+ while (resultsetLastname.hasNext()) {
+ Integer[] startEnd = new Integer[2];
+ QuerySolution tupel = resultsetLastname.next();
+ startEnd[0] = tupel.getLiteral("start").getInt();
+ startEnd[1] = tupel.getLiteral("end").getInt();
+
+ lastnameStartsEnds.add(startEnd);
+ }
+
+ ArrayList queries = new ArrayList<>();
+ for (int i = 0; i < firstnameStartsEnds.size(); i++) {
+ String firstanme = "";
+ String lastname = "";
+
+ try {
+ firstanme = myQuestion.substring(firstnameStartsEnds.get(i)[0], firstnameStartsEnds.get(i)[1]);
+ lastname = myQuestion.substring(lastnameStartsEnds.get(i)[0], lastnameStartsEnds.get(i)[1]);
+ } catch (Exception e) {
+ logger.error("error while get first or lastname: {}", e.getMessage());
+ break;
+ }
+
+ logger.info("creating query for {} {}", firstanme, lastname);
+
+ String createdWikiDataQuery = createWikidataSparqlQuery(firstanme, lastname);
+ queries.add(createdWikiDataQuery);
+ }
+
+ return queries;
+ }
+
+ public String createWikidataSparqlQuery(String wikidataResource) throws IOException {
+ return this.createWikidataSparqlQuery(ResourceFactory.createResource(wikidataResource));
+ }
+
+ public String createWikidataSparqlQuery(RDFNode wikidataResource) throws IOException {
+ // populate a generalized answer query with the specific entity (Wikidata ID)
+ QuerySolutionMap bindingsForWikidataResultQuery = new QuerySolutionMap();
+ // set expected person as parameter for Wikidata query
+ bindingsForWikidataResultQuery.add("person", wikidataResource);
+ return this.loadQueryFromFile(FILENAME_WIKIDATA_BIRTHDATA_QUERY_PERSON, bindingsForWikidataResultQuery);
+ }
+
+ public String createWikidataSparqlQuery(String firstname, String lastname) throws IOException {
+ // populate a generalized answer query with the specific entity (Wikidata ID)
+ QuerySolutionMap bindingsForWikidataResultQuery = new QuerySolutionMap();
+ // set expected last and firstname as parameter for Wikidata query
+ bindingsForWikidataResultQuery.add("firstnameValue", ResourceFactory.createLangLiteral(firstname, "en"));
+ bindingsForWikidataResultQuery.add("lastnameValue", ResourceFactory.createLangLiteral(lastname, "en"));
+ return this.loadQueryFromFile(FILENAME_WIKIDATA_BIRTHDATA_QUERY_FIRST_AND_LASTNAME,
+ bindingsForWikidataResultQuery);
+ }
}
diff --git a/qanary-component-QB-BirthDataWikidata/src/main/resources/config/application.properties b/qanary-component-QB-BirthDataWikidata/src/main/resources/config/application.properties
index 2213cf8f4..71b22b508 100644
--- a/qanary-component-QB-BirthDataWikidata/src/main/resources/config/application.properties
+++ b/qanary-component-QB-BirthDataWikidata/src/main/resources/config/application.properties
@@ -1,6 +1,6 @@
# Update the port number
server.port=5555
-spring.application.name=BirthDataQueryBuilderWikidata
+spring.application.name=QB-BirthDataWikidata
spring.application.description=${spring.application.name} is a Qanary component for creating a Wikidata SPARQL query intended to find the birth place and date of people by firstname/lastname or Wikidata resource
# Update the URL of the Qanary pipeline
spring.boot.admin.url=http://localhost:8080
diff --git a/qanary-component-QB-BirthDataWikidata/src/main/resources/queries/getAnnotationOfNamedEntityLinkedToSpecificKnowledgeGraph.rq b/qanary-component-QB-BirthDataWikidata/src/main/resources/queries/getAnnotationOfNamedEntityLinkedToSpecificKnowledgeGraph.rq
index 25a95bee2..9a2f5703a 100644
--- a/qanary-component-QB-BirthDataWikidata/src/main/resources/queries/getAnnotationOfNamedEntityLinkedToSpecificKnowledgeGraph.rq
+++ b/qanary-component-QB-BirthDataWikidata/src/main/resources/queries/getAnnotationOfNamedEntityLinkedToSpecificKnowledgeGraph.rq
@@ -1,3 +1,4 @@
+# retrieve an annotation pointing to Wikidata entity from a text segment (TextSelector)
PREFIX dbr:
PREFIX oa:
PREFIX qa:
@@ -7,15 +8,18 @@ SELECT *
FROM ?graph
WHERE {
?annotation oa:hasBody ?wikidataResource .
- # accept only Wikidata resources as it is a Wikidata Query Builder component
- FILTER( REGEX(?wikidataResource, "^http://www.wikidata.org/entity/", "i"))
?annotation qa:score ?annotationScore .
?annotation oa:hasTarget ?target .
+ ?annotation oa:annotatedBy ?annotator .
?target oa:hasSource ?source .
?target oa:hasSelector ?textSelector .
?textSelector rdf:type oa:TextPositionSelector .
?textSelector oa:start ?start .
?textSelector oa:end ?end .
+
+ # filter named entities that have particular start index in the given question
FILTER(?start = ?filterStart) .
+ # accept only Wikidata resources as it is a Wikidata Query Builder component
+ FILTER( REGEX(?wikidataResource, ?regexForResourceFilter, "i"))
}
ORDER BY DESC(?annotationScore)
\ No newline at end of file
diff --git a/qanary-component-QB-BirthDataWikidata/src/test/java/eu/wdaqua/component/birthdatawikidata/qb/QueryTest.java b/qanary-component-QB-BirthDataWikidata/src/test/java/eu/wdaqua/component/birthdatawikidata/qb/QueryTest.java
index a62a9d386..b1d91f8e9 100644
--- a/qanary-component-QB-BirthDataWikidata/src/test/java/eu/wdaqua/component/birthdatawikidata/qb/QueryTest.java
+++ b/qanary-component-QB-BirthDataWikidata/src/test/java/eu/wdaqua/component/birthdatawikidata/qb/QueryTest.java
@@ -48,7 +48,7 @@ void filenameAnnotationsFilteredQueryTest() throws IOException {
bindingsForAnnotation.add("filterStart", ResourceFactory.createTypedLiteral(String.valueOf(5), XSDDatatype.XSDint));
String sparqlGetAnnotation = QanaryTripleStoreConnector.readFileFromResourcesWithMap(
- TestConfiguration.FILENAME_ANNOTATIONS_FILTERED,
+ TestConfiguration.FILENAME_ANNOTATIONS_NAMED_ENTITY_FILTERED_FOR_WIKIDATA,
bindingsForAnnotation
);
diff --git a/qanary-component-QB-BirthDataWikidata/src/test/java/eu/wdaqua/component/birthdatawikidata/qb/TestConfiguration.java b/qanary-component-QB-BirthDataWikidata/src/test/java/eu/wdaqua/component/birthdatawikidata/qb/TestConfiguration.java
index d01e7ef20..7d5c2c456 100644
--- a/qanary-component-QB-BirthDataWikidata/src/test/java/eu/wdaqua/component/birthdatawikidata/qb/TestConfiguration.java
+++ b/qanary-component-QB-BirthDataWikidata/src/test/java/eu/wdaqua/component/birthdatawikidata/qb/TestConfiguration.java
@@ -4,12 +4,14 @@
import java.nio.file.Files;
import java.nio.file.Paths;
+import eu.wdaqua.component.qb.birthdata.wikidata.BirthDataQueryBuilder;
+
@org.springframework.boot.test.context.TestConfiguration
public class TestConfiguration {
- protected static final String FILENAME_ANNOTATIONS = "/queries/getAnnotation.rq";
- protected static final String FILENAME_ANNOTATIONS_FILTERED = "/queries/getAnnotationFiltered.rq";
- protected static final String FILENAME_WIKIDATA_BIRTHDATA_QUERY_PERSON = "/queries/getQuestionAnswerFromWikidataByPerson.rq";
- protected static final String FILENAME_WIKIDATA_BIRTHDATA_QUERY_FIRST_AND_LASTNAME = "/queries/getQuestionAnswerFromWikidataByFirstnameLastname.rq";
+ protected static final String FILENAME_ANNOTATIONS = BirthDataQueryBuilder.FILENAME_ANNOTATIONS;
+ protected static final String FILENAME_ANNOTATIONS_NAMED_ENTITY_FILTERED_FOR_WIKIDATA = BirthDataQueryBuilder.FILENAME_ANNOTATIONS_NAMED_ENTITY_FILTERED_FOR_WIKIDATA;
+ protected static final String FILENAME_WIKIDATA_BIRTHDATA_QUERY_PERSON = BirthDataQueryBuilder.FILENAME_WIKIDATA_BIRTHDATA_QUERY_PERSON;
+ protected static final String FILENAME_WIKIDATA_BIRTHDATA_QUERY_FIRST_AND_LASTNAME = BirthDataQueryBuilder.FILENAME_WIKIDATA_BIRTHDATA_QUERY_FIRST_AND_LASTNAME;
/**
* get the defined SPARQL query and remove all control characters (like newline)
diff --git a/qanary-component-QB-BirthDataWikidata/src/test/resources/queries/getAnnotationFilteredTest.rq b/qanary-component-QB-BirthDataWikidata/src/test/resources/queries/getAnnotationFilteredTest.rq
index 8c93916eb..60e226c53 100644
--- a/qanary-component-QB-BirthDataWikidata/src/test/resources/queries/getAnnotationFilteredTest.rq
+++ b/qanary-component-QB-BirthDataWikidata/src/test/resources/queries/getAnnotationFilteredTest.rq
@@ -8,7 +8,8 @@ FROM
WHERE
{ ?annotation oa:hasBody ?wikidataResource ;
qa:score ?annotationScore ;
- oa:hasTarget ?target .
+ oa:hasTarget ?target ;
+ oa:annotatedBy ?annotator .
?target oa:hasSource ;
oa:hasSelector ?textSelector .
?textSelector
@@ -16,5 +17,6 @@ WHERE
oa:start ?start ;
oa:end ?end
FILTER ( ?start = "5"^^ )
+ FILTER regex(?wikidataResource, ?regexForResourceFilter, "i")
}
ORDER BY DESC(?annotationScore)