From e67a3ede4d5137ae6cfea309c52b1c166fec64fc Mon Sep 17 00:00:00 2001 From: renaud Date: Tue, 3 Mar 2015 21:44:46 +0100 Subject: [PATCH] initial client code --- README.md | 20 ++- pom.xml | 58 +++++++ .../org/sherlok/client/SherlokClient.java | 142 ++++++++++++++++++ .../client/SherlokClientException.java | 21 +++ .../java/org/sherlok/mappings/Annotation.java | 108 +++++++++++++ .../org/sherlok/mappings/SherlokResult.java | 64 ++++++++ .../org/sherlok/client/SherlokClientTest.java | 38 +++++ 7 files changed, 449 insertions(+), 2 deletions(-) create mode 100644 pom.xml create mode 100644 src/main/java/org/sherlok/client/SherlokClient.java create mode 100644 src/main/java/org/sherlok/client/SherlokClientException.java create mode 100644 src/main/java/org/sherlok/mappings/Annotation.java create mode 100644 src/main/java/org/sherlok/mappings/SherlokResult.java create mode 100644 src/test/java/org/sherlok/client/SherlokClientTest.java diff --git a/README.md b/README.md index d79b1ea..b52f6cd 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,18 @@ -# sherlok_javaclient -Java client for Sherlok +Java client for [Sherlok](http://sherlok.io/). + +Here is an example that find person's names and places in text. It requires a Sherlok server running locally. + + # create client; alternatively, you can configure host and port + SherlokClient client = new SherlokClient(); + + # have Sherlok annotate some text with a text mining pipeline + SherlokResult res = client.annotate("opennlp.ners.en", "Jack Burton " + + "(born April 29, 1954 in El Paso), also known as Jake Burton, is an " + + "American snowboarder and founder of Burton Snowboards."); + + # process the annotations (here: person's names and places) + for (Annotation a : res.get("NamedEntity")) { + System.out.println(a); + } + + diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..f873366 --- /dev/null +++ b/pom.xml @@ -0,0 +1,58 @@ + + 4.0.0 + + org.sherlok + sherlok_javaclient + 0.1-SNAPSHOT + Lightweight Java client to access Sherlok server results + + + Sherlok + https://github.com/renaud/sherlok_javaclient + + 2014 + + + Renaud Richardet + renaud.richardet@gmail.com + + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.1 + + 1.7 + 1.7 + UTF-8 + + + + + + + + org.apache.httpcomponents + httpclient + 4.3.6 + + + com.fasterxml.jackson.datatype + jackson-datatype-json-org + [2.4.2,] + + + + + junit + junit + 4.12 + test + + + \ No newline at end of file diff --git a/src/main/java/org/sherlok/client/SherlokClient.java b/src/main/java/org/sherlok/client/SherlokClient.java new file mode 100644 index 0000000..ba7301e --- /dev/null +++ b/src/main/java/org/sherlok/client/SherlokClient.java @@ -0,0 +1,142 @@ +package org.sherlok.client; + +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.http.HttpEntity; +import org.apache.http.HttpResponse; +import org.apache.http.NameValuePair; +import org.apache.http.client.ClientProtocolException; +import org.apache.http.client.entity.UrlEncodedFormEntity; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.conn.HttpHostConnectException; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClientBuilder; +import org.apache.http.message.BasicNameValuePair; +import org.apache.http.util.EntityUtils; +import org.sherlok.mappings.SherlokResult; + +import com.fasterxml.jackson.databind.ObjectMapper; + +/** + * Client for Sherlok server. + * + * @author renaud@apache.org + */ +public class SherlokClient { + + public static final String DEFAULT_HOST = "http://localhost"; + public static final int DEFAULT_PORT = 9600; + + private final String postUrl; + private ObjectMapper mapper; + private CloseableHttpClient client; + + /** + * Creates a client with {@link #DEFAULT_PORT} 9600 and + * {@link SherlokClient#DEFAULT_HOST} localhost. + */ + public SherlokClient() { + this(DEFAULT_HOST, DEFAULT_PORT); + } + + /** + * @param host + * @param port + */ + public SherlokClient(String host, int port) { + this.postUrl = host + ":" + port + "/annotate/"; + this.mapper = new ObjectMapper(); + this.client = HttpClientBuilder.create().build(); + } + + /** + * @param pipeline + * the name of the pipeline (no version) + * @param text + * the text to annotate + * @return a {@link SherlokResult} containing the found annotations + */ + public SherlokResult annotate(String pipeline, String text) + throws SherlokClientException { + return annotate(pipeline, null, text); + } + + /** + * @param pipeline + * the name of the pipeline + * @param version + * the version of the pipeline + * @param text + * the text to annotate + * @return a {@link SherlokResult} containing the found annotations + */ + public SherlokResult annotate(String pipeline, String version, String text) + throws SherlokClientException { + String json = annotateRaw(pipeline, version, text); + try { + return mapper.readValue(json, SherlokResult.class); + } catch (Exception e) { + throw new SherlokClientException("could not read Sherlok JSON: " + + e.getMessage()); + } + } + + /** + * @param pipeline + * the name of the pipeline (no version) + * @param text + * the text to annotate + * @return the raw JSON from the Sherlok server + */ + public String annotateRaw(String pipeline, String text) + throws SherlokClientException { + return annotateRaw(pipeline, null, text); + } + + /** + * @param pipeline + * the name of the pipeline + * @param version + * the version of the pipeline + * @param text + * the text to annotate + * @return the raw JSON from the Sherlok server + */ + public String annotateRaw(String pipeline, String version, String text) + throws SherlokClientException { + + try { + HttpPost post = new HttpPost(postUrl + pipeline); + + List nvps = new ArrayList(); + nvps.add(new BasicNameValuePair("version", version == null ? "null" + : version)); + nvps.add(new BasicNameValuePair("text", text)); + post.setEntity(new UrlEncodedFormEntity(nvps, "UTF-8")); + + HttpResponse response = client.execute(post); + HttpEntity entity = response.getEntity(); + + String responseString = EntityUtils.toString(entity, "UTF-8"); + if (response.getStatusLine().getStatusCode() > 250) { + throw new SherlokClientException( + "Could not annotate (error code " + + response.getStatusLine().getStatusCode() + + ") " + responseString); + } + return responseString; + } catch (HttpHostConnectException e) { + throw new SherlokClientException( + "could not connect to Sherlok server at '" + postUrl + "'"); + } catch (UnsupportedEncodingException e) { + throw new RuntimeException(e);// should not happend + } catch (ClientProtocolException e) { + throw new SherlokClientException(e); + } catch (IOException e) { + throw new RuntimeException(e);// should not happend + } + } +} diff --git a/src/main/java/org/sherlok/client/SherlokClientException.java b/src/main/java/org/sherlok/client/SherlokClientException.java new file mode 100644 index 0000000..e8e494b --- /dev/null +++ b/src/main/java/org/sherlok/client/SherlokClientException.java @@ -0,0 +1,21 @@ +package org.sherlok.client; + +import org.apache.http.client.ClientProtocolException; + +/** + * An exception thrown by {@link SherlokClient} + * + * @author renaud@apache.org + */ +@SuppressWarnings("serial") +public class SherlokClientException extends Exception { + + public SherlokClientException(String msg) { + super(msg); + } + + public SherlokClientException(ClientProtocolException e) { + super(e); + } + +} diff --git a/src/main/java/org/sherlok/mappings/Annotation.java b/src/main/java/org/sherlok/mappings/Annotation.java new file mode 100644 index 0000000..20d2d71 --- /dev/null +++ b/src/main/java/org/sherlok/mappings/Annotation.java @@ -0,0 +1,108 @@ +package org.sherlok.mappings; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; + +import org.json.JSONException; + +import com.fasterxml.jackson.annotation.JsonAnyGetter; +import com.fasterxml.jackson.annotation.JsonAnySetter; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonPropertyOrder; + +/** + * Represents a (UIMA) annotation produced by Sherlok server + * + * @author renaud@apache.org + */ +@JsonPropertyOrder(value = { "begin", "end", "type" }, alphabetic = true) +public class Annotation { + + /** These keys are not considered properties */ + final public static Set NOT_PROPERTIES = new HashSet<>(); + static { + NOT_PROPERTIES.add("begin"); + NOT_PROPERTIES.add("end"); + NOT_PROPERTIES.add("@type"); + NOT_PROPERTIES.add("sofa"); + } + + private int begin = 0, end = 0; + @JsonProperty("@type") + private String type; + @JsonInclude(JsonInclude.Include.NON_DEFAULT) + private Map properties = new HashMap<>(); + + // "any getter" needed for serialization + @JsonAnyGetter + public Map any() { + return properties; + } + + @JsonAnySetter + public Annotation addProperty(String name, Object value) + throws JSONException { + if (!NOT_PROPERTIES.contains(name)) + properties.put(name, value); + return this; + } + + public int getBegin() { + return begin; + } + + public Annotation setBegin(int begin) { + this.begin = begin; + return this; + } + + public int getEnd() { + return end; + } + + public Annotation setEnd(int end) { + this.end = end; + return this; + } + + public String getType() { + return type; + } + + public Annotation setType(String type) { + this.type = type; + return this; + } + + @Override + public boolean equals(Object o) { // FIXME test on properties, too + if (o instanceof Annotation) { + Annotation other = (Annotation) o; + if (this.begin == other.begin && // + this.end == other.end && // + this.type.equals(other.type)) { + return true; + } + } + return false; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(type + "[" + begin + ":" + end); + for (Entry p : properties.entrySet()) { + sb.append(", " + p.getKey() + "='" + p.getValue() + "'"); + } + sb.append("]"); + return sb.toString(); + } + + public Map getProperties() { + return properties; + } +} \ No newline at end of file diff --git a/src/main/java/org/sherlok/mappings/SherlokResult.java b/src/main/java/org/sherlok/mappings/SherlokResult.java new file mode 100644 index 0000000..9f39dc1 --- /dev/null +++ b/src/main/java/org/sherlok/mappings/SherlokResult.java @@ -0,0 +1,64 @@ +package org.sherlok.mappings; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.core.JsonParseException; +import com.fasterxml.jackson.databind.JsonMappingException; +import com.fasterxml.jackson.databind.ObjectMapper; + +/** + * The JSON response from Sherlok server, mapped as a Java object. + * + * @author renaud@apache.org + */ +public class SherlokResult { + + private static final ObjectMapper MAPPER = new ObjectMapper(); + + @JsonProperty("@cas_views") + private Map> views; + + @JsonProperty("@context") + private Map context; + + @JsonProperty("annotations") + private Map annotations; + + @JsonProperty("stats") + private Map stats; + + private SherlokResult() { // hide, use parse() + } + + /** + * @param json + * the raw JSON from Sherlok + * @return the mapped {@link SherlokResult} + */ + @JsonIgnore + public static SherlokResult parse(String json) throws JsonParseException, + JsonMappingException, IOException { + return MAPPER.readValue(json, SherlokResult.class); + } + + @JsonIgnore + public List get(String type) { + List ret = new ArrayList<>(); + for (Entry a : annotations.entrySet()) { + if (a.getValue().getType().equals(type)) { + ret.add(a.getValue()); + } + } + return ret; + } + + public Map getAnnotations() { + return annotations; + } +} diff --git a/src/test/java/org/sherlok/client/SherlokClientTest.java b/src/test/java/org/sherlok/client/SherlokClientTest.java new file mode 100644 index 0000000..07468ab --- /dev/null +++ b/src/test/java/org/sherlok/client/SherlokClientTest.java @@ -0,0 +1,38 @@ +package org.sherlok.client; + +import static org.junit.Assert.assertEquals; + +import java.util.List; +import java.util.Map.Entry; + +import org.junit.Ignore; +import org.junit.Test; +import org.sherlok.mappings.Annotation; +import org.sherlok.mappings.SherlokResult; + +/** + * Hum: cannot test this Sherlok client with Sherlok server, because of Maven + * circular dependencies. However, this Sherlok client code is thouroughly + * tested in Sherlok server (sherlok_core). + */ +@Ignore +public class SherlokClientTest { + + final String DEFAULT_PIPELINE = "opennlp.ners.en"; + final String TEST_TEXT = "Jack Burton (born April 29, 1954 in El Paso), also known as Jake Burton, is an American snowboarder and founder of Burton Snowboards."; + + @Test + public void test() throws Exception { + + SherlokClient client = new SherlokClient(); + + SherlokResult res = client.annotate(DEFAULT_PIPELINE, TEST_TEXT); + + for (Entry a : res.getAnnotations().entrySet()) { + System.out.println(a.getValue()); + } + + List entities = res.get("NamedEntity"); + assertEquals(3, entities.size()); + } +}