-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
449 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,18 @@ | ||
# sherlok_javaclient | ||
Java client for Sherlok | ||
Java client for [Sherlok](http://sherlok.io/). | ||
|
||
Here is an example that find person's names and places in text. It requires a Sherlok server running locally. | ||
|
||
# create client; alternatively, you can configure host and port | ||
SherlokClient client = new SherlokClient(); | ||
|
||
# have Sherlok annotate some text with a text mining pipeline | ||
SherlokResult res = client.annotate("opennlp.ners.en", "Jack Burton " | ||
+ "(born April 29, 1954 in El Paso), also known as Jake Burton, is an " | ||
+ "American snowboarder and founder of Burton Snowboards."); | ||
|
||
# process the annotations (here: person's names and places) | ||
for (Annotation a : res.get("NamedEntity")) { | ||
System.out.println(a); | ||
} | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||
<modelVersion>4.0.0</modelVersion> | ||
|
||
<groupId>org.sherlok</groupId> | ||
<artifactId>sherlok_javaclient</artifactId> | ||
<version>0.1-SNAPSHOT</version> | ||
<description>Lightweight Java client to access Sherlok server results</description> | ||
|
||
<organization> | ||
<name>Sherlok</name> | ||
<url>https://github.com/renaud/sherlok_javaclient</url> | ||
</organization> | ||
<inceptionYear>2014</inceptionYear> | ||
<developers> | ||
<developer> | ||
<name>Renaud Richardet</name> | ||
<email>[email protected]</email> | ||
</developer> | ||
</developers> | ||
|
||
<build> | ||
<plugins> | ||
<!-- compile with java 1.7 --> | ||
<plugin> | ||
<groupId>org.apache.maven.plugins</groupId> | ||
<artifactId>maven-compiler-plugin</artifactId> | ||
<version>3.1</version> | ||
<configuration> | ||
<source>1.7</source> | ||
<target>1.7</target> | ||
<encoding>UTF-8</encoding> | ||
</configuration> | ||
</plugin> | ||
</plugins> | ||
</build> | ||
|
||
<dependencies> | ||
<dependency> | ||
<groupId>org.apache.httpcomponents</groupId> | ||
<artifactId>httpclient</artifactId> | ||
<version>4.3.6</version> | ||
</dependency> | ||
<dependency> | ||
<groupId>com.fasterxml.jackson.datatype</groupId> | ||
<artifactId>jackson-datatype-json-org</artifactId> | ||
<version>[2.4.2,]</version> | ||
</dependency> | ||
|
||
<!-- TESTS --> | ||
<dependency> | ||
<groupId>junit</groupId> | ||
<artifactId>junit</artifactId> | ||
<version>4.12</version> | ||
<scope>test</scope> | ||
</dependency> | ||
</dependencies> | ||
</project> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
package org.sherlok.client; | ||
|
||
import java.io.IOException; | ||
import java.io.UnsupportedEncodingException; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
|
||
import org.apache.http.HttpEntity; | ||
import org.apache.http.HttpResponse; | ||
import org.apache.http.NameValuePair; | ||
import org.apache.http.client.ClientProtocolException; | ||
import org.apache.http.client.entity.UrlEncodedFormEntity; | ||
import org.apache.http.client.methods.HttpPost; | ||
import org.apache.http.conn.HttpHostConnectException; | ||
import org.apache.http.impl.client.CloseableHttpClient; | ||
import org.apache.http.impl.client.HttpClientBuilder; | ||
import org.apache.http.message.BasicNameValuePair; | ||
import org.apache.http.util.EntityUtils; | ||
import org.sherlok.mappings.SherlokResult; | ||
|
||
import com.fasterxml.jackson.databind.ObjectMapper; | ||
|
||
/** | ||
* Client for Sherlok server. | ||
* | ||
* @author [email protected] | ||
*/ | ||
public class SherlokClient { | ||
|
||
public static final String DEFAULT_HOST = "http://localhost"; | ||
public static final int DEFAULT_PORT = 9600; | ||
|
||
private final String postUrl; | ||
private ObjectMapper mapper; | ||
private CloseableHttpClient client; | ||
|
||
/** | ||
* Creates a client with {@link #DEFAULT_PORT} 9600 and | ||
* {@link SherlokClient#DEFAULT_HOST} localhost. | ||
*/ | ||
public SherlokClient() { | ||
this(DEFAULT_HOST, DEFAULT_PORT); | ||
} | ||
|
||
/** | ||
* @param host | ||
* @param port | ||
*/ | ||
public SherlokClient(String host, int port) { | ||
this.postUrl = host + ":" + port + "/annotate/"; | ||
this.mapper = new ObjectMapper(); | ||
this.client = HttpClientBuilder.create().build(); | ||
} | ||
|
||
/** | ||
* @param pipeline | ||
* the name of the pipeline (no version) | ||
* @param text | ||
* the text to annotate | ||
* @return a {@link SherlokResult} containing the found annotations | ||
*/ | ||
public SherlokResult annotate(String pipeline, String text) | ||
throws SherlokClientException { | ||
return annotate(pipeline, null, text); | ||
} | ||
|
||
/** | ||
* @param pipeline | ||
* the name of the pipeline | ||
* @param version | ||
* the version of the pipeline | ||
* @param text | ||
* the text to annotate | ||
* @return a {@link SherlokResult} containing the found annotations | ||
*/ | ||
public SherlokResult annotate(String pipeline, String version, String text) | ||
throws SherlokClientException { | ||
String json = annotateRaw(pipeline, version, text); | ||
try { | ||
return mapper.readValue(json, SherlokResult.class); | ||
} catch (Exception e) { | ||
throw new SherlokClientException("could not read Sherlok JSON: " | ||
+ e.getMessage()); | ||
} | ||
} | ||
|
||
/** | ||
* @param pipeline | ||
* the name of the pipeline (no version) | ||
* @param text | ||
* the text to annotate | ||
* @return the raw JSON from the Sherlok server | ||
*/ | ||
public String annotateRaw(String pipeline, String text) | ||
throws SherlokClientException { | ||
return annotateRaw(pipeline, null, text); | ||
} | ||
|
||
/** | ||
* @param pipeline | ||
* the name of the pipeline | ||
* @param version | ||
* the version of the pipeline | ||
* @param text | ||
* the text to annotate | ||
* @return the raw JSON from the Sherlok server | ||
*/ | ||
public String annotateRaw(String pipeline, String version, String text) | ||
throws SherlokClientException { | ||
|
||
try { | ||
HttpPost post = new HttpPost(postUrl + pipeline); | ||
|
||
List<NameValuePair> nvps = new ArrayList<NameValuePair>(); | ||
nvps.add(new BasicNameValuePair("version", version == null ? "null" | ||
: version)); | ||
nvps.add(new BasicNameValuePair("text", text)); | ||
post.setEntity(new UrlEncodedFormEntity(nvps, "UTF-8")); | ||
|
||
HttpResponse response = client.execute(post); | ||
HttpEntity entity = response.getEntity(); | ||
|
||
String responseString = EntityUtils.toString(entity, "UTF-8"); | ||
if (response.getStatusLine().getStatusCode() > 250) { | ||
throw new SherlokClientException( | ||
"Could not annotate (error code " | ||
+ response.getStatusLine().getStatusCode() | ||
+ ") " + responseString); | ||
} | ||
return responseString; | ||
} catch (HttpHostConnectException e) { | ||
throw new SherlokClientException( | ||
"could not connect to Sherlok server at '" + postUrl + "'"); | ||
} catch (UnsupportedEncodingException e) { | ||
throw new RuntimeException(e);// should not happend | ||
} catch (ClientProtocolException e) { | ||
throw new SherlokClientException(e); | ||
} catch (IOException e) { | ||
throw new RuntimeException(e);// should not happend | ||
} | ||
} | ||
} |
21 changes: 21 additions & 0 deletions
21
src/main/java/org/sherlok/client/SherlokClientException.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
package org.sherlok.client; | ||
|
||
import org.apache.http.client.ClientProtocolException; | ||
|
||
/** | ||
* An exception thrown by {@link SherlokClient} | ||
* | ||
* @author [email protected] | ||
*/ | ||
@SuppressWarnings("serial") | ||
public class SherlokClientException extends Exception { | ||
|
||
public SherlokClientException(String msg) { | ||
super(msg); | ||
} | ||
|
||
public SherlokClientException(ClientProtocolException e) { | ||
super(e); | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
package org.sherlok.mappings; | ||
|
||
import java.util.HashMap; | ||
import java.util.HashSet; | ||
import java.util.Map; | ||
import java.util.Map.Entry; | ||
import java.util.Set; | ||
|
||
import org.json.JSONException; | ||
|
||
import com.fasterxml.jackson.annotation.JsonAnyGetter; | ||
import com.fasterxml.jackson.annotation.JsonAnySetter; | ||
import com.fasterxml.jackson.annotation.JsonInclude; | ||
import com.fasterxml.jackson.annotation.JsonProperty; | ||
import com.fasterxml.jackson.annotation.JsonPropertyOrder; | ||
|
||
/** | ||
* Represents a (UIMA) annotation produced by Sherlok server | ||
* | ||
* @author [email protected] | ||
*/ | ||
@JsonPropertyOrder(value = { "begin", "end", "type" }, alphabetic = true) | ||
public class Annotation { | ||
|
||
/** These keys are not considered properties */ | ||
final public static Set<String> NOT_PROPERTIES = new HashSet<>(); | ||
static { | ||
NOT_PROPERTIES.add("begin"); | ||
NOT_PROPERTIES.add("end"); | ||
NOT_PROPERTIES.add("@type"); | ||
NOT_PROPERTIES.add("sofa"); | ||
} | ||
|
||
private int begin = 0, end = 0; | ||
@JsonProperty("@type") | ||
private String type; | ||
@JsonInclude(JsonInclude.Include.NON_DEFAULT) | ||
private Map<String, Object> properties = new HashMap<>(); | ||
|
||
// "any getter" needed for serialization | ||
@JsonAnyGetter | ||
public Map<String, Object> any() { | ||
return properties; | ||
} | ||
|
||
@JsonAnySetter | ||
public Annotation addProperty(String name, Object value) | ||
throws JSONException { | ||
if (!NOT_PROPERTIES.contains(name)) | ||
properties.put(name, value); | ||
return this; | ||
} | ||
|
||
public int getBegin() { | ||
return begin; | ||
} | ||
|
||
public Annotation setBegin(int begin) { | ||
this.begin = begin; | ||
return this; | ||
} | ||
|
||
public int getEnd() { | ||
return end; | ||
} | ||
|
||
public Annotation setEnd(int end) { | ||
this.end = end; | ||
return this; | ||
} | ||
|
||
public String getType() { | ||
return type; | ||
} | ||
|
||
public Annotation setType(String type) { | ||
this.type = type; | ||
return this; | ||
} | ||
|
||
@Override | ||
public boolean equals(Object o) { // FIXME test on properties, too | ||
if (o instanceof Annotation) { | ||
Annotation other = (Annotation) o; | ||
if (this.begin == other.begin && // | ||
this.end == other.end && // | ||
this.type.equals(other.type)) { | ||
return true; | ||
} | ||
} | ||
return false; | ||
} | ||
|
||
@Override | ||
public String toString() { | ||
StringBuilder sb = new StringBuilder(); | ||
sb.append(type + "[" + begin + ":" + end); | ||
for (Entry<String, Object> p : properties.entrySet()) { | ||
sb.append(", " + p.getKey() + "='" + p.getValue() + "'"); | ||
} | ||
sb.append("]"); | ||
return sb.toString(); | ||
} | ||
|
||
public Map<String, Object> getProperties() { | ||
return properties; | ||
} | ||
} |
Oops, something went wrong.